I have developed a log parser, the goal is to take whatever log type I can, and convert them in a mongoDB collection.
I think I've done a pretty good job at reading whatever type of log are trhown at it, but:
in some cases, like when I used it at work, I had to add "special cases" (exemple: date is YYYYMMDD, and a second field time is "hhmmss", but the collection should have a timestamp), and I doubt everyone will want to look in the code, get the logic, and add his own little piece of node.js to get his special case done.
There are three config file, which may confuse a lot of people. How can I make the concept easier to grasp?
It was first developed for a precise job, then adapted to do more, and there may still be some code that is not required by anyone.
As it is, what can I do to make the code more polyvalent, usable for someone who will just want his log thrown in a MongoDB solution with nothing more than download -> edit a config file -> start?
How can I make this script something really useful for people, and not just some tool I throw in the middle fo all the already existing tools?
The code with config example is available here.
Here is a copy paste of the current version of the code:
var fs = require('fs'),
mongoose = require('mongoose'),
db, //Mongoose connection to the collection
config, //config file parsed
config_log, //config_log parsed
config_mongo, //config_mongo parsed
S_logs, //Mongoose schema
M_logs; //Mongoose model
/*
IN :
S date in YYYY format
T time in hh:mm:ss format
OUT:
Timestamp
COM:
Transform two string in the right format in a timestamp
*/
function parse_date(S, T) {
var Y = parseInt(S.substr(0, 4), 10), //Year
M = parseInt(S.substr(5, 2) - 1, 10), //Month, -1 to conform to javascript Date
D = parseInt(S.substr(8, 2), 10), //Day
h = parseInt(T.substr(0, 2), 10), //hour
m = parseInt(T.substr(3, 2), 10), //minute
s = parseInt(T.substr(6, 2), 10); //seconds
return new Date(Y, M, D, h, m, s, 0).getTime() / 1000;
}
/*
IN :
logs String containing the logs to parse
i line in logs to parse
stats current stats of the parsing
OUT:
stats
COM:
Take a line of logs, transform it in a JS object according to config_log, then in a mongoose model according to config_mongo, and save it in the collection.
*/
function parse_line(logs, i, stats, callback) {
var log, //line to parse
j, //index
O_log = {}, //Javascript object, see config_log
P_log = {}, //Mongoose model, see config_mongo
name;
//Print the progress bar
while(i > ((stats.p * logs.length)/100)) {
process.stdout.write('-');
stats.p = stats.p + 1;
}
//If there is log to parse
if (i < logs.length) {
//Separate the fields
log = logs[i].split(config_log.separator);
//If not enought or too many fields, regard it as error line and ignore it
if(log.length !== config_log.data.length) {
// console.log('Error :' + JSON.stringify(log));
// console.log('Error :' + log.length + '/' + config_log.data.length);
stats.error = stats.error + 1;
//Give time to node.js to clear the queue
if(stats.error % 1000 === 0) {
process.nextTick( function() {
parse_line(logs, i + 1, stats, callback);
});
} else {
parse_line(logs, i + 1, stats, callback);
}
//Log line is OK
} else {
//Copy the line in O_log
j = log.length;
while (j--) {
O_log[config_log.data[j]] = log[j].trim();
}
//And copy it in the mongoose model
for (name in config_mongo.data) {
if (config_mongo.data.hasOwnProperty(name)) {
//Insert special case here (String to number, truncate, round, other check...)
//If a field is required by the model, but isn t present in the object, set it to ''
P_log[name] = O_log[name] || '';
}
}
//Save the model
new M_logs(P_log).save(function (err) {
//Update stats
if(err) {
stats.error = stats.error + 1;
} else {
stats.line = stats.line + 1;
}
//Parse the next line
parse_line(logs, i + 1, stats, callback);
});
}
//When entire file is parsed
} else {
//End the progress bar
process.stdout.write('\n');
callback(stats);
}
}
/*
IN :
i number of the file to parse
stats current stats of the parsing
OUT:
stats
COM:
Take a log file, read it, and use parse_line to parse it, then go to the next file.
*/
function parse_file(i, stats, callback) {
var lines;
//If there is a file to parse
if(i < config.logfile.length) {
console.log('\nParsing :' + config.logfile[i]);
//Read the file
fs.readFile(config.logfile[i], function (err, data) {
if(err) {
console.log(err);
stats.errfile = stats.errfile + 1;
parse_file(i + 1, stats, callback);
} else {
//Split the lines
lines = String(data).split('\n');
//Start printing the progress bar
console.log(lines.length + ' lines\n\nProgress:');
console.log('|0% |10% |20% |30% |40% |50% |60% |70% |80% |90% |100%')
//Start mesure time
console.time('Done in ');
//Parse the file
parse_line(lines, 0, {p:0, line:0, error:0}, function (stat) {
//Print the file stats
console.timeEnd('Done in ');
console.log('Lines parsed:' + stat.line);
console.log('Error :' + stat.error);
//Add the file stats to the total stats
stats.line = stats.line + stat.line;
stats.error = stats.error + stat.error;
stats.file = stats.file + 1;
//Parse the next file
parse_file(i + 1, stats, callback);
});
}
});
} else {
callback(stats);
}
}
/*
IN :
/
OUT:
db
S_logs
M_logs
COM:Connect to the mongoDB collection, create the mongoose schema based on config_mongo, then compile the model.
*/
function connect(callback) {
console.log('Connecting to MongoDB...');
mongoose.connect(config_mongo.url);
db = mongoose.connection;
db.on('error', console.error.bind(console, 'connection error:'));
db.once('open', function () {
console.log('Connected');
//Create the schema with config_mongo
S_logs = new mongoose.Schema(
config_mongo.data,
{
strict: false,
collection: config_mongo.base
}
);
S_logs.index(config_mongo.index, {unique: true});
//create the mongoose model
M_logs = mongoose.model(config_mongo.base, S_logs, config_mongo.base);
callback();
});
}
/*
IN :
number number to be padded
OUT:
number
COM:
Transform a number to a padded number of fixed length.
*/
function pad(number) {
var padder = '00',
str = String(number);
return padder.substring(0, padder.length - str.length) + str;
}
/*
IN :
pattern string to repeat
count number of time to repeat
OUT:
repeated string
COM:Repeat a string a set number of time.
*/
function repeat(pattern, count) {
//Exemple: repeat('abc', 5) => 'abcabcabcabcabc'
if (count < 1) return '';
var result = '';
while (count > 1) {
if (count & 1) result += pattern;
count >>= 1, pattern += pattern;
}
return result + pattern;
}
/*
IN :
config config file
log name of log file
cf_mongo config_mongo filename
cf_log config log filename
OUT:
/
COM:Print a encart with what will be parsed and wich config file are used.
*/
function print_params(config, log, cf_mongo, cf_log) {
var size,
print,
i = 0;
//Calculate the length of the encart
size = config.length;
size = (size > cf_log.length) ? size : cf_log.length;
size = (size > cf_mongo.length) ? size : cf_mongo.length;
while (i < log.length) {
size = (size > log[i].length) ? size : log[i].length;
i = i + 1;
}
//Build the encart
print = '._____________________' + repeat('_', size) + '.\n' +
'|Configuration ' + repeat(' ', size) + '|\n' +
'|_____________________' + repeat('_', size) + '|\n' +
'|Configuration file : ' + config + repeat(' ', size - config.length) + '|\n' +
'|Log file : ' + log[0] + repeat(' ', size - log[0].length) + '|\n';
i = 1;
while (i < log.length) {
print = print +
'| ' + log[i] + repeat(' ', size - log[i].length) + '|\n';
i = i + 1;
}
print = print +
'|Config MongoDB file: ' + cf_mongo + repeat(' ', size - cf_mongo.length) + '|\n' +
'|Config Logs file : ' + config + repeat(' ', size - config.length) + '|\n' +
'|_____________________' + repeat('_', size) + '/\n';
console.log(print);
}
/*
IN :
stats Stats from the parsing
OUT:
/
COM:Print how many file and line has been parsed, failed, and percentage.
*/
function print_stats (stats) {
//Lots of variables are just for renaming
var fp = stats.file,
fe = stats.errfile,
ft = fp + fe,
lp = stats.line,
le = stats.error,
lt = lp + le,
//Percentage
pfp = (fp/ft) * 100,
pfe = (fe/ft) * 100,
plp = (lp/lt) * 100,
ple = (le/lt) * 100,
print = '',
size;
//Replace NaN by 0 if needed
if (ft == 0) {
pfp = 0;
pfe = 0;
}
if (lt == 0) {
plp = 0;
ple = 0;
}
//Truncate percentage
fp = '' + fp + ' (' + pfp.toFixed(2) + '%)';
fe = '' + fe + ' (' + pfe.toFixed(2) + '%)';
ft = '' + ft;
lp = '' + lp + ' (' + plp.toFixed(2) + '%)';
le = '' + le + ' (' + ple.toFixed(2) + '%)';
lt = '' + lt;
//Calculate length of the encart
size = fp.length;
size = (size > fe.length) ? size : fe.length;
size = (size > lp.length) ? size : lp.length;
size = (size > le.length) ? size : le.length;
//Build the encart
print = '._______________' + repeat('_', size) + '.\n' +
'|Result ' + repeat(' ', size) + '|\n' +
'|_______________' + repeat('_', size) + '|\n' +
'|Parsed files : ' + fp + repeat(' ', size - fp.length) + '|\n' +
'|Error files : ' + fe + repeat(' ', size - fe.length) + '|\n' +
'|Total files : ' + ft + repeat(' ', size - ft.length) + '|\n' +
'|Parsed lines : ' + lp + repeat(' ', size - lp.length) + '|\n' +
'|Error lines : ' + le + repeat(' ', size - le.length) + '|\n' +
'|Total lines : ' + lt + repeat(' ', size - lt.length) + '|\n' +
'|_______________' + repeat('_', size) + '/\n';
console.log(print);
}
/*
IN :
/
OUT:
config
config_mongo
config_log
COM:
Read config files, parse them, and start the script
*/
function init() {
var cf_mongo,
cf_log,
log,
print = '',
i = 0;
print = '.______________________.\n' +
'|Stats Parser |\n' +
'|______________________|\n' +
'|Version: 1.0|\n' +
'| D219|\n' +
'|______________________/\n';
console.log(print);
//Read config file, or config.js by default
fs.readFile(process.argv[2] || 'config.js', function (err, data) {
if (err) {
console.log('Error in ' + (process.argv[2] || 'config.js') + ':' + err);
process.exit();
}
//Parse the onfig file
config = JSON.parse(data);
//Get config_log, config_mongo and logfiles filename
cf_mongo = config.config_mongo || 'config_mongo.js';
cf_log = config.config_log || 'config_log.js';
log = config.logfile || ['log.log'];
print_params(process.argv[2] || 'config.js', log, cf_mongo, cf_log);
//Read and parse config_log and config_mongo
fs.readFile(cf_log, function (err, data) {
if (err) {
console.log('Error in config_log.js:' + err);
process.exit();
}
config_log = JSON.parse(data);
fs.readFile(cf_mongo, function (err, data) {
if (err) {
console.log('Error in config_mongo.js:' + err);
process.exit();
}
config_mongo = JSON.parse(data);
//If one of the config file return a wrong JSON
if (!config_mongo || !config_log) {
console.log('Erreur de config');
process.exit();
}
//Print the log format
print = 'Logs:\n';
i = 0;
while (i < config_log.data.length) {
if (i !== 0) {
print = print + config_log.separator;
}
print = print + config_log.data[i];
i = i + 1;
}
console.log(print);
connect(function () {
//Start timer of total execution
console.time('\nTotal execution time: ');
//Parse the files
parse_file(0, {file:0, errfile:0, line:0, error:0}, function (stats) {
//Print total stats and exit
console.timeEnd('\nTotal execution time: ');
print_stats(stats);
process.exit();
});
});
});
});
});
}
init();