Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

I have developed a log parser, the goal is to take whatever log type I can, and convert them in a mongoDB collection.

I think I've done a pretty good job at reading whatever type of log are trhown at it, but:

  • in some cases, like when I used it at work, I had to add "special cases" (exemple: date is YYYYMMDD, and a second field time is "hhmmss", but the collection should have a timestamp), and I doubt everyone will want to look in the code, get the logic, and add his own little piece of node.js to get his special case done.

    • There are three config file, which may confuse a lot of people. How can I make the concept easier to grasp?

    • It was first developed for a precise job, then adapted to do more, and there may still be some code that is not required by anyone.

As it is, what can I do to make the code more polyvalent, usable for someone who will just want his log thrown in a MongoDB solution with nothing more than download -> edit a config file -> start?

How can I make this script something really useful for people, and not just some tool I throw in the middle fo all the already existing tools?

The code with config example is available here.

Here is a copy paste of the current version of the code:

var fs = require('fs'),
    mongoose = require('mongoose'),
    db,                             //Mongoose connection to the collection
    config,                         //config file parsed
    config_log,                     //config_log parsed
    config_mongo,                   //config_mongo parsed
    S_logs,                         //Mongoose schema
    M_logs;                         //Mongoose model

/*
    IN :
        S   date in YYYY format
        T   time in hh:mm:ss format
    OUT:
        Timestamp
    COM:
        Transform two string in the right format in a timestamp
*/
function parse_date(S, T) {
    var Y = parseInt(S.substr(0, 4), 10),       //Year
        M = parseInt(S.substr(5, 2) - 1, 10),   //Month, -1 to conform to javascript Date
        D = parseInt(S.substr(8, 2), 10),       //Day
        h = parseInt(T.substr(0, 2), 10),       //hour
        m = parseInt(T.substr(3, 2), 10),       //minute
        s = parseInt(T.substr(6, 2), 10);       //seconds

    return new Date(Y, M, D, h, m, s, 0).getTime() / 1000;
}

/*
    IN :
        logs    String containing the logs to parse
        i       line in logs to parse
        stats   current stats of the parsing
    OUT:
        stats 
    COM:
        Take a line of logs, transform it in a JS object according to config_log, then in a mongoose model according to config_mongo, and save it in the collection.
*/
function parse_line(logs, i, stats, callback) {
    var log,        //line to parse
        j,          //index
        O_log = {}, //Javascript object, see config_log
        P_log = {}, //Mongoose model, see config_mongo
        name;

    //Print the progress bar
    while(i > ((stats.p * logs.length)/100)) {
        process.stdout.write('-');
        stats.p = stats.p + 1;
    }

    //If there is log to parse
    if (i < logs.length) {
        //Separate the fields
        log = logs[i].split(config_log.separator);
        //If not enought or too many fields, regard it as error line and ignore it
        if(log.length !== config_log.data.length) {
            // console.log('Error :' + JSON.stringify(log));
            // console.log('Error :' + log.length + '/' + config_log.data.length);
            stats.error = stats.error + 1;
            //Give time to node.js to clear the queue
            if(stats.error % 1000 === 0) {
                process.nextTick( function() {
                    parse_line(logs, i + 1, stats, callback);
                });
            } else {
                parse_line(logs, i + 1, stats, callback);
            }
        //Log line is OK
        } else {
            //Copy the line in O_log
            j = log.length;
            while (j--) {
                O_log[config_log.data[j]] = log[j].trim();
            }

            //And copy it in the mongoose model
            for (name in config_mongo.data) {
                if (config_mongo.data.hasOwnProperty(name)) {
                    //Insert special case here (String to number, truncate, round, other check...)
                    //If a field is required by the model, but isn t present in the object, set it to ''
                    P_log[name] = O_log[name] || '';
                }
            }

            //Save the model
            new M_logs(P_log).save(function (err) {
                //Update stats
                if(err) {
                    stats.error = stats.error + 1;
                } else {
                    stats.line = stats.line + 1;
                }
                //Parse the next line
                parse_line(logs, i + 1, stats, callback);
            });
        }
    //When entire file is parsed
    } else {
        //End the progress bar
        process.stdout.write('\n');
        callback(stats);
    }
}

/*
    IN :
        i       number of the file to parse  
        stats   current stats of the parsing
    OUT:
        stats
    COM:
        Take a log file, read it, and use parse_line to parse it, then go to the next file.
*/
function parse_file(i, stats, callback) {
    var lines;
    //If there is a file to parse
    if(i < config.logfile.length) {
        console.log('\nParsing :' + config.logfile[i]);
        //Read the file
        fs.readFile(config.logfile[i], function (err, data) {
            if(err) {
                console.log(err);
                stats.errfile = stats.errfile + 1;
                parse_file(i + 1, stats, callback);
            } else {
                //Split the lines
                lines = String(data).split('\n');
                //Start printing the progress bar
                console.log(lines.length + ' lines\n\nProgress:');
                console.log('|0%      |10%      |20%      |30%      |40%      |50%      |60%      |70%      |80%      |90%      |100%')
                //Start mesure time
                console.time('Done in ');
                //Parse the file
                parse_line(lines, 0, {p:0, line:0, error:0}, function (stat) {
                    //Print the file stats
                    console.timeEnd('Done in ');
                    console.log('Lines parsed:' + stat.line);
                    console.log('Error       :' + stat.error);
                    //Add the file stats to the total stats
                    stats.line = stats.line + stat.line;
                    stats.error = stats.error + stat.error;
                    stats.file = stats.file + 1;
                    //Parse the next file
                    parse_file(i + 1, stats, callback);
                });
            }
        });
    } else {
        callback(stats);
    }
}

/*
    IN :
        /
    OUT:
        db
        S_logs
        M_logs
    COM:Connect to the mongoDB collection, create the mongoose schema based on config_mongo, then compile the model.
*/
function connect(callback) {

    console.log('Connecting to MongoDB...');
    mongoose.connect(config_mongo.url);
    db = mongoose.connection;

    db.on('error', console.error.bind(console, 'connection error:'));
    db.once('open', function () {

        console.log('Connected');

        //Create the schema with config_mongo
        S_logs = new mongoose.Schema(
            config_mongo.data,
            {
                strict: false,
                collection: config_mongo.base
            }
        );
        S_logs.index(config_mongo.index, {unique: true});

        //create the mongoose model
        M_logs = mongoose.model(config_mongo.base, S_logs, config_mongo.base);

        callback();
    });
}

/*
    IN :
        number  number to be padded
    OUT:
        number
    COM:
        Transform a number to a padded number of fixed length.
*/
function pad(number) {
    var padder = '00',
        str = String(number);
    return padder.substring(0, padder.length - str.length) + str;
}

/*
    IN :
        pattern     string to repeat
        count       number of time to repeat
    OUT:
        repeated string
    COM:Repeat a string a set number of time.
*/
function repeat(pattern, count) {
    //Exemple: repeat('abc', 5) => 'abcabcabcabcabc'
    if (count < 1) return '';
    var result = '';
    while (count > 1) {
        if (count & 1) result += pattern;
        count >>= 1, pattern += pattern;
    }
    return result + pattern;
}

/*
    IN :
        config      config file
        log         name of log file
        cf_mongo    config_mongo filename
        cf_log      config log filename
    OUT:
        /
    COM:Print a encart with what will be parsed and wich config file are used.
*/
function print_params(config, log, cf_mongo, cf_log) {
    var size,
        print,
        i = 0;

    //Calculate the length of the encart
    size = config.length;
    size = (size > cf_log.length) ? size : cf_log.length;
    size = (size > cf_mongo.length) ? size : cf_mongo.length;
    while (i < log.length) {
        size = (size > log[i].length) ? size : log[i].length;
        i = i + 1;
    }

    //Build the encart
    print = '._____________________' +              repeat('_', size)                       + '.\n' +
            '|Configuration        ' +              repeat(' ', size)                       + '|\n' +
            '|_____________________' +              repeat('_', size)                       + '|\n' +
            '|Configuration file : ' + config +     repeat(' ', size - config.length)       + '|\n' +
            '|Log file           : ' + log[0] +     repeat(' ', size - log[0].length)       + '|\n';

    i = 1;
    while (i < log.length) {
        print = print +
            '|                     ' + log[i] +     repeat(' ', size - log[i].length)       + '|\n';
        i = i + 1;
    }

    print = print +
            '|Config MongoDB file: ' + cf_mongo +   repeat(' ', size - cf_mongo.length)     + '|\n' +
            '|Config Logs file   : ' + config +     repeat(' ', size - config.length)       + '|\n' +
            '|_____________________' +              repeat('_', size)                       + '/\n';
    console.log(print);
}

/*
    IN :
        stats   Stats from the parsing
    OUT:
        /
    COM:Print how many file and line has been parsed, failed, and percentage.
*/
function print_stats (stats) {
    //Lots of variables are just for renaming
    var fp = stats.file,
        fe = stats.errfile,
        ft = fp + fe,
        lp = stats.line,
        le = stats.error,
        lt = lp + le,
        //Percentage
        pfp = (fp/ft) * 100,
        pfe = (fe/ft) * 100,
        plp = (lp/lt) * 100,
        ple = (le/lt) * 100,
        print = '',
        size;

    //Replace NaN by 0 if needed
    if (ft == 0) {
        pfp = 0;
        pfe = 0;
    }
    if (lt == 0) {
        plp = 0;
        ple = 0;
    }

    //Truncate percentage
    fp = '' + fp + ' (' + pfp.toFixed(2) + '%)';
    fe = '' + fe + ' (' + pfe.toFixed(2) + '%)';
    ft = '' + ft;
    lp = '' + lp + ' (' + plp.toFixed(2) + '%)';
    le = '' + le + ' (' + ple.toFixed(2) + '%)';
    lt = '' + lt;

    //Calculate length of the encart
    size = fp.length;
    size = (size > fe.length) ? size : fe.length;
    size = (size > lp.length) ? size : lp.length;
    size = (size > le.length) ? size : le.length;

    //Build the encart
    print = '._______________' +      repeat('_', size)             + '.\n' +
            '|Result         ' +      repeat(' ', size)             + '|\n' +
            '|_______________' +      repeat('_', size)             + '|\n' +
            '|Parsed files : ' + fp + repeat(' ', size - fp.length) + '|\n' +
            '|Error files  : ' + fe + repeat(' ', size - fe.length) + '|\n' +
            '|Total files  : ' + ft + repeat(' ', size - ft.length) + '|\n' +
            '|Parsed lines : ' + lp + repeat(' ', size - lp.length) + '|\n' +
            '|Error lines  : ' + le + repeat(' ', size - le.length) + '|\n' +
            '|Total lines  : ' + lt + repeat(' ', size - lt.length) + '|\n' +
            '|_______________' +      repeat('_', size)             + '/\n';

    console.log(print);
}

/*
IN :
    /
OUT:
    config
    config_mongo
    config_log
COM:
    Read config files, parse them, and start the script
*/
function init() {
    var cf_mongo,
        cf_log,
        log,
        print = '',
        i = 0;

    print = '.______________________.\n' +
            '|Stats Parser          |\n' +
            '|______________________|\n' +
            '|Version:           1.0|\n' +
            '|                  D219|\n' +
            '|______________________/\n';
    console.log(print);

    //Read config file, or config.js by default
    fs.readFile(process.argv[2] || 'config.js', function (err, data) {
        if (err) {
            console.log('Error in ' + (process.argv[2] || 'config.js') + ':' + err);
            process.exit();
        }
        //Parse the onfig file
        config = JSON.parse(data);
        //Get config_log, config_mongo and logfiles filename
        cf_mongo = config.config_mongo || 'config_mongo.js';
        cf_log = config.config_log || 'config_log.js';
        log = config.logfile || ['log.log'];

        print_params(process.argv[2] || 'config.js', log, cf_mongo, cf_log);

        //Read and parse config_log and config_mongo
        fs.readFile(cf_log, function (err, data) {
            if (err) {
                console.log('Error in config_log.js:' + err);
                process.exit();
            }
            config_log = JSON.parse(data);
            fs.readFile(cf_mongo, function (err, data) {
                if (err) {
                    console.log('Error in config_mongo.js:' + err);
                    process.exit();
                }
                config_mongo = JSON.parse(data);

                //If one of the config file return a wrong JSON
                if (!config_mongo || !config_log) {
                    console.log('Erreur de config');
                    process.exit();
                }

                //Print the log format
                print = 'Logs:\n';
                i = 0;
                while (i < config_log.data.length) {
                    if (i !== 0) {
                        print = print + config_log.separator;
                    }
                    print = print + config_log.data[i];
                    i = i + 1;
                }
                console.log(print);

                connect(function () {
                    //Start timer of total execution
                    console.time('\nTotal execution time: ');
                    //Parse the files
                    parse_file(0, {file:0, errfile:0, line:0, error:0}, function (stats) {
                        //Print total stats and exit
                        console.timeEnd('\nTotal execution time: ');
                        print_stats(stats);
                        process.exit();
                    });
                });
            });
        });
    });
}

init();
share|improve this question

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.