Tell me more ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

I am given the following CSV file which I extracted from an excel spreadsheet. Just to give some background information which could be of assistance, it discusses AGI Numbers (think of it as protein identifiers), unmodified peptide sequences for those protein identifiers, and then modified peptide sequences with modifications made on the unmodified sequences, the index/indeces of those modifications, and then the combined spectral count for repeated peptides. The text file is called MASP.GlycoModReader.txt and the information is in the following format below:

AGI,UnMd Peptide (M) = x,Mod Peptide (oM) = Ox,Index/Indeces of Modification,counts,Combined 
Spectral count for repeated Peptides

AT1G56070.1,NMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR,NoMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR,2,17
AT1G56070.1,LYMEARPMEEGLAEAIDDGR,LYoMEARPoMEEGLAEAIDDGR,"3, 9",1
AT1G56070.1,EAMTPLSEFEDKL,EAoMTPLSEFEDKL,3,7
AT1G56070.1,LYMEARPMEEGLAEAIDDGR,LYoMEARPoMEEGLAEAIDDGR,"3, 9",2
AT1G56070.1,EGPLAEENMR,EGPLAEENoMR,9,2
AT1G56070.1,DLQDDFMGGAEIIK,DLQDDFoMGGAEIIK,7,1

The output file that needs to result after extracting the above is in the following format below:

AT1G56070.1,{"peptides": [{"sequence": "NMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR", "mod_sequence":    
"NoMSVIAHVDHGKSTLTDSLVAAAGIIAQEVAGDVR" , "mod_indeces": 2, "spectral_count": 17}, {"sequence": 
"LYMEARPMEEGLAEAIDDGR" , "mod_sequence": "LYoMEARPoMEEGLAEAIDDGR", "mod_indeces": [3, 9], 
"spectral_count": 3}, {"sequence": "EAMTPLSEFEDKL" , "mod_sequence": "EAoMTPLSEFEDKL", 
"mod_indeces": [3,9], "spectral_count": 7}, {"sequence": "EGPLAEENMR", "mod_sequence": 
"EGPLAEENoMR", "mod_indeces": 9, "spectral_count": 2}, {"sequence": "DLQDDFMGGAEIIK", 
"mod_sequence": "DLQDDFoMGGAEIIK", "mod_indeces": [7], "spectral_count": 1}]}

I have provided my solution below: If anyone has a better solution in another language or can possibly analyze mine and let me know if there are more efficient methods of coming about this, then please comment below. Thank you.

#!/usr/bin/env node

var fs = require('fs');
var csv = require('csv');
var data ="proteins.csv";

/* Uses csv nodejs module to parse the proteins.csv file.
* Parses the csv file row by row and updates the peptide_arr.
* For new entries creates a peptide object, for similar entries it updates the
* counts in the peptide object with the same AGI#.
* Uses a peptide object to store protein ID AGI#, and the associated data.
* Writes all formatted peptide objects to a txt file - output.txt.
*/

// Tracks current row
var x = 0;
// An array of peptide objects stores the information from the csv file
var peptide_arr = [];

// csv module reads row by row from data 
csv()
.from(data)
.to('debug.csv')
.transform(function(row, index) {
    // For the first entry push a new peptide object with the AGI# (row[0]) 
    if(x == 0) {
    // cur is the current peptide read into row by csv module
    Peptide cur = new Peptide( row[0] );

    // Add the assoicated data from row (1-5) to cur
    cur.data.peptides.push({
        "sequence" : row[1];
        "mod_sequence" : row[2];
        if(row[5]){
        "mod_indeces" : "[" + row[3] + ", " + row[4] + "]";
        "spectral_count" : row[5];  
        } else {
        "mod_indeces" : row[3];
        "spectral_count" : row[4];  
        }
    });

    // Add the current peptide to the array
    peptide_arr.push(cur);
    }

    // Move to the next row
    x++;
});

// Loop through peptide_arr and append output with each peptide's AGI# and its data
String output = "";
for(var peptide in peptide_arr) 
{
    output = output + peptide.toString()
}
// Write the output to output.txt
fs.writeFile("output.txt", output);

/* Peptide Object :
 *  - id:AGI#
 *  - data: JSON Array associated
 */
function Peptide(id) // this is the actual function that does the ID retrieving and data 
                    // storage
{
    this.id = id;
    this.data = {
        peptides: []
    };
}

/* Peptide methods :
 *  - toJson : Returns the properly formatted string
 */
Peptide.prototype = {
    toString: function(){
        return this.id + "," + JSON.stringify(this.data, null, " ") + "/n"
    }
};
share|improve this question
1  
“If anyone has a better solution in another language” That's not what this site is for. This site is specifically for reviewing code you have written (so the “If anyone can analyze mine” part of your question could belong here). –  svick Jul 23 at 23:24
 
Okay, well I did include the latter part of the question like you mentioned. And I realized what you've said. I'll keep that in mind. Instead of stating the obvious, maybe actually giving your input would be appreciated too, unless you have none to give. –  Zaheer Syed Jul 23 at 23:34

Know someone who can answer? Share a link to this question via email, Google+, Twitter, or Facebook.

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.