I've just started out writing an app that will visualize genomic data for anybody to understand.
When you get your genome sequenced the raw data usually comes in the form of a VCF file. I started out by drafting parser below that essentially creates a JavaScript object out of a VCF text file. Next I think want to write a schema-like JSON file (since there doesn't seem to be any database with an API that correlate genotypes to phenotypes based on GWAS) that I can query for phenotypes with the genotypes generated by the parser. For example, if I want to know if a specific genome has the genotype for lactose-intolerance, red hair or alcoholism pre-disposition, I can consult this manually written schema that maps genotypes to phenotypes.
If you have genomics background all the better, but I'm mostly looking for advice concerning my usage of OOP and any other insights. How would you go about the more downstream front-end parts of the app (the actual visualization, was thinking something like a timeline and a 'humonculus'. React? D3?)?
(function iife() {
'use strict'
const fs = require('fs');
// Output class for parsing genotype file
class Genome {
constructor(name) {
this.name = name
// Find genotype by rs-ID without having to know chromosome
this.findByRsId = (rs_id) => {
for(var chr in this) {
if (chr.substring(0, 1) === 'c') {
for(var id in this[chr]) {
if (id == rs_id) {
return this[chr][rs_id]
}
}
}
}
}
// Get actual sample's genotype
this.getSampGenotype = (rs_id) => {
let rs = this.findByRsId(rs_id)
switch(rs.fit) {
case '0/0':
return rs['genotype'][0]+rs['genotype'][0]
break
case '0/1':
return rs.genotype
break
case '1/1':
return rs['genotype'][1]+rs['genotype'][1]
break
case '0':
return rs.genotype
break
case '1':
return rs.genotype
break
default:
return rs.genotype
break
}
}
// Get ref genotype
this.getRefGenotype = (rs_id) => {
var rs = this.findByRsId(rs_id)
return rs.genotype
}
}
}
// Helpers
var handleRow = (row) => {
// Parse row into data items
if (row.length === 10 && row[0] != '#CHROM' && row[2].substring(0,2) === 'rs') {
// If no chr_[num] exists, make one
if(!myGenotypes["chr_"+row[0]]) {
myGenotypes["chr_"+row[0]] = {}
} else {
// If no rs_[num] exists, make one and set it to genotype
if (!myGenotypes["chr_"+row[0]][row[2]]) {
myGenotypes["chr_"+row[0]][row[2]] = {
'genotype': row[3]+row[4],
'quality': row[6],
'fit': row[9]
}
}
}
}
}
var handleGFGChunk = (data) => {
// Cut up chuck into array of rows, then pass to row handler
var lines = data.split('\n')
var cols = lines.map((l) => l.split('\t'))
cols.forEach( (r) => handleRow(r) )
}
var check = (obj) => {
console.log(obj.findByRsId('rs1556032'))
console.log(obj.getRefGenotype('rs1556032') === 'CT')
console.log(obj.getSampGenotype('rs1556032') === 'CC')
}
/*=====================
Parsing of VCF file
=====================*/
// Have e.g. genotypes class on the ready
var myGenotypes = new Genome('edmund')
// Read VCF
var readFromFile = __dirname + 'vcf/GFG_filtered_unphased_genotypes.vcf'
var GFGfile = fs.createReadStream(readFromFile)
GFGfile.setEncoding('utf8')
// Write Genome upon code
// var writeToFile = 'GFG.js'
// var GFGclass = fs.createWriteStream(writeToFile)
// Start the parsing!
GFGfile.on('data', handleGFGChunk)
GFGfile.on('close', d => console.log('Checked genotype class') )
GFGfile.on('end', d => check(myGenotypes) )
})()