The Needleman–Wunsch algorithm is an algorithm used in bioinformatics to align protein or nucleotide sequences.
Here is an implementation in Scala:
package vu.co.kaiyin.akkconcurrent
import vu.co.kaiyin.akkconcurrent.NeedlemanWunsch.{ResultEntry, Score, SimilarityFunc}
/**
* Created by IDEA on 2/26/16.
*/
case class NeedlemanWunsch(private var s1: String, private var s2: String, gapPenalty: Int, sm: SimilarityFunc) {
val borderDirection = (-9, -9)
val verticalDirection = (-1, 0)
val horizontalDirection = (0, -1)
val matchDirection = (-1, -1)
s1 = " " + s1
s2 = " " + s2
private val alignMat = new Array[Array[ResultEntry]](s1.length)
private var alignResult: (String, String) = ("", "")
// init alignment matrix
def initAlignmentMatrix = {
for (i <- (0 until s1.length)) {
alignMat(i) = new Array[ResultEntry](s2.length)
if (i == 0) {
for (j <- 0 until s2.length) {
alignMat(i)(j) = (j * gapPenalty, borderDirection)
}
} else {
alignMat(i)(0) = (i * gapPenalty, borderDirection)
}
}
}
def fillAlignmentMatrix: Unit = {
def getScore(i: Int, j: Int): Score = {
if (alignMat(i)(j) != null) {
alignMat(i)(j)._1
} else {
val tryMatch = getScore(i - 1, j - 1) + sm(s1(i), s2(j))
val horizontalGap = getScore(i, j - 1) + gapPenalty
val verticalGap = getScore(i - 1, j) + gapPenalty
val m = Set(tryMatch, horizontalGap, verticalGap).max
if (m == tryMatch) {
alignMat(i)(j) = (m, matchDirection)
} else if (m == horizontalGap) {
alignMat(i)(j) = (m, horizontalDirection)
} else {
alignMat(i)(j) = (m, verticalDirection)
}
m
}
}
for (i <- 1 until s1.length) {
for (j <- 1 until s2.length) {
val _ = getScore(i, j)
}
}
}
def align: NeedlemanWunsch = {
initAlignmentMatrix
fillAlignmentMatrix
val alignBuffer1 = new StringBuffer()
val alignBuffer2 = new StringBuffer()
var rowNumber = s1.length - 1
var colNumber = s2.length - 1
var node = alignMat(rowNumber)(colNumber)
while (node._2 != borderDirection) {
val direction = node._2
if(direction == matchDirection) {
alignBuffer1.insert(0, s1(rowNumber))
alignBuffer2.insert(0, s2(colNumber))
rowNumber -= 1
colNumber -= 1
} else if(direction == verticalDirection) {
alignBuffer1.insert(0, s1(rowNumber))
alignBuffer2.insert(0, " ")
rowNumber -= 1
} else {
alignBuffer1.insert(0, " ")
alignBuffer2.insert(0, s2(colNumber))
colNumber -= 1
}
node = alignMat(rowNumber)(colNumber)
}
alignResult = (alignBuffer1.toString, alignBuffer2.toString)
this
}
override def toString = {
s"\n${alignResult._1}\n${alignResult._2}\n"
}
}
object NeedlemanWunsch {
type Direction = (Int, Int)
type Score = Int
type ResultEntry = (Score, Direction)
type SimilarityFunc = (Char, Char) => Int
}
object TestNeedlemanWunsch extends App {
def sm(c1: Char, c2: Char) = if (c1 == c2) 1 else -1
println(NeedlemanWunsch("GATTACA", "GCATGCU", -1, sm).align.toString)
}
Any suggestions for improvement are welcome.