#include <cassert>
#include <string>
#include <vector>
using namespace std;
inline size_t min(size_t x, size_t y, size_t z)
{
if (x < y)
return x < z ? x : z;
else
return y < z ? y : z;
}
size_t edit_distance(const string& A, const string& B)
{
size_t NA = A.size();
size_t NB = B.size();
vector<vector<size_t>> M(NA + 1, vector<size_t>(NB + 1));
for (size_t a = 0; a <= NA; ++a)
M[a][0] = a;
for (size_t b = 0; b <= NB; ++b)
M[0][b] = b;
for (size_t a = 1; a <= NA; ++a)
for (size_t b = 1; b <= NB; ++b)
{
size_t x = M[a-1][b] + 1;
size_t y = M[a][b-1] + 1;
size_t z = M[a-1][b-1] + (A[a-1] == B[b-1] ? 0 : 2);
M[a][b] = min(x,y,z);
}
return M[A.size()][B.size()];
}
int main()
{
assert(edit_distance("ISLANDER", "SLANDER") == 1);
assert(edit_distance("MART", "KARMA") == 5);
assert(edit_distance("KITTEN", "SITTING") == 5);
assert(edit_distance("INTENTION", "EXECUTION") == 8);
}
|
||||
|
But apart from that it’s a very clean and efficient implementation. |
|||||||||||||||||||||
|
I hope you're enjoying the NLP class. I do. :) Min
This could be rewritten as:
Drop
It now works on all types supporting size_t I think Performance: You could improve memory usage by storing only two rows of M. But this would prevent your from backtracking to print the alignment, and isn't a huge win anyway (except if you're working on very large strings). General comments Your code is really straightforward and well-written. The algorithm is simply translated, and the rest of the function is simply the initialization: there's nothing complicated, and the code is easy to read. Good job! |
|||||
|