I just started to learn D and ported my a bit over-templated trie. It looks way better than on C++.
I'm sure there is still room for improvement and to make it be real D code.
I checked an the array appending is doing x2 pre-allocation on resize (although 1.5 seems to friendlier, but for my use both are ok). Does this have other performance problems?
import std.stdio; // I/O
import std.typecons; // Nullable
import std.algorithm; // map
import std.array; // splitter
//Tokenizer
interface Tokenizer(Word, Token) {
Token encode(Word w);
Word decode(Token t);
final:
Token[] encode(ref Word[] ws){
return ws.map!(w => encode(w)).array;
}
Word[] decode(ref Token[] ts){
return ts.map!(t => decode(t)).array;
}
}
class VectorTokenizer(Word) : Tokenizer!(Word, size_t) {
Word[] alphabet;
size_t[Word] index;
size_t encode(Word w) {
if(auto i = w in index)
return *i;
auto t = alphabet.length;
alphabet ~= w;
return index[w] = t;
}
Word decode(size_t i) {
return alphabet[i];
}
}
//Trie
//Node
class TrieNode(Data, Word) {
Nullable!Data data;
TrieNode[Word] children;
Word word;
size_t depth;
TrieNode parent;
this() {}
this(TrieNode p, Word w) {
if(p) {
parent = p;
depth = parent.depth+1;
}
word = w;
}
nothrow
bool put(Word[] path, Data d, size_t i=0) {
if(path.length==i) {
// Update leaf
bool r = data.isNull;
data = d;
return r;
}
// Look for leaf
auto w = path[i];
if(auto cn = w in children) // Inside subtree
return cn.put(path, d, i+1);
else { // On new subtree
children[w] = new TrieNode(this, w);
return children[w].put(path, d, i+1);
}
}
nothrow
Nullable!Data get(Word[] path, size_t i=0) {
if(path.length==i)
return data;
// Look for leaf
if(auto cn = path[i] in children)
return cn.get(path, i+1);
// Not here =/
return Nullable!Data.init;
}
void print(string indent="", bool lastChild=true) {
// Print tree indent
string branch = lastChild ? "┕━" : "┝━";
writef("%s%s '%s'", indent, branch, word);
// Print data
if(!data.isNull)
writef("->'%s'", data);
writef(":\n");
// Print children
string newIndent = indent ~ (lastChild ? " " : "│ ");
auto i=0;
auto lastIndex = children.length;
foreach(c; children)
c.print(newIndent, ++i==lastIndex);
}
}
//Trie
class Trie(Data, Word) {
auto _root = new TrieNode!(Data, Word);
size_t _length;
nothrow
auto put(Word[] path, Data d) {
auto r = _root.put(path, d, 0);
if (r)
_length++;
return r;
}
nothrow
auto get(Word[] path) {
return _root.get(path);
}
void print() {
writef("Trie |%s|:\n", _length);
auto i=0;
auto lastIndex = _root.children.length;
foreach(n; _root.children)
n.print("", ++i==lastIndex);
}
@property nothrow
auto length() {
return _length;
}
}
//Trie Specializations
//Tokenized Trie
class TokenizedTrie(
Data,
Word, Token,
Compressor: Tokenizer!(Word, Token)
) : Trie!(Data, Token) {
Compressor c;
this() {
c = new Compressor();
}
this(Compressor compressor) {
c = compressor;
}
bool put(Word[] path, Data d) {
Token[] tokenPath = path.map!(w => c.encode(w)).array;
return Trie!(Data, Token).put(tokenPath, d);
}
Nullable!Data get(Word[] path) {
Token[] tokenPath = path.map!(w => c.encode(w)).array;
return Trie!(Data, Token).get(tokenPath);
}
}
//Iri Trie
class IriTrie(
Data
) : TokenizedTrie!(Data, string, size_t, VectorTokenizer!(string)) {
bool put(string iri, Data d) {
string[] path = iri.split('/');
return TokenizedTrie!(Data, string, size_t, VectorTokenizer!(string)).put(path, d);
}
Nullable!Data get(string iri) {
string[] path = iri.split('/');
return TokenizedTrie!(Data, string, size_t, VectorTokenizer!(string)).get(path);
}
}
//main
//Sample usage:
void main() {
auto trie = new Trie!(string, string);
trie.put(["http:", "", "doge.ing.puc.cl"], "aoeu");
trie.put(["http:", "", "forum.dlang.org"], "htns");
trie.print();
auto tTrie = new TokenizedTrie!(string, string, size_t, VectorTokenizer!(string));
tTrie.put(["http:", "", "doge.ing.puc.cl"], "aoeu");
tTrie.put(["http:", "", "forum.dlang.org"], "htns");
tTrie.print();
auto iriTrie = new IriTrie!(string);
iriTrie.put("http://doge.ing.puc.cl", "aoeu");
iriTrie.put("http://forum.dlang.org", "htns");
iriTrie.print();
}