D trie implementation

Ask Question

Asked 4 years, 5 months ago

Active 4 years, 2 months ago

Viewed 155 times

I just started to learn D and ported my a bit over-templated trie. It looks way better than on C++.

I'm sure there is still room for improvement and to make it be real D code.

I checked an the array appending is doing x2 pre-allocation on resize (although 1.5 seems to friendlier, but for my use both are ok). Does this have other performance problems?

import std.stdio;     // I/O
import std.typecons;  // Nullable
import std.algorithm; // map
import std.array;     // splitter

//Tokenizer

interface Tokenizer(Word, Token) {
    Token encode(Word  w);
    Word  decode(Token t);

    final:
    Token[] encode(ref Word[]  ws){
        return ws.map!(w => encode(w)).array;
    }
    Word[]  decode(ref Token[] ts){
        return ts.map!(t => decode(t)).array;
    }
}

class VectorTokenizer(Word) : Tokenizer!(Word, size_t) {
    Word[]       alphabet;
    size_t[Word] index;

    size_t encode(Word  w) {
        if(auto i = w in index)
            return *i;

        auto t = alphabet.length;
        alphabet ~= w;
        return index[w] = t;
    }
    Word  decode(size_t i) {
        return alphabet[i];
    }
}

//Trie

//Node

class TrieNode(Data, Word) {
    Nullable!Data data;
    TrieNode[Word] children;

    Word word;
    size_t depth;
    TrieNode parent;

    this() {}
    this(TrieNode p, Word w) {
        if(p) {
            parent = p;
            depth = parent.depth+1;
        }
        word = w;
    }

    nothrow
    bool put(Word[] path, Data d, size_t i=0) {
        if(path.length==i) {
            // Update leaf
            bool r = data.isNull;
            data = d;
            return r;
        }

        // Look for leaf
        auto w = path[i];
        if(auto cn = w in children)  // Inside subtree
            return cn.put(path, d, i+1);
        else {  // On new subtree
            children[w] = new TrieNode(this, w);
            return children[w].put(path, d, i+1);
        }
    }

    nothrow
    Nullable!Data get(Word[] path, size_t i=0) {
        if(path.length==i)
            return data;

        // Look for leaf
        if(auto cn = path[i] in children)
            return cn.get(path, i+1);

        // Not here =/
        return Nullable!Data.init;
    }

    void print(string indent="", bool lastChild=true) {
        // Print tree indent
        string branch = lastChild ? "┕━" : "┝━";
        writef("%s%s '%s'", indent, branch, word);

        // Print data
        if(!data.isNull)
            writef("->'%s'", data);
        writef(":\n");

        // Print children
        string newIndent = indent ~ (lastChild ? "  " : "│ ");
        auto i=0;
        auto lastIndex = children.length;
        foreach(c; children)
            c.print(newIndent, ++i==lastIndex);
    }

}

//Trie

class Trie(Data, Word) {
    auto _root = new TrieNode!(Data, Word);
    size_t _length;

    nothrow
    auto put(Word[] path, Data d) {
        auto r = _root.put(path, d, 0);
        if (r)
            _length++;
        return r;
    }
    nothrow
    auto get(Word[] path) {
        return _root.get(path);
    }

    void print() {
        writef("Trie |%s|:\n", _length);
        auto i=0;
        auto lastIndex = _root.children.length;
        foreach(n; _root.children)
            n.print("", ++i==lastIndex);
    }

    @property nothrow
    auto length() {
        return _length;
    }
}

//Trie Specializations

//Tokenized Trie

class TokenizedTrie(
        Data,
        Word, Token,
        Compressor: Tokenizer!(Word, Token)
        ) : Trie!(Data, Token) {
    Compressor c;

    this() {
        c = new Compressor();
    }
    this(Compressor compressor) {
        c = compressor;
    }

    bool put(Word[] path, Data d) {
        Token[] tokenPath = path.map!(w => c.encode(w)).array;
        return Trie!(Data, Token).put(tokenPath, d);
    }

    Nullable!Data get(Word[] path) {
        Token[] tokenPath = path.map!(w => c.encode(w)).array;
        return Trie!(Data, Token).get(tokenPath);
    }
}

//Iri Trie

class IriTrie(
   Data
   ) : TokenizedTrie!(Data,  string, size_t, VectorTokenizer!(string)) {
 bool put(string iri, Data d) {
   string[] path = iri.split('/');
   return TokenizedTrie!(Data, string, size_t, VectorTokenizer!(string)).put(path, d);
 }
 Nullable!Data get(string iri) {
   string[] path = iri.split('/');
   return TokenizedTrie!(Data, string, size_t, VectorTokenizer!(string)).get(path);
 }
}

//main

//Sample usage:

void main() {
    auto trie = new Trie!(string, string);
    trie.put(["http:", "", "doge.ing.puc.cl"], "aoeu");
    trie.put(["http:", "", "forum.dlang.org"], "htns");
    trie.print();

    auto tTrie = new TokenizedTrie!(string, string, size_t, VectorTokenizer!(string));
    tTrie.put(["http:", "", "doge.ing.puc.cl"], "aoeu");
    tTrie.put(["http:", "", "forum.dlang.org"], "htns");
    tTrie.print();

    auto iriTrie = new IriTrie!(string);
    iriTrie.put("http://doge.ing.puc.cl", "aoeu");
    iriTrie.put("http://forum.dlang.org", "htns");
    iriTrie.print();
}

edited Jun 4 '16 at 22:50

asked Feb 26 '16 at 20:32

Dietr1ch

3011 silver badge6 bronze badges

add a comment |

0 Your Answer

Sign up or log in

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

By clicking “Post Your Answer”, you agree to our terms of service, privacy policy and cookie policy

Stack Exchange Network

current community

your communities

more stack exchange communities

D trie implementation

0

Your Answer

Browse other questions tagged trie d or ask your own question.

Hot Network Questions

D trie implementation

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Your Answer

Sign up or log in

Post as a guest

Browse other questions tagged trie d or ask your own question.

Related

Hot Network Questions