Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

I wrote up a Treap implementation to serve as the base class for a bunch of augmented data structures (interval sets, plane (2D) sets, order statistic trees), replacing my RB-tree base.

Some profiling for performance compared against std::set:

  • 4 times faster insertion (for all cases; random, in order, reverse order)
  • 2 times faster find
  • 2 times faster erase
  • 1.532 times slower iteration (!?)

profiling data with gcc 4.9.2 -O3 (numbers in ms)

basic tree (RB tree)
sequential insert: 4620.9
iteration: 168.516
clear: 1020
reverse order insert: 4516.92
find: 1745.32
find nearby: 1698.18
erase: 2628.6

treap
sequential insert: 1267.88
iteration: 289.564
clear: 1141.23
reverse order insert: 1197.49
find: 1166.52
find nearby: 1032.5
erase: 1269.2

std set
sequential insert: 4368.96
iteration: 169.782
clear: 1039.6
reverse order insert: 4352.46
find: 2214.36
find nearby: 1884.86
erase: 2518.06

My RB-tree's performance is pretty similar to std::set, except it is slightly faster in iteration. However, both my RB-tree and Treap both share the general Tree_iterator, so what could this difference be explained by (tree shape)?

Please take a look at why the iteration might be this slow (I will annotate parts of the code that I have questions on by a [RF] request feedback comment):

general helper tree functions

// either finds key or the location where the key should go
template <typename Node>
Node* tree_find(Node* start, const typename Node::key_type& key) {
    while (start != Node::nil && start->key != key) {
        if (key < start->key) start = start->left;
        else start = start->right;
    }
    return start;
}
template <typename Node>
Node* tree_min(Node* start) {
    while (start->left != Node::nil) start = start->left;
    return start;
}
template <typename Node>
Node* tree_max(Node* start) {
    while (start->right != Node::nil) start = start->right;
    return start;
}
// const versions
template <typename Node>
const Node* tree_find(const Node* start, const typename Node::key_type& key) {
    while (start != Node::nil && start->key != key) {
        if (key < start->key) start = start->left;
        else start = start->right;
    }
    return start;
}
template <typename Node>
const Node* tree_min(const Node* start) {
    while (start->left != Node::nil) start = start->left;
    return start;
}
template <typename Node>
const Node* tree_max(const Node* start) {
    while (start->right != Node::nil) start = start->right;
    return start;
}


// successor is node with smallest key greater than start
template <typename Node>
Node* tree_successor(const Node* start) {
    if (start->right != Node::nil) return tree_min(start->right);
    // else go up until a node that's the left child of parent
    Node* parent {start->parent};
    while (parent != Node::nil && start == parent->right) {
        start = parent;
        parent = parent->parent;
    }
    return parent;
}
template <typename Node>
Node* tree_predecessor(const Node* start) {
    if (start->left != Node::nil) return tree_max(start->left);
    // else go up until a node that's the right child of parent
    Node* parent {start->parent};
    while (parent != Node::nil && start == parent->left) {
        start = parent;
        parent = parent->parent;
    }
    return parent;
}
// convenient and semantically clear functions
template <typename Node>
bool is_left_child(const Node* node) {
    return node == node->parent->left;
}
template <typename Node>
bool is_right_child(const Node* node) {
    return node == node->parent->right;
}

// BST traversals
template <typename Node, typename Op>
void preorder_walk(Node* start, Op&& op) {
    if (start != Node::nil) {
        op(start);
        preorder_walk(start->left, op);
        preorder_walk(start->right, op);
    }
}
template <typename Node, typename Op>
void inorder_walk(Node* start, Op&& op) {
    if (start != Node::nil) {
        inorder_walk(start->left, op);
        op(start);
        inorder_walk(start->right, op);
    }
}
template <typename Node, typename Op>
void postorder_walk(Node* start, Op&& op) {
    if (start != Node::nil) {
        postorder_walk(start->left, op);
        postorder_walk(start->right, op);
        op(start);
    }
}

general tree iterators

// node iterators
// full traversal takes O(n) time and O(1) space
// traverse at most 2(n - 1) edges, where n is # nodes
// non-const bidirectional iterator
template <typename Node>
struct Tree_iterator {
    using key_type = typename Node::key_type;
    using CR = const Tree_iterator<Node>&;
    using adjacent_iterator = Tree_adj_iterator<Node>;

    Node* cur;

    // [RF] I know that people will say the return should be a reference to itself, but I want to discourage chaining with incre/decrement operators
    void operator++() {cur = tree_successor(cur);}
    void operator--() {cur = tree_predecessor(cur);}
    key_type& operator*() {return cur->key;}
    Node* operator->() {return cur;}
    Node* get()     {return cur;}
    bool operator==(CR other) const {return other.cur == cur;}
    bool operator!=(CR other) const {return !(*this == other);}
    adjacent_iterator begin() {
        if (cur->left != Node::nil) return {cur->left};
        else return {cur->right};
    }
    adjacent_iterator end()   {return {Node::nil};}
    friend std::ostream& operator<<(std::ostream& os, CR itr) {os << *itr.cur; return os;}
};
// const bidirectional iterator
template <typename Node>
struct Tree_const_iterator {
    using key_type = typename Node::key_type;
    using CR = const Tree_const_iterator<Node>&;
    using adjacent_const_iterator = Tree_adj_const_iterator<Node>;

    const Node* cur;

    void operator++() {cur = tree_successor(cur);}
    void operator--() {cur = tree_predecessor(cur);}
    const key_type& operator*() const {return cur->key;}
    const Node* operator->() const {return cur;}
    const Node* get()         const {return cur;}
    bool operator==(CR other) const {return other.cur == cur;}
    bool operator!=(CR other) const {return !(*this == other);}
    adjacent_const_iterator begin() const {
        if (cur->left != Node::nil) return {cur->left};
        else return {cur->right};
    }
    adjacent_const_iterator end()   const {return {Node::nil};}
};

// adjacent iterator (expected of all graphs)
// iterators from left to right if they exist
template <typename Node>
struct Tree_adj_iterator {
    using pointer = Node*;
    using key_type = typename Node::key_type;
    using CR = const Tree_adj_iterator<Node>&;

    pointer cur;

    void operator++() {if (is_left_child(cur)) cur = cur->parent->right; else cur = Node::nil;}
    void operator--() {if (is_right_child(cur)) cur = cur->parent->left; else cur = Node::nil;}
    key_type& operator*() {return cur->key;}
    pointer operator->() {return cur;}
    pointer get()       {return cur;}
    bool operator==(CR other) const {return other.cur == cur;}
    bool operator!=(CR other) const {return !(*this == other);} 
    friend std::ostream& operator<<(std::ostream& os, CR itr) {os << *itr.cur; return os;}
};
// const version
template <typename Node>
struct Tree_adj_const_iterator {
    using const_pointer = const Node*;
    using key_type = typename Node::key_type;
    using CR = const Tree_adj_const_iterator<Node>&;

    const_pointer cur;

    void operator++() {if (is_left_child(cur)) cur = cur->parent->right; else cur = Node::nil;}
    void operator--() {if (is_right_child(cur)) cur = cur->parent->left; else cur = Node::nil;}
    const key_type& operator*() const {return cur->key;}
    const_pointer operator->() const {return cur;}
    const_pointer get() const {return cur;}
    bool operator==(CR other) const {return other.cur == cur;}
    bool operator!=(CR other) const {return !(*this == other);} 
};

actual Treap class

template <typename Node>
class Treap {
protected:
    using NP = Node*;
    using T = typename Node::key_type;

    NP root {Node::nil};

    // treap specific methods
    void heap_fix_up(NP node) {
        // climb up via rotations until priority's min-heap property is fulfilled
        while (node != root && node->priority < node->parent->priority) {
            if (is_left_child(node)) rotate_right(node->parent);
            else rotate_left(node->parent);
        }
    }
    template <typename Op>
    void treap_insert(NP node, Op&& fixup) {
        // assumes caller has handle to node so no need to return it
        tree_insert(root, node, fixup);
        heap_fix_up(node);
    }
    template <typename Op>
    void treap_delete(NP node, Op&& fixup) {
        // 4 cases based on the children of node
        // either no children or just one child
        if      (node->left == Node::nil) transplant(node, node->right);    // no left child, become right child
        else if (node->right == Node::nil) transplant(node, node->left);    // no right child, become left child
        // else both children, find successor in right subtree
        else {
            NP successor {tree_min(node->right)};
            if (successor->parent != node) {    // not immediate right child
                // replace successor by its right child, then give successor node's right subtree
                transplant(successor, successor->right);
                successor->right = node->right;
                successor->right->parent = successor;
            }
            transplant(node, successor);
            successor->left = node->left;
            successor->left->parent = successor; 
            // restore priority
            while (successor->priority > successor->left->priority || successor->priority > successor->right->priority) {
                if (successor->left->priority < successor->right->priority) rotate_right(successor);
                else rotate_left(successor);
            }
        }
        fixup(node);
        delete node;
    }

    // core treap utilities
    // rotations to balance tree
    /* rotate left shifts everything left s.t. 
       it becomes the left child of its original right child
       its right child is replaced by its original right child's left child
                 A <--
                / \   |
               /   \  |
              B     C
             / \   / \
            /   \ /   \
           D    E F    G

        rotate_left(A) transforms into

                 C
                / \
               /   \
              A     G
             / \
            /   \
           B     F
          / \
         /   \
        D     E
    */
    virtual void rotate_left(NP node) {
        NP child {node->right};

        node->right = child->left;
        if (child->left != Node::nil) child->left->parent = node;

        child->parent = node->parent;
        if (node->parent == Node::nil) root = child;
        else if (is_left_child(node)) node->parent->left = child;
        else node->parent->right = child;

        child->left = node;
        node->parent = child;   
    }
    // rotate right shifts everything right, inverse of rotate left
    virtual void rotate_right(NP node) {
        NP child {node->left};

        node->left = child->right;
        if (child->right != Node::nil) child->right->parent = node;

        child->parent = node->parent;
        if (node->parent == Node::nil) root = child;
        else if (is_left_child(node)) node->parent->left = child;
        else node->parent->right = child;

        child->right = node;
        node->parent = child;   
    }
    // cannot do a simple tree_find for insert since parent has to be updated
    template <typename Op>
    void tree_insert(NP start, NP node, Op&& fixup) {
        NP parent {Node::nil};
        while (start != Node::nil) {
            fixup(start, node);
            parent = start;
            if (node->key < start->key) start = start->left;
            else start = start->right;
        }
        node->parent = parent;
        if (parent == Node::nil) root = node;   // tree was empty
        else if (node->key < parent->key) parent->left = node;
        else parent->right = node;
    }

    // moves one subtree to replace another one
    void transplant(NP old, NP moved) {
        // move moved into old's position in the tree without deleting any trees
        // no parent must mean that old is root or nil itself
        if (old->parent == Node::nil) root = moved;
        // old is a left child
        else if (is_left_child(old)) old->parent->left = moved;
        else old->parent->right = moved;
        // can assign to parent unconditionally due to sentinel (we know it's not nullptr)
        moved->parent = old->parent;
        // updating moved's children and erasing old is up to the caller
    }

public:
    using value_type = T;
    using pointer = Node*;
    using const_pointer = const Node*;
    using iterator = Tree_iterator<Node>;
    using const_iterator = Tree_const_iterator<Node>;
    Treap() = default;
    Treap(std::initializer_list<T> l) {
        for (const auto& v : l) insert(v);
    }

    // [RF] all my derived classes add no extra data in the tree itself (all in the nodes); should I make this non-virtual?
    virtual ~Treap() {clear();} 

    // modifiers
    void insert(const T& data) {
        NP node {new Node(data)};
        treap_insert(node, skip_insert_fixup<Node>);
    };

    void erase(const T& data) {
        NP node {tree_find(root, data)};
        if (node != Node::nil) treap_delete(node, skip_delete_fixup<Node>);
    }
    void clear() {
        postorder_walk(root, [](NP node){delete node;});
        root = Node::nil;
    }

    // query
    iterator find_and_elevate(const T& key) {
        NP found {tree_find(root, key)};
        // elevate the found node's priority for better temporal locality
        if (found != Node::nil) {
            found->priority >>= 1;
            heap_fix_up(found);
        }
        return iterator{found};
    }
    iterator find(const T& key) {
        NP found {tree_find(root,key)};
        return iterator{found};
    }   
    const_iterator find(const T& key) const {
        NP found {tree_find(root,key)};
        return const_iterator{found};
    }   
    // with hints / starting points
    iterator find(const T& key, const_iterator hint) {
        NP found {tree_find(hint.get(),key)};
        return iterator{found};
    }   
    const_iterator find(const T& key, const_iterator hint) const {
        NP found {tree_find(hint.get(),key)};
        return const_iterator{found};
    }

    size_t size() const {
        size_t num_elems {0};
        inorder_walk(root, [&](const Node* node){++num_elems;});
        return num_elems;
    }
    bool empty() const {return root == Node::nil;}

    // iterators
    iterator begin()             {return iterator{tree_min(root)};}
    iterator end()               {return iterator{Node::nil};}
    const_iterator begin() const {return const_iterator{tree_min(root)};}
    const_iterator end() const   {return const_iterator{Node::nil};}

    void print() const {
        inorder_walk(root, [](NP node){std::cout << node->key << '(' << node->priority << ')' << ' ';});
        std::cout << "root: " << root->key << '(' << root->priority << ')' << std::endl; 
    }

};

template <typename T>
struct Treap_node {
    static Treap_node* nil;
    using key_type = T;

    Treap_node *parent, *left, *right;
    T key;
    int priority;
    // [RF] should the sentinel priority be maximum or minimum?
    Treap_node() : priority{std::numeric_limits<int>::max()} {} // sentinel construction
    Treap_node(T val) : parent{nil}, left{nil}, right{nil}, key{val}, priority{rand()} {}
};


template <typename T>
Treap_node<T>* Treap_node<T>::nil {new Treap_node{}};

template <typename T>
using Basic_treap = Treap<Treap_node<T>>;

profiling code

// somewhere in utility.h with std namespace usage and proper includes
class Timer {
    time_point<system_clock> init, end;
    microseconds elapsed = duration_cast<microseconds>(end - init);
public:
    Timer() : init{system_clock::now()} {}
    void restart() { init = system_clock::now(); }
    double tonow() { 
        end = system_clock::now();
        elapsed = duration_cast<microseconds>(end - init);
        return elapsed.count();
    }
};

constexpr int test_size = 10000000;
template <typename Set>
void profile_set(Set& single_set) {
    Timer time;
    for (int i = 0; i < test_size; ++i)
        single_set.insert(i);
    cout << "sequential insert: " << time.tonow() / 1000.0 << endl;

    time.restart();
    for (auto& elem : single_set) (void)elem;
    cout << "iteration: " << time.tonow() / 1000.0 << endl; 

    time.restart();
    single_set.clear();
    cout << "clear: " << time.tonow() / 1000.0 << endl;

    time.restart();
    for (int i = test_size - 1; i >= 0; --i)
        single_set.insert(i);
    cout << "reverse order insert: " << time.tonow() / 1000.0 << endl;

    time.restart();
    int find_start = randint(test_size - 1);
    for (int i = 0; i < test_size; ++i)
        single_set.find((find_start + i) % test_size);
    cout << "find: " << time.tonow() / 1000.0 << endl;

    time.restart();
    for (int i = 0; i < test_size; i += 5) {
        for (int j = 0; j < 5; ++j) single_set.find((find_start + i) % test_size);
    }
    cout << "find nearby: " << time.tonow() / 1000.0 << endl;

    time.restart();
    for (int i = 0; i < test_size; ++i)
        single_set.erase(i);
    cout << "erase: " << time.tonow() / 1000.0 << endl;
}
void profile_std_set() {
    cout << "std set\n";
    std::set<int> std_set;
    profile_set(std_set);
    cout << endl;
}

void profile_treap() {
    cout << "treap\n";
    Basic_treap<int> treap_set;
    profile_set(treap_set);
    cout << endl;
}

The full code can be found here, which is part of my simple algorithms and data structures library (feel free to use)

share|improve this question
    
I guess that you are comparing against the libstdc++ implementation? –  Deduplicator Aug 16 at 21:47
    
Yes I believe that's the default standard library implementation that GCC uses –  LemonPi Aug 16 at 23:12

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.