I wrote up a Treap implementation to serve as the base class for a bunch of augmented data structures (interval sets, plane (2D) sets, order statistic trees), replacing my RB-tree base.
Some profiling for performance compared against std::set
:
- 4 times faster insertion (for all cases; random, in order, reverse order)
- 2 times faster find
- 2 times faster erase
- 1.532 times slower iteration (!?)
profiling data with gcc 4.9.2 -O3 (numbers in ms)
basic tree (RB tree)
sequential insert: 4620.9
iteration: 168.516
clear: 1020
reverse order insert: 4516.92
find: 1745.32
find nearby: 1698.18
erase: 2628.6
treap
sequential insert: 1267.88
iteration: 289.564
clear: 1141.23
reverse order insert: 1197.49
find: 1166.52
find nearby: 1032.5
erase: 1269.2
std set
sequential insert: 4368.96
iteration: 169.782
clear: 1039.6
reverse order insert: 4352.46
find: 2214.36
find nearby: 1884.86
erase: 2518.06
My RB-tree's performance is pretty similar to std::set
, except it is slightly faster in iteration. However, both my RB-tree and Treap both share the general Tree_iterator
, so what could this difference be explained by (tree shape)?
Please take a look at why the iteration might be this slow (I will annotate parts of the code that I have questions on by a [RF]
request feedback comment):
general helper tree functions
// either finds key or the location where the key should go
template <typename Node>
Node* tree_find(Node* start, const typename Node::key_type& key) {
while (start != Node::nil && start->key != key) {
if (key < start->key) start = start->left;
else start = start->right;
}
return start;
}
template <typename Node>
Node* tree_min(Node* start) {
while (start->left != Node::nil) start = start->left;
return start;
}
template <typename Node>
Node* tree_max(Node* start) {
while (start->right != Node::nil) start = start->right;
return start;
}
// const versions
template <typename Node>
const Node* tree_find(const Node* start, const typename Node::key_type& key) {
while (start != Node::nil && start->key != key) {
if (key < start->key) start = start->left;
else start = start->right;
}
return start;
}
template <typename Node>
const Node* tree_min(const Node* start) {
while (start->left != Node::nil) start = start->left;
return start;
}
template <typename Node>
const Node* tree_max(const Node* start) {
while (start->right != Node::nil) start = start->right;
return start;
}
// successor is node with smallest key greater than start
template <typename Node>
Node* tree_successor(const Node* start) {
if (start->right != Node::nil) return tree_min(start->right);
// else go up until a node that's the left child of parent
Node* parent {start->parent};
while (parent != Node::nil && start == parent->right) {
start = parent;
parent = parent->parent;
}
return parent;
}
template <typename Node>
Node* tree_predecessor(const Node* start) {
if (start->left != Node::nil) return tree_max(start->left);
// else go up until a node that's the right child of parent
Node* parent {start->parent};
while (parent != Node::nil && start == parent->left) {
start = parent;
parent = parent->parent;
}
return parent;
}
// convenient and semantically clear functions
template <typename Node>
bool is_left_child(const Node* node) {
return node == node->parent->left;
}
template <typename Node>
bool is_right_child(const Node* node) {
return node == node->parent->right;
}
// BST traversals
template <typename Node, typename Op>
void preorder_walk(Node* start, Op&& op) {
if (start != Node::nil) {
op(start);
preorder_walk(start->left, op);
preorder_walk(start->right, op);
}
}
template <typename Node, typename Op>
void inorder_walk(Node* start, Op&& op) {
if (start != Node::nil) {
inorder_walk(start->left, op);
op(start);
inorder_walk(start->right, op);
}
}
template <typename Node, typename Op>
void postorder_walk(Node* start, Op&& op) {
if (start != Node::nil) {
postorder_walk(start->left, op);
postorder_walk(start->right, op);
op(start);
}
}
general tree iterators
// node iterators
// full traversal takes O(n) time and O(1) space
// traverse at most 2(n - 1) edges, where n is # nodes
// non-const bidirectional iterator
template <typename Node>
struct Tree_iterator {
using key_type = typename Node::key_type;
using CR = const Tree_iterator<Node>&;
using adjacent_iterator = Tree_adj_iterator<Node>;
Node* cur;
// [RF] I know that people will say the return should be a reference to itself, but I want to discourage chaining with incre/decrement operators
void operator++() {cur = tree_successor(cur);}
void operator--() {cur = tree_predecessor(cur);}
key_type& operator*() {return cur->key;}
Node* operator->() {return cur;}
Node* get() {return cur;}
bool operator==(CR other) const {return other.cur == cur;}
bool operator!=(CR other) const {return !(*this == other);}
adjacent_iterator begin() {
if (cur->left != Node::nil) return {cur->left};
else return {cur->right};
}
adjacent_iterator end() {return {Node::nil};}
friend std::ostream& operator<<(std::ostream& os, CR itr) {os << *itr.cur; return os;}
};
// const bidirectional iterator
template <typename Node>
struct Tree_const_iterator {
using key_type = typename Node::key_type;
using CR = const Tree_const_iterator<Node>&;
using adjacent_const_iterator = Tree_adj_const_iterator<Node>;
const Node* cur;
void operator++() {cur = tree_successor(cur);}
void operator--() {cur = tree_predecessor(cur);}
const key_type& operator*() const {return cur->key;}
const Node* operator->() const {return cur;}
const Node* get() const {return cur;}
bool operator==(CR other) const {return other.cur == cur;}
bool operator!=(CR other) const {return !(*this == other);}
adjacent_const_iterator begin() const {
if (cur->left != Node::nil) return {cur->left};
else return {cur->right};
}
adjacent_const_iterator end() const {return {Node::nil};}
};
// adjacent iterator (expected of all graphs)
// iterators from left to right if they exist
template <typename Node>
struct Tree_adj_iterator {
using pointer = Node*;
using key_type = typename Node::key_type;
using CR = const Tree_adj_iterator<Node>&;
pointer cur;
void operator++() {if (is_left_child(cur)) cur = cur->parent->right; else cur = Node::nil;}
void operator--() {if (is_right_child(cur)) cur = cur->parent->left; else cur = Node::nil;}
key_type& operator*() {return cur->key;}
pointer operator->() {return cur;}
pointer get() {return cur;}
bool operator==(CR other) const {return other.cur == cur;}
bool operator!=(CR other) const {return !(*this == other);}
friend std::ostream& operator<<(std::ostream& os, CR itr) {os << *itr.cur; return os;}
};
// const version
template <typename Node>
struct Tree_adj_const_iterator {
using const_pointer = const Node*;
using key_type = typename Node::key_type;
using CR = const Tree_adj_const_iterator<Node>&;
const_pointer cur;
void operator++() {if (is_left_child(cur)) cur = cur->parent->right; else cur = Node::nil;}
void operator--() {if (is_right_child(cur)) cur = cur->parent->left; else cur = Node::nil;}
const key_type& operator*() const {return cur->key;}
const_pointer operator->() const {return cur;}
const_pointer get() const {return cur;}
bool operator==(CR other) const {return other.cur == cur;}
bool operator!=(CR other) const {return !(*this == other);}
};
actual Treap class
template <typename Node>
class Treap {
protected:
using NP = Node*;
using T = typename Node::key_type;
NP root {Node::nil};
// treap specific methods
void heap_fix_up(NP node) {
// climb up via rotations until priority's min-heap property is fulfilled
while (node != root && node->priority < node->parent->priority) {
if (is_left_child(node)) rotate_right(node->parent);
else rotate_left(node->parent);
}
}
template <typename Op>
void treap_insert(NP node, Op&& fixup) {
// assumes caller has handle to node so no need to return it
tree_insert(root, node, fixup);
heap_fix_up(node);
}
template <typename Op>
void treap_delete(NP node, Op&& fixup) {
// 4 cases based on the children of node
// either no children or just one child
if (node->left == Node::nil) transplant(node, node->right); // no left child, become right child
else if (node->right == Node::nil) transplant(node, node->left); // no right child, become left child
// else both children, find successor in right subtree
else {
NP successor {tree_min(node->right)};
if (successor->parent != node) { // not immediate right child
// replace successor by its right child, then give successor node's right subtree
transplant(successor, successor->right);
successor->right = node->right;
successor->right->parent = successor;
}
transplant(node, successor);
successor->left = node->left;
successor->left->parent = successor;
// restore priority
while (successor->priority > successor->left->priority || successor->priority > successor->right->priority) {
if (successor->left->priority < successor->right->priority) rotate_right(successor);
else rotate_left(successor);
}
}
fixup(node);
delete node;
}
// core treap utilities
// rotations to balance tree
/* rotate left shifts everything left s.t.
it becomes the left child of its original right child
its right child is replaced by its original right child's left child
A <--
/ \ |
/ \ |
B C
/ \ / \
/ \ / \
D E F G
rotate_left(A) transforms into
C
/ \
/ \
A G
/ \
/ \
B F
/ \
/ \
D E
*/
virtual void rotate_left(NP node) {
NP child {node->right};
node->right = child->left;
if (child->left != Node::nil) child->left->parent = node;
child->parent = node->parent;
if (node->parent == Node::nil) root = child;
else if (is_left_child(node)) node->parent->left = child;
else node->parent->right = child;
child->left = node;
node->parent = child;
}
// rotate right shifts everything right, inverse of rotate left
virtual void rotate_right(NP node) {
NP child {node->left};
node->left = child->right;
if (child->right != Node::nil) child->right->parent = node;
child->parent = node->parent;
if (node->parent == Node::nil) root = child;
else if (is_left_child(node)) node->parent->left = child;
else node->parent->right = child;
child->right = node;
node->parent = child;
}
// cannot do a simple tree_find for insert since parent has to be updated
template <typename Op>
void tree_insert(NP start, NP node, Op&& fixup) {
NP parent {Node::nil};
while (start != Node::nil) {
fixup(start, node);
parent = start;
if (node->key < start->key) start = start->left;
else start = start->right;
}
node->parent = parent;
if (parent == Node::nil) root = node; // tree was empty
else if (node->key < parent->key) parent->left = node;
else parent->right = node;
}
// moves one subtree to replace another one
void transplant(NP old, NP moved) {
// move moved into old's position in the tree without deleting any trees
// no parent must mean that old is root or nil itself
if (old->parent == Node::nil) root = moved;
// old is a left child
else if (is_left_child(old)) old->parent->left = moved;
else old->parent->right = moved;
// can assign to parent unconditionally due to sentinel (we know it's not nullptr)
moved->parent = old->parent;
// updating moved's children and erasing old is up to the caller
}
public:
using value_type = T;
using pointer = Node*;
using const_pointer = const Node*;
using iterator = Tree_iterator<Node>;
using const_iterator = Tree_const_iterator<Node>;
Treap() = default;
Treap(std::initializer_list<T> l) {
for (const auto& v : l) insert(v);
}
// [RF] all my derived classes add no extra data in the tree itself (all in the nodes); should I make this non-virtual?
virtual ~Treap() {clear();}
// modifiers
void insert(const T& data) {
NP node {new Node(data)};
treap_insert(node, skip_insert_fixup<Node>);
};
void erase(const T& data) {
NP node {tree_find(root, data)};
if (node != Node::nil) treap_delete(node, skip_delete_fixup<Node>);
}
void clear() {
postorder_walk(root, [](NP node){delete node;});
root = Node::nil;
}
// query
iterator find_and_elevate(const T& key) {
NP found {tree_find(root, key)};
// elevate the found node's priority for better temporal locality
if (found != Node::nil) {
found->priority >>= 1;
heap_fix_up(found);
}
return iterator{found};
}
iterator find(const T& key) {
NP found {tree_find(root,key)};
return iterator{found};
}
const_iterator find(const T& key) const {
NP found {tree_find(root,key)};
return const_iterator{found};
}
// with hints / starting points
iterator find(const T& key, const_iterator hint) {
NP found {tree_find(hint.get(),key)};
return iterator{found};
}
const_iterator find(const T& key, const_iterator hint) const {
NP found {tree_find(hint.get(),key)};
return const_iterator{found};
}
size_t size() const {
size_t num_elems {0};
inorder_walk(root, [&](const Node* node){++num_elems;});
return num_elems;
}
bool empty() const {return root == Node::nil;}
// iterators
iterator begin() {return iterator{tree_min(root)};}
iterator end() {return iterator{Node::nil};}
const_iterator begin() const {return const_iterator{tree_min(root)};}
const_iterator end() const {return const_iterator{Node::nil};}
void print() const {
inorder_walk(root, [](NP node){std::cout << node->key << '(' << node->priority << ')' << ' ';});
std::cout << "root: " << root->key << '(' << root->priority << ')' << std::endl;
}
};
template <typename T>
struct Treap_node {
static Treap_node* nil;
using key_type = T;
Treap_node *parent, *left, *right;
T key;
int priority;
// [RF] should the sentinel priority be maximum or minimum?
Treap_node() : priority{std::numeric_limits<int>::max()} {} // sentinel construction
Treap_node(T val) : parent{nil}, left{nil}, right{nil}, key{val}, priority{rand()} {}
};
template <typename T>
Treap_node<T>* Treap_node<T>::nil {new Treap_node{}};
template <typename T>
using Basic_treap = Treap<Treap_node<T>>;
profiling code
// somewhere in utility.h with std namespace usage and proper includes
class Timer {
time_point<system_clock> init, end;
microseconds elapsed = duration_cast<microseconds>(end - init);
public:
Timer() : init{system_clock::now()} {}
void restart() { init = system_clock::now(); }
double tonow() {
end = system_clock::now();
elapsed = duration_cast<microseconds>(end - init);
return elapsed.count();
}
};
constexpr int test_size = 10000000;
template <typename Set>
void profile_set(Set& single_set) {
Timer time;
for (int i = 0; i < test_size; ++i)
single_set.insert(i);
cout << "sequential insert: " << time.tonow() / 1000.0 << endl;
time.restart();
for (auto& elem : single_set) (void)elem;
cout << "iteration: " << time.tonow() / 1000.0 << endl;
time.restart();
single_set.clear();
cout << "clear: " << time.tonow() / 1000.0 << endl;
time.restart();
for (int i = test_size - 1; i >= 0; --i)
single_set.insert(i);
cout << "reverse order insert: " << time.tonow() / 1000.0 << endl;
time.restart();
int find_start = randint(test_size - 1);
for (int i = 0; i < test_size; ++i)
single_set.find((find_start + i) % test_size);
cout << "find: " << time.tonow() / 1000.0 << endl;
time.restart();
for (int i = 0; i < test_size; i += 5) {
for (int j = 0; j < 5; ++j) single_set.find((find_start + i) % test_size);
}
cout << "find nearby: " << time.tonow() / 1000.0 << endl;
time.restart();
for (int i = 0; i < test_size; ++i)
single_set.erase(i);
cout << "erase: " << time.tonow() / 1000.0 << endl;
}
void profile_std_set() {
cout << "std set\n";
std::set<int> std_set;
profile_set(std_set);
cout << endl;
}
void profile_treap() {
cout << "treap\n";
Basic_treap<int> treap_set;
profile_set(treap_set);
cout << endl;
}
The full code can be found here, which is part of my simple algorithms and data structures library (feel free to use)