I'm currently trying to prove a data structure which I've created to speed up lookups for integral keys. The specific case I have in mind has the following usage case:
- At start is populated with n number of items (where n can be from 1 to thousands), and the specific key is actually an
unsigned int
. - The critical path in the code requires a lookup based on this key.
I've tried the following,
- sorted
vector
with binary search std::map
boost::unordered_map
And now this tree. I've found through profiling that the following holds
- If every item looked up exists, then
unordered_set
is quicker thanstd::map
which is quicker than sorted vector - If the number of successful lookups for the search set is low, then
std::map
is quicker thanunordered_set
(I guess the hash overhead is higher for failure case)
With this tree, I've found that it's quicker in all cases, but what I would like is some critique on the approach - are there any further optimisations that I am missing for example, and here is the tree:
/** Copyright: Nim 2011 */
template <typename KeyType, typename ValueType>
class tree
{
public:
typedef ValueType value_type;
typedef ValueType& reference;
typedef ValueType const& const_reference;
typedef KeyType key_type;
typedef ValueType* iterator;
typedef ValueType const* const_iterator;
private:
static const size_t MAX_SIZE = 256;
typedef unsigned char index_type;
struct node
{
node() : _cKey(), _cValue(), _vNodes() {}
~node()
{
cleanup();
}
void add(key_type index, key_type key, const_reference value)
{
if (!index)
{
_cKey = key;
_cValue.reset(new value_type(value));
return;
}
// get the lsb
index_type lsb = index & 0xff;
if (!_vNodes)
{
_vNodes = new node*[MAX_SIZE]();
_vNodes[lsb] = new node();
}
else if (!_vNodes[lsb])
_vNodes[lsb] = new node();
_vNodes[lsb]->add(index >> 8, key, value);
}
iterator find(key_type index, key_type key)
{
if (_cKey == key)
return _cValue.get();
if (_vNodes)
{
// get the lsb
index_type lsb = index & 0xff;
if (_vNodes[lsb])
return _vNodes[lsb]->find(index >> 8, key);
}
return 0;
}
const_iterator find(key_type index, key_type key) const
{
if (_cKey == key)
return _cValue.get();
if (_vNodes)
{
// get the lsb
index_type lsb = index & 0xff;
if (_vNodes[lsb])
return _vNodes[lsb]->find(index >> 8, key);
}
return 0;
}
void optimize()
{
if (_vNodes)
{
// first see how many nodes we have
size_t count = 0;
for(size_t i = 0; i < MAX_SIZE; ++i)
count += (_vNodes[i] != 0);
switch(count)
{
case 0: return; // nothing to optimize
case 1: pullup(); break;// we could pull up
default:
{
// means we have more than one node at this level, so optimize each
for(size_t i = 0; i < MAX_SIZE; ++i)
if (_vNodes[i])
_vNodes[i]->optimize();
}
}
}
}
void pullup()
{
// try to pullup a leaf node
for(size_t i = 0; i < MAX_SIZE; ++i)
if (_vNodes[i])
{
// if we manage to pullup, then cleanup the nodeset at this level
if (_vNodes[i]->pullup(_cKey, _cValue))
cleanup();
break;
}
}
bool pullup(key_type& key, boost::shared_ptr<value_type>& value)
{
if (!_vNodes)
{
// this is a leaf node -
key = _cKey;
value = _cValue;
return true;
}
// cannot pull up at this level
// see whether we can pull up from levels below
size_t count = 0;
for(size_t i = 0; i < MAX_SIZE; ++i)
count += (_vNodes[i] != 0);
switch(count)
{
case 0:
{
// this is a leaf node -
key = _cKey;
value = _cValue;
return true;
}
case 1:
{
for(size_t i = 0; i < MAX_SIZE; ++i)
if (_vNodes[i])
// if we manage to pullup, then cleanup the nodeset at this level
return _vNodes[i]->pullup(key, value);
break;
}
default:
{
// means we have more than one node at this level so cannot pullup
}
}
return false;
}
void print(size_t index, std::ostream& str)
{
str << "<node i='" << index << '\'';
if (_cValue)
str << " key='" << _cKey << "' value='" << *_cValue << "'";
str << '>' << std::endl;
if (_vNodes != 0)
for(size_t i = 0; i < MAX_SIZE; ++i)
if (_vNodes[i])
_vNodes[i]->print(i, str);
str << "</node>";
}
void cleanup()
{
if (_vNodes)
{
for(size_t i = 0; i < MAX_SIZE; ++i)
if (_vNodes[i])
delete _vNodes[i];
delete[] _vNodes;
_vNodes = 0;
}
}
key_type _cKey;
boost::shared_ptr<value_type> _cValue;
node** _vNodes;
};
public:
tree() : _vNodes() {}
~tree()
{
if (_vNodes != 0)
{
for(size_t i = 0; i < MAX_SIZE; ++i)
if (_vNodes[i])
delete _vNodes[i];
delete[] _vNodes;
_vNodes = 0;
}
}
iterator end() { return 0; }
const_iterator end() const { return 0; }
void add(key_type key, const_reference value)
{
// get the lsb
unsigned char lsb = key & 0xff;
if (!_vNodes)
{
_vNodes = new node*[MAX_SIZE]();
_vNodes[lsb] = new node();
}
else if (!_vNodes[lsb])
_vNodes[lsb] = new node();
_vNodes[lsb]->add(key >> 8, key, value);
}
iterator find(key_type key)
{
if (_vNodes)
{
// get the lsb
index_type lsb = key & 0xff;
if (_vNodes[lsb])
return _vNodes[lsb]->find(key >> 8, key);
}
return 0;
}
const_iterator find(key_type key) const
{
if (_vNodes)
{
// get the lsb
index_type lsb = key & 0xff;
if (_vNodes[lsb])
return _vNodes[lsb]->find(key >> 8, key);
}
return 0;
}
void optimize()
{
if (_vNodes )
// optmize each root node
for(size_t i = 0; i < MAX_SIZE; ++i)
if (_vNodes[i])
_vNodes[i]->optimize();
}
friend
std::ostream& operator<<(std::ostream& str, tree const& t)
{
str << "<?xml version='1.0'?>" << std::endl;
if (!t._vNodes)
return str << "<tree/>" << std::endl;
str << "<tree> " << std::endl;
for(size_t i = 0; i < MAX_SIZE; ++i)
if (t._vNodes[i])
t._vNodes[i]->print(i, str);
return str << "</tree> " << std::endl;
}
private:
node** _vNodes;
};
Some salient points:
- I don't need the container to be iterable - all I want is the fastest possible lookups
- I don't care about insertion performance as it's a start up cost
- I've added an
optimize
method to "prune" the tree and this works nicely, however the performance difference between a pruned and a normal tree is negligible. - Yes, I've considered
boost::scoped_array
. - I am considering using a
std::vector<node*>
rather thannode**
, I find the pointer sytanx strangely appealing.. :)
Thanks for any tips.