I just read and experimented with C++ implementation of the merge sort algorithm from the Wikipedia page. During the experiments I have slightly modified the source code and would like to know how much the algorithm is improved (if it is).
//! \brief Performs a recursive merge sort on the given vector
//! \param vec The vector to be sorted using the merge sort
//! \return The sorted resultant vector after merge sort is
//! complete.
vector<int> merge_sort(vector<int>& vec)
{
// Termination condition: List is completely sorted if it
// only contains a single element.
if(vec.size() == 1)
{
return vec;
}
// Determine the location of the middle element in the vector
std::vector<int>::iterator middle = vec.begin() + (vec.size() / 2);
vector<int> left(vec.begin(), middle);
vector<int> right(middle, vec.end());
// Perform a merge sort on the two smaller vectors
left = merge_sort(left);
right = merge_sort(right);
return merge(vec,left, right);
}
//! \brief Merges two sorted vectors into one sorted vector
//! \param left A sorted vector of integers
//! \param right A sorted vector of integers
//! \return A sorted vector that is the result of merging two sorted
//! vectors.
vector<int> merge(vector<int> &vec,const vector<int>& left, const vector<int>& right)
{
// Fill the resultant vector with sorted results from both vectors
vector<int> result;
unsigned left_it = 0, right_it = 0;
while(left_it < left.size() && right_it < right.size())
{
// If the left value is smaller than the right it goes next
// into the resultant vector
if(left[left_it] < right[right_it])
{
result.push_back(left[left_it]);
left_it++;
}
else
{
result.push_back(right[right_it]);
right_it++;
}
}
// Push the remaining data from both vectors onto the resultant
while(left_it < left.size())
{
result.push_back(left[left_it]);
left_it++;
}
while(right_it < right.size())
{
result.push_back(right[right_it]);
right_it++;
}
//show merge process..
int i;
for(i=0;i<result.size();i++)
{
cout<<result[i]<<" ";
}
// break each line for display purposes..
cout<<"***********"<<endl;
//take a source vector and parse the result to it. then return it.
vec = result;
return vec;
}
The following is my code:
5 typedef std::vector<int> int_v;
6
7 int_v Merge(int_v& vec, int_v& l, int_v& r) {
8 int_v res;
9 res.reserve(l.size() + r.size());
10 unsigned int li = 0;
11 unsigned int ri = 0;
12 if (l.size() > 1 && (l[l.size()-1] < r[0])) {
13 res.insert(res.end(), l.begin(), l.end());
14 res.insert(res.end(), r.begin(), r.end());
15 return res;
16 }
17 while (li < l.size() && ri < r.size()) {
18 if (l[li] < r[ri]) {
19 res.push_back(l[li++]);
20 } else {
21 res.push_back(r[ri++]);
22 }
23 }
24 while (li < l.size()) {
25 res.push_back(l[li++]);
26 }
27 while (ri < r.size()) {
28 res.push_back(r[ri++]);
29 }
30 vec = res;
31 return vec;
32 }
33
34 int_v MergeSort(int_v& v) {
35 if (1 == v.size()) {
36 return v;
37 }
38 int_v::iterator m = v.begin() + v.size()/2;
39 int_v l(v.begin(), m);
40 int_v r(m, v.end());
41
42 l = MergeSort(l);
43 r = MergeSort(r);
44 return Merge(v, l, r);
45 }
The modifications are:
9 res.reserve(l.size() + r.size());
This will help to avoid relocation of the vector each time when there will need to double the size to be able to store the next element.
12 if (l.size() > 1 && (l[l.size()-1] < r[0])) {
13 res.insert(res.end(), l.begin(), l.end());
14 res.insert(res.end(), r.begin(), r.end());
15 return res;
16 }
I think this will help to avoid the loops at lines 17 - 29 when just concatenation of two sides is enough.
Running both versions Wikipedia's one and mine for five million randomly generated integers produces the following differences:
Wikipedia's example:
time ./a.out real 0m26.278s user 0m26.205s sys 0m0.064s
Mine:
time ./a.out real 0m22.129s user 0m22.026s sys 0m0.092s