Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

With the inclusion of <random> into C++11, we can finally chuck out std::rand and start using generator with much better properties. Still, it's easy to get things wrong (uniform sampling where 0 occurs more than it should due to usage of % size comes to mind). To that end, I've written some wrapper libraries to ease usage of the library for some basic tasks, namely:

  • Selection of values from a sequence with equal probability
  • Selection of values from a sequence according to a weighted distribution
  • Random "choice" - k out of n selection.

The style is very similar to that of most of the algorithms in <algorithm>. Any comments or suggestions (or bug-finding) is welcome:

initialized_generator.hpp:

/*! \file intialized_generator.hpp
*   \brief Helper struct to initialize and "warm up" a random number generator.
*
*/

#ifndef INITIALIZED_GENERATOR_SGL_HPP__
#define INITIALIZED_GENERATOR_SGL_HPP__

#include <random>
#include <array>
#include <algorithm>
#include <type_traits>
#include <ctime>

namespace simplegl
{
namespace detail
{
template <typename GeneratorType>
struct initialized_generator
{
public:

    typedef GeneratorType                        generator_type;
    typedef typename generator_type::result_type result_type;

private:

    generator_type                               generator_;

    //! \brief Seeds the given generator with a given source of randomness.
    /*!  
     *  Some random number generators generate "poor" random numbers until their
     *  internal states are sufficiently "mixed up". This should seed the given
     *  generator with enough randomness to overcome the initial bad statistical
     *  properties that are seen in some of these generators.
     *
     *  The code below was nabbed from 
     *  http://stackoverflow.com/questions/15509270/does-stdmt19937-require-warmup 
     *
     *  \param rd A std::random_device, utilized to generate random seed data.
    */
    void warmup(std::random_device& rd)
    {
        std::array<int, std::mt19937::state_size> seed_data;
        std::generate_n(seed_data.data(), seed_data.size(), std::ref(rd));
        std::seed_seq seq(std::begin(seed_data), std::end(seed_data));
        generator_.seed(seq);
    }

    //! \brief Seeds the given generator utilizing the current time.
    /*!  
     *  Seeds the given generator utilizing the current time as a seed.
     *  If the generator is std::mt19937, then also attempts to initialize
     *  it and warm it up with some additional (not really random) seed
     *  parameters.    
     */
    void warmup()
    {
        std::time_t initial = time(nullptr);
        //This branch will be optimized out at compile time
        if(std::is_same<GeneratorType, std::mt19937>::value) {
            //Even relatively close "non-random" numbers for a sequence should
            //be ok for seeding a mersenne twister.
            std::seed_seq seq{initial, initial + 1, initial + 2, initial + 3,
                              initial + 4, initial + 5};
            generator_.seed(seq);
        } else {
            //For an LCG, time should suffice as a seed. For a subtract with carry,
            //it may not - apparently they are actually quite difficult to seed
            //and are incredibly sensitive to initial state. This should perhaps
            //be changed to deal with that, but I don't really have the expertise
            //to know exactly how.
            generator_.seed(initial);
        }
    }

public:

    initialized_generator()
    {
        warmup();
    }

    initialized_generator(std::random_device& rd)
    {
        warmup(rd);
    }

    //! \brief Returns the minimum possible value the underlying generator 
    /*!        can generate.
     *
     *  Simple wrapper function forwarding to GeneratorType::min().   
     */
    static constexpr result_type min()
    {
        return generator_type::min();
    }

    //! \brief Returns the maximum possible value the underlying generator 
    /*!        can generate.
     *
     *  Simple wrapper function forwarding to GeneratorType::max().   
     */
    static constexpr result_type max()
    {
        return generator_type::max();
    }

    //! \brief Returns the next random number from the underlying generator.
    /*! 
     *  Simple wrapper function forwarding to operator()().
     *  \return The next random number, of type GeneratorType::result_type.
     */
    result_type operator()()
    {
        return generator_();
    }

    //! \brief Allows implicit conversion back to the underlying GeneratorType.
    /*!  
     */
    operator generator_type()
    {
        return generator_;
    }

}; //end struct initialized_generator

} //end namespace detail 
} //end namespace simplegl

#endif //INITIALIZED_GENERATOR_SGL_HPP__

weighted_selection.hpp:

/*! \file weighted_selection.hpp
*   \brief Selects values from a sequence based on a linear weighting.
*
*/

#ifndef WEIGHTED_SELECTION_SGL_HPP__
#define WEIGHTED_SELECTION_SGL_HPP__

#include <iterator>
#include <random>
#include <initializer_list>
#include <utility>

#include "initialized_generator.hpp"

namespace simplegl
{

template <typename GeneratorType = std::mt19937, typename UIntType = std::size_t>
struct weighted_selection
{
private:

    typedef UIntType uint_type;

    std::discrete_distribution<UIntType>         distribution_;
    detail::initialized_generator<GeneratorType> generator_;
    uint_type                                    prob_length_;

public:

    weighted_selection(std::initializer_list<double> init, std::random_device& rd)
      : distribution_(init),
        generator_(rd),
        prob_length_(init.size())
    { }

    weighted_selection(std::initializer_list<double> init)
      : distribution_(init),
        prob_length_(init.size())
    { }

    template <typename Iterator>
    weighted_selection(Iterator begin, Iterator end, std::random_device& rd)
      : distribution_(begin, end),
        generator_(rd),
        prob_length_(std::distance(begin, end))
    { }

    template <typename Iterator>
    weighted_selection(Iterator begin, Iterator end)
      : distribution_(begin, end),
        prob_length_(std::distance(begin, end))
    { }

    //! \brief Selects a random value from the sequence [begin, end) based
    //*        on the weights given on initialization.
    /*!  
     *  Overload that performs error checking based on the distance
     *  between begin and end.
     *
     *  \param begin A parameter supporting the RandomAccessIterator concept,
     *         which dereferences to the start of the given sequenece.
     *  \param end A parameter supporting the RandomAccessIterator concept,
     *         which is one past the end of the sequence.
     *  \return The next random value, with probability given by the 
     *          supplied weights. 
     *  \throw std::length_error when the distance between end and
     *         begin is not equal to the length of the weights initially supplied
     *         on object construction.
    */
    template <typename RandomAccessIter>
    typename std::iterator_traits<RandomAccessIter>::value_type
    operator()(RandomAccessIter begin, RandomAccessIter end)
    {
        auto distance = end - begin;
        uint_type unsigned_distance = 
            (distance > 0) ? static_cast<uint_type>(distance) : 0;
        if(unsigned_distance != prob_length_) {
            throw std::length_error("Iterator distance does not equal probability \
                                     weight distances");
        }
        auto next = distribution_(generator_);
        return *(begin + next);
    }

    //! \brief Selects a random value from the sequence [begin, ...) based
    //*        on the weights given on initialization.
    /*!  
     *  Overloaded version that performs no error checking. It is assumed that
     *  (begin + weights.size() - 1) is dereferencable.
     *
     *  \param begin A parameter supporting the RandomAccessIterator concept,
     *         which dereferences to the start of the given sequenece.
     *  \return The next random value, with probability given by the 
     *          supplied weights. 
    */
    template <typename RandomAccessIter>
    typename std::iterator_traits<RandomAccessIter>::value_type
    operator()(RandomAccessIter begin)
    {
        auto next = distribution_(generator_);
        return *(begin + next);
    }

    //! \brief The number of weights this was initialized with.
    uint_type num_weights() const
    {
        return prob_length_;
    }

}; //end struct weighted_selection

} //end namespace simplegl

#endif //WEIGHTED_SELECTION_SGL_HPP__

uniform_selection.hpp:

/*! \file uniform_selection.hpp
*   \brief Selects values from a sequence with equal probability.
*
*/

#ifndef UNIFORM_SELECTION_SGL_HPP__
#define UNIFORM_SELECTION_SGL_HPP__

#include <iterator>
#include <random>
#include <stdexcept>

#include "initialized_generator.hpp"

namespace simplegl
{

template <typename GeneratorType = std::mt19937, typename UIntType = std::size_t>
struct uniform_selection
{
public:

    typedef UIntType uint_type;

private:

    std::uniform_int_distribution<UIntType>      distribution_;
    detail::initialized_generator<GeneratorType> generator_;

public:

    uniform_selection()
    { }

    uniform_selection(std::random_device& rd)
      : generator_(rd)
    { }

    //! \brief Selects a random value from the sequence [begin, end) with
    //*        equal probability.
    /*!  
     *  \param begin A parameter supporting the RandomAccessIterator concept,
     *         which dereferences to the start of the given sequenece.
     *  \param end A parameter suporting the RandomAccessIterator concept,
     *         which points to one past the end of the sequence.
     *  \return The next random value from [begin, end) with each value
     *          having probability of 1 / distance(begin, end) of being
     *          chosen.
     *  \throw  std::length_error if begin and end are equal, or if
     *          the distance between them is negative.
    */     
    template <typename RandomAccessIter>
    typename std::iterator_traits<RandomAccessIter>::value_type
    operator()(RandomAccessIter begin, RandomAccessIter end)
    {
        typedef typename std::uniform_int_distribution<UIntType> distribution_type;
        typedef typename distribution_type::param_type param_type;

        auto distance = end - begin;
        uint_type unsigned_distance = 
            (distance > 0) ? static_cast<uint_type>(distance) : 0;
        uint_type zero = 0;

        if(distance <= 0) {
            throw std::length_error("Iterator distance is negative or zero.");
        }

        auto next = distribution_(generator_, param_type(zero, unsigned_distance - 1));
        return *(begin + next);
    }
}; //end struct uniform_selection

} //end namespace simplegl

#endif //UNIFORM_SELECTION_SGL_HPP__

intrusive_random_choice.hpp:

/*! \file intrusive_random_choice.hpp
*   \brief Selects k random values from a sequence of length n, k <= n.
*
*/

#ifndef INTRUSIVE_RANDOM_CHOICE_SGL_HPP__
#define INTRUSIVE_RANDOM_CHOICE_SGL_HPP__

#include <algorithm>
#include <random>
#include <limits>

#include "initialized_generator.hpp"

namespace simplegl
{

template <typename GeneratorType = std::mt19937, typename UIntType = std::size_t>
struct intrusive_random_choice
{
public:

    typedef UIntType uint_type;

private:

    detail::initialized_generator<GeneratorType> generator_;

public:

    intrusive_random_choice()
    { }

    intrusive_random_choice(std::random_device& rd)
      : generator_(rd)
    { }

    //! \brief Selects k random values from a sequence, where each value is 
    //*        chosen with equal probability. This will modify the order in the
    //*        underlying sequence [begin, end).
    /*!
     *  k must be less than or equal to the length of the sequence. No checking
     *  is done to enforce this. If k is greater than the sequence length, this
     *  will result in undefined behaviour. Inserts the k values into a sequence
     *  starting at insert. There must be space allocated for at least k elements,
     *  otherwise, undefined behaviour will result. 
     *
     *  This will generate a correct uniform distribution for k when the sequence 
     *  length is less than (roughly) 2500. Any size larger requires more
     *  sophisticated techniques.
     *     
     *  \param begin A parameter supporting the RandomAccessIterator concept,
     *         which dereferences to the start of the given sequenece to select
     *         values from.
     *  \param end A parameter suporting the RandomAccessIterator concept,
     *         which points to one past the end of the sequence to select
     *         values from.
     *  \param insert The start of the sequence where the k random values will
     *         be inserted. 
    */  
    template <typename RandomAccessIter, typename RandomAccessIter2>
    void operator()(RandomAccessIter begin, RandomAccessIter end, 
                    RandomAccessIter2 insert, uint_type k)
    {
        std::shuffle(begin, end, generator_);
        for(uint_type i = 0; i < k; ++i) {
            *insert++ = *begin++;
        }
    }

}; //end struct intrusive_random_choice

} //end namespace simplegl

#endif //INTRUSIVE_RANDOM_CHOICE_SGL_HPP__

example_usage.cpp:

#include <initializer_list>
#include <vector>
#include <string>
#include <iostream>
#include <unordered_map>
#include <deque>

#include "weighted_selection.hpp"
#include "uniform_selection.hpp"
#include "intrusive_random_choice.hpp"

using namespace simplegl;

int main()
{
    //Weighted Selection example
    weighted_selection<> rs {10, 15, 20, 12, 18};

    std::vector<std::string> s {"The", "Cake", "Is", "A", "Lie"};
    std::unordered_map<std::string, std::size_t> strcount;

    //Select 15 random elements:
    for(int i = 0; i < 15; ++i) {
        std::cout << rs(s.begin(), s.end()) << "\n";
    }

    std::cout << "\n--------------------------------------------------\n";

    //Uniform Selection Example
    uniform_selection<> us;

    std::deque<int> d{10, 15, 20, 25, 30};
    std::unordered_map<int, std::size_t> counts;

    for(unsigned i = 0; i < 100000; ++i) {
        ++counts[us(d.begin(), d.end())];
    }

    for(const auto& it : counts) {
        std::cout << it.first << " seen: " << it.second << " times\n";
    }

    std::cout << "\n";

    //The same uniform_selection object can be used with sequences with
    //different lengths
    std::vector<int> ex {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
    for(int i = 0; i < 5; ++i) {
        std::cout << "Random element from ex: " << us(ex.begin(), ex.end()) << "\n";
    }

    std::cout << "\n--------------------------------------------------\n";

    //Intrusive random choice example
    intrusive_random_choice<> irc;
    //Make a copy of d so we leave it in order
    std::deque<int> dcopy(d.begin(), d.end());
    //We'll select 3 elements from the 5 in dcopy
    std::vector<int> v(3);
    counts.clear();

    for(int j = 0; j < 100000; ++ j) {
        irc(dcopy.begin(), dcopy.end(), v.begin(), 3);
        for(int i : v) {
            ++counts[i];
        }
    }

    for(const auto& it : counts) {
        std::cout << it.first << " seen: " << it.second << " times\n";
    }
}
share|improve this question
2  
Double underscore in identifiers __ is reserved. –  Loki Astari May 16 '13 at 12:01
add comment

Know someone who can answer? Share a link to this question via email, Google+, Twitter, or Facebook.

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Browse other questions tagged or ask your own question.