mercredi 3 juin 2020

Pseudo-random number generation and performance

This thread is quite long.

It is my understanding that functions like std::random_device::operator() are very slow to generate a random number, so I had an idea. The idea was to create two std::array's of random numbers, each with a size of Number, and generate Number*Number pseudo-random numbers with the XOR bit operator.

So first, I made an abstract class (could have been a namespace) that contains everything I will need:

#include <array>
#include <random>

class RandomTools
{
    public:

        template <typename Type, uint32_t Number>
        static typename std::array<Type, Number> GenerateTable(void);

        static std::random_device Rand;
};

std::random_device RandomTools::Rand;

template <typename Type, uint32_t Number>
typename std::array<Type, Number> RandomTools::GenerateTable(void)
{
    constexpr auto numberOfCalls = (sizeof(Type) + sizeof(uint32_t) - 1u)/sizeof(uint32_t);
    typename std::array<Type, Number> foo;

    for (auto& u : foo)
    {
        for (auto i = 0u; i < numberOfCalls; ++i)
        {
            u <<= 32u;
            u |= static_cast<Type>(RandomTools::Rand());
        }
    }

    return foo;
}

GenerateTable<Type, Number> basically returns an std::array containing Number random values of type Type.

Then I went on with my DualRandomGenerator class:

template <typename Type, uint32_t Number>
class DualRandomGenerator
{
    private:

        const typename std::array<Type, Number> table1 = RandomTools::GenerateTable<Type, Number>();
        const typename std::array<Type, Number> table2 = RandomTools::GenerateTable<Type, Number>();

        const typename std::array<Type, Number>::const_iterator beg1 = table1.cbegin();
        const typename std::array<Type, Number>::const_iterator beg2 = table1.cbegin();

        const typename std::array<Type, Number>::const_iterator end1 = table1.cend();
        const typename std::array<Type, Number>::const_iterator end2 = table1.cend();

        typename std::array<Type, Number>::const_iterator it1 = beg1;
        typename std::array<Type, Number>::const_iterator it2 = beg2;

    public:

        Type getNumber(void)
        {
            ++DualRandomGenerator::it1;

            if (DualRandomGenerator::it1 == DualRandomGenerator::end1)
            {
                DualRandomGenerator::it1 = DualRandomGenerator::beg1;
                ++DualRandomGenerator::it2;

                if (DualRandomGenerator::it2 == DualRandomGenerator::end2)
                    DualRandomGenerator::it2 = DualRandomGenerator::beg2;
            }

            return (*DualRandomGenerator::it1) ^ (*DualRandomGenerator::it2);
        }
};

And then I had another thought. Why only two std::array's? To generate one billion pseudo-random numbers, I would need two std::array's with a size of roughly 45,000. (45,000² is greater than 1,000,000,000.) If I had a third array, then each std::array need only have 1,000 values, since 1,000³ = 1,000,000,000. And of course, I would need even fewer numbers if I used additional arrays. So I went on with a more general class with as many arrays as I would like. To that end, I created a class that would contain the std::array data.

template <typename Type, uint32_t Number>
class TableData
{
    private:

        const typename std::array<Type, Number> table = RandomTools::GenerateTable<Type, Number>();

    public:


        const typename std::array<Type, Number>::const_iterator beg = table.cbegin();
        const typename std::array<Type, Number>::const_iterator end = table.cend();
        typename std::array<Type, Number>::const_iterator   iter = beg;
};

With this, I could make the general class I was talking about:

template <typename Type, uint32_t NumberOfRandomValues, uint32_t NumberOfArrays>
class RandomGenerator
{
    private:

        typename std::array<TableData<Type, NumberOfRandomValues>, NumberOfArrays> tables;

    public:

        Type getNumber(void)
        {
            auto result = static_cast<Type>(0);
            bool incrementNextIterator = true;

            for (auto& f : RandomGenerator::tables)
            {
                if (incrementNextIterator)
                {
                    ++f.iter;

                    if (f.iter != f.end)
                        incrementNextIterator = false;
                    else
                        f.iter = f.beg;
                }

                result ^= *f.iter;
            }

            return result;
        }
};

All the coding is done now. Let's compare the performance of each generator. In the main function, I decided to measure how much time was required for each class to generate 2,000,000,000 pseudo-random numbers. The first generator is the Mersenne Twister generator.

int main(void)
{
    constexpr auto numberOfCalls = 2'000'000'000u;
    std::mt19937 bar(42);

    {
        const auto time = std::chrono::system_clock::now();

        for (auto i = 0u; i < numberOfCalls; ++i)
            bar();

        std::cout << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
    }

    RandomGenerator<uint64_t, 45'000u, 2u> wee;

    {
        const auto time = std::chrono::system_clock::now();

        for (auto i = 0u; i < numberOfCalls; ++i)
            wee.getNumber();

        std::cout << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
    }

    DualRandomGenerator<uint64_t, 45'000u> woo;

    {
        const auto time = std::chrono::system_clock::now();

        for (auto i = 0u; i < numberOfCalls; ++i)
            woo.getNumber();

        std::cout << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
    }

    return 0;
}

And here's the result I got. I could guess that DualRandomGenerator<uint64_t, 45'000u> would outperform RandomGenerator<uint64_t, 45'000u, 2u> but I didn't expect that it would do so by a 5 or 6 factor. Output:

31103.8
47960.2
8253.8

With -03 optimisation:

0.03483
594.445
0.000938

So my question is, why? What could RandomGenerator possibly do that would require 5 times as much time than DualRandomGenerator? They both have the same number of std::array's, which is two.

Edit:

I changed main to the following:

int main(void)
{
    constexpr auto numberOfCalls = 500'000'000u;
    std::vector<uint64_t> v(numberOfCalls);

    std::mt19937 bar(42);
    v.clear();

    {
        const auto time = std::chrono::system_clock::now();

        for (auto i = 0u; i < numberOfCalls; ++i)
            v.push_back(bar());

        std::cout << "Mersenne: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
    }

    RandomGenerator<uint64_t, 45'000u, 2u> wee;
    v.clear();

    {
        const auto time = std::chrono::system_clock::now();

        for (auto i = 0u; i < numberOfCalls; ++i)
            v.push_back(wee.getNumber());

        std::cout << "Random2: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
    }

    RandomGenerator<uint64_t, 20u, 8u> wii;
    v.clear();

    {
        const auto time = std::chrono::system_clock::now();

        for (auto i = 0u; i < numberOfCalls; ++i)
            v.push_back(wii.getNumber());

        std::cout << "Random8: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
    }

    DualRandomGenerator<uint64_t, 45'000u> woo;
    v.clear();

    {
        const auto time = std::chrono::system_clock::now();

        for (auto i = 0u; i < numberOfCalls; ++i)
            v.push_back(woo.getNumber());

        std::cout << "Dual: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
    }

    return 0;
}

and this time, I got:

Mersenne: 3274.28
Random2: 1098.78
Random8: 1533.74
Dual: 1088.23

(The full construction of 500,000,000 zeros, before clearing, at the beginning was intentional.) I guess things are looking more normal.




Aucun commentaire:

Enregistrer un commentaire