This thread is quite long.
It is my understanding that functions like std::random_device::operator() are very slow to generate a random number, so I had an idea. The idea was to create two std::array's of random numbers, each with a size of Number, and generate Number*Number pseudo-random numbers with the XOR bit operator.
So first, I made an abstract class (could have been a namespace) that contains everything I will need:
#include <array>
#include <random>
class RandomTools
{
public:
template <typename Type, uint32_t Number>
static typename std::array<Type, Number> GenerateTable(void);
static std::random_device Rand;
};
std::random_device RandomTools::Rand;
template <typename Type, uint32_t Number>
typename std::array<Type, Number> RandomTools::GenerateTable(void)
{
constexpr auto numberOfCalls = (sizeof(Type) + sizeof(uint32_t) - 1u)/sizeof(uint32_t);
typename std::array<Type, Number> foo;
for (auto& u : foo)
{
for (auto i = 0u; i < numberOfCalls; ++i)
{
u <<= 32u;
u |= static_cast<Type>(RandomTools::Rand());
}
}
return foo;
}
GenerateTable<Type, Number> basically returns an std::array containing Number random values of type Type.
Then I went on with my DualRandomGenerator class:
template <typename Type, uint32_t Number>
class DualRandomGenerator
{
private:
const typename std::array<Type, Number> table1 = RandomTools::GenerateTable<Type, Number>();
const typename std::array<Type, Number> table2 = RandomTools::GenerateTable<Type, Number>();
const typename std::array<Type, Number>::const_iterator beg1 = table1.cbegin();
const typename std::array<Type, Number>::const_iterator beg2 = table1.cbegin();
const typename std::array<Type, Number>::const_iterator end1 = table1.cend();
const typename std::array<Type, Number>::const_iterator end2 = table1.cend();
typename std::array<Type, Number>::const_iterator it1 = beg1;
typename std::array<Type, Number>::const_iterator it2 = beg2;
public:
Type getNumber(void)
{
++DualRandomGenerator::it1;
if (DualRandomGenerator::it1 == DualRandomGenerator::end1)
{
DualRandomGenerator::it1 = DualRandomGenerator::beg1;
++DualRandomGenerator::it2;
if (DualRandomGenerator::it2 == DualRandomGenerator::end2)
DualRandomGenerator::it2 = DualRandomGenerator::beg2;
}
return (*DualRandomGenerator::it1) ^ (*DualRandomGenerator::it2);
}
};
And then I had another thought. Why only two std::array's? To generate one billion pseudo-random numbers, I would need two std::array's with a size of roughly 45,000. (45,000² is greater than 1,000,000,000.) If I had a third array, then each std::array need only have 1,000 values, since 1,000³ = 1,000,000,000. And of course, I would need even fewer numbers if I used additional arrays. So I went on with a more general class with as many arrays as I would like. To that end, I created a class that would contain the std::array data.
template <typename Type, uint32_t Number>
class TableData
{
private:
const typename std::array<Type, Number> table = RandomTools::GenerateTable<Type, Number>();
public:
const typename std::array<Type, Number>::const_iterator beg = table.cbegin();
const typename std::array<Type, Number>::const_iterator end = table.cend();
typename std::array<Type, Number>::const_iterator iter = beg;
};
With this, I could make the general class I was talking about:
template <typename Type, uint32_t NumberOfRandomValues, uint32_t NumberOfArrays>
class RandomGenerator
{
private:
typename std::array<TableData<Type, NumberOfRandomValues>, NumberOfArrays> tables;
public:
Type getNumber(void)
{
auto result = static_cast<Type>(0);
bool incrementNextIterator = true;
for (auto& f : RandomGenerator::tables)
{
if (incrementNextIterator)
{
++f.iter;
if (f.iter != f.end)
incrementNextIterator = false;
else
f.iter = f.beg;
}
result ^= *f.iter;
}
return result;
}
};
All the coding is done now. Let's compare the performance of each generator. In the main function, I decided to measure how much time was required for each class to generate 2,000,000,000 pseudo-random numbers. The first generator is the Mersenne Twister generator.
int main(void)
{
constexpr auto numberOfCalls = 2'000'000'000u;
std::mt19937 bar(42);
{
const auto time = std::chrono::system_clock::now();
for (auto i = 0u; i < numberOfCalls; ++i)
bar();
std::cout << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
}
RandomGenerator<uint64_t, 45'000u, 2u> wee;
{
const auto time = std::chrono::system_clock::now();
for (auto i = 0u; i < numberOfCalls; ++i)
wee.getNumber();
std::cout << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
}
DualRandomGenerator<uint64_t, 45'000u> woo;
{
const auto time = std::chrono::system_clock::now();
for (auto i = 0u; i < numberOfCalls; ++i)
woo.getNumber();
std::cout << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
}
return 0;
}
And here's the result I got. I could guess that DualRandomGenerator<uint64_t, 45'000u> would outperform RandomGenerator<uint64_t, 45'000u, 2u> but I didn't expect that it would do so by a 5 or 6 factor. Output:
31103.8
47960.2
8253.8
With -03 optimisation:
0.03483
594.445
0.000938
So my question is, why? What could RandomGenerator possibly do that would require 5 times as much time than DualRandomGenerator? They both have the same number of std::array's, which is two.
Edit:
I changed main to the following:
int main(void)
{
constexpr auto numberOfCalls = 500'000'000u;
std::vector<uint64_t> v(numberOfCalls);
std::mt19937 bar(42);
v.clear();
{
const auto time = std::chrono::system_clock::now();
for (auto i = 0u; i < numberOfCalls; ++i)
v.push_back(bar());
std::cout << "Mersenne: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
}
RandomGenerator<uint64_t, 45'000u, 2u> wee;
v.clear();
{
const auto time = std::chrono::system_clock::now();
for (auto i = 0u; i < numberOfCalls; ++i)
v.push_back(wee.getNumber());
std::cout << "Random2: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
}
RandomGenerator<uint64_t, 20u, 8u> wii;
v.clear();
{
const auto time = std::chrono::system_clock::now();
for (auto i = 0u; i < numberOfCalls; ++i)
v.push_back(wii.getNumber());
std::cout << "Random8: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
}
DualRandomGenerator<uint64_t, 45'000u> woo;
v.clear();
{
const auto time = std::chrono::system_clock::now();
for (auto i = 0u; i < numberOfCalls; ++i)
v.push_back(woo.getNumber());
std::cout << "Dual: " << (std::chrono::system_clock::now() - time).count()/1'000'000.f << std::endl;
}
return 0;
}
and this time, I got:
Mersenne: 3274.28
Random2: 1098.78
Random8: 1533.74
Dual: 1088.23
(The full construction of 500,000,000 zeros, before clearing, at the beginning was intentional.) I guess things are looking more normal.
Aucun commentaire:
Enregistrer un commentaire