dimanche 2 juillet 2017

Is rand() really random?

I am doing a numerical simulation of a problem where I need to need to generate a array of N elements with N/2 0s and other 1s initially. With each iteration the array is shuffled and next array elements are chosen randomly from previous iteration until only 0 or 1 remains. I am recording the number of iteration in T number of trials. To generate random integers I am using discard-modulo method using rand() (got the idea from here).

#include <iostream>
#include <ctime>    
#include <cstdlib>  
#include <fstream>      
#include <algorithm> 
#include <array>

using namespace std;

//generate random integer between 0 and MAX
int randomn(int MAX);

int main()
{
    const int N = 10000;
    const int T = 100;     //Number of trials

    srand((unsigned)time(0));   

    ofstream writefile ("Observation.out");

    for (int indexT = 0; indexT < T; indexT++) {

        //initializing myArray
        array<bool, N> myArray;
        array<bool, N> newArray;

        auto middle = myArray.begin() + myArray.size() / 2;
        fill(myArray.begin(), middle, false);
        fill(middle, myArray.end(), true);

        int counterIt = 0;  //desired Iteration number

        for (;;) {
            int counterF = 0;
            int randompos = 0;
            bool savedata = true;

            //suffling myArray
            random_shuffle(begin(myArray), end(myArray));

            //next Iteration
            for (int indexN = 0; indexN < N; indexN++) {
                randompos = randomn(N-1);
                savedata = myArray[randompos];
                newArray[indexN] = savedata;
                if (savedata == false){
                    counterF += 1;
                }
            }

            copy(begin(newArray), end(newArray), begin(myArray));

            //updating Iteration number
            counterIt += 1;

            if ((counterF == 0)|| (counterF == N)) {
                break;
            }

        }

        writefile << indexT+1 <<"\t"<<counterIt <<endl;
    }

    writefile.close();

    return 0;
}

int randomn (int MAX){
    int temp;
    for (;;){
        temp = rand();
        if ( temp < RAND_MAX - RAND_MAX%(MAX+1) ) 
            return temp%(MAX+1);
    }
}

The output is pretty interesting. The iteration number per trial converges to a oscillation within first 10 trail no matter how many times I run it.

1       15463
2       12886  //oscillation starts
3       25053
4       14531
5       6852
6       9695
7       6165
8       3934
9       6019
10      4698
11      8607
12      12886 //one period
13      25053
14      14531
15      6852
16      9695
17      6165
18      3934
19      6019
20      4698
21      8607
22      12886
23      25053
24      14531
25      6852

Replacing the random_shuffle by Fisher–Yates shuffle in the following code (again using rand()) the period of the oscillation decreases further.

            for (int indexN = N-1; indexN > 0; indexN--) {
                randompos = randomn(indexN);
                savedata = myArray[randompos];
                myArray[randompos] = myArray[indexN] ;
                myArray[indexN] = savedata;
            } 

Output :

1    13583  
2    7308  
3    22580  
4    6307    // oscillation starts
5    42060   
6    10485   
7    8525    
8    31061   
9    6307   // one period 
10   42060  
11   10485 
12   8525   
13   31061  
14   6307  
15   42060 

Now I know that I should use <random> in c++ but is this bias or oscillation is fault of rand() function? Or am I introducing any bias in any stage of the program?




Aucun commentaire:

Enregistrer un commentaire