cuda random number not always return 0 and 1

I am trying to generate a set of random number there are only 1 and zero. The code below almost works. When I do the print for loop I notice that some times I have a number that generates that is not 1 or 0. I know I am missing something just not sure what. I think its a memory misplacement.

#include <stdio.h>
#include <curand.h>
#include <curand_kernel.h>
#include <math.h>
#include <assert.h>
#define MIN 1
#define MAX (2048*20)

#define MOD 2 // only need one and zero for each random value.

__global__ void setup_kernel(curandState *state, unsigned long seed)
  int idx = threadIdx.x+blockDim.x*blockIdx.x;
  curand_init(seed, idx, 0, state+idx);

__global__ void generate_kernel(curandState *state,  unsigned int *result){

  int idx = threadIdx.x + blockDim.x*blockIdx.x;
   result[idx] = curand(state+idx) % MOD;

int main(){

  curandState *d_state;
  cudaMalloc(&d_state, sizeof(curandState));

  unsigned *d_result, *h_result;
  cudaMalloc(&d_result, (MAX-MIN+1) * sizeof(unsigned));
  h_result = (unsigned *)malloc((MAX-MIN+1)*sizeof(unsigned));

  cudaMemset(d_result, 0, (MAX-MIN+1)*sizeof(unsigned));


  generate_kernel<<<MAX/THREADS_PER_BLOCK,THREADS_PER_BLOCK>>>(d_state, d_result);

  cudaMemcpy(h_result, d_result, (MAX-MIN+1) * sizeof(unsigned), cudaMemcpyDeviceToHost);  

  printf("Bin:    Count: \n");
  for (int i = MIN; i <= MAX; i++)
    printf("%d    %d\n", i, h_result[i-MIN]);


  return 0;

What I am attempting to do is transform a genetic algorithm from this site.


I thought it would be a good problem to learn CUDA and have some fun at the same time.

The first part is to generate my random array.

