random: strange behaviour of the optimization function in CUDA (maybe rand?)

I have writte a code which can be seen as below but the question is I do not underestand why the random output is sometimes goes out of range: x1=0.0.71 y=0.028 f(x,y)=0.999|||x2 to x4 are ok|||x5=-107374176.000 y70=-107374176.000 f(x,y)=-107374176.000||| in between some are ok, some not. (f(x,y) should be near to 1. I limited the space between -5,+5, i really do not know how 107374176 appears sometimes!??) This is my first code, that would be nice if you explain in a simple way.


const int NF = 512;                                                 
const int Number_repeat = 100;                              
__device__ float minest = -5.0f;
__device__ float maxest = 5.0f;
__device__ float gama = 1.0f;
__device__ float beta0 = 2.0f;
__device__ float alphadevice = 0.1f;
__device__ int   index = 0;
__device__ int   printNUM = 0;

__device__ float Fitness_Optimization_Device(float x,float y)           
{
    return sin(sqrt(x*x+y*y)) / sqrt(x*x+y*y);

}

float Genarate_Random(float low, float high)                                
{
    return low + float(((high - low) + 1.0f) * (rand() / (RAND_MAX + 1.0f)));       
}
  __global__ void Kernel_Update(float *FGPUT, float *FGPUC, float *BFGPU, float *BFGPUEF,float rand1,float rand2)
     {        
    int j = blockIdx.x * blockDim.x + threadIdx.x;      

     float x1=0,x2=0,y1=0,y2=0,x=0,y=0;
     float rij=0,Beta;


                for(int i=0;i<NF*3;i+=3)    
                {
                x1=FGPUT[i];
                y1=FGPUT[i+1];
                x2=FGPUT[j];
                y2=FGPUT[j+1];

                rij=sqrt((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1)); 
                Beta=beta0*exp(-1*gama*(rij*rij));

                FGPUT[i]=FGPUC[i]+Beta*(FGPUC[j]-FGPUC[i])+alphadevice*0.001f*(rand1-0.5f);
                FGPUT[i+1]=FGPUC[i+1]+Beta*(FGPUC[j+1]-FGPUC[i+1])+alphadevice*0.001f*(rand1-0.5f);

                if(FGPUT[i]<=minest)
                    FGPUT[i]=minest;
                if(FGPUT[i+1]<=minest)
                    FGPUT[i+1]=minest;

                if(FGPUT[i]>=maxest)
                    FGPUT[i]=maxest;
                if(FGPUT[i+1]>=maxest)
                    FGPUT[i+1]=maxest;
                x=FGPUT[i];
                y=FGPUT[i+1];
                FGPUT[i+2]=Fitness_Optimization_Device(x,y);

                if(FGPUT[i+2]>=BFGPUEF[2])
                  {
                    BFGPUEF[0]=FGPUT[i];
                    BFGPUEF[1]=FGPUT[i+1];
                    BFGPUEF[2]=FGPUT[i+2];

                  }
                }                   
    BFGPU[index]=BFGPUEF[0];
    BFGPU[index+1]=BFGPUEF[1];
    BFGPU[index+2]=BFGPUEF[2];


    index+=3;           

    printNUM +=1;
    printf(" PrintNUM %d ,Hello from block %d, thread %d\n", printNUM, blockIdx.x, threadIdx.x);

}

    int ThreadsPerBlock = 64;                   
    int BlocksPerGrid = NF / ThreadsPerBlock; 
    int size = NF *3;
    float *devFGPU;                     
    float *devBFGPU;                    
    float *devBFGPUEF;  
    float *devFGPUT;    

    cudaMalloc((void**)&devFGPU, sizeof(float) * size);
    cudaMalloc((void**)&devBFGPU, sizeof(float) * size);
    cudaMalloc((void**)&devBFGPUEF, sizeof(float) * size);
    cudaMalloc((void**)&devFGPUT, sizeof(float) * size);

    cudaMemcpy(devFGPU, FGPUC, sizeof(float) * size, cudaMemcpyHostToDevice);
    cudaMemcpy(devBFGPU, BFGPU, sizeof(float) * size,cudaMemcpyHostToDevice); 
    cudaMemcpy(devBFGPUEF, BFGPUEF, sizeof(float) * size, cudaMemcpyHostToDevice);
    cudaMemcpy(devFGPUT, FGPUT, sizeof(float) * size, cudaMemcpyHostToDevice);


    for(it=0;it<Number_repeat;it++)
        {
    Kernel_Update<<<BlocksPerGrid,ThreadsPerBlock>>>(devFGPUT, devFGPU, devBFGPU, devBFGPUEF, rand(), rand());
        }

    //-----------------------------------------------------
    // copy result back to host
    //-----------------------------------------------------

    index=0;  
     printf("\nLocation and Cost Updated in GPU After %d repeat:\n\n",Number_repeat);

         for(i=0;i<Number_repeat*3;i+=3)
     {
        printf("x%d=%4.3f\t\ty%d=%4.3f\t\tf(x,y)=%4.3f\n\n",index,BFGPU[i],index,BFGPU[i+1],BFGPU[i+2]);
        index++;
     }

Any suggestion and help will be appreciated.

random

mardi 17 février 2015

strange behaviour of the optimization function in CUDA (maybe rand?)

Aucun commentaire:

Enregistrer un commentaire