mardi 17 février 2015

strange behaviour of the optimization function in CUDA (maybe rand?)

I have writte a code which can be seen as below but the question is I do not underestand why the random output is sometimes goes out of range: x1=0.0.71 y=0.028 f(x,y)=0.999|||x2 to x4 are ok|||x5=-107374176.000 y70=-107374176.000 f(x,y)=-107374176.000||| in between some are ok, some not. (f(x,y) should be near to 1. I limited the space between -5,+5, i really do not know how 107374176 appears sometimes!??) This is my first code, that would be nice if you explain in a simple way.



const int NF = 512;
const int Number_repeat = 100;
__device__ float minest = -5.0f;
__device__ float maxest = 5.0f;
__device__ float gama = 1.0f;
__device__ float beta0 = 2.0f;
__device__ float alphadevice = 0.1f;
__device__ int index = 0;
__device__ int printNUM = 0;

__device__ float Fitness_Optimization_Device(float x,float y)
{
return sin(sqrt(x*x+y*y)) / sqrt(x*x+y*y);

}

float Genarate_Random(float low, float high)
{
return low + float(((high - low) + 1.0f) * (rand() / (RAND_MAX + 1.0f)));
}
__global__ void Kernel_Update(float *FGPUT, float *FGPUC, float *BFGPU, float *BFGPUEF,float rand1,float rand2)
{
int j = blockIdx.x * blockDim.x + threadIdx.x;

float x1=0,x2=0,y1=0,y2=0,x=0,y=0;
float rij=0,Beta;


for(int i=0;i<NF*3;i+=3)
{
x1=FGPUT[i];
y1=FGPUT[i+1];
x2=FGPUT[j];
y2=FGPUT[j+1];

rij=sqrt((x2-x1)*(x2-x1)+(y2-y1)*(y2-y1));
Beta=beta0*exp(-1*gama*(rij*rij));

FGPUT[i]=FGPUC[i]+Beta*(FGPUC[j]-FGPUC[i])+alphadevice*0.001f*(rand1-0.5f);
FGPUT[i+1]=FGPUC[i+1]+Beta*(FGPUC[j+1]-FGPUC[i+1])+alphadevice*0.001f*(rand1-0.5f);

if(FGPUT[i]<=minest)
FGPUT[i]=minest;
if(FGPUT[i+1]<=minest)
FGPUT[i+1]=minest;

if(FGPUT[i]>=maxest)
FGPUT[i]=maxest;
if(FGPUT[i+1]>=maxest)
FGPUT[i+1]=maxest;
x=FGPUT[i];
y=FGPUT[i+1];
FGPUT[i+2]=Fitness_Optimization_Device(x,y);

if(FGPUT[i+2]>=BFGPUEF[2])
{
BFGPUEF[0]=FGPUT[i];
BFGPUEF[1]=FGPUT[i+1];
BFGPUEF[2]=FGPUT[i+2];

}
}
BFGPU[index]=BFGPUEF[0];
BFGPU[index+1]=BFGPUEF[1];
BFGPU[index+2]=BFGPUEF[2];


index+=3;

printNUM +=1;
printf(" PrintNUM %d ,Hello from block %d, thread %d\n", printNUM, blockIdx.x, threadIdx.x);

}

int ThreadsPerBlock = 64;
int BlocksPerGrid = NF / ThreadsPerBlock;
int size = NF *3;
float *devFGPU;
float *devBFGPU;
float *devBFGPUEF;
float *devFGPUT;

cudaMalloc((void**)&devFGPU, sizeof(float) * size);
cudaMalloc((void**)&devBFGPU, sizeof(float) * size);
cudaMalloc((void**)&devBFGPUEF, sizeof(float) * size);
cudaMalloc((void**)&devFGPUT, sizeof(float) * size);

cudaMemcpy(devFGPU, FGPUC, sizeof(float) * size, cudaMemcpyHostToDevice);
cudaMemcpy(devBFGPU, BFGPU, sizeof(float) * size,cudaMemcpyHostToDevice);
cudaMemcpy(devBFGPUEF, BFGPUEF, sizeof(float) * size, cudaMemcpyHostToDevice);
cudaMemcpy(devFGPUT, FGPUT, sizeof(float) * size, cudaMemcpyHostToDevice);


for(it=0;it<Number_repeat;it++)
{
Kernel_Update<<<BlocksPerGrid,ThreadsPerBlock>>>(devFGPUT, devFGPU, devBFGPU, devBFGPUEF, rand(), rand());
}

//-----------------------------------------------------
// copy result back to host
//-----------------------------------------------------

index=0;
printf("\nLocation and Cost Updated in GPU After %d repeat:\n\n",Number_repeat);

for(i=0;i<Number_repeat*3;i+=3)
{
printf("x%d=%4.3f\t\ty%d=%4.3f\t\tf(x,y)=%4.3f\n\n",index,BFGPU[i],index,BFGPU[i+1],BFGPU[i+2]);
index++;
}


Any suggestion and help will be appreciated.


Aucun commentaire:

Enregistrer un commentaire