i am new to CUDA and i got an error when i try to copy the array from host to device.
#include <assert.h>
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <time.h>
#include <unistd.h>
#include <curand.h>
#include <curand_kernel.h>
#define N (1024*1024)
#define M (1000000)
/**************************************************/
/* this GPU kernel function is used to initialize the random states */
__global__ void init(unsigned int seed, curandState_t* states) {
/* we have to initialize the state */
curand_init(seed, /* the seed can be the same for each core, here we pass the time in from the CPU */
blockIdx.x, /* the sequence number should be different for each core (unless you want all
cores to get the same sequence of numbers for some reason - use thread id! */
0, /* the offset is how much extra we advance in the sequence for each call, can be 0 */
&states[blockIdx.x]);
}
/* this GPU kernel takes an array of states, and an array of ints, and puts a random int into each */
__global__ void randoms(curandState_t* states, unsigned int* numbers) {
/* curand works like rand - except that it takes a state as a parameter */
numbers[blockIdx.x] = curand(&states[blockIdx.x]) %2000;
};
/*******************************************************/
__global__ void cudakernel(int *buf)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
// buf[i] = rand();
for(int j = 0; j < M; j++)
buf[i] = buf[i] * buf[i] - 0.25f;
}
int main()
{
/*****************************************************/
/* CUDA's random number library uses curandState_t to keep track of the seed value
we will store a random state for every thread */
curandState_t* states;
/* allocate space on the GPU for the random states */
cudaMalloc((void**) &states, N * sizeof(curandState_t));
/* invoke the GPU to initialize all of the random states */
init<<<N, 1>>>(time(0), states);
/* allocate an array of unsigned ints on the CPU and GPU */
// unsigned int cpu_nums[N];//getting error in median relared to type of int
unsigned int* gpu_nums;
int cpu_nums[N];
cudaMalloc((void**) &gpu_nums, N * sizeof(unsigned int));
/* invoke the kernel to get some random numbers */
randoms<<<N, 1>>>(states, gpu_nums);
/* copy the random numbers back */
cudaMemcpy(cpu_nums, gpu_nums, N * sizeof(unsigned int), cudaMemcpyDeviceToHost);
/******************************************************************************/
int data[N];// int count = 0;
int cpunums[N],i;
for (i=0;i<=N;i++)
cpunums[i]=cpu_nums[i];
cudaMalloc(&cpunums, N * sizeof(int));
cudakernel<<<N/256, 256>>>(cpunums);
cudaMemcpy(data, cpunums, N * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(cpunums);
int sel;
printf("Enter an index: ");
scanf("%d", &sel);
printf("data[%d] = %f\n", sel, data[sel]);
}
i am trying to copy cpunums[i] array from host to device agine affter i genrate a random numbers from device.
i tried to call the device function but i got many errors.so i tried this way.
Aucun commentaire:
Enregistrer un commentaire