diff --git a/README.md b/README.md index 76738e7..18b7f1b 100644 --- a/README.md +++ b/README.md @@ -21,4 +21,4 @@ Used for gathering the image data ### Todo - [x] Load all images from directory - [x] Do knn algorithm -- [ ] Use opencl to accelerate algorithm execution \ No newline at end of file +- [x] Use opencl to accelerate algorithm execution \ No newline at end of file diff --git a/src/distancekernel.cl b/src/distancekernel.cl index 67bb12b..a97d389 100644 --- a/src/distancekernel.cl +++ b/src/distancekernel.cl @@ -1,35 +1,41 @@ __kernel void EuclideanDistance(__global unsigned const char *imgs, - __global int *offset1_offset2_imsize_gpumem, + int offset1, + int imsize, __global float *distance) { + + int img1Offset = offset1; + int imgsize = imsize; + + int groupID = get_group_id(0); + int img2Offset=imsize*groupID; + + int localID = get_local_id(0); + int globalID = get_global_id(0); + int groupSize = get_local_size(0); + int totalSize = get_global_size(0); + unsigned int sum = 0; - int img1Offset = offset1_offset2_imsize_gpumem[0]; - int img2Offset = offset1_offset2_imsize_gpumem[1]; - int imsize = offset1_offset2_imsize_gpumem[2]; - int lid = get_local_id(0); - int gid = get_global_id(0); - int groupSize = get_local_size(0); - - __local unsigned int partial[256]; - - for (int i = lid; i < imsize; i += groupSize) { + // Subtraction + for (int i = localID; i < imgsize; i += groupSize) { int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i]; sum += (unsigned int)(d * d); } - partial[lid] = sum; + __local unsigned int partial[256]; + partial[localID] = sum; barrier(CLK_LOCAL_MEM_FENCE); - for (int stride = groupSize/2; stride > 0; stride >>= 1) { - if (lid < stride) { - partial[lid] += partial[lid + stride]; + for (int stride = get_local_size(0)/2; stride > 0; stride >>= 1) { + if (localID < stride) { + partial[localID] += partial[localID + stride]; } barrier(CLK_LOCAL_MEM_FENCE); } - if (lid == 0) { - distance[0] = (float)partial[0]; + if (localID == 0) { + distance[groupID] = (float)partial[0]; } } \ No newline at end of file diff --git a/src/main.c b/src/main.c index 0c1a7c7..4eed669 100644 --- a/src/main.c +++ b/src/main.c @@ -127,17 +127,14 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis free(kernelsourcecode); kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL); - float distance = 0.0f; - int offset1_offset2_imsize[3] = {0, 0, 0}; + float distance[2000]; cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL); - cl_mem offset1_offset2_imsize_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(int)*3,offset1_offset2_imsize,NULL); - cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float),NULL,NULL); + cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float)*2000,NULL,NULL); clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem); - clSetKernelArg(kernel,1,sizeof(cl_mem),&offset1_offset2_imsize_gpumem); - clSetKernelArg(kernel, 2, sizeof(cl_mem), &distance_gpumem); + clSetKernelArg(kernel, 3, sizeof(cl_mem), &distance_gpumem); - size_t globalWorkSize=256; + size_t globalWorkSize=256*2000; size_t localWorkSize = 256; @@ -145,20 +142,19 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis int matrixSize=nfiles*nfiles; unsigned int computeCounter=0; for(int i = 0; i < counter; i++){ - for(int j = 0; j < counter; j++){ - int imsize=audioData[i].x*audioData[i].y; + int imsize=audioData[i].x*audioData[i].y; + int offset1=audioData[i].index*imsize; + + // Sets kernel args + clSetKernelArg(kernel,1,sizeof(int),&offset1); + clSetKernelArg(kernel,2,sizeof(int),&imsize); - offset1_offset2_imsize[0] = audioData[i].index * imsize; - offset1_offset2_imsize[1] = audioData[j].index * imsize; - offset1_offset2_imsize[2] = imsize; - - // Writes to gpu buffer for args - clEnqueueWriteBuffer(commandQueue,offset1_offset2_imsize_gpumem,CL_TRUE,0,sizeof(int)*3,offset1_offset2_imsize,0,NULL,NULL); - // Launches kernel - clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL); - // reads distance back - clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float), &distance, 0, NULL, NULL); - distanceArrays[i][j]=distance; + // Launches kernel + clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL); + // Read buffer back + clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float)*2000, distance, 0, NULL, NULL); + for(int j=0;j