Implemented opencl kernel for distance matrix calculation
This commit is contained in:
@@ -21,4 +21,4 @@ Used for gathering the image data
|
||||
### Todo
|
||||
- [x] Load all images from directory
|
||||
- [x] Do knn algorithm
|
||||
- [ ] Use opencl to accelerate algorithm execution
|
||||
- [x] Use opencl to accelerate algorithm execution
|
||||
@@ -1,35 +1,41 @@
|
||||
__kernel void EuclideanDistance(__global unsigned const char *imgs,
|
||||
__global int *offset1_offset2_imsize_gpumem,
|
||||
int offset1,
|
||||
int imsize,
|
||||
__global float *distance)
|
||||
{
|
||||
|
||||
int img1Offset = offset1;
|
||||
int imgsize = imsize;
|
||||
|
||||
int groupID = get_group_id(0);
|
||||
int img2Offset=imsize*groupID;
|
||||
|
||||
int localID = get_local_id(0);
|
||||
int globalID = get_global_id(0);
|
||||
int groupSize = get_local_size(0);
|
||||
int totalSize = get_global_size(0);
|
||||
|
||||
unsigned int sum = 0;
|
||||
|
||||
int img1Offset = offset1_offset2_imsize_gpumem[0];
|
||||
int img2Offset = offset1_offset2_imsize_gpumem[1];
|
||||
int imsize = offset1_offset2_imsize_gpumem[2];
|
||||
|
||||
int lid = get_local_id(0);
|
||||
int gid = get_global_id(0);
|
||||
int groupSize = get_local_size(0);
|
||||
|
||||
__local unsigned int partial[256];
|
||||
|
||||
for (int i = lid; i < imsize; i += groupSize) {
|
||||
// Subtraction
|
||||
for (int i = localID; i < imgsize; i += groupSize) {
|
||||
int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i];
|
||||
sum += (unsigned int)(d * d);
|
||||
}
|
||||
|
||||
partial[lid] = sum;
|
||||
__local unsigned int partial[256];
|
||||
partial[localID] = sum;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int stride = groupSize/2; stride > 0; stride >>= 1) {
|
||||
if (lid < stride) {
|
||||
partial[lid] += partial[lid + stride];
|
||||
for (int stride = get_local_size(0)/2; stride > 0; stride >>= 1) {
|
||||
if (localID < stride) {
|
||||
partial[localID] += partial[localID + stride];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (lid == 0) {
|
||||
distance[0] = (float)partial[0];
|
||||
if (localID == 0) {
|
||||
distance[groupID] = (float)partial[0];
|
||||
}
|
||||
}
|
||||
37
src/main.c
37
src/main.c
@@ -127,17 +127,14 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
|
||||
free(kernelsourcecode);
|
||||
kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL);
|
||||
|
||||
float distance = 0.0f;
|
||||
int offset1_offset2_imsize[3] = {0, 0, 0};
|
||||
float distance[2000];
|
||||
cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL);
|
||||
cl_mem offset1_offset2_imsize_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(int)*3,offset1_offset2_imsize,NULL);
|
||||
cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float),NULL,NULL);
|
||||
cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float)*2000,NULL,NULL);
|
||||
|
||||
clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem);
|
||||
clSetKernelArg(kernel,1,sizeof(cl_mem),&offset1_offset2_imsize_gpumem);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &distance_gpumem);
|
||||
clSetKernelArg(kernel, 3, sizeof(cl_mem), &distance_gpumem);
|
||||
|
||||
size_t globalWorkSize=256;
|
||||
size_t globalWorkSize=256*2000;
|
||||
size_t localWorkSize = 256;
|
||||
|
||||
|
||||
@@ -145,20 +142,19 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
|
||||
int matrixSize=nfiles*nfiles;
|
||||
unsigned int computeCounter=0;
|
||||
for(int i = 0; i < counter; i++){
|
||||
for(int j = 0; j < counter; j++){
|
||||
int imsize=audioData[i].x*audioData[i].y;
|
||||
int imsize=audioData[i].x*audioData[i].y;
|
||||
int offset1=audioData[i].index*imsize;
|
||||
|
||||
// Sets kernel args
|
||||
clSetKernelArg(kernel,1,sizeof(int),&offset1);
|
||||
clSetKernelArg(kernel,2,sizeof(int),&imsize);
|
||||
|
||||
offset1_offset2_imsize[0] = audioData[i].index * imsize;
|
||||
offset1_offset2_imsize[1] = audioData[j].index * imsize;
|
||||
offset1_offset2_imsize[2] = imsize;
|
||||
|
||||
// Writes to gpu buffer for args
|
||||
clEnqueueWriteBuffer(commandQueue,offset1_offset2_imsize_gpumem,CL_TRUE,0,sizeof(int)*3,offset1_offset2_imsize,0,NULL,NULL);
|
||||
// Launches kernel
|
||||
clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL);
|
||||
// reads distance back
|
||||
clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float), &distance, 0, NULL, NULL);
|
||||
distanceArrays[i][j]=distance;
|
||||
// Launches kernel
|
||||
clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL);
|
||||
// Read buffer back
|
||||
clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float)*2000, distance, 0, NULL, NULL);
|
||||
for(int j=0;j<counter;j++){
|
||||
distanceArrays[i][j]=distance[j];
|
||||
computeCounter++;
|
||||
}
|
||||
printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100);
|
||||
@@ -167,7 +163,6 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
|
||||
printf("\n");
|
||||
// releasing all objects
|
||||
clReleaseMemObject(distance_gpumem);
|
||||
clReleaseMemObject(offset1_offset2_imsize_gpumem);
|
||||
clReleaseMemObject(imgs_gpumem);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
|
||||
Reference in New Issue
Block a user