Implemented opencl kernel for distance matrix calculation

This commit is contained in:
2026-02-14 17:26:45 -06:00
parent 275f48fe0e
commit 5571e0ca21
3 changed files with 40 additions and 39 deletions

View File

@@ -21,4 +21,4 @@ Used for gathering the image data
### Todo ### Todo
- [x] Load all images from directory - [x] Load all images from directory
- [x] Do knn algorithm - [x] Do knn algorithm
- [ ] Use opencl to accelerate algorithm execution - [x] Use opencl to accelerate algorithm execution

View File

@@ -1,35 +1,41 @@
__kernel void EuclideanDistance(__global unsigned const char *imgs, __kernel void EuclideanDistance(__global unsigned const char *imgs,
__global int *offset1_offset2_imsize_gpumem, int offset1,
int imsize,
__global float *distance) __global float *distance)
{ {
int img1Offset = offset1;
int imgsize = imsize;
int groupID = get_group_id(0);
int img2Offset=imsize*groupID;
int localID = get_local_id(0);
int globalID = get_global_id(0);
int groupSize = get_local_size(0);
int totalSize = get_global_size(0);
unsigned int sum = 0; unsigned int sum = 0;
int img1Offset = offset1_offset2_imsize_gpumem[0];
int img2Offset = offset1_offset2_imsize_gpumem[1];
int imsize = offset1_offset2_imsize_gpumem[2];
int lid = get_local_id(0); // Subtraction
int gid = get_global_id(0); for (int i = localID; i < imgsize; i += groupSize) {
int groupSize = get_local_size(0);
__local unsigned int partial[256];
for (int i = lid; i < imsize; i += groupSize) {
int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i]; int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i];
sum += (unsigned int)(d * d); sum += (unsigned int)(d * d);
} }
partial[lid] = sum; __local unsigned int partial[256];
partial[localID] = sum;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for (int stride = groupSize/2; stride > 0; stride >>= 1) { for (int stride = get_local_size(0)/2; stride > 0; stride >>= 1) {
if (lid < stride) { if (localID < stride) {
partial[lid] += partial[lid + stride]; partial[localID] += partial[localID + stride];
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if (lid == 0) { if (localID == 0) {
distance[0] = (float)partial[0]; distance[groupID] = (float)partial[0];
} }
} }

View File

@@ -127,17 +127,14 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
free(kernelsourcecode); free(kernelsourcecode);
kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL); kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL);
float distance = 0.0f; float distance[2000];
int offset1_offset2_imsize[3] = {0, 0, 0};
cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL); cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL);
cl_mem offset1_offset2_imsize_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(int)*3,offset1_offset2_imsize,NULL); cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float)*2000,NULL,NULL);
cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float),NULL,NULL);
clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem); clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem);
clSetKernelArg(kernel,1,sizeof(cl_mem),&offset1_offset2_imsize_gpumem); clSetKernelArg(kernel, 3, sizeof(cl_mem), &distance_gpumem);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &distance_gpumem);
size_t globalWorkSize=256; size_t globalWorkSize=256*2000;
size_t localWorkSize = 256; size_t localWorkSize = 256;
@@ -145,20 +142,19 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
int matrixSize=nfiles*nfiles; int matrixSize=nfiles*nfiles;
unsigned int computeCounter=0; unsigned int computeCounter=0;
for(int i = 0; i < counter; i++){ for(int i = 0; i < counter; i++){
for(int j = 0; j < counter; j++){
int imsize=audioData[i].x*audioData[i].y; int imsize=audioData[i].x*audioData[i].y;
int offset1=audioData[i].index*imsize;
offset1_offset2_imsize[0] = audioData[i].index * imsize; // Sets kernel args
offset1_offset2_imsize[1] = audioData[j].index * imsize; clSetKernelArg(kernel,1,sizeof(int),&offset1);
offset1_offset2_imsize[2] = imsize; clSetKernelArg(kernel,2,sizeof(int),&imsize);
// Writes to gpu buffer for args
clEnqueueWriteBuffer(commandQueue,offset1_offset2_imsize_gpumem,CL_TRUE,0,sizeof(int)*3,offset1_offset2_imsize,0,NULL,NULL);
// Launches kernel // Launches kernel
clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL); clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL);
// reads distance back // Read buffer back
clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float), &distance, 0, NULL, NULL); clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float)*2000, distance, 0, NULL, NULL);
distanceArrays[i][j]=distance; for(int j=0;j<counter;j++){
distanceArrays[i][j]=distance[j];
computeCounter++; computeCounter++;
} }
printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100); printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100);
@@ -167,7 +163,6 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
printf("\n"); printf("\n");
// releasing all objects // releasing all objects
clReleaseMemObject(distance_gpumem); clReleaseMemObject(distance_gpumem);
clReleaseMemObject(offset1_offset2_imsize_gpumem);
clReleaseMemObject(imgs_gpumem); clReleaseMemObject(imgs_gpumem);
clReleaseKernel(kernel); clReleaseKernel(kernel);
clReleaseProgram(program); clReleaseProgram(program);