From 73ae0e66d138b664fcdd8ca230ca4f4af63740a7 Mon Sep 17 00:00:00 2001 From: Taylor Oxelgren Date: Fri, 13 Feb 2026 04:26:47 -0600 Subject: [PATCH] Added inital opencl support, extremely slow and output is not correct yet --- src/distancekernel.cl | 35 ++++++++++++++ src/main.c | 107 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 src/distancekernel.cl diff --git a/src/distancekernel.cl b/src/distancekernel.cl new file mode 100644 index 0000000..bb3b9db --- /dev/null +++ b/src/distancekernel.cl @@ -0,0 +1,35 @@ +__kernel void EuclideanDistance(__global unsigned const char *imgs, + __global int *offset1_offset2_imsize_gpumem, + __global float *distance) +{ + unsigned int sum = 0; + + int img1Offset = offset1_offset2_imsize_gpumem[0]; + int img2Offset = offset1_offset2_imsize_gpumem[1]; + int imsize = offset1_offset2_imsize_gpumem[2]; + + int lid = get_local_id(0); + int gid = get_global_id(0); + int groupSize = get_local_size(0); + + __local unsigned int partial[256]; + + for (int i = gid; i < imsize; i += groupSize) { + int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i]; + sum += (unsigned int)(d * d); + } + + partial[lid] = sum; + barrier(CLK_LOCAL_MEM_FENCE); + + for (int stride = groupSize/2; stride > 0; stride >>= 1) { + if (lid < stride) { + partial[lid] += partial[lid + stride]; + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (lid == 0) { + distance[0] = (float)partial[0]; + } +} \ No newline at end of file diff --git a/src/main.c b/src/main.c index ac94844..41d7832 100644 --- a/src/main.c +++ b/src/main.c @@ -6,6 +6,7 @@ #include #define CL_TARGET_OPENCL_VERSION 120 #include +#include #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" @@ -31,13 +32,32 @@ void EuclideanDistance(unsigned char* imgs, int img1Offset,int img2Offset,int im *result=(float)distance; } +char* loadKernel(const char* filename) { + FILE* file = fopen(filename, "rb"); + if (!file) { + printf("Failed to open kernel file\n"); + return NULL; + } + + fseek(file, 0, SEEK_END); + long size = ftell(file); + rewind(file); + + char* source = (char*)malloc(size + 1); + fread(source, 1, size, file); + source[size] = '\0'; + + fclose(file); + return source; +} + void computeDistanceMatrix(unsigned char* imgs,AudioData *audioData, float **distanceArrays,int nfiles, int counter){ // Computes distance matrix for all images int matrixSize=nfiles*nfiles; unsigned int computeCounter=0; + float distance; for(int i = 0; i < counter; i++){ for(int j = 0; j < counter; j++){ - float distance; int imsize=audioData[i].x*audioData[i].y; EuclideanDistance(imgs, audioData[i].index*imsize,audioData[j].index*imsize,imsize,&distance); distanceArrays[i][j] = distance; @@ -73,6 +93,88 @@ void computeDistanceMatrixOMP(unsigned char* imgs,AudioData *audioData, float ** } +void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **distanceArrays,int nfiles, int counter){ + // Computes distance matrix for all images + cl_platform_id platform; + cl_device_id device = 0; + cl_context context = 0; + cl_program program = 0; + cl_command_queue commandQueue=0; + cl_kernel kernel = 0; + cl_int errNum; + + clGetPlatformIDs(1,&platform,NULL); + clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); + + char name[256]; + clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, NULL); + printf("Device: %s\n", name); + + context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL); + if(context == NULL){ + printf("Couldn't create context"); + } + + + commandQueue=clCreateCommandQueue(context,device,0,&errNum); + if(errNum!=CL_SUCCESS){ + printf("There was an error in the commmand queue"); + } + + char* kernelsourcecode= loadKernel("src/distancekernel.cl"); + program=clCreateProgramWithSource(context,1,(const char**)&kernelsourcecode ,NULL,NULL); + clBuildProgram(program,1,&device,NULL,NULL,NULL); + free(kernelsourcecode); + kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL); + + float distance = 0.0f; + int offset1_offset2_imsize[3] = {0, 0, 0}; + cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL); + cl_mem offset1_offset2_imsize_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(int)*3,offset1_offset2_imsize,NULL); + cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float),NULL,NULL); + + clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem); + clSetKernelArg(kernel,1,sizeof(cl_mem),&offset1_offset2_imsize_gpumem); + clSetKernelArg(kernel, 2, sizeof(cl_mem), &distance_gpumem); + + size_t globalWorkSize=256; + size_t localWorkSize = 256; + + + + int matrixSize=nfiles*nfiles; + unsigned int computeCounter=0; + for(int i = 0; i < counter; i++){ + for(int j = 0; j < counter; j++){ + int imsize=audioData[i].x*audioData[i].y; + + offset1_offset2_imsize[0] = audioData[i].index * imsize; + offset1_offset2_imsize[1] = audioData[j].index * imsize; + offset1_offset2_imsize[2] = imsize; + + // Writes to gpu buffer for args + clEnqueueWriteBuffer(commandQueue,offset1_offset2_imsize_gpumem,CL_TRUE,0,sizeof(int)*3,offset1_offset2_imsize,0,NULL,NULL); + // Launches kernel + clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL); + // reads distance back + clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float), &distance, 0, NULL, NULL); + computeCounter++; + } + printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100); + fflush(stdout); + } + printf("\n"); + // releasing all objects + clReleaseMemObject(distance_gpumem); + clReleaseMemObject(offset1_offset2_imsize_gpumem); + clReleaseMemObject(imgs_gpumem); + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseCommandQueue(commandQueue); + clReleaseContext(context); +} + + int getAmountOfFiles(){ HANDLE myHandle; WIN32_FIND_DATA FindFileData; @@ -143,8 +245,9 @@ int main(){ } time_t now = time(NULL); - computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter); + // computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter); // computeDistanceMatrix(imgData,audioData, distanceArrays,nfiles, counter); + computeDistanceOpenCL(imgData,audioData, distanceArrays,nfiles, counter); printf("Time it took to compute matrix: %lld seconds\n",time(NULL)-now); // Searches for index of specific image