Added inital opencl support, extremely slow and output is not correct yet

This commit is contained in:
2026-02-13 04:26:47 -06:00
parent dfe446bb22
commit 73ae0e66d1
2 changed files with 140 additions and 2 deletions

35
src/distancekernel.cl Normal file
View File

@@ -0,0 +1,35 @@
__kernel void EuclideanDistance(__global unsigned const char *imgs,
__global int *offset1_offset2_imsize_gpumem,
__global float *distance)
{
unsigned int sum = 0;
int img1Offset = offset1_offset2_imsize_gpumem[0];
int img2Offset = offset1_offset2_imsize_gpumem[1];
int imsize = offset1_offset2_imsize_gpumem[2];
int lid = get_local_id(0);
int gid = get_global_id(0);
int groupSize = get_local_size(0);
__local unsigned int partial[256];
for (int i = gid; i < imsize; i += groupSize) {
int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i];
sum += (unsigned int)(d * d);
}
partial[lid] = sum;
barrier(CLK_LOCAL_MEM_FENCE);
for (int stride = groupSize/2; stride > 0; stride >>= 1) {
if (lid < stride) {
partial[lid] += partial[lid + stride];
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (lid == 0) {
distance[0] = (float)partial[0];
}
}

View File

@@ -6,6 +6,7 @@
#include <omp.h>
#define CL_TARGET_OPENCL_VERSION 120
#include <CL/cl.h>
#include <stdlib.h>
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
@@ -31,13 +32,32 @@ void EuclideanDistance(unsigned char* imgs, int img1Offset,int img2Offset,int im
*result=(float)distance;
}
char* loadKernel(const char* filename) {
FILE* file = fopen(filename, "rb");
if (!file) {
printf("Failed to open kernel file\n");
return NULL;
}
fseek(file, 0, SEEK_END);
long size = ftell(file);
rewind(file);
char* source = (char*)malloc(size + 1);
fread(source, 1, size, file);
source[size] = '\0';
fclose(file);
return source;
}
void computeDistanceMatrix(unsigned char* imgs,AudioData *audioData, float **distanceArrays,int nfiles, int counter){
// Computes distance matrix for all images
int matrixSize=nfiles*nfiles;
unsigned int computeCounter=0;
float distance;
for(int i = 0; i < counter; i++){
for(int j = 0; j < counter; j++){
float distance;
int imsize=audioData[i].x*audioData[i].y;
EuclideanDistance(imgs, audioData[i].index*imsize,audioData[j].index*imsize,imsize,&distance);
distanceArrays[i][j] = distance;
@@ -73,6 +93,88 @@ void computeDistanceMatrixOMP(unsigned char* imgs,AudioData *audioData, float **
}
void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **distanceArrays,int nfiles, int counter){
// Computes distance matrix for all images
cl_platform_id platform;
cl_device_id device = 0;
cl_context context = 0;
cl_program program = 0;
cl_command_queue commandQueue=0;
cl_kernel kernel = 0;
cl_int errNum;
clGetPlatformIDs(1,&platform,NULL);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
char name[256];
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, NULL);
printf("Device: %s\n", name);
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
if(context == NULL){
printf("Couldn't create context");
}
commandQueue=clCreateCommandQueue(context,device,0,&errNum);
if(errNum!=CL_SUCCESS){
printf("There was an error in the commmand queue");
}
char* kernelsourcecode= loadKernel("src/distancekernel.cl");
program=clCreateProgramWithSource(context,1,(const char**)&kernelsourcecode ,NULL,NULL);
clBuildProgram(program,1,&device,NULL,NULL,NULL);
free(kernelsourcecode);
kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL);
float distance = 0.0f;
int offset1_offset2_imsize[3] = {0, 0, 0};
cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL);
cl_mem offset1_offset2_imsize_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(int)*3,offset1_offset2_imsize,NULL);
cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float),NULL,NULL);
clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem);
clSetKernelArg(kernel,1,sizeof(cl_mem),&offset1_offset2_imsize_gpumem);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &distance_gpumem);
size_t globalWorkSize=256;
size_t localWorkSize = 256;
int matrixSize=nfiles*nfiles;
unsigned int computeCounter=0;
for(int i = 0; i < counter; i++){
for(int j = 0; j < counter; j++){
int imsize=audioData[i].x*audioData[i].y;
offset1_offset2_imsize[0] = audioData[i].index * imsize;
offset1_offset2_imsize[1] = audioData[j].index * imsize;
offset1_offset2_imsize[2] = imsize;
// Writes to gpu buffer for args
clEnqueueWriteBuffer(commandQueue,offset1_offset2_imsize_gpumem,CL_TRUE,0,sizeof(int)*3,offset1_offset2_imsize,0,NULL,NULL);
// Launches kernel
clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL);
// reads distance back
clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float), &distance, 0, NULL, NULL);
computeCounter++;
}
printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100);
fflush(stdout);
}
printf("\n");
// releasing all objects
clReleaseMemObject(distance_gpumem);
clReleaseMemObject(offset1_offset2_imsize_gpumem);
clReleaseMemObject(imgs_gpumem);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
}
int getAmountOfFiles(){
HANDLE myHandle;
WIN32_FIND_DATA FindFileData;
@@ -143,8 +245,9 @@ int main(){
}
time_t now = time(NULL);
computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter);
// computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter);
// computeDistanceMatrix(imgData,audioData, distanceArrays,nfiles, counter);
computeDistanceOpenCL(imgData,audioData, distanceArrays,nfiles, counter);
printf("Time it took to compute matrix: %lld seconds\n",time(NULL)-now);
// Searches for index of specific image