Added inital opencl support, extremely slow and output is not correct yet
This commit is contained in:
35
src/distancekernel.cl
Normal file
35
src/distancekernel.cl
Normal file
@@ -0,0 +1,35 @@
|
||||
__kernel void EuclideanDistance(__global unsigned const char *imgs,
|
||||
__global int *offset1_offset2_imsize_gpumem,
|
||||
__global float *distance)
|
||||
{
|
||||
unsigned int sum = 0;
|
||||
|
||||
int img1Offset = offset1_offset2_imsize_gpumem[0];
|
||||
int img2Offset = offset1_offset2_imsize_gpumem[1];
|
||||
int imsize = offset1_offset2_imsize_gpumem[2];
|
||||
|
||||
int lid = get_local_id(0);
|
||||
int gid = get_global_id(0);
|
||||
int groupSize = get_local_size(0);
|
||||
|
||||
__local unsigned int partial[256];
|
||||
|
||||
for (int i = gid; i < imsize; i += groupSize) {
|
||||
int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i];
|
||||
sum += (unsigned int)(d * d);
|
||||
}
|
||||
|
||||
partial[lid] = sum;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int stride = groupSize/2; stride > 0; stride >>= 1) {
|
||||
if (lid < stride) {
|
||||
partial[lid] += partial[lid + stride];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
if (lid == 0) {
|
||||
distance[0] = (float)partial[0];
|
||||
}
|
||||
}
|
||||
107
src/main.c
107
src/main.c
@@ -6,6 +6,7 @@
|
||||
#include <omp.h>
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#include <CL/cl.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb_image.h"
|
||||
@@ -31,13 +32,32 @@ void EuclideanDistance(unsigned char* imgs, int img1Offset,int img2Offset,int im
|
||||
*result=(float)distance;
|
||||
}
|
||||
|
||||
char* loadKernel(const char* filename) {
|
||||
FILE* file = fopen(filename, "rb");
|
||||
if (!file) {
|
||||
printf("Failed to open kernel file\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fseek(file, 0, SEEK_END);
|
||||
long size = ftell(file);
|
||||
rewind(file);
|
||||
|
||||
char* source = (char*)malloc(size + 1);
|
||||
fread(source, 1, size, file);
|
||||
source[size] = '\0';
|
||||
|
||||
fclose(file);
|
||||
return source;
|
||||
}
|
||||
|
||||
void computeDistanceMatrix(unsigned char* imgs,AudioData *audioData, float **distanceArrays,int nfiles, int counter){
|
||||
// Computes distance matrix for all images
|
||||
int matrixSize=nfiles*nfiles;
|
||||
unsigned int computeCounter=0;
|
||||
float distance;
|
||||
for(int i = 0; i < counter; i++){
|
||||
for(int j = 0; j < counter; j++){
|
||||
float distance;
|
||||
int imsize=audioData[i].x*audioData[i].y;
|
||||
EuclideanDistance(imgs, audioData[i].index*imsize,audioData[j].index*imsize,imsize,&distance);
|
||||
distanceArrays[i][j] = distance;
|
||||
@@ -73,6 +93,88 @@ void computeDistanceMatrixOMP(unsigned char* imgs,AudioData *audioData, float **
|
||||
}
|
||||
|
||||
|
||||
void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **distanceArrays,int nfiles, int counter){
|
||||
// Computes distance matrix for all images
|
||||
cl_platform_id platform;
|
||||
cl_device_id device = 0;
|
||||
cl_context context = 0;
|
||||
cl_program program = 0;
|
||||
cl_command_queue commandQueue=0;
|
||||
cl_kernel kernel = 0;
|
||||
cl_int errNum;
|
||||
|
||||
clGetPlatformIDs(1,&platform,NULL);
|
||||
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
|
||||
|
||||
char name[256];
|
||||
clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(name), name, NULL);
|
||||
printf("Device: %s\n", name);
|
||||
|
||||
context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
|
||||
if(context == NULL){
|
||||
printf("Couldn't create context");
|
||||
}
|
||||
|
||||
|
||||
commandQueue=clCreateCommandQueue(context,device,0,&errNum);
|
||||
if(errNum!=CL_SUCCESS){
|
||||
printf("There was an error in the commmand queue");
|
||||
}
|
||||
|
||||
char* kernelsourcecode= loadKernel("src/distancekernel.cl");
|
||||
program=clCreateProgramWithSource(context,1,(const char**)&kernelsourcecode ,NULL,NULL);
|
||||
clBuildProgram(program,1,&device,NULL,NULL,NULL);
|
||||
free(kernelsourcecode);
|
||||
kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL);
|
||||
|
||||
float distance = 0.0f;
|
||||
int offset1_offset2_imsize[3] = {0, 0, 0};
|
||||
cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL);
|
||||
cl_mem offset1_offset2_imsize_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(int)*3,offset1_offset2_imsize,NULL);
|
||||
cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float),NULL,NULL);
|
||||
|
||||
clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem);
|
||||
clSetKernelArg(kernel,1,sizeof(cl_mem),&offset1_offset2_imsize_gpumem);
|
||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &distance_gpumem);
|
||||
|
||||
size_t globalWorkSize=256;
|
||||
size_t localWorkSize = 256;
|
||||
|
||||
|
||||
|
||||
int matrixSize=nfiles*nfiles;
|
||||
unsigned int computeCounter=0;
|
||||
for(int i = 0; i < counter; i++){
|
||||
for(int j = 0; j < counter; j++){
|
||||
int imsize=audioData[i].x*audioData[i].y;
|
||||
|
||||
offset1_offset2_imsize[0] = audioData[i].index * imsize;
|
||||
offset1_offset2_imsize[1] = audioData[j].index * imsize;
|
||||
offset1_offset2_imsize[2] = imsize;
|
||||
|
||||
// Writes to gpu buffer for args
|
||||
clEnqueueWriteBuffer(commandQueue,offset1_offset2_imsize_gpumem,CL_TRUE,0,sizeof(int)*3,offset1_offset2_imsize,0,NULL,NULL);
|
||||
// Launches kernel
|
||||
clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL);
|
||||
// reads distance back
|
||||
clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float), &distance, 0, NULL, NULL);
|
||||
computeCounter++;
|
||||
}
|
||||
printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100);
|
||||
fflush(stdout);
|
||||
}
|
||||
printf("\n");
|
||||
// releasing all objects
|
||||
clReleaseMemObject(distance_gpumem);
|
||||
clReleaseMemObject(offset1_offset2_imsize_gpumem);
|
||||
clReleaseMemObject(imgs_gpumem);
|
||||
clReleaseKernel(kernel);
|
||||
clReleaseProgram(program);
|
||||
clReleaseCommandQueue(commandQueue);
|
||||
clReleaseContext(context);
|
||||
}
|
||||
|
||||
|
||||
int getAmountOfFiles(){
|
||||
HANDLE myHandle;
|
||||
WIN32_FIND_DATA FindFileData;
|
||||
@@ -143,8 +245,9 @@ int main(){
|
||||
}
|
||||
|
||||
time_t now = time(NULL);
|
||||
computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter);
|
||||
// computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter);
|
||||
// computeDistanceMatrix(imgData,audioData, distanceArrays,nfiles, counter);
|
||||
computeDistanceOpenCL(imgData,audioData, distanceArrays,nfiles, counter);
|
||||
printf("Time it took to compute matrix: %lld seconds\n",time(NULL)-now);
|
||||
|
||||
// Searches for index of specific image
|
||||
|
||||
Reference in New Issue
Block a user