Compare commits
2 Commits
73ae0e66d1
...
5571e0ca21
| Author | SHA1 | Date | |
|---|---|---|---|
| 5571e0ca21 | |||
| 275f48fe0e |
@@ -21,4 +21,4 @@ Used for gathering the image data
|
|||||||
### Todo
|
### Todo
|
||||||
- [x] Load all images from directory
|
- [x] Load all images from directory
|
||||||
- [x] Do knn algorithm
|
- [x] Do knn algorithm
|
||||||
- [ ] Use opencl to accelerate algorithm execution
|
- [x] Use opencl to accelerate algorithm execution
|
||||||
@@ -1,35 +1,41 @@
|
|||||||
__kernel void EuclideanDistance(__global unsigned const char *imgs,
|
__kernel void EuclideanDistance(__global unsigned const char *imgs,
|
||||||
__global int *offset1_offset2_imsize_gpumem,
|
int offset1,
|
||||||
|
int imsize,
|
||||||
__global float *distance)
|
__global float *distance)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
int img1Offset = offset1;
|
||||||
|
int imgsize = imsize;
|
||||||
|
|
||||||
|
int groupID = get_group_id(0);
|
||||||
|
int img2Offset=imsize*groupID;
|
||||||
|
|
||||||
|
int localID = get_local_id(0);
|
||||||
|
int globalID = get_global_id(0);
|
||||||
|
int groupSize = get_local_size(0);
|
||||||
|
int totalSize = get_global_size(0);
|
||||||
|
|
||||||
unsigned int sum = 0;
|
unsigned int sum = 0;
|
||||||
|
|
||||||
int img1Offset = offset1_offset2_imsize_gpumem[0];
|
|
||||||
int img2Offset = offset1_offset2_imsize_gpumem[1];
|
|
||||||
int imsize = offset1_offset2_imsize_gpumem[2];
|
|
||||||
|
|
||||||
int lid = get_local_id(0);
|
// Subtraction
|
||||||
int gid = get_global_id(0);
|
for (int i = localID; i < imgsize; i += groupSize) {
|
||||||
int groupSize = get_local_size(0);
|
|
||||||
|
|
||||||
__local unsigned int partial[256];
|
|
||||||
|
|
||||||
for (int i = gid; i < imsize; i += groupSize) {
|
|
||||||
int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i];
|
int d = (int)imgs[img1Offset + i] - (int)imgs[img2Offset + i];
|
||||||
sum += (unsigned int)(d * d);
|
sum += (unsigned int)(d * d);
|
||||||
}
|
}
|
||||||
|
|
||||||
partial[lid] = sum;
|
__local unsigned int partial[256];
|
||||||
|
partial[localID] = sum;
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
|
|
||||||
for (int stride = groupSize/2; stride > 0; stride >>= 1) {
|
for (int stride = get_local_size(0)/2; stride > 0; stride >>= 1) {
|
||||||
if (lid < stride) {
|
if (localID < stride) {
|
||||||
partial[lid] += partial[lid + stride];
|
partial[localID] += partial[localID + stride];
|
||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (lid == 0) {
|
if (localID == 0) {
|
||||||
distance[0] = (float)partial[0];
|
distance[groupID] = (float)partial[0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
36
src/main.c
36
src/main.c
@@ -127,17 +127,14 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
|
|||||||
free(kernelsourcecode);
|
free(kernelsourcecode);
|
||||||
kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL);
|
kernel=clCreateKernel(program,(const char*)"EuclideanDistance",NULL);
|
||||||
|
|
||||||
float distance = 0.0f;
|
float distance[2000];
|
||||||
int offset1_offset2_imsize[3] = {0, 0, 0};
|
|
||||||
cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL);
|
cl_mem imgs_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(char)*nfiles*audioData[0].x*audioData[0].y,imgs,NULL);
|
||||||
cl_mem offset1_offset2_imsize_gpumem=clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(int)*3,offset1_offset2_imsize,NULL);
|
cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float)*2000,NULL,NULL);
|
||||||
cl_mem distance_gpumem=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(float),NULL,NULL);
|
|
||||||
|
|
||||||
clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem);
|
clSetKernelArg(kernel,0,sizeof(cl_mem),&imgs_gpumem);
|
||||||
clSetKernelArg(kernel,1,sizeof(cl_mem),&offset1_offset2_imsize_gpumem);
|
clSetKernelArg(kernel, 3, sizeof(cl_mem), &distance_gpumem);
|
||||||
clSetKernelArg(kernel, 2, sizeof(cl_mem), &distance_gpumem);
|
|
||||||
|
|
||||||
size_t globalWorkSize=256;
|
size_t globalWorkSize=256*2000;
|
||||||
size_t localWorkSize = 256;
|
size_t localWorkSize = 256;
|
||||||
|
|
||||||
|
|
||||||
@@ -145,19 +142,19 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
|
|||||||
int matrixSize=nfiles*nfiles;
|
int matrixSize=nfiles*nfiles;
|
||||||
unsigned int computeCounter=0;
|
unsigned int computeCounter=0;
|
||||||
for(int i = 0; i < counter; i++){
|
for(int i = 0; i < counter; i++){
|
||||||
for(int j = 0; j < counter; j++){
|
int imsize=audioData[i].x*audioData[i].y;
|
||||||
int imsize=audioData[i].x*audioData[i].y;
|
int offset1=audioData[i].index*imsize;
|
||||||
|
|
||||||
offset1_offset2_imsize[0] = audioData[i].index * imsize;
|
// Sets kernel args
|
||||||
offset1_offset2_imsize[1] = audioData[j].index * imsize;
|
clSetKernelArg(kernel,1,sizeof(int),&offset1);
|
||||||
offset1_offset2_imsize[2] = imsize;
|
clSetKernelArg(kernel,2,sizeof(int),&imsize);
|
||||||
|
|
||||||
// Writes to gpu buffer for args
|
// Launches kernel
|
||||||
clEnqueueWriteBuffer(commandQueue,offset1_offset2_imsize_gpumem,CL_TRUE,0,sizeof(int)*3,offset1_offset2_imsize,0,NULL,NULL);
|
clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL);
|
||||||
// Launches kernel
|
// Read buffer back
|
||||||
clEnqueueNDRangeKernel(commandQueue,kernel,1,NULL,&globalWorkSize,&localWorkSize,0, NULL, NULL);
|
clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float)*2000, distance, 0, NULL, NULL);
|
||||||
// reads distance back
|
for(int j=0;j<counter;j++){
|
||||||
clEnqueueReadBuffer(commandQueue, distance_gpumem, CL_TRUE, 0, sizeof(float), &distance, 0, NULL, NULL);
|
distanceArrays[i][j]=distance[j];
|
||||||
computeCounter++;
|
computeCounter++;
|
||||||
}
|
}
|
||||||
printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100);
|
printf("\rWay through matrix compute: %.2f%%",(computeCounter/(float)matrixSize)*100);
|
||||||
@@ -166,7 +163,6 @@ void computeDistanceOpenCL(unsigned char* imgs,AudioData *audioData, float **dis
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
// releasing all objects
|
// releasing all objects
|
||||||
clReleaseMemObject(distance_gpumem);
|
clReleaseMemObject(distance_gpumem);
|
||||||
clReleaseMemObject(offset1_offset2_imsize_gpumem);
|
|
||||||
clReleaseMemObject(imgs_gpumem);
|
clReleaseMemObject(imgs_gpumem);
|
||||||
clReleaseKernel(kernel);
|
clReleaseKernel(kernel);
|
||||||
clReleaseProgram(program);
|
clReleaseProgram(program);
|
||||||
@@ -245,8 +241,8 @@ int main(){
|
|||||||
}
|
}
|
||||||
|
|
||||||
time_t now = time(NULL);
|
time_t now = time(NULL);
|
||||||
// computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter);
|
|
||||||
// computeDistanceMatrix(imgData,audioData, distanceArrays,nfiles, counter);
|
// computeDistanceMatrix(imgData,audioData, distanceArrays,nfiles, counter);
|
||||||
|
// computeDistanceMatrixOMP(imgData,audioData, distanceArrays,nfiles, counter);
|
||||||
computeDistanceOpenCL(imgData,audioData, distanceArrays,nfiles, counter);
|
computeDistanceOpenCL(imgData,audioData, distanceArrays,nfiles, counter);
|
||||||
printf("Time it took to compute matrix: %lld seconds\n",time(NULL)-now);
|
printf("Time it took to compute matrix: %lld seconds\n",time(NULL)-now);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user