Jump to content
50cent

open cl

Recommended Posts

OpenCL (Open Computing Language) este un nou standard de programare care se executa in paralel pe multiple platforme(CPU ,GPU, DSP).

Programele openCL au de obicei un fisier extern numit kernel in care se gasesc functii care urmeaza sa fie executate in paralel.

Am sa va arat un mic programel openCL care ia 2 vectori si care salveaza in alt vector minmul dintre elementele din a si b de pe fiecare pozitie


#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <iostream>
using namespace std;
//header si .lib open cl
#include <CL/cl.h>
#pragma comment (lib,"OpenCL.lib")
//globale
const int ELEMENTS = 200;
//prototip functii
char* readSource(const char *sourceFilename);
void afisare(int *,int n);

int main(int argc, char ** argv)
{
size_t datasize = sizeof(int)*ELEMENTS;//dimensiune memorie de alocat
int *A, *B;//vectori de intrare
int *C;//vectorul final
/* alocare memorie */
A = (int*)malloc(datasize);
B = (int*)malloc(datasize);
C = (int*)malloc(datasize);
//initializare
for(int i =0; i < ELEMENTS; ++i)
{
A[i] = rand() % 100 + 1;
B[i] = rand() % 100 + 1;
}

afisare(A,ELEMENTS);
afisare(B,ELEMENTS);
// variabile specifice open cl
cl_int status; //folosita pentru a verifica erori pentru majoritatea functiilor
cl_uint numPlatforms = 0; //numarul de platforme
cl_platform_id *platforms; //pointer catre platforme
cl_uint numDevices = 0; //numarul de dispozitive open cl
cl_device_id *devices; //pointer catre dispozitive
cl_context context; //contextul open cl
cl_command_queue cmdQueue0,cmdQueue1;//stiva de comenzi
cl_mem d_A0, d_A1, d_B0, d_B1; // bufferele de intrare
cl_mem d_C0, d_C1; // buffere de iesire
cl_program program; //pointer catre programul open cl(folosit pentru executia kernel)
cl_kernel kernel0, kernel1; //kernel
cl_int buildErr; //folosit pentru a stoca erorile de compilare kernel
clGetPlatformIDs(0, NULL, &numPlatforms);//numarul de platforme
platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));//aloca memorie in functie de platformele gasite
clGetPlatformIDs(numPlatforms, platforms, NULL);//stocheaza platofrmele in pointer
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);//numarul de dispozitive
if(numDevices < 2)
{
printf("Cel putin 2 dispozitive open cl, (%d gasite).\n", numDevices);//avem nevoie de cpu+gpu
exit(-1);
}

devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));//aloca memorie in functie de dispozitive gasite
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);//stocheaza dispozitivele in pointer
context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);//creare context
cmdQueue0 = clCreateCommandQueue(context, devices[0], 0, &status);
cmdQueue1 = clCreateCommandQueue(context, devices[1], 0, &status);

//alocare buffere pentru fiecare vector(jumatate CPU jumatate GPU)

//A
d_A0 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, A, &status);
d_A1 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, &A[ELEMENTS/2], &status);
//B
d_B0 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, B, &status);
d_B1 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, &B[ELEMENTS/2], &status);
//C
d_C0 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize/2, NULL, &status);
d_C1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize/2, NULL, &status);

char *source;//se retine sursa pentru kernel
const char *sourceFile = "kernel.cl";//numele fisierului care contine kernelul
source = readSource(sourceFile);
program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, &status);//compilare kernel
buildErr = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
//select este numele functiei din fisierul kernel.cl
kernel0 = clCreateKernel(program, "select", &status);
kernel1 = clCreateKernel(program, "select", &status);

// asociere buffere cu argumentele functiei din kernel
status = clSetKernelArg(kernel0, 0, sizeof(cl_mem), &d_A0);
status |= clSetKernelArg(kernel0, 1, sizeof(cl_mem), &d_B0);
status |= clSetKernelArg(kernel0, 2, sizeof(cl_mem), &d_C0);

status = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &d_A1);
status |= clSetKernelArg(kernel1, 1, sizeof(cl_mem), &d_B1);
status |= clSetKernelArg(kernel1, 2, sizeof(cl_mem), &d_C1);

size_t globalWorkSize[1];
globalWorkSize[0] = ELEMENTS/2;

// executie kernele
status = clEnqueueNDRangeKernel(cmdQueue0, kernel0, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);//GPU
status = clEnqueueNDRangeKernel(cmdQueue1, kernel1, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);//CPU
//transfera datele din buffer in vectorul c
clEnqueueReadBuffer(cmdQueue0, d_C0, CL_TRUE, 0, datasize/2, C, 0, NULL, NULL);
clEnqueueReadBuffer(cmdQueue1, d_C1, CL_TRUE, 0, datasize/2, &C[ELEMENTS/2], 0, NULL, NULL);

cout<<"rezultat \n";
afisare(C,ELEMENTS);
//eliberare memorie
clReleaseKernel(kernel0);
clReleaseKernel(kernel1);
clReleaseProgram(program);
clReleaseCommandQueue(cmdQueue0);
clReleaseCommandQueue(cmdQueue1);
clReleaseMemObject(d_A0);
clReleaseMemObject(d_A1);
clReleaseMemObject(d_B0);
clReleaseMemObject(d_B1);
clReleaseMemObject(d_C0);
clReleaseMemObject(d_C1);
clReleaseContext(context);
free(A);
free(;
free(C);
free(source);
free(platforms);
free(devices);
_getch();
}

char* readSource(const char *sourceFilename) {

FILE *fp;
int size;
char *source;
fp = fopen(sourceFilename, "rb");
if(fp == NULL)
{
printf("Nu gasesc fisierul: %s\n", sourceFilename);
exit(-1);
}
fseek(fp, 0, SEEK_END);
size = ftell(fp);
fseek(fp, 0, SEEK_SET);
source = (char*)malloc(size+1);
fread(source, 1, size, fp);
source[size] = '\0';
return source;
}
void afisare(int *a,int n)
{
for(int i=0;i<n;++i)
cout<<a[i]<<" ";
cout<<endl;
}

Si fisierul kernel.cl :


__kernel void select(__global int *a, __global int *b, __global int *c)
{
int i = get_global_id(0);
c[i]=a[i]>b[i]?b[i]:a[i];
}

Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.



×
×
  • Create New...