50cent Posted March 5, 2011 Report Posted March 5, 2011 OpenCL (Open Computing Language) este un nou standard de programare care se executa in paralel pe multiple platforme(CPU ,GPU, DSP).Programele openCL au de obicei un fisier extern numit kernel in care se gasesc functii care urmeaza sa fie executate in paralel.Am sa va arat un mic programel openCL care ia 2 vectori si care salveaza in alt vector minmul dintre elementele din a si b de pe fiecare pozitie#include <stdio.h>#include <stdlib.h>#include <conio.h>#include <iostream>using namespace std;//header si .lib open cl#include <CL/cl.h>#pragma comment (lib,"OpenCL.lib")//globaleconst int ELEMENTS = 200;//prototip functiichar* readSource(const char *sourceFilename);void afisare(int *,int n);int main(int argc, char ** argv){ size_t datasize = sizeof(int)*ELEMENTS;//dimensiune memorie de alocat int *A, *B;//vectori de intrare int *C;//vectorul final /* alocare memorie */ A = (int*)malloc(datasize); B = (int*)malloc(datasize); C = (int*)malloc(datasize); //initializare for(int i =0; i < ELEMENTS; ++i) { A[i] = rand() % 100 + 1; B[i] = rand() % 100 + 1; } afisare(A,ELEMENTS); afisare(B,ELEMENTS); // variabile specifice open cl cl_int status; //folosita pentru a verifica erori pentru majoritatea functiilor cl_uint numPlatforms = 0; //numarul de platforme cl_platform_id *platforms; //pointer catre platforme cl_uint numDevices = 0; //numarul de dispozitive open cl cl_device_id *devices; //pointer catre dispozitive cl_context context; //contextul open cl cl_command_queue cmdQueue0,cmdQueue1;//stiva de comenzi cl_mem d_A0, d_A1, d_B0, d_B1; // bufferele de intrare cl_mem d_C0, d_C1; // buffere de iesire cl_program program; //pointer catre programul open cl(folosit pentru executia kernel) cl_kernel kernel0, kernel1; //kernel cl_int buildErr; //folosit pentru a stoca erorile de compilare kernel clGetPlatformIDs(0, NULL, &numPlatforms);//numarul de platforme platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));//aloca memorie in functie de platformele gasite clGetPlatformIDs(numPlatforms, platforms, NULL);//stocheaza platofrmele in pointer status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);//numarul de dispozitive if(numDevices < 2) { printf("Cel putin 2 dispozitive open cl, (%d gasite).\n", numDevices);//avem nevoie de cpu+gpu exit(-1); } devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));//aloca memorie in functie de dispozitive gasite status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);//stocheaza dispozitivele in pointer context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);//creare context cmdQueue0 = clCreateCommandQueue(context, devices[0], 0, &status); cmdQueue1 = clCreateCommandQueue(context, devices[1], 0, &status); //alocare buffere pentru fiecare vector(jumatate CPU jumatate GPU) //A d_A0 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, A, &status); d_A1 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, &A[ELEMENTS/2], &status); //B d_B0 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, B, &status); d_B1 = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, datasize/2, &B[ELEMENTS/2], &status); //C d_C0 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize/2, NULL, &status); d_C1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize/2, NULL, &status); char *source;//se retine sursa pentru kernel const char *sourceFile = "kernel.cl";//numele fisierului care contine kernelul source = readSource(sourceFile); program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, &status);//compilare kernel buildErr = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL); //select este numele functiei din fisierul kernel.cl kernel0 = clCreateKernel(program, "select", &status); kernel1 = clCreateKernel(program, "select", &status); // asociere buffere cu argumentele functiei din kernel status = clSetKernelArg(kernel0, 0, sizeof(cl_mem), &d_A0); status |= clSetKernelArg(kernel0, 1, sizeof(cl_mem), &d_B0); status |= clSetKernelArg(kernel0, 2, sizeof(cl_mem), &d_C0); status = clSetKernelArg(kernel1, 0, sizeof(cl_mem), &d_A1); status |= clSetKernelArg(kernel1, 1, sizeof(cl_mem), &d_B1); status |= clSetKernelArg(kernel1, 2, sizeof(cl_mem), &d_C1); size_t globalWorkSize[1]; globalWorkSize[0] = ELEMENTS/2; // executie kernele status = clEnqueueNDRangeKernel(cmdQueue0, kernel0, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);//GPU status = clEnqueueNDRangeKernel(cmdQueue1, kernel1, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);//CPU //transfera datele din buffer in vectorul c clEnqueueReadBuffer(cmdQueue0, d_C0, CL_TRUE, 0, datasize/2, C, 0, NULL, NULL); clEnqueueReadBuffer(cmdQueue1, d_C1, CL_TRUE, 0, datasize/2, &C[ELEMENTS/2], 0, NULL, NULL); cout<<"rezultat \n"; afisare(C,ELEMENTS); //eliberare memorie clReleaseKernel(kernel0); clReleaseKernel(kernel1); clReleaseProgram(program); clReleaseCommandQueue(cmdQueue0); clReleaseCommandQueue(cmdQueue1); clReleaseMemObject(d_A0); clReleaseMemObject(d_A1); clReleaseMemObject(d_B0); clReleaseMemObject(d_B1); clReleaseMemObject(d_C0); clReleaseMemObject(d_C1); clReleaseContext(context); free(A); free(; free(C); free(source); free(platforms); free(devices); _getch();}char* readSource(const char *sourceFilename) { FILE *fp; int size; char *source; fp = fopen(sourceFilename, "rb"); if(fp == NULL) { printf("Nu gasesc fisierul: %s\n", sourceFilename); exit(-1); } fseek(fp, 0, SEEK_END); size = ftell(fp); fseek(fp, 0, SEEK_SET); source = (char*)malloc(size+1); fread(source, 1, size, fp); source[size] = '\0'; return source;}void afisare(int *a,int n){ for(int i=0;i<n;++i) cout<<a[i]<<" "; cout<<endl;}Si fisierul kernel.cl :__kernel void select(__global int *a, __global int *b, __global int *c) { int i = get_global_id(0); c[i]=a[i]>b[i]?b[i]:a[i];} Quote