opencl 학습(二)
6799 단어 OpenCL
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
using namespace std;
int main(int argc, char**argv)
{
cl_platform_id platform;
cl_context context = 0;
cl_command_queue queue = 0;
cl_device_id device = 0;
cl_kernel kernel = 0;
cl_int errNum = 0;
cl_uint numPlatforms = 0;
//Platform
errNum = clGetPlatformIDs( 1, &platform, &numPlatforms );
if( errNum != CL_SUCCESS )
{
cout << "Error getting platform id:" <<endl;
return 1;
}
//Device
errNum = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
if( errNum != CL_SUCCESS )
{
cout << "Error getting device ids:" <<endl;
return 1;
}
//Context
context = clCreateContext(0, 1, &device, NULL, NULL, &errNum);
if( errNum != CL_SUCCESS )
{
cout << "Error creating context:" <<endl;
return 1;
}
//Command-queue
queue = clCreateCommandQueue( context, device, 0, &errNum );
if( errNum != CL_SUCCESS )
{
cout << "Error creating command queue:" <<endl;
return 1;
}
//create memory objects that will be used as arguements to
//kernel. First create host memory arrays that will be
//used to store the arguments to the kernel
const int ARRAY_SIZE = 1024;
float result[ARRAY_SIZE];
float a[ARRAY_SIZE];
float b[ARRAY_SIZE];
for(int i=0; i<ARRAY_SIZE; i++)
{
a[i] = i*1.0;
b[i] = i*2.0;
}
cl_mem memObjects[3] = { 0, 0, 0 };
memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), a, &errNum);
memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), b, &errNum);
memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, ARRAY_SIZE*sizeof(float), NULL, &errNum);
if( memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
{
cout << "Error creating memory object." << endl;
return 1;
}
const char* source = "__kernel void helloworld(__global const float *a, \
__global const float *b, \
__global float *result) \
{\
int gid = get_global_id(0);\
result[gid] = a[gid] + b[gid];\
}";
cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, NULL );
//assert( errNum == CL_SUCCESS );
if( errNum != CL_SUCCESS )
{
cout << "Error creating program!" <<endl;
return 1;
}
//builds the program
errNum = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
//assert( errNum == CL_SUCESS );
if( errNum != CL_SUCCESS )
{
cout << "Error building program!" <<endl;
return 1;
}
//Shows the log
char* build_log;
size_t log_size;
//first call to know the proper size
clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size );
build_log = new char[ log_size+1 ];
//second call to know the proper size
clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL );
build_log[log_size] = '\0';
cout << build_log << endl;
delete[] build_log;
//extracting the kernel
kernel = clCreateKernel( program, "helloworld", &errNum);
if( errNum != CL_SUCCESS )
{
switch(errNum){
case CL_INVALID_PROGRAM:printf("1");break;
case CL_INVALID_PROGRAM_EXECUTABLE:printf("2");break;
case CL_INVALID_KERNEL_NAME:printf("3");break;
case CL_INVALID_KERNEL_DEFINITION:printf("4");break;
case CL_INVALID_VALUE:printf("5");break;
case CL_OUT_OF_RESOURCES:printf("6");break;
case CL_OUT_OF_HOST_MEMORY:printf("7");break;
}
cout << "Error create kernel." <<endl;
return 1;
}
//set the kernel argument (result, a, b)
errNum = clSetKernelArg(kernel, 0,
sizeof(cl_mem), &memObjects[0]);
if( errNum != CL_SUCCESS )
{
cout << "Error setting kernel arguments 1." <<endl;
return 1;
}
errNum |= clSetKernelArg(kernel, 1,
sizeof(cl_mem), &memObjects[1]);
if( errNum != CL_SUCCESS )
{
cout << "Error setting kernel arguments 2." <<endl;
return 1;
}
errNum |= clSetKernelArg(kernel, 2,
sizeof(cl_mem), &memObjects[2]);
if( errNum != CL_SUCCESS )
{
cout << "Error setting kernel arguments 3." <<endl;
return 1;
}
size_t globalWorkSize[1] = { ARRAY_SIZE };
size_t localWorkSize[1] = { 1 };
//queue the kernel up for execution across the array
errNum = clEnqueueNDRangeKernel( queue, kernel, 1, NULL,
globalWorkSize, localWorkSize,
0, NULL, NULL);
if( errNum != CL_SUCCESS )
{
cout << "Error queuing kernel for execution." <<endl;
return 1;
}
//read the output buffer back to the Host
errNum = clEnqueueReadBuffer(queue, memObjects[2],
CL_TRUE, 0, ARRAY_SIZE * sizeof(float),
result, 0, NULL, NULL);
if( errNum != CL_SUCCESS )
{
switch(errNum){
case CL_INVALID_COMMAND_QUEUE:printf("1");break;
case CL_INVALID_CONTEXT:printf("2");break;
case CL_INVALID_MEM_OBJECT:printf("3");break;
case CL_INVALID_VALUE:printf("4");break;
case CL_INVALID_EVENT_WAIT_LIST:printf("5");break;
case CL_MISALIGNED_SUB_BUFFER_OFFSET:printf("6");break;
case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:printf("7");break;
case CL_MEM_OBJECT_ALLOCATION_FAILURE:printf("8");break;
case CL_OUT_OF_RESOURCES:printf("9");break;
case CL_OUT_OF_HOST_MEMORY:printf("10");break;
}
printf("
");
cout << "Error reading result buffer." <<endl;
return 1;
}
printf("after output
");
//output the result buffer
for( int i = 0; i < ARRAY_SIZE; i++)
{
cout << result[i] << " ";
}
cout << endl;
cout << "executed program successfully." << endl;
delete[] a;
delete[] b;
delete[] result;
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);
clReleaseMemObject(memObjects[0]);
clReleaseMemObject(memObjects[1]);
clReleaseMemObject(memObjects[2]);
return 0;
}
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
NVIDIA/AMD GPU에서 OpenCL에서 C++로 핵이 아직 안 된다고 설명(2021/04시)OpenCL 3.0으로 크게 (´ 63;)구문을 사용합니다.알겠습니다. 현재 상황에 대응하는 GPU가 없습니다. OpenCL C++에는 다음 조건 중 하나가 필요합니다. cl_ext_cxx_for_opencl는 확장...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.