opencl 학습(二)

일부 구조 소개를 본 후에 다른 사람의 발자취를 따라 코드를 쓰기 시작한다. 먼저 사람들이 자주 시작하는 간단한 1차원 그룹을 더한 오픈cl 코드를 써서 오픈cl 코드 구조를 익히자.이 코드는freescale의 i.MX6q에서 뛰며 교차 컴파일 도구를 통해opencl 라이브러리와 헤더 파일 등 컴파일 옵션을 추가하여 컴파일합니다.

#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
 
using namespace std;

int main(int argc, char**argv)
{
    cl_platform_id platform;
    cl_context context = 0;
     cl_command_queue queue = 0;
    cl_device_id device = 0;
    cl_kernel kernel = 0;
    cl_int errNum = 0;
     cl_uint numPlatforms = 0;
 
     //Platform
     errNum = clGetPlatformIDs( 1, &platform, &numPlatforms );
    if( errNum != CL_SUCCESS )
    {
         cout << "Error getting platform id:" <<endl;
        return 1;
     }
 
     //Device
	 errNum = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
     if( errNum != CL_SUCCESS )
     {
         cout << "Error getting device ids:"  <<endl;
         return 1;
      }

     //Context
      context = clCreateContext(0, 1, &device, NULL, NULL, &errNum);
     if( errNum != CL_SUCCESS )
     {
         cout << "Error creating context:" <<endl;
         return 1;
     }
 
     //Command-queue
     queue = clCreateCommandQueue( context, device, 0, &errNum );
     if( errNum != CL_SUCCESS )
     {
         cout << "Error creating command queue:" <<endl;
         return 1;
     }

	//create memory objects that will be used as arguements to 
	//kernel. First create host memory arrays that will be 
	//used to store the arguments to the kernel
	const int ARRAY_SIZE = 1024;	
	float result[ARRAY_SIZE];
	float a[ARRAY_SIZE];
	float b[ARRAY_SIZE];
	for(int i=0; i<ARRAY_SIZE; i++)
	{
		a[i] = i*1.0;
		b[i] = i*2.0;
	}
	cl_mem memObjects[3] = { 0, 0, 0 };
	memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), a, &errNum);
	memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), b, &errNum);
	memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, ARRAY_SIZE*sizeof(float), NULL, &errNum);
	if( memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
	{
		cout << "Error creating memory object." << endl;
		return 1;
	}
	
	const char* source = "__kernel void helloworld(__global const float *a, \
						 __global const float *b, \
						 __global float *result) \
						{\
							int gid = get_global_id(0);\
							result[gid] = a[gid] + b[gid];\
						}";
	cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, NULL );
	//assert( errNum == CL_SUCCESS );
	if( errNum != CL_SUCCESS )
	{
		cout << "Error creating program!" <<endl;
		return 1;
	}

	//builds the program
	errNum = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
	//assert( errNum == CL_SUCESS );
	if( errNum != CL_SUCCESS )
	{
		cout << "Error building program!" <<endl;
		return 1;
	}

	//Shows the log
	char* build_log;
	size_t log_size;
	//first call to know the proper size
	clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size );
	build_log = new char[ log_size+1 ];
	//second call to know the proper size
	clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL );
	build_log[log_size] = '\0';
	cout << build_log << endl;
	delete[] build_log;
	
	//extracting the kernel
	kernel = clCreateKernel( program, "helloworld", &errNum);	
	if( errNum != CL_SUCCESS )
	{
		switch(errNum){
			case CL_INVALID_PROGRAM:printf("1");break;		
			case CL_INVALID_PROGRAM_EXECUTABLE:printf("2");break;		
			case CL_INVALID_KERNEL_NAME:printf("3");break;
			case CL_INVALID_KERNEL_DEFINITION:printf("4");break;		
			case CL_INVALID_VALUE:printf("5");break;		
			case CL_OUT_OF_RESOURCES:printf("6");break;		
			case CL_OUT_OF_HOST_MEMORY:printf("7");break;		
						}
		cout << "Error create kernel." <<endl;
		return 1;
	}
	

	//set the kernel argument (result, a, b)
	errNum = clSetKernelArg(kernel, 0, 
									sizeof(cl_mem), &memObjects[0]);
	if( errNum != CL_SUCCESS )
	{
		
		cout << "Error setting kernel arguments 1." <<endl;
		return 1;
	}

	errNum |= clSetKernelArg(kernel, 1, 
									sizeof(cl_mem), &memObjects[1]);
	if( errNum != CL_SUCCESS )
	{
		cout << "Error setting kernel arguments 2." <<endl;
		return 1;
	}

	errNum |= clSetKernelArg(kernel, 2, 
									sizeof(cl_mem), &memObjects[2]);

	if( errNum != CL_SUCCESS )
	{
		cout << "Error setting kernel arguments 3." <<endl;
		return 1;
	}

	size_t globalWorkSize[1] = { ARRAY_SIZE };
	size_t localWorkSize[1] = { 1 };

	//queue the kernel up for execution across the array
	errNum = clEnqueueNDRangeKernel( queue, kernel, 1, NULL,
									globalWorkSize, localWorkSize,
									0, NULL, NULL);
	if( errNum != CL_SUCCESS )
	{
		cout << "Error queuing kernel for execution." <<endl;
		return 1;
	}

	
	//read the output buffer back to the Host
	errNum = clEnqueueReadBuffer(queue, memObjects[2],
								CL_TRUE, 0, ARRAY_SIZE * sizeof(float),
								result, 0, NULL, NULL);
	if( errNum != CL_SUCCESS )
	{
					
		switch(errNum){
			case CL_INVALID_COMMAND_QUEUE:printf("1");break;		
			case CL_INVALID_CONTEXT:printf("2");break;		
			case CL_INVALID_MEM_OBJECT:printf("3");break;
			case CL_INVALID_VALUE:printf("4");break;		
			case CL_INVALID_EVENT_WAIT_LIST:printf("5");break;		
			case CL_MISALIGNED_SUB_BUFFER_OFFSET:printf("6");break;		
			case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:printf("7");break;		
			case CL_MEM_OBJECT_ALLOCATION_FAILURE:printf("8");break;		
			case CL_OUT_OF_RESOURCES:printf("9");break;		
			case CL_OUT_OF_HOST_MEMORY:printf("10");break;		
						}
		printf("
");
		cout << "Error reading result buffer." <<endl;
		return 1;
	}

printf("after output
");

	//output the result buffer
	for( int i = 0; i < ARRAY_SIZE; i++)
	{
		cout << result[i] << " ";
	}
	
	cout << endl;
	cout << "executed program successfully." << endl;
	
	delete[] a;
	delete[] b;
	delete[] result;
	clReleaseKernel(kernel);
	clReleaseCommandQueue(queue);
	clReleaseContext(context);
	clReleaseMemObject(memObjects[0]);
	clReleaseMemObject(memObjects[1]);
	clReleaseMemObject(memObjects[2]);
	return 0;

}

이 내용에 흥미가 있습니까?

현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:

NVIDIA/AMD GPU에서 OpenCL에서 C++로 핵이 아직 안 된다고 설명(2021/04시)

OpenCL 3.0으로 크게 (´ 63;)구문을 사용합니다.알겠습니다. 현재 상황에 대응하는 GPU가 없습니다. OpenCL C++에는 다음 조건 중 하나가 필요합니다. cl_ext_cxx_for_opencl는 확장...

텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.

CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.

opencl 학습(二)

좋은 웹페이지 즐겨찾기