opencl 학습(二)

일부 구조 소개를 본 후에 다른 사람의 발자취를 따라 코드를 쓰기 시작한다. 먼저 사람들이 자주 시작하는 간단한 1차원 그룹을 더한 오픈cl 코드를 써서 오픈cl 코드 구조를 익히자.이 코드는freescale의 i.MX6q에서 뛰며 교차 컴파일 도구를 통해opencl 라이브러리와 헤더 파일 등 컴파일 옵션을 추가하여 컴파일합니다.
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
using namespace std;

int main(int argc, char**argv)
    cl_platform_id platform;
    cl_context context = 0;
     cl_command_queue queue = 0;
    cl_device_id device = 0;
    cl_kernel kernel = 0;
    cl_int errNum = 0;
     cl_uint numPlatforms = 0;
     errNum = clGetPlatformIDs( 1, &platform, &numPlatforms );
    if( errNum != CL_SUCCESS )
         cout << "Error getting platform id:" <<endl;
        return 1;
	 errNum = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
     if( errNum != CL_SUCCESS )
         cout << "Error getting device ids:"  <<endl;
         return 1;

      context = clCreateContext(0, 1, &device, NULL, NULL, &errNum);
     if( errNum != CL_SUCCESS )
         cout << "Error creating context:" <<endl;
         return 1;
     queue = clCreateCommandQueue( context, device, 0, &errNum );
     if( errNum != CL_SUCCESS )
         cout << "Error creating command queue:" <<endl;
         return 1;

	//create memory objects that will be used as arguements to 
	//kernel. First create host memory arrays that will be 
	//used to store the arguments to the kernel
	const int ARRAY_SIZE = 1024;	
	float result[ARRAY_SIZE];
	float a[ARRAY_SIZE];
	float b[ARRAY_SIZE];
	for(int i=0; i<ARRAY_SIZE; i++)
		a[i] = i*1.0;
		b[i] = i*2.0;
	cl_mem memObjects[3] = { 0, 0, 0 };
	memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), a, &errNum);
	memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ARRAY_SIZE*sizeof(float), b, &errNum);
	memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, ARRAY_SIZE*sizeof(float), NULL, &errNum);
	if( memObjects[0] == NULL || memObjects[1] == NULL || memObjects[2] == NULL)
		cout << "Error creating memory object." << endl;
		return 1;
	const char* source = "__kernel void helloworld(__global const float *a, \
						 __global const float *b, \
						 __global float *result) \
							int gid = get_global_id(0);\
							result[gid] = a[gid] + b[gid];\
	cl_program program = clCreateProgramWithSource( context, 1, &source, NULL, NULL );
	//assert( errNum == CL_SUCCESS );
	if( errNum != CL_SUCCESS )
		cout << "Error creating program!" <<endl;
		return 1;

	//builds the program
	errNum = clBuildProgram( program, 1, &device, NULL, NULL, NULL );
	//assert( errNum == CL_SUCESS );
	if( errNum != CL_SUCCESS )
		cout << "Error building program!" <<endl;
		return 1;

	//Shows the log
	char* build_log;
	size_t log_size;
	//first call to know the proper size
	clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size );
	build_log = new char[ log_size+1 ];
	//second call to know the proper size
	clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL );
	build_log[log_size] = '\0';
	cout << build_log << endl;
	delete[] build_log;
	//extracting the kernel
	kernel = clCreateKernel( program, "helloworld", &errNum);	
	if( errNum != CL_SUCCESS )
			case CL_INVALID_PROGRAM:printf("1");break;		
			case CL_INVALID_PROGRAM_EXECUTABLE:printf("2");break;		
			case CL_INVALID_KERNEL_NAME:printf("3");break;
			case CL_INVALID_KERNEL_DEFINITION:printf("4");break;		
			case CL_INVALID_VALUE:printf("5");break;		
			case CL_OUT_OF_RESOURCES:printf("6");break;		
			case CL_OUT_OF_HOST_MEMORY:printf("7");break;		
		cout << "Error create kernel." <<endl;
		return 1;

	//set the kernel argument (result, a, b)
	errNum = clSetKernelArg(kernel, 0, 
									sizeof(cl_mem), &memObjects[0]);
	if( errNum != CL_SUCCESS )
		cout << "Error setting kernel arguments 1." <<endl;
		return 1;

	errNum |= clSetKernelArg(kernel, 1, 
									sizeof(cl_mem), &memObjects[1]);
	if( errNum != CL_SUCCESS )
		cout << "Error setting kernel arguments 2." <<endl;
		return 1;

	errNum |= clSetKernelArg(kernel, 2, 
									sizeof(cl_mem), &memObjects[2]);

	if( errNum != CL_SUCCESS )
		cout << "Error setting kernel arguments 3." <<endl;
		return 1;

	size_t globalWorkSize[1] = { ARRAY_SIZE };
	size_t localWorkSize[1] = { 1 };

	//queue the kernel up for execution across the array
	errNum = clEnqueueNDRangeKernel( queue, kernel, 1, NULL,
									globalWorkSize, localWorkSize,
									0, NULL, NULL);
	if( errNum != CL_SUCCESS )
		cout << "Error queuing kernel for execution." <<endl;
		return 1;

	//read the output buffer back to the Host
	errNum = clEnqueueReadBuffer(queue, memObjects[2],
								CL_TRUE, 0, ARRAY_SIZE * sizeof(float),
								result, 0, NULL, NULL);
	if( errNum != CL_SUCCESS )
			case CL_INVALID_COMMAND_QUEUE:printf("1");break;		
			case CL_INVALID_CONTEXT:printf("2");break;		
			case CL_INVALID_MEM_OBJECT:printf("3");break;
			case CL_INVALID_VALUE:printf("4");break;		
			case CL_INVALID_EVENT_WAIT_LIST:printf("5");break;		
			case CL_MISALIGNED_SUB_BUFFER_OFFSET:printf("6");break;		
			case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:printf("7");break;		
			case CL_MEM_OBJECT_ALLOCATION_FAILURE:printf("8");break;		
			case CL_OUT_OF_RESOURCES:printf("9");break;		
			case CL_OUT_OF_HOST_MEMORY:printf("10");break;		
"); cout << "Error reading result buffer." <<endl; return 1; } printf("after output
"); //output the result buffer for( int i = 0; i < ARRAY_SIZE; i++) { cout << result[i] << " "; } cout << endl; cout << "executed program successfully." << endl; delete[] a; delete[] b; delete[] result; clReleaseKernel(kernel); clReleaseCommandQueue(queue); clReleaseContext(context); clReleaseMemObject(memObjects[0]); clReleaseMemObject(memObjects[1]); clReleaseMemObject(memObjects[2]); return 0; }

