CUDA 및 OpenCV의 이미지 GAMMA 변환

3632 단어
매우 간단한 CUDA 프로그램으로 CUDA를 처음 접한 사람들이 CUDA의 작업 원리와 OpenCV와 결합하는 기본적인 용법을 이해하기에 적합하다.
#include <stdlib.h>
#include <stdio.h>
#include <cv.h>
#include <highgui.h>
#include "cutil_inline.h"

#define GAMMA 0.4

void runTest(int argc, char** argv);

__global__ void testKernel(float* d_idata, float* d_odata, int width, int height, float gamma)
{
 unsigned int tid_in_grid_x = blockDim.x*blockIdx.x + threadIdx.x;
 unsigned int tid_in_grid_y = blockDim.y*blockIdx.y + threadIdx.y;
 unsigned int tid_in_grid = tid_in_grid_y*width + tid_in_grid_x;

 d_odata[tid_in_grid] = powf(d_idata[tid_in_grid], gamma); 
}

int main(int argc, char** argv)
{
 runTest(argc, argv);
 CUT_EXIT(argc, argv);
}

void runTest(int argc, char** argv)
{
 if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )     //           。
  cutilDeviceInit(argc, argv);
 else
  cudaSetDevice(cutGetMaxGflopsDeviceId() );

 unsigned int timer = 0;
    cutilCheckError(cutCreateTimer(&timer));
    cutilCheckError(cutStartTimer(timer));

 IplImage* pImg; 
 if((pImg = cvLoadImage("lena_gray.jpg", CV_LOAD_IMAGE_GRAYSCALE)) != 0 )
    {
        cvNamedWindow("Image", 1);
        cvShowImage("Image", pImg);

  unsigned int num_blocks_x = pImg->width/16;
  unsigned int num_blocks_y = pImg->height/16;

  unsigned int mem_size = sizeof(float)*pImg->widthStep*pImg->height;

  float* h_idata = (float*)malloc(mem_size);
  float* h_odata = (float*)malloc(mem_size);

  float* d_idata;
  CUDA_SAFE_CALL(cudaMalloc((void**)&d_idata, mem_size));
  float* d_odata;
  CUDA_SAFE_CALL(cudaMalloc((void**)&d_odata, mem_size));

  for(int i = 0; i < pImg->widthStep*pImg->height; i++)  //                  ,char            

 h_idata[i] = ((uchar)pImg->imageData[i])/255.0;           //  ,         char  ,       

  CUDA_SAFE_CALL(cudaMemcpy(d_idata, h_idata, mem_size, cudaMemcpyHostToDevice));

  dim3 grid(num_blocks_x, num_blocks_y, 1);
  dim3 threads(16, 16, 1);

  testKernel<<<grid, threads>>>(d_idata, d_odata, pImg->width, pImg->height, GAMMA);

  CUT_CHECK_ERROR("Kernel execution failed");

  CUDA_SAFE_CALL(cudaMemcpy(h_odata, d_odata, mem_size, cudaMemcpyDeviceToHost));

  IplImage* oImg = cvCreateImage(cvSize(pImg->width,pImg->height), IPL_DEPTH_8U, 1);
  for(int i = 0; i < pImg->widthStep*pImg->height; i++)
   oImg->imageData[i] = (uchar)(int)(h_odata[i]*255);

  cvNamedWindow("Result", CV_WINDOW_AUTOSIZE);
        cvShowImage("Result", oImg);
//  cvSaveImage("result.jpg", oImg);

  cutilCheckError( cutStopTimer( timer));
  printf( "Processing time: %f (ms)/n", cutGetTimerValue( timer));
  cutilCheckError( cutDeleteTimer( timer));

  cvWaitKey(0);
  cvDestroyWindow("Image");
        cvReleaseImage(&pImg);
  cvDestroyWindow("Result");
        cvReleaseImage(&oImg);

  free(h_idata);
  free(h_odata);
  CUDA_SAFE_CALL(cudaFree(d_idata));
  CUDA_SAFE_CALL(cudaFree(d_odata));

    }
} 

http://blog.csdn.net/mmjwung/article/details/6273653

좋은 웹페이지 즐겨찾기