0_Simple__simpleSurfaceWrite

使用表面写入函数，结合纹理引用实现图片的旋转
▶ 源代码

 #include <stdio.h>

 #include <windows.h>

 #include <cuda_runtime.h>

 #include "device_launch_parameters.h"

 #include <helper_functions.h>

 #include <helper_cuda.h>    

 #define WINDOWS_LEAN_AND_MEAN

 #define NOMINMAX

 #define MIN_EPSILON_ERROR 5e-3f

 float angle = 0.5f;                             // 弧度制

 texture<float, , cudaReadModeElementType> tex;

 surface<void, > outputSurface;

 // 使用表面写入，将全局内存中的数据 d_data 写到绑定了纹理引用的 CUDA 数组 cuArray 中

 __global__ void surfaceWriteKernel(float *gIData, int width, int height)

 {

     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

     surf2Dwrite(gIData[y * width + x], outputSurface, x * , y, cudaBoundaryModeTrap);

 }

 // 利用纹理取样，将绑定了纹理引用的 CUDA 数组 cuArray 中的图片进行旋转，写入全局内存 d_data 中

 __global__ void transformKernel(float *gOData,int width,int height,float theta)

 {

     unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;

     unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

     float u = x / (float)width - 0.5f;

     float v = y / (float)height - 0.5f;

     gOData[y * width + x] = tex2D(tex, u * cosf(theta) - v * sinf(theta) + 0.5f, v * cosf(theta) + u * sinf(theta) + 0.5f);

 }

 int main()

 {

     printf("\n\tStart.\n");

     cudaSetDevice();// 删掉了筛选设备的过程

     cudaDeviceProp deviceProps;

     cudaGetDeviceProperties(&deviceProps, );

     printf("\n\tDevice %s, Multi-Processors: %d, SM %d.%d\n", deviceProps.name, deviceProps.multiProcessorCount, deviceProps.major, deviceProps.minor);

     // 读取图片数据

     float *h_data = NULL, *h_dataRef = NULL;

     unsigned int width, height, size;

     sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\lena_bw.pgm", &h_data, &width, &height);// 删掉了用函数 sdkFindFilePath() 查找输入文件的过程

     size = width * height * sizeof(float);

     sdkLoadPGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\ref_rotated.pgm", &h_dataRef, &width, &height);

     printf("\n\tLoad input files, %d x %d pixels\n", width, height);

     // 申请设备内存

     float *d_data = NULL;

     cudaMalloc((void **) &d_data, size);

     cudaArray *cuArray;

     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);

     cudaMallocArray(&cuArray,&channelDesc,width,height,cudaArraySurfaceLoadStore);

     cudaMemcpy(d_data, h_data, size, cudaMemcpyHostToDevice);

     //cudaMemcpyToArray(cuArray,0,0,h_data,size,cudaMemcpyHostToDevice); 只使用纹理内存时，可以直接拷贝到cuArray中

     // 绑定表面引用

     cudaBindSurfaceToArray(outputSurface, cuArray, channelDesc);

     // 使用表面写入

     dim3 dimBlock(, , );

     dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, );

     surfaceWriteKernel<<<dimGrid, dimBlock>>>(d_data, width, height);

     // 绑定纹理引用

     tex.addressMode[] = cudaAddressModeWrap;

     tex.addressMode[] = cudaAddressModeWrap;

     tex.filterMode = cudaFilterModeLinear;

     tex.normalized = true;

     cudaBindTextureToArray(tex, cuArray, channelDesc);

     // 预跑

     transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);

     cudaDeviceSynchronize();

     StopWatchInterface *timer = NULL;

     sdkCreateTimer(&timer);

     sdkStartTimer(&timer);

     transformKernel<<<dimGrid, dimBlock, >>>(d_data, width, height, angle);

     cudaDeviceSynchronize();

     sdkStopTimer(&timer);

     sdkDeleteTimer(&timer);

     printf("\n\tCost time: %f ms, %.2f Mpixels/sec\n", sdkGetTimerValue(&timer), (width *height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6);

     // 结果回收、输出和检验

     cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);

     sdkSavePGM("D:\\Code\\CUDA\\cudaProjectTemp\\data\\output.pgm", h_data, width, height);

     printf("\n\tSave output file.\n");

     printf("\n\tFinish, return %s.\n", compareData(h_data, h_dataRef, width * height, MIN_EPSILON_ERROR, 0.0f) ? "Passed" : "Failed");

     cudaFree(d_data);

     cudaFreeArray(cuArray);

     getchar();

     return ;

 }

▶ 输出结果

 Start.

 Device GeForce GTX , Multi-Processors: , SM 6.1

 Load input files,  x  pixels

 Cost time: 0.000000 ms, inf Mpixels/sec

 Save output file.

 Finish, return Passed

▶ 涨姿势

● 使用函数 sdkLoadPGM() 读取图片数据

 // helper_image.h

 inline bool __loadPPM(const char *file, unsigned char **data, unsigned int *w, unsigned int *h, unsigned int *channels)

 {

     FILE *fp = NULL;

     if (FOPEN_FAIL(FOPEN(fp, file, "rb")))

     {

         std::cerr << "__LoadPPM() : Failed to open file: " << file << std::endl;

         return false;

     }

     // check header

     char header[PGMHeaderSize];

     if (fgets(header, PGMHeaderSize, fp) == NULL)

     {

         std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;

         return false;

     }

     if (strncmp(header, "P5", ) == )

     {

         *channels = ;

     }

     else if (strncmp(header, "P6", ) == )

     {

         *channels = ;

     }

     else

     {

         std::cerr << "__LoadPPM() : File is not a PPM or PGM image" << std::endl;

         *channels = ;

         return false;

     }

     // parse header, read maxval, width and height

     unsigned int width = ;

     unsigned int height = ;

     unsigned int maxval = ;

     unsigned int i = ;

     while (i < )

     {

         if (fgets(header, PGMHeaderSize, fp) == NULL)

         {

             std::cerr << "__LoadPPM() : reading PGM header returned NULL" << std::endl;

             return false;

         }

         if (header[] == '#')

         {

             continue;

         }

         if (i == )

         {

             i += SSCANF(header, "%u %u %u", &width, &height, &maxval);

         }

         else if (i == )

         {

             i += SSCANF(header, "%u %u", &height, &maxval);

         }

         else if (i == )

         {

             i += SSCANF(header, "%u", &maxval);

         }

     }

     // check if given handle for the data is initialized

     if (NULL != *data)

     {

         if (*w != width || *h != height)

         {

             std::cerr << "__LoadPPM() : Invalid image dimensions." << std::endl;

         }

     }

     else

     {

         *data = (unsigned char *)malloc(sizeof(unsigned char) * width * height **channels);

         *w = width;

         *h = height;

     }

     // read and close file

     if (fread(*data, sizeof(unsigned char), width * height **channels, fp) == )

     {

         std::cerr << "__LoadPPM() read data returned error." << std::endl;

     }

     fclose(fp);

     return true;

 }

 template <class T> inline bool sdkLoadPGM(const char *file, T **data, unsigned int *w, unsigned int *h)

 {

     unsigned char *idata = NULL;

     unsigned int channels;

     if (!__loadPPM(file, &idata, w, h, &channels))

         return false;

     unsigned int size = *w **h * channels;

     if (*data == NULL)// 如果 T **data 没有初始化，则按照读取的 size 进行初始化

         *data = (T *)malloc(sizeof(T) * size);

     std::transform(idata, idata + size, *data, ConverterFromUByte<T>());// 拷贝数据到 data 中

     free(idata);

     return true;

 }

● 使用到的表面写入函数原型

 // surface_functions.h

 template<class T> static __device__ __forceinline__ void surf2Dwrite(T val, surface<void, cudaSurfaceType2D> surf, int x, int y, enum cudaSurfaceBoundaryMode mode = cudaBoundaryModeTrap)

 {

 #ifdef __CUDA_ARCH__

     __nv_tex_surf_handler("__surf2Dwrite_v2", (typename __nv_surf_trait<T>::cast_type)&val, (int)sizeof(T), surf, x, y, mode);

 #endif

 }

0_Simple__simpleSurfaceWrite的更多相关文章

0_Simple__simpleTexture + 0_Simple__simpleTextureDrv
使用纹理引用来旋转图片,并在使用了静态编译和运行时编译两种环境. ▶ 源代码:静态编译 #include <stdio.h> #include <windows.h> #inc ...

随机推荐

JVM 加载class文件的原理
PS:类的装载过程是ClassLoader,只有把类加载到 JVM后才能运行: PS:两种加载类的方式: 显示:new 隐式:class.forname() PS:动态的执行,用到谁,加载谁:
stenciljs 学习一 web 组件开发
stenciljs 介绍参考官方网站,或者 https://www.cnblogs.com/rongfengliang/p/9706542.html 创建项目使用脚手架工具 npm init ste ...
POSIX 线程具体解释（3-相互排斥量："固定加锁层次"/“试加锁-回退”）
有时一个相互排斥量是不够的: 比方: 当多个线程同一时候訪问一个队列结构时,你须要2个相互排斥量,一个用来保护队列头,一个用来保护队列元素内的数据. 当为多线程建立一个树结构时.你可能须要为每一个节点 ...
ubuntu 14.04安装OVS虚拟OpenFlow交换机配置总结
一.安装OVS sudo apt-get install openvswitch-controller openvswitch-switch openvswitch-datapath-source ( ...
bat中的“多线程”处理代码
大家都知道,批处理中运行的都是一步步单进程执行, 但如果进程执行比较慢,如PING一个不通的IP地址,那就会大大影响批处理程序的执行效率. 如下内容将简单举例,在WINDOWS下使用批处理做多进程并发 ...
servlet / jsp(一)
2016-03-25 11:34:14 一.实现一个简单的servlet程序 Servlet是在服务器端运行的小程序,这是一个很广泛的概念,并没有说是在web服务器端运行的小程序,除了在web服务器上 ...
Huawei E1750 Asterisk
http://wiki.e1550.mobi/doku.php?id=installation https://wiki.asterisk.org/wiki/display/AST/Mobile+Ch ...
【转】每天一个linux命令（33）：df 命令
原文网址:http://www.cnblogs.com/peida/archive/2012/12/07/2806483.html linux中df命令的功能是用来检查linux服务器的文件系统的磁盘 ...
Microsoft Dynamics CRM 2013 安装报表服务出现“ SQL Server Reporting Services 帐户是本地用户且不受支持 ”错误的解决方法
安装好CRM 2013 之后,还需要安装报表服务,发现出现:SQL Server Reporting Services 帐户是本地用户且不受支持,具体如下图: 经过分析原来发现是需要用域用户,打开对应 ...
python初始环境安装
Python下载地址 Python官网:https://www.python.org/ 在该网可以下载Python最新及历史版本.可以下载基于Windows或其它操作系统的版本. Python安装本 ...

0_Simple__simpleSurfaceWrite

0_Simple__simpleSurfaceWrite的更多相关文章

随机推荐

热门专题