0_Simple__simpleCubemapTexture
立方体纹理贴图
▶ 源代码。用纹理方法把元素按原顺序从 CUDA3D 数组中取出来,求个相反数放入全局内存,输出。
#include <stdio.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <helper_functions.h>
#include <helper_cuda.h> #define MIN_EPSILON_ERROR 5e-3f texture<float, cudaTextureTypeCubemap> tex; __global__ void transformKernel(float *g_odata, int width)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; float u = ((x + 0.5f) / (float)width) * .f - .f;// [0, width-1] 间隔 1 的坐标变换为 [-1+1/width,1-1/width] 间隔 1/width 的坐标
float v = ((y + 0.5f) / (float)width) * .f - .f; float cx, cy, cz; for (unsigned int face = ; face < ; face++)
{
if (face == )// x 正层
{
cx = ;
cy = -v;
cz = -u;
}
else if (face == )// x 负层
{
cx = -;
cy = -v;
cz = u;
}
else if (face == )// y 正层
{
cx = u;
cy = ;
cz = v;
}
else if (face == )// y 负层
{
cx = u;
cy = -;
cz = -v;
}
else if (face == )// z 正层
{
cx = u;
cy = -v;
cz = ;
}
else if (face == )// z 负层
{
cx = -u;
cy = -v;
cz = -;
}
g_odata[face*width*width + y*width + x] = - texCubemap(tex, cx, cy, cz);// 纹理数据读取到全局内存中输出
}
} int main(int argc, char** argv)
{
unsigned int width = , num_faces = , num_layers = ;
unsigned int cubemap_size = width * width * num_faces;
unsigned int size = cubemap_size * num_layers * sizeof(float);
float *h_data = (float *)malloc(size);
float *h_data_ref = (float *)malloc(size); // 理论输出
float *d_data = NULL;
cudaMalloc((void **)&d_data, size); for (int i = ; i < (int)(cubemap_size * num_layers); i++)
h_data[i] = (float)i;
for (unsigned int layer = ; layer < num_layers; layer++)
{
for (int i = ; i < (int)(cubemap_size); i++)
h_data_ref[layer*cubemap_size + i] = -h_data[layer*cubemap_size + i] + layer;
} printf("\n\t\Input data.n\t");
for (int i = ; i < width * num_faces * num_layers; i++)
{
printf("%2.1f ", h_data[i]);
if ((i + ) % width == )
printf("\n\t");
if ((i + ) % (width *width) == )
printf("\n\t");
}
printf("\n\tIdeal output data\n\t");
for (int i = ; i < width * num_faces * num_layers; i++)
{
printf("%2.1f ", h_data_ref[i]);
if ((i + ) % width == )
printf("\n\t");
if ((i + ) % (width *width) == )
printf("\n\t");
} // 设置 CUDA 3D 数组参数和数据拷贝
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(, , , , cudaChannelFormatKindFloat);
cudaArray *cu_3darray;
cudaMalloc3DArray(&cu_3darray, &channelDesc, make_cudaExtent(width, width, num_faces), cudaArrayCubemap);
cudaMemcpy3DParms myparms = { };
myparms.srcPos = make_cudaPos(, , );
myparms.dstPos = make_cudaPos(, , );
myparms.srcPtr = make_cudaPitchedPtr(h_data, width * sizeof(float), width, width);
myparms.dstArray = cu_3darray;
myparms.extent = make_cudaExtent(width, width, num_faces);
myparms.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(&myparms); // 设置纹理参数并绑定
tex.addressMode[] = cudaAddressModeWrap;
tex.addressMode[] = cudaAddressModeWrap;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = true;
cudaBindTextureToArray(tex, cu_3darray, channelDesc); dim3 dimBlock(, , );
dim3 dimGrid(width / dimBlock.x, width / dimBlock.y, );
printf("\n\tCubemap data of %d * %d * %d: Grid size is %d x %d, each block has 8 x 8 threads.\n", width, width, num_layers, dimGrid.x, dimGrid.y);
transformKernel << < dimGrid, dimBlock >> >(d_data, width);// 预跑
cudaDeviceSynchronize(); StopWatchInterface *timer = NULL;// 新的计时工具
sdkCreateTimer(&timer);
sdkStartTimer(&timer); transformKernel << < dimGrid, dimBlock, >> >(d_data, width);
cudaDeviceSynchronize(); sdkStopTimer(&timer);
printf("\n\Time: %.3f msec, %.2f Mtexlookups/sec\n", sdkGetTimerValue(&timer), (cubemap_size / (sdkGetTimerValue(&timer) / 1000.0f) / 1e6));
sdkDeleteTimer(&timer); // 返回计算结果并检验
memset(h_data, , size);
cudaMemcpy(h_data, d_data, size, cudaMemcpyDeviceToHost);
if (checkCmdLineFlag(argc, (const char **)argv, "regression"))
sdkWriteFile<float>("./data/regression.dat", h_data, width * width, 0.0f, false);
else
printf("Comparing kernel output to expected data return %d\n", compareData(h_data, h_data_ref, cubemap_size, MIN_EPSILON_ERROR, 0.0f)); printf("\n\tActual output data\n\t");
for (int i = ; i < width * num_faces * num_layers; i++)
{
printf("%2.1f ", h_data[i]);
if ((i + ) % width == )
printf("\n\t");
if ((i + ) % (width * width) == )
printf("\n\t");
} free(h_data);
free(h_data_ref);
cudaFree(d_data);
cudaFreeArray(cu_3darray); getchar();
return ;
}
▶ 输出结果
Input data.n 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0 24.0 25.0 26.0 27.0 28.0 29.0 30.0 31.0 32.0 33.0 34.0 35.0 36.0 37.0 38.0 39.0 40.0 41.0 42.0 43.0 44.0 45.0 46.0 47.0 48.0 49.0 50.0 51.0 52.0 53.0 54.0 55.0 56.0 57.0 58.0 59.0 60.0 61.0 62.0 63.0
64.0 65.0 66.0 67.0 68.0 69.0 70.0 71.0 72.0 73.0 74.0 75.0 76.0 77.0 78.0 79.0 80.0 81.0 82.0 83.0 84.0 85.0 86.0 87.0 88.0 89.0 90.0 91.0 92.0 93.0 94.0 95.0 96.0 97.0 98.0 99.0 100.0 101.0 102.0 103.0 104.0 105.0 106.0 107.0 108.0 109.0 110.0 111.0 112.0 113.0 114.0 115.0 116.0 117.0 118.0 119.0 120.0 121.0 122.0 123.0 124.0 125.0 126.0 127.0
128.0 129.0 130.0 131.0 132.0 133.0 134.0 135.0 136.0 137.0 138.0 139.0 140.0 141.0 142.0 143.0 144.0 145.0 146.0 147.0 148.0 149.0 150.0 151.0 152.0 153.0 154.0 155.0 156.0 157.0 158.0 159.0 160.0 161.0 162.0 163.0 164.0 165.0 166.0 167.0 168.0 169.0 170.0 171.0 172.0 173.0 174.0 175.0 176.0 177.0 178.0 179.0 180.0 181.0 182.0 183.0 184.0 185.0 186.0 187.0 188.0 189.0 190.0 191.0
192.0 193.0 194.0 195.0 196.0 197.0 198.0 199.0 200.0 201.0 202.0 203.0 204.0 205.0 206.0 207.0 208.0 209.0 210.0 211.0 212.0 213.0 214.0 215.0 216.0 217.0 218.0 219.0 220.0 221.0 222.0 223.0 224.0 225.0 226.0 227.0 228.0 229.0 230.0 231.0 232.0 233.0 234.0 235.0 236.0 237.0 238.0 239.0 240.0 241.0 242.0 243.0 244.0 245.0 246.0 247.0 248.0 249.0 250.0 251.0 252.0 253.0 254.0 255.0
256.0 257.0 258.0 259.0 260.0 261.0 262.0 263.0 264.0 265.0 266.0 267.0 268.0 269.0 270.0 271.0 272.0 273.0 274.0 275.0 276.0 277.0 278.0 279.0 280.0 281.0 282.0 283.0 284.0 285.0 286.0 287.0 288.0 289.0 290.0 291.0 292.0 293.0 294.0 295.0 296.0 297.0 298.0 299.0 300.0 301.0 302.0 303.0 304.0 305.0 306.0 307.0 308.0 309.0 310.0 311.0 312.0 313.0 314.0 315.0 316.0 317.0 318.0 319.0
320.0 321.0 322.0 323.0 324.0 325.0 326.0 327.0 328.0 329.0 330.0 331.0 332.0 333.0 334.0 335.0 336.0 337.0 338.0 339.0 340.0 341.0 342.0 343.0 344.0 345.0 346.0 347.0 348.0 349.0 350.0 351.0 352.0 353.0 354.0 355.0 356.0 357.0 358.0 359.0 360.0 361.0 362.0 363.0 364.0 365.0 366.0 367.0 368.0 369.0 370.0 371.0 372.0 373.0 374.0 375.0 376.0 377.0 378.0 379.0 380.0 381.0 382.0 383.0 Ideal output data
0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
-64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
-128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
-192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
-256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
-320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0 Cubemap data of * * : Grid size is x , each block has x threads. Time: 0.098 msec, 249.50 Mtexlookups/sec
Comparing kernel output to expected data return Actual output data
-0.0 -1.0 -2.0 -3.0 -4.0 -5.0 -6.0 -7.0 -8.0 -9.0 -10.0 -11.0 -12.0 -13.0 -14.0 -15.0 -16.0 -17.0 -18.0 -19.0 -20.0 -21.0 -22.0 -23.0 -24.0 -25.0 -26.0 -27.0 -28.0 -29.0 -30.0 -31.0 -32.0 -33.0 -34.0 -35.0 -36.0 -37.0 -38.0 -39.0 -40.0 -41.0 -42.0 -43.0 -44.0 -45.0 -46.0 -47.0 -48.0 -49.0 -50.0 -51.0 -52.0 -53.0 -54.0 -55.0 -56.0 -57.0 -58.0 -59.0 -60.0 -61.0 -62.0 -63.0
-64.0 -65.0 -66.0 -67.0 -68.0 -69.0 -70.0 -71.0 -72.0 -73.0 -74.0 -75.0 -76.0 -77.0 -78.0 -79.0 -80.0 -81.0 -82.0 -83.0 -84.0 -85.0 -86.0 -87.0 -88.0 -89.0 -90.0 -91.0 -92.0 -93.0 -94.0 -95.0 -96.0 -97.0 -98.0 -99.0 -100.0 -101.0 -102.0 -103.0 -104.0 -105.0 -106.0 -107.0 -108.0 -109.0 -110.0 -111.0 -112.0 -113.0 -114.0 -115.0 -116.0 -117.0 -118.0 -119.0 -120.0 -121.0 -122.0 -123.0 -124.0 -125.0 -126.0 -127.0
-128.0 -129.0 -130.0 -131.0 -132.0 -133.0 -134.0 -135.0 -136.0 -137.0 -138.0 -139.0 -140.0 -141.0 -142.0 -143.0 -144.0 -145.0 -146.0 -147.0 -148.0 -149.0 -150.0 -151.0 -152.0 -153.0 -154.0 -155.0 -156.0 -157.0 -158.0 -159.0 -160.0 -161.0 -162.0 -163.0 -164.0 -165.0 -166.0 -167.0 -168.0 -169.0 -170.0 -171.0 -172.0 -173.0 -174.0 -175.0 -176.0 -177.0 -178.0 -179.0 -180.0 -181.0 -182.0 -183.0 -184.0 -185.0 -186.0 -187.0 -188.0 -189.0 -190.0 -191.0
-192.0 -193.0 -194.0 -195.0 -196.0 -197.0 -198.0 -199.0 -200.0 -201.0 -202.0 -203.0 -204.0 -205.0 -206.0 -207.0 -208.0 -209.0 -210.0 -211.0 -212.0 -213.0 -214.0 -215.0 -216.0 -217.0 -218.0 -219.0 -220.0 -221.0 -222.0 -223.0 -224.0 -225.0 -226.0 -227.0 -228.0 -229.0 -230.0 -231.0 -232.0 -233.0 -234.0 -235.0 -236.0 -237.0 -238.0 -239.0 -240.0 -241.0 -242.0 -243.0 -244.0 -245.0 -246.0 -247.0 -248.0 -249.0 -250.0 -251.0 -252.0 -253.0 -254.0 -255.0
-256.0 -257.0 -258.0 -259.0 -260.0 -261.0 -262.0 -263.0 -264.0 -265.0 -266.0 -267.0 -268.0 -269.0 -270.0 -271.0 -272.0 -273.0 -274.0 -275.0 -276.0 -277.0 -278.0 -279.0 -280.0 -281.0 -282.0 -283.0 -284.0 -285.0 -286.0 -287.0 -288.0 -289.0 -290.0 -291.0 -292.0 -293.0 -294.0 -295.0 -296.0 -297.0 -298.0 -299.0 -300.0 -301.0 -302.0 -303.0 -304.0 -305.0 -306.0 -307.0 -308.0 -309.0 -310.0 -311.0 -312.0 -313.0 -314.0 -315.0 -316.0 -317.0 -318.0 -319.0
-320.0 -321.0 -322.0 -323.0 -324.0 -325.0 -326.0 -327.0 -328.0 -329.0 -330.0 -331.0 -332.0 -333.0 -334.0 -335.0 -336.0 -337.0 -338.0 -339.0 -340.0 -341.0 -342.0 -343.0 -344.0 -345.0 -346.0 -347.0 -348.0 -349.0 -350.0 -351.0 -352.0 -353.0 -354.0 -355.0 -356.0 -357.0 -358.0 -359.0 -360.0 -361.0 -362.0 -363.0 -364.0 -365.0 -366.0 -367.0 -368.0 -369.0 -370.0 -371.0 -372.0 -373.0 -374.0 -375.0 -376.0 -377.0 -378.0 -379.0 -380.0 -381.0 -382.0 -383.0
▶ 涨姿势
● helper_time.h 中新定义的计时函数
// 关键步骤
StopWatchInterface *timer = NULL;
sdkCreateTimer(&timer);
sdkStartTimer(&timer); sdkStopTimer(&timer);
sdkGetTimerValue(&timer);
sdkDeleteTimer(&timer); // helper_time.h
class StopWatchInterface
{
public:
StopWatchInterface() {};
virtual ~StopWatchInterface() {}; public:
virtual void start() = ;
virtual void stop() = ;
virtual void reset() = ;
virtual float getTime() = ;// 获取计时(计时器不停)
virtual float getAverageTime() = ;
}; inline bool sdkCreateTimer(StopWatchInterface **timer_interface)
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
*timer_interface = (StopWatchInterface *)new StopWatchWin();
#else
*timer_interface = (StopWatchInterface *)new StopWatchLinux();
#endif
return (*timer_interface != NULL) ? true : false;
} inline bool sdkDeleteTimer(StopWatchInterface **timer_interface)
{
if (*timer_interface)
{
delete *timer_interface;
*timer_interface = NULL;
}
return true;
} inline bool sdkStartTimer(StopWatchInterface **timer_interface)
{
if (*timer_interface)
(*timer_interface)->start();
return true;
} inline bool sdkStopTimer(StopWatchInterface **timer_interface)
{
if (*timer_interface)
(*timer_interface)->stop();
return true;
} inline float sdkGetTimerValue(StopWatchInterface **timer_interface)
{
if (*timer_interface)
return (*timer_interface)->getTime();
else
return 0.0f;
}
● 立方体纹理贴图。六个面分别为 x = 1 正面、x = -1 轴负面、y = 1 正面、y = -1 负面、z = 1 正面、x = -1 负面,对应前、后、右、左、上、下。按照线性下标 [0, width * width * 6 - 1] 顺序访问时,各元素存储位置如下图所示(width == 2 为例)。
0_Simple__simpleCubemapTexture的更多相关文章
随机推荐
- LeetCode Single Number I II Python
Single Number Given an array of integers, every element appears twice except for one. Find that sing ...
- 协程、gevent实现异步io、进程、线程、协程对比
异步io的说白了就是遇到io操作的时候,就停下来去做别的事情.io分网络io和磁盘io,网络io比如说打开一个网站获取数据,下载一首歌等等,磁盘io就是把数据存到一个文件里面,写到磁盘上. 从网站上获 ...
- FastAdmin 教程草稿大纲
FastAdmin 教程草稿大纲 计划 FastAdmin 教程大纲 FastAdmin 环境搭建 phpStudy 2018 安装 一键 CRUD 教程 环境变量配置 环境安装 命令行安装 列出所需 ...
- FastAdmin 绑定的模块禁用路由
为了安全,将后台入口隐藏. 这里出一个问题,因为装了 CMS 插件,使用入口登录后显示的是 CMS 的首页. 这个问题已经修复. https://gitee.com/karson/fastadmin/ ...
- zz 史上最全--各银行借记卡的年费、小额管理费、转账费等!
史上最全--各银行借记卡的年费.小额管理费.转账费等! 发布时间:2015-01-14 17:28:10 还在迷茫借记卡自费的菜主儿们~菜菜特别整理关于各银行借记卡.存折账户等的年费.小额管理费.转账 ...
- Spring boot 使用的注解有哪些?
Spring boot 使用的注解有哪些? 注解 作用 @SpringBootApplication 等价于 @Configuration + @EnableAutoConfiguration + @ ...
- Openwrt TTL线刷
1.接通串口,网线: 2.打开串口软件SecureCRT: 3.按复位键,不断地出现信息: 4.2秒内按任意键停下来,出现uboot> 5.输入httpd 6.打开网页,输入ip 7.开始更新, ...
- linux下的pd
东西叫OpenSystemArchitect 地址在http://www.codebydesign.com/SystemArchitect 运行之前,先sudo apt-get install lib ...
- ASP.NET Web Pages:WebMail 帮助器
ylbtech-.Net-ASP.NET Web Pages:WebMail 帮助器 1.返回顶部 1. ASP.NET Web Pages - WebMail 帮助器 WebMail 帮助器 - 众 ...
- shell_sctipts: 删除mysql备份到最后7日
目前,mysql的备份文件,经过一周左右清理,手动清理会比较费事,所以写了一个简单脚本来实现. 前提介绍: mysql备份文件放在/usr/bak/sql里面,sql文件的备份名称格式为: mysql ...