使用cuda内质结构 cudaFuncAttributes 来观察核函数的共享内存.寄存器数量. ▶ 源代码: /*cppOverload_kernel.cuh*/ __global__ void simple_kernel(const int *pIn, int *pOut, int a) { __shared__ int sData[THREAD_N]; int tid = threadIdx.x + blockDim.x*blockIdx.x; sData[threadIdx.x] = p…