GPGPU OpenCL 获取kernel函数编译信息

　　使用OpenCL编程时，kernel写成一个单独的文件或者将文件内容保存在一个string中。可以使用clBuildProgram对kernel进行编译链接(compiles & links)，如果失败，可以使用clGetProgramBuildInfo获取OpenCL编译器对kernel的编译信息。

1.clBuildProgram

　cl_int clBuildProgram (

　　　　cl_program program,　　//program
　　　　cl_uint num_devices,　　//the number of device
　　　　const cl_device_id *device_list, 　　//devices id
　　　　const char *options,　　//the option of compiler
　　　　void (CL_CALLBACK *pfn_notify)(cl_program program, void *user_data),　　//the callback function
　　　　void *user_data)　　//the data of callback function
　　)

2.clGetProgramBuildInfo

　　cl_int clGetProgramBuildInfo (

　　　　cl_program program, 　　//program
　　　　cl_device_id device,　　//the id of device
　　　　cl_program_build_info param_name,
　　　　size_t param_value_size,
　　　　void *param_value,
　　　　size_t *param_value_size_ret
　　)

3.代码实例(获取编译器对kernel的编译信息)

3.1 kernel(build_info_kernel.cl)

 __kernel void good(__global float *a,

                    __global float *b,

                    __global float *c) {

     *c = *a + *b;

 }

 __kernel void good(__global float *a,

                    __global float *b,

                    __global float *c) {

     __local int var=;

     int size=get_local_sze();

     *c = *a + *b;

 }

3.2 tool.h

 #ifndef TOOLH

 #define TOOLH

 #include <CL/cl.h>

 #include <string.h>

 #include <stdio.h>

 #include <stdlib.h>

 #include <iostream>

 #include <string>

 #include <fstream>

 using namespace std;

 /** convert the kernel file into a string */

 int convertToString(const char *filename, std::string& s);

 /**Getting platforms and choose an available one.*/

 int getPlatform(cl_platform_id &platform);

 /**Step 2:Query the platform and choose the first GPU device if has one.*/

 cl_device_id *getCl_device_id(cl_platform_id &platform);

 /**获取编译program出错时，编译器的出错信息*/

 int getProgramBuildInfo(cl_program program,cl_device_id device);

 #endif

　 tool.cpp

 #include <CL/cl.h>

 #include <string.h>

 #include <stdio.h>

 #include <stdlib.h>

 #include <iostream>

 #include <string>

 #include <fstream>

 #include "tool.h"

 using namespace std;

 /** convert the kernel file into a string */

 int convertToString(const char *filename, std::string& s)

 {

     size_t size;

     char*  str;

     std::fstream f(filename, (std::fstream::in | std::fstream::binary));

     if(f.is_open())

     {

         size_t fileSize;

         f.seekg(, std::fstream::end);

         size = fileSize = (size_t)f.tellg();

         f.seekg(, std::fstream::beg);

         str = new char[size+];

         if(!str)

         {

             f.close();

             return ;

         }

         f.read(str, fileSize);

         f.close();

         str[size] = '\0';

         s = str;

         delete[] str;

         return ;

     }

     cout<<"Error: failed to open file\n:"<<filename<<endl;

     return -;

 }

 /**Getting platforms and choose an available one.*/

 int getPlatform(cl_platform_id &platform)

 {

     platform = NULL;//the chosen platform

     cl_uint numPlatforms;//the NO. of platforms

     cl_int    status = clGetPlatformIDs(, NULL, &numPlatforms);

     if (status != CL_SUCCESS)

     {

         cout<<"Error: Getting platforms!"<<endl;

         return -;

     }

     /**For clarity, choose the first available platform. */

     if(numPlatforms > )

     {

         cl_platform_id* platforms =

             (cl_platform_id* )malloc(numPlatforms* sizeof(cl_platform_id));

         status = clGetPlatformIDs(numPlatforms, platforms, NULL);

         platform = platforms[];

         free(platforms);

     }

     else

         return -;

 }

 /**Step 2:Query the platform and choose the GPU device*/

 cl_device_id *getCl_device_id(cl_platform_id &platform)

 {

     cl_uint numDevices = ;

     cl_device_id *devices=NULL;

     cl_int    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, , NULL, &numDevices);

     if (numDevices > ) //GPU available.

     {

         devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));

         status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);

     }

     return devices;

 }

 /**获取编译program出错时，编译器的出错信息*/

 int getProgramBuildInfo(cl_program program,cl_device_id device)

 {

     size_t log_size;

     char *program_log;

     /* Find size of log and print to std output */

     clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,

             , NULL, &log_size);

     program_log = (char*) malloc(log_size+);

     program_log[log_size] = '\0';

     clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,

             log_size+, program_log, NULL);

     printf("%s\n", program_log);

     free(program_log);

     return ;

 }

3.3 buildInfo.cpp

 #include "tool.h"

 #include <string.h>

 #include <stdio.h>

 #include <stdlib.h>

 #include <iostream>

 #include <string>

 #include <fstream>

 using namespace std;

 void CL_CALLBACK checkData(cl_program platform, void* data){

     printf("%s\n",(char*)data);

 }

 int main(int argc, char* argv[])

 {

     cl_int    status;

     /** Getting platforms and choose an available one(first).*/

     cl_platform_id platform;

     getPlatform(platform);

     /**Query the platform and choose the GPU device.*/

     cl_device_id *devices=getCl_device_id(platform);

     /**Create context use the frist device.*/

     cl_context context = clCreateContext(NULL,, devices,NULL,NULL,NULL);

     /**Create program object */

     const char *filename = "build_info_kernel.cl";

     string sourceStr;

     status = convertToString(filename, sourceStr);

     const char *source = sourceStr.c_str();

     size_t sourceSize[] = {strlen(source)};

     cl_program program = clCreateProgramWithSource(context, , &source, sourceSize, NULL);

     /**Build program. */

     //status=clBuildProgram(program, 1,devices,NULL,checkData,"sdf");

     status=clBuildProgram(program, ,devices,NULL,NULL,NULL);

     if(status < )    //get the build info

         getProgramBuildInfo(program ,devices[]);

     else

         printf("Build Success\n");

     status = clReleaseProgram(program);    //Release the program object.

     status = clReleaseContext(context);//Release context.

     free(devices);

     getchar();

     return ;

 }

对kernel的编译结果：

GPGPU OpenCL 获取kernel函数编译信息的更多相关文章

GPGPU OpenCL 获取设备信息
在使用OpenCL编程中,需要对GPU设备的底层理解,这样才能更好的进行代码优化. 比如计算单元CU数量,每个CU的执行单元PE数量,每个CU中的共享内存大小等等.只有了解了这些才能更好的使用共享内存 ...
Linux Kernel ‘mp_get_count()’函数本地信息泄露漏洞
漏洞名称: Linux Kernel ‘mp_get_count()’函数本地信息泄露漏洞 CNNVD编号: CNNVD-201311-054 发布时间: 2013-11-06 更新时间: 2013- ...
python装饰器内获取函数有用信息方法
装饰器内获取函数有用信息方法 .__doc__用于得到函数注释信息 .__name_用于得到函数名在函数引用装饰器的时候,函数名会变为装饰器内部执行该函数的名字,所有在直接执行函数名加.__doc_ ...
QMetaMethod 获取成员函数的元信息
在上一篇中,我们将的是QMetaEnum类,它可以获得一个类中由Q_ENUM宏或Q_FLAG宏声明的枚举类型的元信息.同样,QMetaMethod类是用来获取成员方法的元信息的一个类.通过该类,我们可 ...
C/C++通过WMI和系统API函数获取获取系统硬件配置信息
转载:http://www.cnblogs.com/renyuan/archive/2012/12/29/2838716.html 转载:http://blog.csdn.net/jhqin/arti ...
【并行计算-CUDA开发】GPGPU OpenCL/CUDA 高性能编程的10大注意事项
GPGPU OpenCL/CUDA 高性能编程的10大注意事项 1.展开循环如果提前知道了循环的次数,可以进行循环展开,这样省去了循环条件的比较次数.但是同时也不能使得kernel代码太大. 循环展 ...
GPGPU OpenCL/CUDA 高性能编程的10大注意事项
转载自:http://hc.csdn.net/contents/content_details?type=1&id=341 1.展开循环如果提前知道了循环的次数,可以进行循环展开,这样省去了 ...
kernel(一)编译体验
目录打补丁配置总结配置方式配置体验配置详解 Makefile解析子目录的Makefile 架构下面的Makefile 顶层Makefile Make解析编译链接链接脚本烧写内核 ...
make V=1 查看完整的gcc编译信息
Linux内核make命令选项 2012年5月28日lenky发表评论阅读评论6,289 次浏览升级Linux内核的操作已经变得很简单,基本的几个命令即可搞定:make menuconfig.m ...

随机推荐

mysql 存储过程详解
MySQL 存储过程是从 MySQL 5.0 开始增加的新功能.存储过程的优点有一箩筐.不过最主要的还是执行效率和SQL 代码封装.特别是 SQL 代码封装功能,如果没有存储过程,在外部程序访问数据库 ...
hdu-5023线段树刷题
title: hdu-5023线段树刷题 date: 2018-10-18 13:32:13 tags: acm 刷题 categories: ACM-线段树概述这道题和上次做的那道染色问题一样, ...
ubuntu下安装和破解navicat的方法
ubuntu下安装和破解navicat的方法之前我也在苦苦搜寻ubuntu完美破解navicat的方法,但是大家都说是删除掉~/.Navicat,就可以续用,的确是这样,但是很麻烦. 于是我找到了一 ...
理解事件(Event)
Overview 在前几章,我们已经对委托有了一个完整的了解了,本章将会对事件进行一下介绍: 相对于委托,事件再是我们更加频繁的接触的,比如鼠标的click 事件,键盘的 keydown 事件等等. ...
安装部署VMware vSphere 5.5文档 (6-6) 集群和vMotion
部署VMware vSphere 5.5 实施文档 ########################################################################## ...
【转】高效率编辑器VIM
最近实习的时候需要在服务器上做Debug,不得不用到vim的相关操作.以前对vim这种被码农无数赞扬的神器望而却步,但今天试了之后感觉还是不错的.以后争取少用鼠标,少用insert模式. 这是从网上看 ...
type与instance区别
class Foo(object): pass class Bar(Foo): pass obj = Bar() # isinstance用于判断,对象是否是指定类或其派生类的实例 print(isi ...
sgu 176 上下界网络流最小可行流带输出方案
算法步骤: 1. 先将原图像最大可行流那样变换,唯一不同的是不加dst->src那条边来将它变成无源无汇的网络流图.直接跑一边超级源到超级汇的最大流. 2. 加上刚才没有加上的那条边p 3. 再 ...
PAT甲级1131. Subway Map
PAT甲级1131. Subway Map 题意: 在大城市,地铁系统对访客总是看起来很复杂.给你一些感觉,下图显示了北京地铁的地图.现在你应该帮助人们掌握你的电脑技能!鉴于您的用户的起始位置,您的任 ...
Go语言Web框架gwk介绍 (二)
HttpResult 凡是实现了HttpResult接口的对象,都可以作为gwk返回Web客户端的内容.HttpResult接口定义非常简单,只有一个方法: type HttpResult inter ...

GPGPU OpenCL 获取kernel函数编译信息

GPGPU OpenCL 获取kernel函数编译信息的更多相关文章

随机推荐

热门专题