由于对于dxva2解码得到的数据不宜copy回内存给CPU处理,所以最好的办法是在GPU上直接进行处理。D3D的像素着色器能够对像素直接进行操作,实现点运算极其简单方便,简单的卷积运算效果也非常好。但D3D9的限制也很多,对于过于复杂的图像处理则显得有些不能胜任。

1.点运算

点运算用HLSL非常容易实现,几乎是公式怎么写,代码就怎么写。以RGB转灰度图显示为例:

texture Tex0 ;

int iFlag =  ;
float aValue= 0.0 ;
float bValue= 0.0 ; sampler2D YTex =
sampler_state
{
Texture = <Tex0> ;
MipFilter = LINEAR ;
MinFilter = LINEAR ;
MagFilter = LINEAR ; AddressU = CLAMP ;
AddressV = CLAMP ;
}; struct PS_INPUT
{
float2 uvCoords0 : TEXCOORD0 ;
}; float4 Main( PS_INPUT input ) : COLOR0
{
float4 yuvColor ;
//rgb to gray 不知道是不是这么显示的,姑且这么认为
float gray = tex2D( YTex, input.uvCoords0 ).r * 0.299 + tex2D( YTex, input.uvCoords0 ).g * 0.587 + tex2D( YTex, input.uvCoords0 ).b * 0.114 ;
float s = ;
if(iFlag == )
{
s = aValue * gray + bValue/ ;
}
else if(iFlag == )
{
s = aValue * log(+gray) ;
}
else if(iFlag == )
{
s = aValue * pow(abs(gray),bValue) ;
}
yuvColor.r = s ;
yuvColor.g = s ;
yuvColor.b = s ;
yuvColor.a = 1.0 ; return yuvColor ;
}

点运算如此简单是因为GPU是并行运算的,我个人认为可以看成是每一个像素点(BGRA)对应一个线程,这大概就是OpenCL中所谓的数据并行。这是一个非常简单的程序,指令数少,程序结构也很简单,shader 的版本用2.0就可以轻松编过。

2.卷积运算举例

指令数较多的情况2.0版本的shader就搞不定了,上3.0版本可以做一些简单的卷积运算。以中值滤波为例:

texture Tex0 ;

matrix WorldMatrix;
matrix ViewMatrix;
matrix ProjMatrix; sampler2D YTex =
sampler_state
{
Texture = <Tex0> ;
MipFilter = LINEAR ;
MinFilter = LINEAR ;
MagFilter = LINEAR ; AddressU = CLAMP ;
AddressV = CLAMP ;
}; struct VS_INPUT
{
float4 pos : POSITION;
float4 color : COLOR0;
float2 tex : TEXCOORD0;
};
//
struct VS_OUTPUT
{
float4 pos : POSITION;
float4 color : COLOR0;
float2 tex : TEXCOORD0;
}; float2 g_v4ScreenSize;
int ksize = ; float fLeft = -1.0f ;
float fTop = -1.0f ;
float fRight = -1.0f ;
float fBottom = -1.0f ;
//--------------------------------- BurTechnique -------------------------------------- VS_OUTPUT MainVS_Screen( VS_INPUT In )
{
VS_OUTPUT Out = ( VS_OUTPUT ); float4x4 matWorldView = mul(WorldMatrix,ViewMatrix);
float4x4 matProject = mul(matWorldView,ProjMatrix);
Out.pos = mul(In.pos,matProject);
Out.tex = In.tex;
Out.color = In.color;
return Out;
} float4 MainPS_Screen( VS_INPUT In ) : COLOR0
{
float4 outColor = tex2D( YTex, In.tex ).rgba ; if(ksize <= || ksize% == )
{
return outColor ;
} if( ksize > || ksize < )
{
return outColor ;
} if(!(In.tex.x < fRight && In.tex.y < fBottom && In.tex.x > fLeft && In.tex.y > fTop))
{
return outColor ;
} // 纹理大小
float2 TexSize = float2( g_v4ScreenSize.x , g_v4ScreenSize.y ); float x_off = 1.0f / TexSize.x;
float y_off = 1.0f / TexSize.y; float2 fX0Y0 = In.tex - float2(x_off * ksize/, y_off*ksize/) ; float3 sum = {0.0f, 0.0f, 0.0f} ; if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} outColor = float4(sum/(ksize*ksize),1.0f); return outColor ;
} //--------------------------- 技术--------------------------- technique BurTechnique
{
pass P0
{
LightEnable[] = false; VertexShader = compile vs_3_0 MainVS_Screen();
PixelShader = compile ps_3_0 MainPS_Screen();
}
}

由于3.0版本的shader似乎不允许pixel shader单独出现,所以我从点运算用像素着色器实现改为用特效来实现。HLSL语法中有if语句,也有for语句,可是这个程序却不厌其烦的把所有的都给列出来来,而没有使用for循环。这是因为在实际使用中发现有一些限制,比如if语句的if(A>B),A与B中必须有一个是常量,就像上面见到的那种形式;for循环中间的判断也是如此,只是在第二层j循环中可以是第一层循环的i,即不可以

for(int i=;i<ksize;i++)
{
for(int j=;j<ksize1;j++)
{
..........
}
}

以上代码的ksize与ksize1都必须为常数,例外的情况是ksize1可以为第一层循环的 i 。这个问题不知道后续版本的shader有没有,反正我当前使用的版本有。

另外有一个需要注意的地方是指令数,2.0版本的shader支持的指令数相当少,3.0版本则要多好多,我最长写到了400多条快500条时才导致编译失败。 还有一个需要提醒的是3.0版本的shader只支持D3D 9.0C以后的。如果要求做更为复杂的图像处理,可以的话建议上D3D11,compute shader虽然我没用过,但从介绍来说,应该可以处理一些更为复杂的图像处理。

结合我的上一篇博客(DXVA2解码数据用texture纹理渲染http://www.cnblogs.com/betterwgo/p/6327422.html),就算是实现了从硬解到简单图像处理的完整过程。显卡加速效果非常好,在我的intel 5200上即使4K视频也可以实现比正常播放略快的效果。

工程源码:http://download.csdn.net/download/qq_33892166/9755307

HLSL实现简单的图像处理功能的更多相关文章

  1. Java 从零开始实现一个画图板、以及图像处理功能,代码可复现

    Java 从零开始实现一个画图板.以及图像处理功能,代码可复现 这是一个学习分享博客,带你从零开始实现一个画图板.图像处理的小项目,为了降低阅读难度,本博客将画图板的一步步迭代优化过程展示给读者,篇幅 ...

  2. ASP.NET MVC 学习4、Controller中添加SearchIndex页面,实现简单的查询功能

    参考:http://www.asp.net/mvc/tutorials/mvc-4/getting-started-with-aspnet-mvc4/examining-the-edit-method ...

  3. Web---创建Servlet的3种方式、简单的用户注册功能

    说明: 创建Servlet的方式,在上篇博客中,已经用了方式1(实现Servlet接口),接下来本节讲的是另外2种方式. 上篇博客地址:http://blog.csdn.net/qq_26525215 ...

  4. js+html+css简单的互动功能页面(2015知道几乎尖笔试题)http://v.youku.com/v_show/id_XMTI0ODQ5NTAyOA==.html?from=y1.7-1.2

    js+html+css实现简单页面交互功能(2015知乎前端笔试题) http://v.youku.com/v_show/id_XMTI0ODQ5NTAyOA==.html? from=y1.7-1. ...

  5. Spring 学习——基于Spring WebSocket 和STOMP实现简单的聊天功能

    本篇主要讲解如何使用Spring websocket 和STOMP搭建一个简单的聊天功能项目,里面使用到的技术,如websocket和STOMP等会简单介绍,不会太深,如果对相关介绍不是很了解的,请自 ...

  6. Django文件上传三种方式以及简单预览功能

    主要内容: 一.文件长传的三种方式 二.简单预览功能实现 一.form表单上传 1.页面代码 <!DOCTYPE html> <html lang="en"> ...

  7. 运用socket实现简单的ssh功能

    在python socket知识点中已经对socket进行了初步的了解,那现在就使用这些知识来实现一个简单的ssh(Secure Shell)功能. 首先同样是建立两个端(服务器端和客户端) 需求是: ...

  8. Jenkins实现简单的CI功能

    步骤一:安装JDK.Tomcat,小儿科的东西不在此详细描述 步骤二:下载安装Jenkins下载链接:https://jenkins.io/download/ 步骤三:将下载的jenkins.war部 ...

  9. 利用python进行简单的图像处理:包括打开,显示以及保存图像

    利用python进行简单的图像处理:包括打开,显示以及保存图像 利用PIL处理 PIL(python image library) 是python用于图片处理的package.但目前这个package ...

随机推荐

  1. JavaScript和jQuery的学习

    还有12天就要回学校了,我的假期计划还能实现吗?在这12天里,需要把JavaScript和jQuery学完.我知道这两个技术对于前端网页开发非常重要.前期把HTML和CSS学完了,学的不是特别深,只是 ...

  2. python - 常用模块 os, sys

    常用模块: os(处理文件和目录), sys(sys 模块包含了与 Python 解释器和它的环境有关的函数.) sys.argv 变量是一个字符串的 列表.特别地,sys.argv 包含了 命令行参 ...

  3. Linux系统——磁盘管理

    磁盘结构 (1)硬盘的物理结构 磁头:每面一个磁盘 盘片:硬盘有多个盘片,每个盘片2面 (2)硬盘的数据结构 扇区:盘片被分为多个扇形区域,每个扇形区存放512字节的数据 磁道:统一盘片不同半径的同心 ...

  4. 命令查看java的class字节码文件

    源代码: public class Math { public static void main(String[] args){ int a=1; int b=2; int c=(a+b)*10; } ...

  5. springer论文模板参考文献的顺序问题

    latex环境 MikTex 2.9 + TeXstudio 2.12.8 (+ Mendeley) 问题 springer提供的latex模板 中最后的参考文献是按照字母顺序排列的.我想要弄成按照文 ...

  6. mongodb研究(mongodb 内存数据库)

    本日志大部分都不是原创的转载复制的会带链接保持版权 工作中使用mongodb已经好久了,讽刺的是到了最后快离职的时候才有时间好好研究下源码.   印象:mongodb是一个内存数据库,数据都是放到内存 ...

  7. 在LAMP的生产环境内添加PHP的cURL扩展模块

    服务器运行一段时间后,可能突然会需求添加某个扩展,如curl.pdo.xmlrpc等, 这就需要在不重新编译 PHP   的情况下独立添加扩展. 下面以安装curl为例,介绍具体安装步骤. 1.安装c ...

  8. <The old man and the sea>

    Every day is a new day. It is better to be lucky. But i would rather be exact. Then when luck comes ...

  9. 爬虫框架Scrapy之案例三图片下载器

    items.py class CoserItem(scrapy.Item): url = scrapy.Field() name = scrapy.Field() info = scrapy.Fiel ...

  10. HDU5299 圆的扫描线 && 树上删边博弈

    HDU5299 圆的扫描线 && 树上删边博弈 标签(空格分隔): 未分类 给出若干个圆,可以互相嵌套但不相交或相切. 每次删去一个圆和它内部的圆,进行博弈,问谁赢. 分成两部分.首先 ...