由于对于dxva2解码得到的数据不宜copy回内存给CPU处理,所以最好的办法是在GPU上直接进行处理。D3D的像素着色器能够对像素直接进行操作,实现点运算极其简单方便,简单的卷积运算效果也非常好。但D3D9的限制也很多,对于过于复杂的图像处理则显得有些不能胜任。

1.点运算

点运算用HLSL非常容易实现,几乎是公式怎么写,代码就怎么写。以RGB转灰度图显示为例:

texture Tex0 ;

int iFlag =  ;
float aValue= 0.0 ;
float bValue= 0.0 ; sampler2D YTex =
sampler_state
{
Texture = <Tex0> ;
MipFilter = LINEAR ;
MinFilter = LINEAR ;
MagFilter = LINEAR ; AddressU = CLAMP ;
AddressV = CLAMP ;
}; struct PS_INPUT
{
float2 uvCoords0 : TEXCOORD0 ;
}; float4 Main( PS_INPUT input ) : COLOR0
{
float4 yuvColor ;
//rgb to gray 不知道是不是这么显示的,姑且这么认为
float gray = tex2D( YTex, input.uvCoords0 ).r * 0.299 + tex2D( YTex, input.uvCoords0 ).g * 0.587 + tex2D( YTex, input.uvCoords0 ).b * 0.114 ;
float s = ;
if(iFlag == )
{
s = aValue * gray + bValue/ ;
}
else if(iFlag == )
{
s = aValue * log(+gray) ;
}
else if(iFlag == )
{
s = aValue * pow(abs(gray),bValue) ;
}
yuvColor.r = s ;
yuvColor.g = s ;
yuvColor.b = s ;
yuvColor.a = 1.0 ; return yuvColor ;
}

点运算如此简单是因为GPU是并行运算的,我个人认为可以看成是每一个像素点(BGRA)对应一个线程,这大概就是OpenCL中所谓的数据并行。这是一个非常简单的程序,指令数少,程序结构也很简单,shader 的版本用2.0就可以轻松编过。

2.卷积运算举例

指令数较多的情况2.0版本的shader就搞不定了,上3.0版本可以做一些简单的卷积运算。以中值滤波为例:

texture Tex0 ;

matrix WorldMatrix;
matrix ViewMatrix;
matrix ProjMatrix; sampler2D YTex =
sampler_state
{
Texture = <Tex0> ;
MipFilter = LINEAR ;
MinFilter = LINEAR ;
MagFilter = LINEAR ; AddressU = CLAMP ;
AddressV = CLAMP ;
}; struct VS_INPUT
{
float4 pos : POSITION;
float4 color : COLOR0;
float2 tex : TEXCOORD0;
};
//
struct VS_OUTPUT
{
float4 pos : POSITION;
float4 color : COLOR0;
float2 tex : TEXCOORD0;
}; float2 g_v4ScreenSize;
int ksize = ; float fLeft = -1.0f ;
float fTop = -1.0f ;
float fRight = -1.0f ;
float fBottom = -1.0f ;
//--------------------------------- BurTechnique -------------------------------------- VS_OUTPUT MainVS_Screen( VS_INPUT In )
{
VS_OUTPUT Out = ( VS_OUTPUT ); float4x4 matWorldView = mul(WorldMatrix,ViewMatrix);
float4x4 matProject = mul(matWorldView,ProjMatrix);
Out.pos = mul(In.pos,matProject);
Out.tex = In.tex;
Out.color = In.color;
return Out;
} float4 MainPS_Screen( VS_INPUT In ) : COLOR0
{
float4 outColor = tex2D( YTex, In.tex ).rgba ; if(ksize <= || ksize% == )
{
return outColor ;
} if( ksize > || ksize < )
{
return outColor ;
} if(!(In.tex.x < fRight && In.tex.y < fBottom && In.tex.x > fLeft && In.tex.y > fTop))
{
return outColor ;
} // 纹理大小
float2 TexSize = float2( g_v4ScreenSize.x , g_v4ScreenSize.y ); float x_off = 1.0f / TexSize.x;
float y_off = 1.0f / TexSize.y; float2 fX0Y0 = In.tex - float2(x_off * ksize/, y_off*ksize/) ; float3 sum = {0.0f, 0.0f, 0.0f} ; if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} outColor = float4(sum/(ksize*ksize),1.0f); return outColor ;
} //--------------------------- 技术--------------------------- technique BurTechnique
{
pass P0
{
LightEnable[] = false; VertexShader = compile vs_3_0 MainVS_Screen();
PixelShader = compile ps_3_0 MainPS_Screen();
}
}

由于3.0版本的shader似乎不允许pixel shader单独出现,所以我从点运算用像素着色器实现改为用特效来实现。HLSL语法中有if语句,也有for语句,可是这个程序却不厌其烦的把所有的都给列出来来,而没有使用for循环。这是因为在实际使用中发现有一些限制,比如if语句的if(A>B),A与B中必须有一个是常量,就像上面见到的那种形式;for循环中间的判断也是如此,只是在第二层j循环中可以是第一层循环的i,即不可以

for(int i=;i<ksize;i++)
{
for(int j=;j<ksize1;j++)
{
..........
}
}

以上代码的ksize与ksize1都必须为常数,例外的情况是ksize1可以为第一层循环的 i 。这个问题不知道后续版本的shader有没有,反正我当前使用的版本有。

另外有一个需要注意的地方是指令数,2.0版本的shader支持的指令数相当少,3.0版本则要多好多,我最长写到了400多条快500条时才导致编译失败。 还有一个需要提醒的是3.0版本的shader只支持D3D 9.0C以后的。如果要求做更为复杂的图像处理,可以的话建议上D3D11,compute shader虽然我没用过,但从介绍来说,应该可以处理一些更为复杂的图像处理。

结合我的上一篇博客(DXVA2解码数据用texture纹理渲染http://www.cnblogs.com/betterwgo/p/6327422.html),就算是实现了从硬解到简单图像处理的完整过程。显卡加速效果非常好,在我的intel 5200上即使4K视频也可以实现比正常播放略快的效果。

工程源码:http://download.csdn.net/download/qq_33892166/9755307

HLSL实现简单的图像处理功能的更多相关文章

  1. Java 从零开始实现一个画图板、以及图像处理功能,代码可复现

    Java 从零开始实现一个画图板.以及图像处理功能,代码可复现 这是一个学习分享博客,带你从零开始实现一个画图板.图像处理的小项目,为了降低阅读难度,本博客将画图板的一步步迭代优化过程展示给读者,篇幅 ...

  2. ASP.NET MVC 学习4、Controller中添加SearchIndex页面,实现简单的查询功能

    参考:http://www.asp.net/mvc/tutorials/mvc-4/getting-started-with-aspnet-mvc4/examining-the-edit-method ...

  3. Web---创建Servlet的3种方式、简单的用户注册功能

    说明: 创建Servlet的方式,在上篇博客中,已经用了方式1(实现Servlet接口),接下来本节讲的是另外2种方式. 上篇博客地址:http://blog.csdn.net/qq_26525215 ...

  4. js+html+css简单的互动功能页面(2015知道几乎尖笔试题)http://v.youku.com/v_show/id_XMTI0ODQ5NTAyOA==.html?from=y1.7-1.2

    js+html+css实现简单页面交互功能(2015知乎前端笔试题) http://v.youku.com/v_show/id_XMTI0ODQ5NTAyOA==.html? from=y1.7-1. ...

  5. Spring 学习——基于Spring WebSocket 和STOMP实现简单的聊天功能

    本篇主要讲解如何使用Spring websocket 和STOMP搭建一个简单的聊天功能项目,里面使用到的技术,如websocket和STOMP等会简单介绍,不会太深,如果对相关介绍不是很了解的,请自 ...

  6. Django文件上传三种方式以及简单预览功能

    主要内容: 一.文件长传的三种方式 二.简单预览功能实现 一.form表单上传 1.页面代码 <!DOCTYPE html> <html lang="en"> ...

  7. 运用socket实现简单的ssh功能

    在python socket知识点中已经对socket进行了初步的了解,那现在就使用这些知识来实现一个简单的ssh(Secure Shell)功能. 首先同样是建立两个端(服务器端和客户端) 需求是: ...

  8. Jenkins实现简单的CI功能

    步骤一:安装JDK.Tomcat,小儿科的东西不在此详细描述 步骤二:下载安装Jenkins下载链接:https://jenkins.io/download/ 步骤三:将下载的jenkins.war部 ...

  9. 利用python进行简单的图像处理:包括打开,显示以及保存图像

    利用python进行简单的图像处理:包括打开,显示以及保存图像 利用PIL处理 PIL(python image library) 是python用于图片处理的package.但目前这个package ...

随机推荐

  1. Oracle 通过undo块查看事务信息(转)

      数据库版本:Oracle 11.2.0.3 RAC 实验目的:通过undo块查看Oracle事务信息 实验细节:1 开始一个事务SQL> select * from t1; ID NAME- ...

  2. matplotlib中文乱码解决方法

    每次编写代码时进行参数设置 import matplotlib.pyplot as plt plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签 ...

  3. php等守护进程监控脚本(转载 http://www.9958.pw/post/php_script_scan)

    此脚本用户守护监控进程的执行情况,因为有的时候,我们用各类开发语言做的守护进程可能会因为一些特殊情况被退出,所以此脚本就是为了重启这些进程 代码: #!/bin/bash EMAIL='9958_pw ...

  4. SQL Server 创建游标(cursor)

    游标的定义: 游标则是处理结果集的一种机制,它可以定位到结果集中的某一行,也可以移动游标定位到你所需要的行中进行操作数据.与 select 语句的不同是,select 语句面向的是结果集,游标面向的是 ...

  5. PKU 1226 Substrings(字符串匹配+暴搜KMP模板)

    原题大意:原题链接 给出n个字符串,找出一个最长的串s,使s或者s的反转字符串(只要其中一个符合就行)同时满足是这n个串的子串. 对于样例,第一组ABCD   BCDFF  BRCD最长的串就是CD; ...

  6. 关于js中的取值问题

    像这样是获取不到值的,弹出的消息是 underfined:<html><style type="text/css">input { border: 1px ...

  7. Salesforce中通过SOAP API和Metadata API开发java的web server服务

    1.下载Salesforce平台中WSDL文件 在Salesforce中创建了自己需要用到的对象后,我们想要在别的应用中读写纪录到对象中,首先需要的是自己Salesforce平台的权限通过.登陆自己的 ...

  8. XSS是什么

    1.XSS是跨站脚本攻击(Cross Site Scripting),为不和层叠样式表(Cascading Style Sheets, CSS)的缩写混淆,故将跨站脚本攻击缩写为XSS. 2.恶意攻击 ...

  9. NIO 01 (转)

    本文转自:http://www.cnblogs.com/littlehann/p/3720396.html 目录 1. NIO.NIO.2简介 2. NIO中的关键技术 1. NIO.NIO.2简介 ...

  10. windows上使用clang编译程序

    环境:windows7,64位 1.下载并安装llvm,安装包里除了llvm,也有clang: http://releases.llvm.org/5.0.0/LLVM-5.0.0-win64.exe ...