由于对于dxva2解码得到的数据不宜copy回内存给CPU处理,所以最好的办法是在GPU上直接进行处理。D3D的像素着色器能够对像素直接进行操作,实现点运算极其简单方便,简单的卷积运算效果也非常好。但D3D9的限制也很多,对于过于复杂的图像处理则显得有些不能胜任。

1.点运算

点运算用HLSL非常容易实现,几乎是公式怎么写,代码就怎么写。以RGB转灰度图显示为例:

texture Tex0 ;

int iFlag =  ;
float aValue= 0.0 ;
float bValue= 0.0 ; sampler2D YTex =
sampler_state
{
Texture = <Tex0> ;
MipFilter = LINEAR ;
MinFilter = LINEAR ;
MagFilter = LINEAR ; AddressU = CLAMP ;
AddressV = CLAMP ;
}; struct PS_INPUT
{
float2 uvCoords0 : TEXCOORD0 ;
}; float4 Main( PS_INPUT input ) : COLOR0
{
float4 yuvColor ;
//rgb to gray 不知道是不是这么显示的,姑且这么认为
float gray = tex2D( YTex, input.uvCoords0 ).r * 0.299 + tex2D( YTex, input.uvCoords0 ).g * 0.587 + tex2D( YTex, input.uvCoords0 ).b * 0.114 ;
float s = ;
if(iFlag == )
{
s = aValue * gray + bValue/ ;
}
else if(iFlag == )
{
s = aValue * log(+gray) ;
}
else if(iFlag == )
{
s = aValue * pow(abs(gray),bValue) ;
}
yuvColor.r = s ;
yuvColor.g = s ;
yuvColor.b = s ;
yuvColor.a = 1.0 ; return yuvColor ;
}

点运算如此简单是因为GPU是并行运算的,我个人认为可以看成是每一个像素点(BGRA)对应一个线程,这大概就是OpenCL中所谓的数据并行。这是一个非常简单的程序,指令数少,程序结构也很简单,shader 的版本用2.0就可以轻松编过。

2.卷积运算举例

指令数较多的情况2.0版本的shader就搞不定了,上3.0版本可以做一些简单的卷积运算。以中值滤波为例:

texture Tex0 ;

matrix WorldMatrix;
matrix ViewMatrix;
matrix ProjMatrix; sampler2D YTex =
sampler_state
{
Texture = <Tex0> ;
MipFilter = LINEAR ;
MinFilter = LINEAR ;
MagFilter = LINEAR ; AddressU = CLAMP ;
AddressV = CLAMP ;
}; struct VS_INPUT
{
float4 pos : POSITION;
float4 color : COLOR0;
float2 tex : TEXCOORD0;
};
//
struct VS_OUTPUT
{
float4 pos : POSITION;
float4 color : COLOR0;
float2 tex : TEXCOORD0;
}; float2 g_v4ScreenSize;
int ksize = ; float fLeft = -1.0f ;
float fTop = -1.0f ;
float fRight = -1.0f ;
float fBottom = -1.0f ;
//--------------------------------- BurTechnique -------------------------------------- VS_OUTPUT MainVS_Screen( VS_INPUT In )
{
VS_OUTPUT Out = ( VS_OUTPUT ); float4x4 matWorldView = mul(WorldMatrix,ViewMatrix);
float4x4 matProject = mul(matWorldView,ProjMatrix);
Out.pos = mul(In.pos,matProject);
Out.tex = In.tex;
Out.color = In.color;
return Out;
} float4 MainPS_Screen( VS_INPUT In ) : COLOR0
{
float4 outColor = tex2D( YTex, In.tex ).rgba ; if(ksize <= || ksize% == )
{
return outColor ;
} if( ksize > || ksize < )
{
return outColor ;
} if(!(In.tex.x < fRight && In.tex.y < fBottom && In.tex.x > fLeft && In.tex.y > fTop))
{
return outColor ;
} // 纹理大小
float2 TexSize = float2( g_v4ScreenSize.x , g_v4ScreenSize.y ); float x_off = 1.0f / TexSize.x;
float y_off = 1.0f / TexSize.y; float2 fX0Y0 = In.tex - float2(x_off * ksize/, y_off*ksize/) ; float3 sum = {0.0f, 0.0f, 0.0f} ; if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} if(ksize >= )
{
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb; sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
sum += tex2D( YTex , fX0Y0 + float2(x_off * , y_off*)).rgb;
} outColor = float4(sum/(ksize*ksize),1.0f); return outColor ;
} //--------------------------- 技术--------------------------- technique BurTechnique
{
pass P0
{
LightEnable[] = false; VertexShader = compile vs_3_0 MainVS_Screen();
PixelShader = compile ps_3_0 MainPS_Screen();
}
}

由于3.0版本的shader似乎不允许pixel shader单独出现,所以我从点运算用像素着色器实现改为用特效来实现。HLSL语法中有if语句,也有for语句,可是这个程序却不厌其烦的把所有的都给列出来来,而没有使用for循环。这是因为在实际使用中发现有一些限制,比如if语句的if(A>B),A与B中必须有一个是常量,就像上面见到的那种形式;for循环中间的判断也是如此,只是在第二层j循环中可以是第一层循环的i,即不可以

for(int i=;i<ksize;i++)
{
for(int j=;j<ksize1;j++)
{
..........
}
}

以上代码的ksize与ksize1都必须为常数,例外的情况是ksize1可以为第一层循环的 i 。这个问题不知道后续版本的shader有没有,反正我当前使用的版本有。

另外有一个需要注意的地方是指令数,2.0版本的shader支持的指令数相当少,3.0版本则要多好多,我最长写到了400多条快500条时才导致编译失败。 还有一个需要提醒的是3.0版本的shader只支持D3D 9.0C以后的。如果要求做更为复杂的图像处理,可以的话建议上D3D11,compute shader虽然我没用过,但从介绍来说,应该可以处理一些更为复杂的图像处理。

结合我的上一篇博客(DXVA2解码数据用texture纹理渲染http://www.cnblogs.com/betterwgo/p/6327422.html),就算是实现了从硬解到简单图像处理的完整过程。显卡加速效果非常好,在我的intel 5200上即使4K视频也可以实现比正常播放略快的效果。

工程源码:http://download.csdn.net/download/qq_33892166/9755307

HLSL实现简单的图像处理功能的更多相关文章

  1. Java 从零开始实现一个画图板、以及图像处理功能,代码可复现

    Java 从零开始实现一个画图板.以及图像处理功能,代码可复现 这是一个学习分享博客,带你从零开始实现一个画图板.图像处理的小项目,为了降低阅读难度,本博客将画图板的一步步迭代优化过程展示给读者,篇幅 ...

  2. ASP.NET MVC 学习4、Controller中添加SearchIndex页面,实现简单的查询功能

    参考:http://www.asp.net/mvc/tutorials/mvc-4/getting-started-with-aspnet-mvc4/examining-the-edit-method ...

  3. Web---创建Servlet的3种方式、简单的用户注册功能

    说明: 创建Servlet的方式,在上篇博客中,已经用了方式1(实现Servlet接口),接下来本节讲的是另外2种方式. 上篇博客地址:http://blog.csdn.net/qq_26525215 ...

  4. js+html+css简单的互动功能页面(2015知道几乎尖笔试题)http://v.youku.com/v_show/id_XMTI0ODQ5NTAyOA==.html?from=y1.7-1.2

    js+html+css实现简单页面交互功能(2015知乎前端笔试题) http://v.youku.com/v_show/id_XMTI0ODQ5NTAyOA==.html? from=y1.7-1. ...

  5. Spring 学习——基于Spring WebSocket 和STOMP实现简单的聊天功能

    本篇主要讲解如何使用Spring websocket 和STOMP搭建一个简单的聊天功能项目,里面使用到的技术,如websocket和STOMP等会简单介绍,不会太深,如果对相关介绍不是很了解的,请自 ...

  6. Django文件上传三种方式以及简单预览功能

    主要内容: 一.文件长传的三种方式 二.简单预览功能实现 一.form表单上传 1.页面代码 <!DOCTYPE html> <html lang="en"> ...

  7. 运用socket实现简单的ssh功能

    在python socket知识点中已经对socket进行了初步的了解,那现在就使用这些知识来实现一个简单的ssh(Secure Shell)功能. 首先同样是建立两个端(服务器端和客户端) 需求是: ...

  8. Jenkins实现简单的CI功能

    步骤一:安装JDK.Tomcat,小儿科的东西不在此详细描述 步骤二:下载安装Jenkins下载链接:https://jenkins.io/download/ 步骤三:将下载的jenkins.war部 ...

  9. 利用python进行简单的图像处理:包括打开,显示以及保存图像

    利用python进行简单的图像处理:包括打开,显示以及保存图像 利用PIL处理 PIL(python image library) 是python用于图片处理的package.但目前这个package ...

随机推荐

  1. Linux命令 lsof使用

    lsof(list open files)是一个列出当前系统打开文件的工具.在linux环境下,任何事物都以文件的形式存在,通过文件不仅仅可以访问常规数据,还可以访问网络连接和硬件. lsof +d ...

  2. Linux系统——VMware克隆

    克隆VMware 1. 关闭防火墙 2. 关闭selinux 3. 删除UUID和Mac地址 4.清空网卡缓存 5.关机 ===================== 关闭防火墙 #service ip ...

  3. XDU 1140 寻找万神(字符串匹配)

    学会strstr的使用 strstr(str1,str2)函数用于判断字符串str2是否是str1的子串.如果是,则该函数返回str2在str1中首次出现的地址:否则,返回NULL. #include ...

  4. CodeForces - 662C Binary Table (FWT)

    题意:给一个N*M的0-1矩阵,可以进行若干次操作,每次操作将一行或一列的0和1反转,求最后能得到的最少的1的个数. 分析:本题可用FWT求解. 因为其0-1反转的特殊性且\(N\leq20\),将每 ...

  5. 带你走进AJAX(1)

    ajax是什么? (1)ajax (asynchronouse javascript and xml) 异步的javascript 和xml (2)ajax是一个粘合剂,将javascript.xml ...

  6. lastIndexOf is not a function

    最近在开发的时候遇到了这个问题lastIndexOf is not a function,细心调试发现我传递进去的参数不是字符串类型,而且object类型,导致出现这种错误.把参数修改成字符串传递进去 ...

  7. 查看ubuntu 各系统的内核版本

    1.查看ubuntu版本号:   cat  /etc/issue 返回结果: Ubuntu 16.04.2 LTS \n \l   2.查看内核版本号:   cat /proc/version 返回结 ...

  8. linq分析

    例如: var sums = modellist .GroupBy(x => x.userId) .Select(group => new { Peo = group.Key, fist ...

  9. 虚拟中没有eth0

    进行虚拟机的软拷贝和硬拷贝,或直接从一台机器上拷贝虚拟机硬盘文件到另一台机子的虚拟机上时,发现通过修改/etc/network/interfaces配置的IP没用,输入ifconfig,发现根本就没有 ...

  10. CSS Outline(轮廓)

    CSS Outline(轮廓) 一.CSS 轮廓(outline) 轮廓(outline)是绘制于元素周围的一条线,位于边框边缘的外围,可起到突出元素的作用. CSS outline 属性规定元素轮廓 ...