因为工作要求, 需要对EMF文件文字内容做分析.....SO, 如下代码出现了

懒得加注释了, 反正对外接口属性就那么几个, 根据英文猜吧, 很容易的

说明一下:

  这个东西结果会对所有文字内容按照左上到右下的顺序排序(EMF内数据顺序是根据画图顺序来的, 所以不一定是什么顺序, 但是数据分析就要得到行列关系)

但是图片没有行列概念, 所以我简单借鉴了一下纯粹横排版模式, 认为2个文字元素, 只要显示范围的中线在对方范围内, 就会被认为是同一行

2015-10-19:

  1.修改了几个排序时的BUG, 增加了一个对显示区域的处理, 最大方式减少对排版的影响

  2.修改了获取SmallTextOut的处理方式

{
EMF文件分析单元
读取EMF内文字元素并排版 最后修改时间 2015-10-19 by: 刘志林
E-Mail: lzl_17948876@hotmail.com
} unit Comm.EMFInfo; interface uses
System.Types, System.Generics.Collections,
Vcl.Graphics; type
TEMFStrInfo = record
DisplayRect: TRect; {显示区域}
Text: string; {显示内容}
LineKey: string; {行标记}
end;
PEMFStrInfo = ^TEMFStrInfo; TEMFStrInfoList = Class
private
FList: TList<PEMFStrInfo>;
FDic: TDictionary<string, UInt32>;
FMaxHeight: Integer;
FJSONStrs: string; {定位查找失败时,使用文本进行泛查找} function GetItem(Index: UInt32): TEMFStrInfo;
function GetCount: UInt32;
function GetJSONStrs: string;
public
constructor Create;
destructor Destroy; override; procedure Append(AEMF: TMetafile; var AHeight: Integer);
procedure Clear;
property Count: UInt32 read GetCount;
property Items[Index: UInt32]: TEMFStrInfo read GetItem;
function TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
function StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean;
property JSONStr: string read GetJSONStrs;
property MaxHeight: Integer read FMaxHeight;
end; implementation uses
System.SysUtils, System.Classes, System.Generics.Defaults,
System.RegularExpressions,
Winapi.Windows,
Vcl.Printers,
QJSON; const
// if set use ANSI version else UNICODE
SMALLTEXT_TYPE_ANSI = $;
// if set use EMR_SMALLTEXTOUT else use EMR_SMALLTEXTOUTCLIP
SMALLTEXT_TYPE_WITHOUT_CLIP = $; // Structures
type
EMR_SMALLTEXTOUT_HEAD = RECORD
emr: emr;
ptlReference: TPoint;
nChars: DWORD;
fuOptions: DWORD; // this record type
// == SMALLTEXT_TYPE_WITHOUT_CLIP
// == SMALLTEXT_TYPE_ANSI
// also holds fuOptions like in the ExtTextOut function
iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
exScale: Single; { X and Y scales from Page units to .01mm units }
eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
END; PEMRSmallTextOutHead = ^EMR_SMALLTEXTOUT_HEAD; EMR_SMALLTEXTOUTCLIPA = RECORD
emr: emr;
ptlReference: TPoint; // might be in negative numbers, so take abs
nChars: DWORD;
fuOptions: DWORD; // this record type
// != SMALLTEXT_TYPE_WITHOUT_CLIP
// == SMALLTEXT_TYPE_ANSI
// also holds fuOptions like in the ExtTextOut function
iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
exScale: Single; { X and Y scales from Page units to .01mm units }
eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
rclClip: TRect;
cString: Array [ .. ] of AnsiChar;
{ This is followed by the string array }
END; PEMRSmallTextOutClipA = ^EMR_SMALLTEXTOUTCLIPA; EMR_SMALLTEXTOUTCLIPW = RECORD
emr: emr;
ptlReference: TPoint;
nChars: DWORD;
fuOptions: DWORD; // this record type
// != SMALLTEXT_TYPE_WITHOUT_CLIP
// != SMALLTEXT_TYPE_ANSI
// also holds fuOptions like in the ExtTextOut function
iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
exScale: Single; { X and Y scales from Page units to .01mm units }
eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
rclClip: TRect;
cString: Array [ .. ] of WideChar;
{ This is followed by the string array }
END; PEMRSmallTextOutClipW = ^EMR_SMALLTEXTOUTCLIPW; EMR_SMALLTEXTOUTA = RECORD
emr: emr;
ptlReference: TPoint;
nChars: DWORD;
fuOptions: DWORD; // this record type
// == SMALLTEXT_TYPE_WITHOUT_CLIP
// == SMALLTEXT_TYPE_ANSI
// also holds fuOptions like in the ExtTextOut function
iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
exScale: Single; { X and Y scales from Page units to .01mm units }
eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
cString: Array [ .. ] of AnsiChar;
{ This is followed by the string array }
END; PEMRSmallTextOutA = ^EMR_SMALLTEXTOUTA; EMR_SMALLTEXTOUTW = RECORD
emr: emr;
ptlReference: TPoint;
nChars: DWORD;
fuOptions: DWORD; // this record type
// == SMALLTEXT_TYPE_WITHOUT_CLIP
// != SMALLTEXT_TYPE_ANSI
// also holds fuOptions like in the ExtTextOut function
iGraphicsMode: DWORD; // See iMode parameter of SetGraphicsMode
exScale: Single; { X and Y scales from Page units to .01mm units }
eyScale: Single; { if graphics mode is GM_COMPATIBLE. }
cString: Array [ .. ] of WideChar;
{ This is followed by the string array }
END; PEMRSmallTextOutW = ^EMR_SMALLTEXTOUTW; var
FReferenceDC: VCL.Graphics.TBitmap; function EnumTextProc(DC: HDC; lpHTable: PHANDLETABLE; EMFR: PENHMETARECORD;
nObj, lpData: Integer): Integer; stdcall; function _IsEffeetiveRect(const ARect: TRect): Boolean;
begin
Result := (not ARect.IsEmpty) and (ARect.Right > ) and (ARect.Left > )
and (ARect.Bottom - ARect.Top > ) and (ARect.Right - ARect.Left > );
end; procedure _ShrinkRect(var ARect: TRect; ASize: TSize);
var
v: Integer;
begin
v := ARect.Left + ASize.cx;
if ARect.Right > v then
ARect.Right := v;
v := ARect.Top + ASize.cy;
if ARect.Bottom > v then
ARect.Bottom := v;
end; var
nSize: TSize;
nStrA: PAnsiChar;
nStrW: PWideChar;
nEMRTO: PEMRExtTextOut; nEMRSTOHead: PEMRSmallTextOutHead;
nEMRSTO_A: PEMRSmallTextOutA;
nEMRSTO_AC: PEMRSmallTextOutClipA;
nEMRSTO_W: PEMRSmallTextOutW;
nEMRSTO_WC: PEMRSmallTextOutClipW; nOTR: PEMFStrInfo;
nEMFElementList: TList<PEMFStrInfo>;
begin
nEMFElementList := Pointer(lpData);
nSize.cX := ;
nSize.cY := ; if (EMFR.iType = EMR_EXTTEXTOUTA) then
begin
nEMRTO := PEMRExtTextOut(EMFR);
nStrA := AnsiStrAlloc(nEMRTO.EMRText.nChars + );
try
FillChar(nStrA^, nEMRTO.EMRText.nChars + , );
Move(pointer( + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString)^,
nStrA^, nEMRTO.EMRText.nChars); New(nOTR);
with nOTR^ do
begin
Text := Trim(nStrA);
DisplayRect := nEMRTO.rclBounds;
LineKey := '';
end; finally
StrDispose(nStrA);
end; Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
nOTR^.Text, Length(nOTR^.Text), nSize);
nOTR^.DisplayRect.NormalizeRect;
_ShrinkRect(nOTR^.DisplayRect, nSize); if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
nEMFElementList.Add(nOTR)
else
Dispose(nOTR);
end
else if (EMFR.iType = EMR_EXTTEXTOUTW) then
begin
nEMRTO := PEMRExtTextOut(EMFR);
nStrW := WideStrAlloc(nEMRTO.EMRText.nChars + );
try
FillChar(nStrW^, (nEMRTO.EMRText.nChars + ) * , );
Move(pointer( + Cardinal(@nEMRTO.EMRText) + nEMRTO.EMRText.offString div )^,
nStrW^, nEMRTO.EMRText.nChars * ); New(nOTR);
with nOTR^ do
begin
Text := Trim(nStrW);
DisplayRect := nEMRTO.rclBounds;
LineKey := '';
end; finally
StrDispose(nStrW);
end; Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
nOTR^.Text, Length(nOTR^.Text), nSize);
nOTR^.DisplayRect.NormalizeRect;
_ShrinkRect(nOTR^.DisplayRect, nSize); if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
nEMFElementList.Add(nOTR)
else
Dispose(nOTR);
end
else if EMFR.iType = EMR_SMALLTEXTOUT then
begin
nEMRSTOHead := PEMRSmallTextOutHead(EMFR);
New(nOTR);
if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_ANSI = SMALLTEXT_TYPE_ANSI then
begin
if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then
begin
nEMRSTO_A := Pointer(nEMRSTOHead);
nStrA := AnsiStrAlloc(nEMRSTO_A^.nChars + );
try
FillChar(nStrA^, nEMRSTO_A^.nChars + , );
Move(nEMRSTO_A^.cString[], nStrA^, nEMRSTO_A^.nChars); with nOTR^ do
begin
Text := Trim(nStrA);
DisplayRect := Rect(nEMRSTO_A^.ptlReference.X, nEMRSTO_A^.ptlReference.Y,
MAXWORD, MAXWORD);
LineKey := '';
end;
finally
StrDispose(nStrA);
end;
end
else
begin
nEMRSTO_AC := Pointer(nEMRSTOHead);
nStrA := AnsiStrAlloc(nEMRSTO_AC^.nChars + );
try
FillChar(nStrA^, nEMRSTO_AC^.nChars + , );
Move(nEMRSTO_AC^.cString[], nStrA^, nEMRSTO_AC^.nChars); with nOTR^ do
begin
Text := Trim(nStrA);
DisplayRect := nEMRSTO_AC^.rclClip;
DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference;
LineKey := '';
end;
finally
StrDispose(nStrA);
end;
end;
end
else
begin
if nEMRSTOHead.fuOptions and SMALLTEXT_TYPE_WITHOUT_CLIP = SMALLTEXT_TYPE_WITHOUT_CLIP then
begin
nEMRSTO_W := Pointer(nEMRSTOHead);
nStrW := WideStrAlloc(nEMRSTO_W^.nChars + );
try
FillChar(nStrW^, (nEMRSTO_W^.nChars + ) * , );
Move(nEMRSTO_W^.cString[], nStrW^, nEMRSTO_W^.nChars * ); with nOTR^ do
begin
Text := Trim(nStrW);
DisplayRect := Rect(nEMRSTO_W^.ptlReference.X, nEMRSTO_W^.ptlReference.Y,
MAXWORD, MAXWORD);
LineKey := '';
end;
finally
StrDispose(nStrA);
end;
end
else
begin
nEMRSTO_WC := Pointer(nEMRSTOHead);
nStrW := WideStrAlloc(nEMRSTO_WC^.nChars + );
try
FillChar(nStrW^, (nEMRSTO_WC^.nChars + ) * , );
Move(nEMRSTO_WC^.cString[], nStrW^, nEMRSTO_WC^.nChars * ); with nOTR^ do
begin
Text := Trim(nStrW);
DisplayRect := nEMRSTO_AC^.rclClip;
DisplayRect.TopLeft := nEMRSTO_AC^.ptlReference;
LineKey := '';
end;
finally
StrDispose(nStrA);
end;
end;
end; Winapi.Windows.GetTextExtentPoint32(FReferenceDC.Canvas.Handle,
nOTR^.Text, Length(nOTR^.Text), nSize);
nOTR^.DisplayRect.NormalizeRect;
_ShrinkRect(nOTR^.DisplayRect, nSize); if (nOTR^.Text <> '') and _IsEffeetiveRect(nOTR^.DisplayRect) then
nEMFElementList.Add(nOTR)
else
Dispose(nOTR);
end; Result := ;
end; type
TEMFStrInfoCompare = class(TComparer<PEMFStrInfo>)
public
function Compare(const Left, Right: PEMFStrInfo): Integer; override;
end; { TEMFStrInfoCompare } function TEMFStrInfoCompare.Compare(const Left, Right: PEMFStrInfo): Integer;
var
nCPLeft, nCPRight: TPoint;
nLIR, nRIL: Int8;
nLineKey: string;
begin
nCPLeft := Left^.DisplayRect.CenterPoint;
nCPRight := Right^.DisplayRect.CenterPoint; if nCPLeft.Y <= Right^.DisplayRect.Top then
nLIR := -
else if nCPLeft.Y >= Right^.DisplayRect.Bottom then
nLIR :=
else
nLIR := ; if nCPRight.Y <= Left^.DisplayRect.Top then
nRIL := -
else if nCPRight.Y >= Left^.DisplayRect.Bottom then
nRIL :=
else
nRIL := ; if (nLIR = ) or (nRIL = ) then
begin
if Left^.LineKey <> '' then
Right^.LineKey := Left^.LineKey
else if Right^.LineKey <> '' then
Left^.LineKey := Right^.LineKey
else
begin
Left^.LineKey := TGUID.NewGuid.ToString;
Right^.LineKey := Left^.LineKey;
end; {有任意left或right在另一方区域内的, 认为在同一行, 通过x位置判断排序}
if nCPLeft.X < nCPRight.X then {根据左侧判断位置}
Result := -
else if nCPLeft.X > nCPRight.X then
Result :=
else if nCPLeft.Y < nCPRight.Y then
Result := -
else if nCPLeft.Y > nCPRight.Y then
Result :=
else
Result := ;
end
else
begin
Result := nLIR;
end;
end; { TEMFStrInfoList } procedure TEMFStrInfoList.Append(AEMF: TMetafile; var AHeight: Integer);
var
nList: TList<PEMFStrInfo>;
nInfoExists: Boolean;
nCheckPoint: TPoint;
i: Integer;
nCompare: TEMFStrInfoCompare;
nPI: PEMFStrInfo;
nTmpLineKey, nTmpJSONStr: string;
nJ, nJLine: TQJson;
begin
nList := TList<PEMFStrInfo>.Create;
try
{读取文件元素存入列表}
EnumEnhMetafile(, AEMF.Handle, @EnumTextProc, Pointer(nList), Rect(, , , )); nCompare := TEMFStrInfoCompare.Create;
try
{排序}
try
nList.Sort(nCompare);
finally
nCompare.Free;
end;
except
end; {计算最大高度, 元素名称存入字典}
AHeight := ;
nJ := TQJson.Create;
try
// nJ.TryParse(FJSONStrs);
nJ.DataType := jdtArray;
nJLine := nil;
nTmpLineKey := '';
for i := to nList.Count - do
begin
nPI := nList[i];
if nPI^.LineKey = '' then
nPI^.LineKey := TGUID.NewGuid.ToString; {没有相同行标记的给一个标记}
{需要换行}
if (nTmpLineKey = '') or (not SameText(nTmpLineKey, nPI^.LineKey)) then
nJLine := nil;
{当前行标记}
nTmpLineKey := nPI^.LineKey; if nPI^.DisplayRect.Bottom > AHeight then
AHeight := nPI^.DisplayRect.Bottom; OffsetRect(nPI^.DisplayRect, , FMaxHeight);
FDic.AddOrSetValue(nPI^.Text, FList.Add(nPI)); if (nJLine = nil) then
nJLine := nJ.AddArray(''); nJLine.Add.AsString := nPI^.Text;
end;
nTmpJSONStr := nJ.Encode(False);
nTmpJSONStr := Copy(nTmpJSONStr, , Length(nTmpJSONStr) - );
if FJSONStrs = '' then
FJSONStrs := nTmpJSONStr
else
FJSONStrs := FJSONStrs + ',' + nTmpJSONStr;
finally
nJ.Free;
end;
FMaxHeight := FMaxHeight + AHeight;
finally
nList.Free;
end;
end; procedure TEMFStrInfoList.Clear;
var
i: Integer;
begin
FMaxHeight := ;
FJsonStrs := '';
for i := to FList.Count - do
Dispose(FList[i]);
FList.Clear;
FDic.Clear;
end; constructor TEMFStrInfoList.Create;
begin
FList := TList<PEMFStrInfo>.Create;
FDic := TDictionary<string, UInt32>.Create;
FMaxHeight := ;
FJsonStrs := '';
end; destructor TEMFStrInfoList.Destroy;
var
i: Integer;
begin
for i := to FList.Count - do
Dispose(FList[i]);
FList.Free;
FDic.Free;
inherited;
end; function TEMFStrInfoList.GetCount: UInt32;
begin
Result := FList.Count;
end; function TEMFStrInfoList.GetItem(Index: UInt32): TEMFStrInfo;
begin
Result := FList[Index]^;
end; function TEMFStrInfoList.GetJSONStrs: string;
begin
Result := '[' + FJSONStrs + ']';
end; function TEMFStrInfoList.StrAnalyze(ALeavePattern: array of string; var AResult: string): Boolean; function _RegExAnalyze(AData, APattern: string): string;
var
nMatches: TMatchCollection;
begin
nMatches := TRegEx.Matches(AData, APattern, [roMultiLine]);
if nMatches.Count > then
Result := nMatches.Item[].Value;
end; var
i: Integer;
nTmpData: string;
begin
AResult := '';
try
nTmpData := FJSONStrs;
for i := Low(ALeavePattern) to High(ALeavePattern) do
begin
nTmpData := _RegExAnalyze(nTmpData, ALeavePattern[i]);
if nTmpData = '' then
Break;
end;
AResult := nTmpData;
except
on E: Exception do
raise Exception.CreateFmt('正则分析失败[%s]', [E.Message]);
end;
Result := AResult <> '';
end; function TEMFStrInfoList.TryGetInfo(AInfoName: string; var AInfo: TEMFStrInfo; var AIndex: UInt32): Boolean;
begin
Result := FDic.TryGetValue(AInfoName, AIndex);
if Result then
AInfo := FList[AIndex]^;
end; initialization
FReferenceDC := VCL.Graphics.TBitmap.Create;
with FReferenceDC do
begin
PixelFormat := pf24bit;
Width := ;
Height := ;
end; finalization
FreeAndNil(FReferenceDC); end.

获取EMF文件内全部文字, 并按照左上到右下的顺序排序的更多相关文章

  1. 从运行时的工作空间获取EMF文件(IFILE)

    //EMFFILE_URI为EMF文件的URI String uriString = EMFFILE_URI.trimFragment().toPlatformString(true); if (ur ...

  2. python 获取excel文件内sheet名称列表

    xl = pd.ExcelFile('foo.xls') xl.sheet_names # see all sheet names xl.parse(sheet_name) # read a spec ...

  3. 使用GridView来获取xml文件数据

    在任何一个系统中,数据的读取和编辑都是至关重要的.无论你是CS还是BS,都需要对数据进行操作.其实 我们可以发现,很多软件和系统最终都是对于数据库中数据的处理.之前在CS的学习过程中我们接触到了很多 ...

  4. 编写Java程序,在硬盘中选取一个 txt 文件,读取该文档的内容后,追加一段文字“[ 来自新华社 ]”,保存到一个新的 txt 文件内

    查看本章节 查看作业目录 需求说明: 在硬盘中选取一个 txt 文件,读取该文档的内容后,追加一段文字"[ 来自新华社 ]",保存到一个新的 txt 文件内 实现思路: 创建 Sa ...

  5. XML序列化 判断是否是手机 字符操作普通帮助类 验证数据帮助类 IO帮助类 c# Lambda操作类封装 C# -- 使用反射(Reflect)获取dll文件中的类型并调用方法 C# -- 文件的压缩与解压(GZipStream)

    XML序列化   #region 序列化 /// <summary> /// XML序列化 /// </summary> /// <param name="ob ...

  6. 在Autodesk Vault 2014中使用VDF(Vault Development Framework) API获取所有文件的属性信息

      这几天在玩儿Vault API, 从Autodesk Vault 2014开始提供了Vault Development Framework(VDF) API,让开发工作更简单了.在Vault 20 ...

  7. 关于 MAXScript 获取全部文件

    MAXScript 官方文档里关于获取文件夹下所有文件的方法 fn getFilesRecursive root pattern = ( dir_array = GetDirectories (roo ...

  8. SNF开发平台WinForm之十三-单独从服务器上获取PDF文件进行显示-SNF快速开发平台3.3-Spring.Net.Framework

    1运行效果: 2开发实现: 如果需要单独显示PDF文件时用下面代码去实现,指定url地址. 地址: . 获取附件管理的实体对象: List<KeyValuePair<string, obj ...

  9. vue 双语言切换中,data内翻译文字不正常切换的解决方案

    背景 有这么一个登录页面,相关功能如下: 支持双语言,点击切换语言 表单内部有一个自定义的select,里面option的label.value都是的名字由外部提供:其中预设的option的label ...

随机推荐

  1. 自定义el函数

    1.1.1 自定义EL函数(EL调用Java的函数) 第一步:创建一个Java类.方法必须是静态方法. public static String sayHello(String name){ retu ...

  2. JTabbedPane 和 JScrollBar 联合使用

    需求:实现一个JTabbed, 当下拉到Tabbed的底部时,自动加载下一次的数据. 下面是具体代码: import java.awt.*; import javax.swing.table.Defa ...

  3. 伪随机数(线性同余法)C语言

    /**Keil Lib*2015.6.12*Pass*by lort*/uint32 Srandx ; uint32 SrandK = 1103515245;//0x41C64E6D;uint32 S ...

  4. web前端学习部落22群开源分享 左边菜单导航

    有大量web前端开发工具及学习资料,可以搜群[ web前端学习部落22群 ]进行下载,遇到学习问题也可以问群内专家以及课程老师哟 <!DOCTYPE html> <html lang ...

  5. LeetCode之387. First Unique Character in a String

    -------------------------------------------------- 最开始的想法是统计每个字符的出现次数和位置,如下: AC代码: public class Solu ...

  6. PHP基础之POST与GET

    post 与 get区别 *.Post传输数据时,不需要在URL中显示出来,而Get方法要在URL中显示.*.Post传输的数据量大,可以达到2M,而Get方法由于受到URL长度的限制,只能传递大约1 ...

  7. CSS3颜色渐变模式

       1.线性渐变:linear-gradient 语法:<linear-gradient> = linear-gradient([ [ <angle> | to <si ...

  8. xml 基础学习备忘

    <?xml version="1.0" encoding="UTF-8"? standalone="yes"> 这里的encod ...

  9. Myeclipse 运行maven控制台中文乱码

    需要在pom中的properties中增加一行配置: <properties> <argLine>-Dfile.encoding=UTF-8</argLine> & ...

  10. Struts2漏洞利用实例

    Struts2漏洞利用实例 如果存在struts2漏洞的站,administrator权限,但是无法加管理组,内网,shell访问500. 1.struts2 漏洞原理:struts2是一个框架,他在 ...