程序调用：

using iTextSharp.text.pdf;

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using System.Threading.Tasks;

namespace TestIText

{

    class Program

    {

        static void Main(string[] args)

        {

           PdfReader readerTemp = new PdfReader(@"D:\_Number position.pdf");

            PdfHelper.LocationTextExtractionStrategyEx pz = new PdfHelper.LocationTextExtractionStrategyEx();

            iTextSharp.text.pdf.parser.PdfReaderContentParser p = new iTextSharp.text.pdf.parser.PdfReaderContentParser(readerTemp);

            p.ProcessContent<PdfHelper.LocationTextExtractionStrategyEx>(, pz);

            Console.WriteLine(pz.GetResultantText());//文字坐标信息等

            Console.ReadLine();

        }

    }

}

PdfHelper帮助类：

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using iTextSharp.text.pdf.parser;

namespace PdfHelper

{

    /// <summary>

    /// Taken from http://www.java-frameworks.com/java/itext/com/itextpdf/text/pdf/parser/LocationTextExtractionStrategy.java.html

    /// </summary>

    class LocationTextExtractionStrategyEx : LocationTextExtractionStrategy

    {

        private List<TextChunk> m_locationResult = new List<TextChunk>();

        private List<TextInfo> m_TextLocationInfo = new List<TextInfo>();

        public List<TextChunk> LocationResult

        {

            get { return m_locationResult; }

        }

        public List<TextInfo> TextLocationInfo

        {

            get { return m_TextLocationInfo; }

        }

        /// <summary>

        /// Creates a new LocationTextExtracationStrategyEx

        /// </summary>

        public LocationTextExtractionStrategyEx()

        {

        }

        /// <summary>

        /// Returns the result so far

        /// </summary>

        /// <returns>a String with the resulting text</returns>

        public override String GetResultantText()

        {

            m_locationResult.Sort();

            StringBuilder sb = new StringBuilder();

            TextChunk lastChunk = null;

            TextInfo lastTextInfo = null;

            foreach (TextChunk chunk in m_locationResult)

            {

                if (lastChunk == null)

                {

                    sb.Append(chunk.Text);

                    lastTextInfo = new TextInfo(chunk);

                    m_TextLocationInfo.Add(lastTextInfo);

                }

                else

                {

                    if (chunk.sameLine(lastChunk))

                    {

                        float dist = chunk.distanceFromEndOf(lastChunk);

                        if (dist < -chunk.CharSpaceWidth)

                        {

                            sb.Append(' ');

                            lastTextInfo.addSpace();

                        }

                        //append a space if the trailing char of the prev string wasn't a space && the 1st char of the current string isn't a space

                        else if (dist > chunk.CharSpaceWidth / 2.0f && chunk.Text[] != ' ' && lastChunk.Text[lastChunk.Text.Length - ] != ' ')

                        {

                            sb.Append(' ');

                            lastTextInfo.addSpace();

                        }

                        sb.Append(chunk.Text);

                        lastTextInfo.appendText(chunk);

                    }

                    else

                    {

                        sb.Append('\n');

                        sb.Append(chunk.Text);

                        lastTextInfo = new TextInfo(chunk);

                        m_TextLocationInfo.Add(lastTextInfo);

                    }

                }

                lastChunk = chunk;

            }

            return sb.ToString();

        }

        /// <summary>

        ///

        /// </summary>

        /// <param name="renderInfo"></param>

        public override void RenderText(TextRenderInfo renderInfo)

        {

            LineSegment segment = renderInfo.GetBaseline();

            TextChunk location = new TextChunk(renderInfo.GetText(), segment.GetStartPoint(), segment.GetEndPoint(), renderInfo.GetSingleSpaceWidth(), renderInfo.GetAscentLine(), renderInfo.GetDescentLine());

            m_locationResult.Add(location);

        }

        public class TextChunk : IComparable, ICloneable

        {

            string m_text;

            Vector m_startLocation;

            Vector m_endLocation;

            Vector m_orientationVector;

            int m_orientationMagnitude;

            int m_distPerpendicular;

            float m_distParallelStart;

            float m_distParallelEnd;

            float m_charSpaceWidth;

            public LineSegment AscentLine;

            public LineSegment DecentLine;

            public object Clone()

            {

                TextChunk copy = new TextChunk(m_text, m_startLocation, m_endLocation, m_charSpaceWidth, AscentLine, DecentLine);

                return copy;

            }

            public string Text

            {

                get { return m_text; }

                set { m_text = value; }

            }

            public float CharSpaceWidth

            {

                get { return m_charSpaceWidth; }

                set { m_charSpaceWidth = value; }

            }

            public Vector StartLocation

            {

                get { return m_startLocation; }

                set { m_startLocation = value; }

            }

            public Vector EndLocation

            {

                get { return m_endLocation; }

                set { m_endLocation = value; }

            }

            /// <summary>

            /// Represents a chunk of text, it's orientation, and location relative to the orientation vector

            /// </summary>

            /// <param name="txt"></param>

            /// <param name="startLoc"></param>

            /// <param name="endLoc"></param>

            /// <param name="charSpaceWidth"></param>

            public TextChunk(string txt, Vector startLoc, Vector endLoc, float charSpaceWidth, LineSegment ascentLine, LineSegment decentLine)

            {

                m_text = txt;

                m_startLocation = startLoc;

                m_endLocation = endLoc;

                m_charSpaceWidth = charSpaceWidth;

                AscentLine = ascentLine;

                DecentLine = decentLine;

                m_orientationVector = m_endLocation.Subtract(m_startLocation).Normalize();

                m_orientationMagnitude = (int)(Math.Atan2(m_orientationVector[Vector.I2], m_orientationVector[Vector.I1]) * );

                // see http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html

                // the two vectors we are crossing are in the same plane, so the result will be purely

                // in the z-axis (out of plane) direction, so we just take the I3 component of the result

                Vector origin = new Vector(, , );

                m_distPerpendicular = (int)(m_startLocation.Subtract(origin)).Cross(m_orientationVector)[Vector.I3];

                m_distParallelStart = m_orientationVector.Dot(m_startLocation);

                m_distParallelEnd = m_orientationVector.Dot(m_endLocation);

            }

            /// <summary>

            /// true if this location is on the the same line as the other text chunk

            /// </summary>

            /// <param name="textChunkToCompare">the location to compare to</param>

            /// <returns>true if this location is on the the same line as the other</returns>

            public bool sameLine(TextChunk textChunkToCompare)

            {

                if (m_orientationMagnitude != textChunkToCompare.m_orientationMagnitude) return false;

                if (m_distPerpendicular != textChunkToCompare.m_distPerpendicular) return false;

                return true;

            }

            /// <summary>

            /// Computes the distance between the end of 'other' and the beginning of this chunk

            /// in the direction of this chunk's orientation vector.  Note that it's a bad idea

            /// to call this for chunks that aren't on the same line and orientation, but we don't

            /// explicitly check for that condition for performance reasons.

            /// </summary>

            /// <param name="other"></param>

            /// <returns>the number of spaces between the end of 'other' and the beginning of this chunk</returns>

            public float distanceFromEndOf(TextChunk other)

            {

                float distance = m_distParallelStart - other.m_distParallelEnd;

                return distance;

            }

            /// <summary>

            /// Compares based on orientation, perpendicular distance, then parallel distance

            /// </summary>

            /// <param name="obj"></param>

            /// <returns></returns>

            public int CompareTo(object obj)

            {

                if (obj == null) throw new ArgumentException("Object is now a TextChunk");

                TextChunk rhs = obj as TextChunk;

                if (rhs != null)

                {

                    if (this == rhs) return ;

                    int rslt;

                    rslt = m_orientationMagnitude - rhs.m_orientationMagnitude;

                    if (rslt != ) return rslt;

                    rslt = m_distPerpendicular - rhs.m_distPerpendicular;

                    if (rslt != ) return rslt;

                    // note: it's never safe to check floating point numbers for equality, and if two chunks

                    // are truly right on top of each other, which one comes first or second just doesn't matter

                    // so we arbitrarily choose this way.

                    rslt = m_distParallelStart < rhs.m_distParallelStart ? - : ;

                    return rslt;

                }

                else

                {

                    throw new ArgumentException("Object is now a TextChunk");

                }

            }

        }

        public class TextInfo

        {

            public Vector TopLeft;

            public Vector BottomRight;

            private string m_Text;

            public string Text

            {

                get { return m_Text; }

            }

            /// <summary>

            /// Create a TextInfo.

            /// </summary>

            /// <param name="initialTextChunk"></param>

            public TextInfo(TextChunk initialTextChunk)

            {

                TopLeft = initialTextChunk.AscentLine.GetStartPoint();

                BottomRight = initialTextChunk.DecentLine.GetEndPoint();

                m_Text = initialTextChunk.Text;

            }

            /// <summary>

            /// Add more text to this TextInfo.

            /// </summary>

            /// <param name="additionalTextChunk"></param>

            public void appendText(TextChunk additionalTextChunk)

            {

                BottomRight = additionalTextChunk.DecentLine.GetEndPoint();

                m_Text += additionalTextChunk.Text;

            }

            /// <summary>

            /// Add a space to the TextInfo.  This will leave the endpoint out of sync with the text.

            /// The assumtion is that you will add more text after the space which will correct the endpoint.

            /// </summary>

            public void addSpace()

            {

                m_Text += ' ';

            }

        }

    }

}

C# 使用itextsharp 读取pdf中文字坐标的更多相关文章

Java 读取PDF中的表格
一.概述本文以Java示例展示读取PDF中的表格的方法.这里导入Spire.PDF for Javah中的jar包,并使用其提供的相关及方法来实现获取表格中的文本内容.下表中整理了本次代码使用到的主 ...
Java 读取PDF中的文本和图片
本文将介绍通过Java程序来读取PDF文档中的文本和图片的方法.分别调用方法extractText()和extractImages()来读取. 使用工具:Free Spire.PDF for Ja ...
读取pdf中的内容
import com.spire.pdf.PdfDocument;import com.spire.pdf.PdfPageBase;import java.io.*; public class Ext ...
利用百度AI OCR图片识别，Java实现PDF中的图片转换成文字
序言:我们在读一些PDF版书籍的时候,如果PDF中不是图片,做起读书笔记的还好:如果PDF中的是图片的话,根本无法编辑,做起笔记来,还是很痛苦的.我是遇到过了.我们搞技术的,当然得自己学着解决现在的痛 ...
SQL 横转竖、竖专横（转载）使用Dapper.Contrib 开发.net core程序，兼容多种数据库 C# 读取PDF多级书签 Json.net日期格式化设置 ASPNET 下载共享文件 ASPNET 文件批量下载递归，循环，尾递归利用IDisposable接口构建包含非托管资源对象《.NET 进阶指南》读书笔记2------定义不可改变类型
SQL 横转竖 .竖专横 (转载) 普通行列转换问题:假设有张学生成绩表(tb)如下: 姓名课程分数张三语文 74 张三数学 83 张三物理 93 李四语文 74 李四数学 84 ...
java读取pdf文本转换html
补充:一下代码基于maven,现将依赖的jar包单独导出地址:pdf jar 完整代码地址也就两个文件 java读取pdf中的纯文字,这里使用的是pdfbox工具包 maven引入如下配置 < ...
C# 读取PDF多级书签
在PDF中,书签作为一种导航的有效工具,能帮助我们快速地定位到文档中的指定段落.同时,书签也能让人对文档结构一目了然,在某种程度上也可作为目录使用.对于C#操作PDF中的书签,在上一篇文章中介绍了具体 ...
Java 添加、替换、删除PDF中的图片
概述本文介绍通过java程序向PDF文档添加图片,以及替换和删除PDF中已有的图片.另外,关于图片的操作还可参考设置PDF 图片背景.设置PDF图片水印.读取PDF中的图片.将PDF保存为图片等文章 ...
在.NET中使用iTextSharp创建/读取PDF报告： Part I [翻译]
原文地址:Create/Read Advance PDF Report using iTextSharp in C# .NET: Part I By Debopam Pal, 27 Nov 20 ...

随机推荐

201521123052《Java程序设计》第8周学习总结
1. 本周学习总结 1.1 以你喜欢的方式(思维导图或其他)归纳总结集合与泛型相关内容. 1.2 选做:收集你认为有用的代码片段 2. 书面作业本次作业题集集合 1.List中指定元素的删除(题目4 ...
201521123035《Java程序设计》第四周学习总结
本周学习总结 1.1 尝试使用思维导图总结有关继承的知识点. 1.2 使用常规方法总结其他上课内容. 本周还讲了注释与类设计.老师用例子向我们展示实际生活中一个类里面包含了哪些属性,并由此联想到如果自 ...
201521123081《java程序设计》第11周学习总结
1. 本周学习总结 1.1 以你喜欢的方式(思维导图或其他)归纳总结多线程相关内容. 参考资料:XMind ============================================== ...
201521123039 《java程序设计》第十周学习总结
1. 本周学习总结 2. 书面作业本次PTA作业题集异常.多线程 finally 题目4-2 1.1 截图你的提交结果(出现学号) 1.2 4-2中finally中捕获异常需要注意什么? 总结:需要 ...
201521123101 《Java程序设计》第10周学习总结
1.本周学习总结 2.书面作业 1.finally,题目4-2 1.1 截图你的提交结果(出现学号) 1.2 4-2中finally中捕获异常需要注意什么? finally不管是否捕获到异常,始终会被 ...
201521123090《Java程序设计》第10周学习总结
本周学习总结 1.1 以你喜欢的方式(思维导图或其他)归纳总结异常与多线程相关内容. 书面作业本次PTA作业题集异常.多线程 finally 题目4-2 1.1 截图你的提交结果(出现学号) 1.2 ...
CSS3 3D环境实现立体魔方效果代码
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...
SAP中常用SM系列事务代码总结
SM01 锁定事物 SM02 系统信息 SM04 显示在线用户 SM12 删除,显示锁对象 SM13 看update request SM21 看下系统日志 SM30|SM31 维护table|vi ...
06jQuery-04-DOM操作
jQuery既然是为了帮助你能从js的繁琐中解脱出来,自然在DOM操作上也有自己的一套. 1.修改Text和HTML 之前我们提到过,如果用JS的话,你要修改Text或者HTML需要用到其innerH ...
mybatis-generato的功能扩展
项目代码地址:https://github.com/whaiming/java-generator 我在原有的基础上扩展了和修改了一些功能: 1.增加获取sqlServer数据库字段注释功能 2.Ma ...

C# 使用itextsharp 读取pdf中文字坐标

程序调用：

PdfHelper帮助类：

C# 使用itextsharp 读取pdf中文字坐标的更多相关文章

随机推荐

热门专题