POI实现word文档转html文件

POI word文件转html

package com.feiruo.officeConvert;

import java.io.BufferedWriter;

import java.io.File;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.OutputStreamWriter;

import java.util.List;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.TransformerException;

import org.apache.poi.hwpf.usermodel.Picture;

public abstract class OfficeConvert {

        // 图片的存放地址

        private String imgPath = null;

        // 文件存放的地址

        private String parentPath = null;

        // 文件内容

        private String fileContent = null;

        private String encode = "UTF-8";

    /**

     * 将指定的doc文档进行格式转换

     *

     * @param docPath

     *            *.doc文档地址

     *

     * @throws FileNotFoundException

     * @throws IOException

     * @throws ParserConfigurationException

     * @throws TransformerException

     */

    public abstract void convert(String docPath) throws FileNotFoundException,

            IOException, ParserConfigurationException, TransformerException;

    /**

     * 将文件内容写入到磁盘

     *

     * @param filepath

     *            保存转换文件的地址

     */

    public void writeFile(String filepath) {

        FileOutputStream fos = null;

        BufferedWriter bw = null;

        File f=new File(this.parentPath);

        if(!f.exists()){

            f.mkdirs();

        }

        try {

            File file = new File(filepath);

            fos = new FileOutputStream(file);

            bw = new BufferedWriter(new OutputStreamWriter(fos, encode));

            bw.write(fileContent);

        } catch (FileNotFoundException fnfe) {

            fnfe.printStackTrace();

        } catch (IOException ioe) {

            ioe.printStackTrace();

        } finally {

            try {

                if (bw != null)

                    bw.close();

                if (fos != null)

                    fos.close();

            } catch (IOException ie) {

            }

        }

    }

    public String checkSetPath(String path){

        path=path.trim();

        if(path.lastIndexOf("/")<path.length()-1) path+="/";

        if(path.indexOf("\"")>0)path=path.replaceAll("\"", "");

        if(path.indexOf(">")>0)path=path.replaceAll(">", "&gt;");

        if(path.indexOf("<")>0)path=path.replaceAll("<", "&lt;");

        //TODO if(path.indexOf("*")>0)path=path.replaceAll("/*", "");

        return path;

    }

    public String getEncode() {

        return encode;

    }

    public void setEncode(String encode) {

        this.encode = encode;

    }

    /**

     * 获取图片存放地址

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getImgPath() {

        return imgPath;

    }

    /**

     * 设置图片的存放地址文件夹路径

     *

     * @param imgPath

     *            设置图片的存放文件夹名称

     */

    public void setImgPath(String imgPath) {

        this.imgPath = checkSetPath(imgPath);

    }

    /**

     * 获取存放文件的目录地址

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getParentPath() {

        return parentPath;

    }

    /**

     * 设置文件存放的路径

     *

     * @param parentPath

     *            文件地址

     */

    public void setParentPath(String parentPath) {

        this.parentPath = checkSetPath(parentPath);

    }

    /**

     * 获取文件内容

     *

     * @return <strong>java.lang.String</strong>

     */

    public String getFileContent() {

        return fileContent;

    }

    public void setFileContent(String content){

        this.fileContent=content;

    }

}

package com.feiruo.officeConvert;

import java.io.ByteArrayOutputStream;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.OutputKeys;

import javax.xml.transform.Transformer;

import javax.xml.transform.TransformerException;

import javax.xml.transform.TransformerFactory;

import javax.xml.transform.dom.DOMSource;

import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.converter.PicturesManager;

import org.apache.poi.hwpf.converter.WordToHtmlConverter;

import org.apache.poi.hwpf.usermodel.Picture;

import org.apache.poi.hwpf.usermodel.PictureType;

import org.w3c.dom.Document;

/**

 * 将*.doc文档转换为*.html文件格式

 *

 * @author Jdk.feiruo.

 * @since JDK 1.7 POI 3.8

 * @version 1.0

 */

public class DocToHtml extends OfficeConvert implements IOfficeConvert {

    private List<Picture> pics = null;

    /**

     * @param parentPath

     *            html文件存放地址

     * @param imageppth

     *            html图片存放地址

     * @param encoding

     *            设置html的编码格式

     */

    public DocToHtml(String parentPath, String imageppth, String encoding) {

        setParentPath(checkSetPath(parentPath));

        setImgPath(checkSetPath(imageppth));

        this.setEncode(encoding);

    }

    public DocToHtml() {

    }

    /**

     * 将*doc文档转为*html文件

     *

     * @param docPath

     *            *doc文档的所在地址

     *

     * @throws FileNotFoundException

     * @throws IOException

     * @throws ParserConfigurationException

     * @throws TransformerException

     */

    public void convert(String docPath) throws FileNotFoundException,

            IOException, ParserConfigurationException, TransformerException {

        HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(

                docPath));

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(

                DocumentBuilderFactory.newInstance().newDocumentBuilder()

                        .newDocument());

        wordToHtmlConverter.setPicturesManager(new PicturesManager() {

            public String savePicture(byte[] content, PictureType pictureType,

                    String suggestedName, float widthInches, float heightInches) {

                return suggestedName;

            }

        });

        wordToHtmlConverter.processDocument(wordDocument);

        pics = wordDocument.getPicturesTable().getAllPictures();

        Document htmlDocument = wordToHtmlConverter.getDocument();

        ByteArrayOutputStream out = new ByteArrayOutputStream();

        DOMSource domSource = new DOMSource(htmlDocument);

        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();

        Transformer serializer = tf.newTransformer();

        serializer.setOutputProperty(OutputKeys.ENCODING, this.getEncode());

        serializer.setOutputProperty(OutputKeys.INDENT, "yes");

        serializer.setOutputProperty(OutputKeys.METHOD, "html");

        serializer.transform(domSource, streamResult);

        out.close();

        String htmlContent = new String(out.toByteArray());

        if(htmlContent.indexOf("<img src=\"") > 0){

            htmlContent=htmlContent.replaceAll("<img src=\"", "<img src=\"" + getImgPath());

        }

        setFileContent(htmlContent);

    }

    @Override

    public void writeWithName(String fileName) {

        // 先保存文档中的图片

        if (pics != null) {

            File imgfile = new File(this.getParentPath() + this.getImgPath());

            // 如果当前文件夹不存在，则创建新文件夹

            if (!imgfile.exists())

                imgfile.mkdirs();

            for (int i = 0; i < pics.size(); i++) {

                Picture pic = (Picture) pics.get(i);

                try {

                    pic.writeImageContent(new FileOutputStream(imgfile + "//"

                            + pic.suggestFullFileName()));

                } catch (IOException e) {

                    e.printStackTrace();

                }

            }

        }

        // 保存html源码文件

        this.writeFile(getParentPath()+fileName+".html");

    }

}

package com.feiruo.Test;

import java.io.FileNotFoundException;

import java.io.IOException;

import javax.xml.parsers.ParserConfigurationException;

import javax.xml.transform.TransformerException;

import com.yinhai.officeConvert.DocToHtml;

public class Test{

    public static void main(String[] args) {

        Test t=new Test();

    }

      public Test(){

          DocToHtml dth=new DocToHtml("C://test", "f", "UTF-8");

          try {

            dth.convert("D://test//test.doc");

        } catch (FileNotFoundException e) {

            e.printStackTrace();

        } catch (IOException e) {

            e.printStackTrace();

        } catch (ParserConfigurationException e) {

            e.printStackTrace();

        } catch (TransformerException e) {

            e.printStackTrace();

        }

          dth.writeWithName("feiruo");

      }

}

package com.feiruo.officeConvert;

public interface IOfficeConvert {

    /**

     * 将文件写入到磁盘

     * @param fileName 要写入文件的名称

     */

    public void writeWithName(String fileName);

}

POI实现word文档转html文件的更多相关文章

POI生成WORD文档
h2:first-child, body>h1:first-child, body>h1:first-child+h2, body>h3:first-child, body>h ...
POI生成word文档完整案例及讲解
一,网上的API讲解其实POI的生成Word文档的规则就是先把获取到的数据转成xml格式的数据,然后通过xpath解析表单式的应用取值,判断等等,然后在把取到的值放到word文档中,最后在输出来. ...
用java语言通过POI实现word文档的按标题提取
最近有一个项目需要将一个word文档中的数据提取到数据库中.就去网上查了好多资料,最靠谱的就是用poi实现word文档的提取. 喝水不忘挖井人,我查了好多资料就这个最靠谱,我的这篇博客主要是借鉴htt ...
POI 生成 word 文档简单版（包括文字、表格、图片、字体样式设置等）
POI 生成word 文档一般有两种方法: ① word模板生成word 文档 : ② 写代码直接生成 word 文档: 我这里演示的是第二种方法,即写代码生成 word文档,不多说废话,直接 ...
Poi之Word文档结构介绍
1.poi之word文档结构介绍之正文段落一个文档包含多个段落,一个段落包含多个Runs,一个Runs包含多个Run,Run是文档的最小单元获取所有段落:List<XWPFParagraph ...
微信公众号怎么添加附件？比如word文档，pdf文件等
微信公众号怎么添加附件?比如word文档,pdf文件等我们都知道创建一个微信公众号,在公众号中发布一些文章是非常简单的,但公众号添加附件下载的功能却被限制,如今可以使用小程序“微附件”进行在公众 ...
Java POI 解析word文档
实现步骤: 1.poi实现word转html 2.模型化解析html 3.html转Map数组 Map数组(数组的操作处理不做说明) 1.导jar包. 2.代码实现 package com.web.o ...
java word文档转 html文件
一.简介一般word文件后缀有doc.docx两种.docx是office word 2007以及以后版本文档的扩展名:doc是office word 2003文档保存的扩展名.对于这两种格式的wo ...
poi 读取word文档
1.导入jar包官网下载地址: https://www.apache.org/dyn/closer.lua/poi/release/bin/poi-bin-3.17-20170915.zip 最开始 ...

随机推荐

Spring实战4：面向切面编程
主要内容面向切面编程的基本知识为POJO创建切面使用@AspectJ注解为AspectJ的aspects注入依赖关系在南方没有暖气的冬天,太冷了,非常想念北方有暖气的冬天.为了取暖,很多朋友 ...
Django数据库设计中字段为空的方式
今天在做数据库设计的时候,设计了如下User表,其中我把email和phone字段设置为允许为空: class User(models.Model): username = models.CharFi ...
【MySQL】技巧之 count(*)、count(1)、count(col)
只看结果的话,Select Count(*) 和 Select Count(1) 两着返回结果是一样的. 假如表沒有主键(Primary key), 那么count(1)比count(*)快,如果有主 ...
Hadoop学习2--Linux准备及环境准备
1.环境安装: 虚拟机:VMware Player 系统:Ubuntu12 注意事项:注意位数,包括系统,java,Hadoop 2.切换账号当前登录账号是自己的账号,如果想切换到root,且是第一 ...
我的wordpress插件总结
酷壳(CoolShell.cn)WordPress的插件注意: 下面的这些插件的链接是其插件主页的链接,你可以在WordPress后台管理中添加插件时直接搜索安装就可以了. 插件不是越多越好.WP的 ...
CSU 1803 2016（数论）
2016 Problem Description: 给出正整数 n 和 m,统计满足以下条件的正整数对 (a,b) 的数量: 1≤a≤n,1≤b≤m; a×b 是 2016 的倍数. Input: 输 ...
jQuery部分源码帮助理解
(function(window){})(window) 为什么要传window给jquery当参数呢? 1.为了压缩有引用 2.加速变量的寻找,当找window对象的时候,默认从本级开始寻找,一级 ...
IT项目管理
IT项目管理是项目管理在IT领域的应用,结合IT行业特点运用项目管理技术.理念和方法,包括9大知识领域(项目综合.范围.时间.成本.质量.人力资源.沟通.风险和采购管理)以及启动.计划.实施.控制和收 ...
PyDev-Python的Eclipse插件安装
PyDev官网:http://marketplace.eclipse.org/node/114 安装方法: 1,打开Eclipse,如果是初次使用,关闭欢迎页面,否则无法按照我说的方法安装. 2,打开 ...
ruby字符串学习笔记5
1获取字符串某部分 s = "My kingdom for a string!" s.slice(3,7) # kingdom s[3,7] # kingdom s[/.ing/] ...

POI实现word文档转html文件

POI实现word文档转html文件的更多相关文章

随机推荐

热门专题