java合并多个word 2007 文档基于docx4j

参考文章:http://dh.swzhinan.com/post/185.html

引入的jar包

 <dependency>

             <groupId>org.docx4j</groupId>

             <artifactId>docx4j</artifactId>

             <version>6.0.1</version>

         </dependency>

         <dependency>

             <groupId>org.apache.commons</groupId>

             <artifactId>commons-compress</artifactId>

             <version>1.8.1</version>

         </dependency>

         <dependency>

             <groupId>com.alibaba</groupId>

             <artifactId>fastjson</artifactId>

             <version>1.2.58</version>

         </dependency>

         <dependency>

             <groupId>org.apache.poi</groupId>

             <artifactId>poi</artifactId>

             <version>3.10-FINAL</version>

         </dependency>

         <dependency>

             <groupId>org.apache.poi</groupId>

             <artifactId>poi-ooxml</artifactId>

             <version>3.10-FINAL</version>

         </dependency>

         <dependency>

             <groupId>org.apache.xmlbeans</groupId>

             <artifactId>xmlbeans</artifactId>

             <version>2.5.0</version>

         </dependency>

         <dependency>

             <groupId>org.apache.xmlgraphics</groupId>

             <artifactId>xmlgraphics-commons</artifactId>

             <version>1.3.1</version>

         </dependency>

代码

 package com.htsoft.oa.action.sjrh.tool;

 import java.io.File;

 import java.io.FileInputStream;

 import java.io.FileOutputStream;

 import java.io.IOException;

 import java.io.InputStream;

 import java.io.OutputStream;

 import java.io.RandomAccessFile;

 import java.nio.MappedByteBuffer;

 import java.nio.channels.FileChannel;

 import java.nio.channels.FileChannel.MapMode;

 import java.text.SimpleDateFormat;

 import java.util.ArrayList;

 import java.util.Date;

 import java.util.Iterator;

 import java.util.List;

 import org.apache.commons.io.IOUtils;

 import org.docx4j.dml.wordprocessingDrawing.Inline;

 import org.docx4j.jaxb.Context;

 import org.docx4j.openpackaging.exceptions.Docx4JException;

 import org.docx4j.openpackaging.packages.WordprocessingMLPackage;

 import org.docx4j.openpackaging.parts.PartName;

 import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart;

 import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;

 import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;

 import org.docx4j.relationships.Relationship;

 import org.docx4j.wml.Br;

 import org.docx4j.wml.CTAltChunk;

 import org.docx4j.wml.Drawing;

 import org.docx4j.wml.ObjectFactory;

 import org.docx4j.wml.P;

 import org.docx4j.wml.R;

 import org.docx4j.wml.STBrType;

 import com.alibaba.fastjson.JSONObject;

 import com.htsoft.oa.action.sjrh.pojo.MergeResult;

 public class WordMergeUtils {

     private static ObjectFactory factory = new ObjectFactory();

     /**

      * 合并docx

      *

      * @param streams

      *            要合并的word文件的输入流

      * @param path

      *            合并后的文件的路径

      * @return

      * @throws Docx4JException

      * @throws IOException

      */

     public static File mergeDocx(final List<InputStream> streams, String path) throws Docx4JException, IOException {

         WordprocessingMLPackage target = null;

         final File generated = new File(path);

         int chunkId = 0;

         Iterator<InputStream> it = streams.iterator();

         while (it.hasNext()) {

             InputStream is = it.next();

             if (is != null) {

                 try {

                     if (target == null) {

                         // Copy first (master) document

                         OutputStream os = new FileOutputStream(generated);

                         os.write(IOUtils.toByteArray(is));

                         os.close();

                         target = WordprocessingMLPackage.load(generated);

                     } else {

                         MainDocumentPart documentPart = target.getMainDocumentPart();

 //                        addPageBreak(documentPart); // 另起一页，换页

                         insertDocx(documentPart, IOUtils.toByteArray(is), chunkId++);

                     }

                 } catch (Exception e) {

                     e.printStackTrace();

                 } finally {

                     is.close();

                 }

             }

         }

         if (target != null) {

             target.save(generated);

             // Docx4J.save(target, generated, Docx4J.FLAG_NONE);

             return generated;

         } else {

             return null;

         }

     }

     // 插入文档

     private static void insertDocx(MainDocumentPart main, byte[] bytes, int chunkId) {

         try {

             AlternativeFormatInputPart afiPart = new AlternativeFormatInputPart(

                     new PartName("/part" + chunkId + ".docx"));

             // afiPart.setContentType(new ContentType(CONTENT_TYPE));

             afiPart.setBinaryData(bytes);

             Relationship altChunkRel = main.addTargetPart(afiPart);

             CTAltChunk chunk = Context.getWmlObjectFactory().createCTAltChunk();

             chunk.setId(altChunkRel.getId());

             main.addObject(chunk);

         } catch (Exception e) {

             e.printStackTrace();

         }

     }

     /**

      * wordML转word，原文件不变，返回转换完成的word文件对象。

      *

      * @param file

      * @return

      * @throws Docx4JException

      * @throws IOException

      */

     public static File wordMLToWord(File file) throws Docx4JException, IOException {

         WordprocessingMLPackage target = WordprocessingMLPackage.load(file);

         File temp = File.createTempFile(file.getName(), ".doc");

         target.save(temp);

         return temp;

     }

     /**

      * xml转docx，原文件不变，返回转换完成的word文件对象。

      *

      * @param file

      * @return

      * @throws Docx4JException

      * @throws IOException

      */

     public static File xmlToWord(File file) throws Docx4JException, IOException {

         WordprocessingMLPackage target = WordprocessingMLPackage.load(file);

         File temp = File.createTempFile(file.getName(), ".doc");

         target.save(temp);

         return temp;

     }

     /**

      * 合并wordML文档

      *

      * @param list

      * @param path

      * @throws Docx4JException

      * @throws IOException

      */

     public static File mergeWordML(List<File> list, String path) throws Docx4JException, IOException {

         final List<InputStream> streams = new ArrayList<InputStream>();

         for (int i = 0; i < list.size(); i++) {

             File file = list.get(i);

             // file = WordMLUtil.wordMLToWord(file); // wordML转word

             streams.add(new FileInputStream(file));

         }

         return WordMergeUtils.mergeDocx(streams, path);

     }

     /**

      * 把文件转换成Byte[] Mapped File way MappedByteBuffer 可以在处理大文件时，提升性能

      *

      * @param filename

      * @return

      * @throws IOException

      */

     public static byte[] fileToByteArray(String filename) throws IOException {

         RandomAccessFile raf = null;

         FileChannel fc = null;

         try {

             raf = new RandomAccessFile(filename, "r");

             fc = raf.getChannel();

             MappedByteBuffer byteBuffer = fc.map(MapMode.READ_ONLY, 0, fc.size()).load();

             System.out.println(byteBuffer.isLoaded());

             byte[] result = new byte[(int) fc.size()];

             if (byteBuffer.remaining() > 0) {

                 byteBuffer.get(result, 0, byteBuffer.remaining());

             }

             return result;

         } catch (IOException e) {

             e.printStackTrace();

             throw e;

         } finally {

             try {

                 fc.close();

                 raf.close();

             } catch (IOException e) {

                 e.printStackTrace();

             }

         }

     }

     /**

      * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中,

      * 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数.

      * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件.

      *

      * @param word

      *            需要编辑的文件

      * @param imageList

      *            图片对象集合（ 图片对象属性： url 图片文件路径 keyword 文档中的图片占位符 name 图片文件名 ）

      * @throws Exception

      *             不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型)

      */

     public static void addImageToPackage(File word, List<JSONObject> imageList) throws Exception {

         WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word);

         for (int i = 0; i < imageList.size(); i++) {

             JSONObject image = imageList.get(i);

             byte[] bytes = fileToByteArray(image.getString("url"));

             BinaryPartAbstractImage imagePart = BinaryPartAbstractImage.createImagePart(wordMLPackage, bytes);

             int docPrId = 1;

             int cNvPrId = 2;

             Inline inline = imagePart.createImageInline(image.getString("name"), image.getString("keyword"), docPrId,

                     cNvPrId, false);

             P paragraph = addInlineImageToParagraph(inline);

             wordMLPackage.getMainDocumentPart().addObject(paragraph);

         }

         wordMLPackage.save(word);

     }

     /**

      * Docx4j拥有一个由字节数组创建图片部件的工具方法, 随后将其添加到给定的包中. 为了能将图片添加 到一个段落中,

      * 我们需要将图片转换成内联对象. 这也有一个方法, 方法需要文件名提示, 替换文本, 两个id标识符和一个是嵌入还是链接到的指示作为参数.

      * 一个id用于文档中绘图对象不可见的属性, 另一个id用于图片本身不可见的绘制属性. 最后我们将内联 对象添加到段落中并将段落添加到包的主文档部件.

      *

      * @param wordFilePath

      *            文件路径

      * @param imageList

      *            图片对象集合（ 图片对象属性： url 图片文件路径 keyword 文档中的图片占位符 name 图片文件名 ）

      * @throws Exception

      *             不幸的createImageInline方法抛出一个异常(没有更多具体的异常类型)

      */

     public static void addImageToPackage(String wordFilePath, List<JSONObject> imageList) throws Exception {

         addImageToPackage(new File(wordFilePath), imageList);

     }

     /**

      * 创建一个对象工厂并用它创建一个段落和一个可运行块R. 然后将可运行块添加到段落中. 接下来创建一个图画并将其添加到可运行块R中. 最后我们将内联

      * 对象添加到图画中并返回段落对象.

      *

      * @param inline

      *            包含图片的内联对象.

      * @return 包含图片的段落

      */

     private static P addInlineImageToParagraph(Inline inline) {

         // 添加内联对象到一个段落中

         P paragraph = factory.createP();

         R run = factory.createR();

         paragraph.getContent().add(run);

         Drawing drawing = factory.createDrawing();

         run.getContent().add(drawing);

         drawing.getAnchorOrInline().add(inline);

         return paragraph;

     }

     /**

      * 文档结尾添加一个空白页

      *

      * @throws Docx4JException

      */

     public static void addPageBreak(File word) throws Docx4JException {

         WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(word);

         MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();

         Br breakObj = new Br();

         breakObj.setType(STBrType.PAGE);

         P paragraph = factory.createP();

         paragraph.getContent().add(breakObj);

         documentPart.getJaxbElement().getBody().getContent().add(paragraph);

         wordMLPackage.save(word);

     }

     /**

      * 文档结尾添加一个空白页

      *

      * @throws Docx4JException

      */

     public static void addPageBreak(MainDocumentPart documentPart) {

         Br breakObj = new Br();

         breakObj.setType(STBrType.PAGE);

         P paragraph = factory.createP();

         paragraph.getContent().add(breakObj);

         documentPart.getJaxbElement().getBody().getContent().add(paragraph);

     }

     /**

      * 文档结尾添加一个空白页

      *

      * @throws Docx4JException

      */

     public static void addPageBreak(String wordFilePath) throws Docx4JException {

         addPageBreak(new File(wordFilePath));

     }

     /**

      * 合并word文档 接口方法

      *

      * @param sourceFiles待合并文件

      * @param mergedFileName合并后的文件名称

      * @throws Exception

      */

     public static MergeResult merge(String djxh, List<String> sourceFiles, String mergedFileName) {

         if (djxh == null || djxh.isEmpty()) {

             return new MergeResult(-1, null, "登记序号为空!", null);

         } else if (sourceFiles == null || sourceFiles.size() <= 0) {

             return new MergeResult(-1, null, "待合并文件路径为空!", null);

         }

         try {

             List<File> files = new ArrayList<File>();

             for (String filePath : sourceFiles) {

                 File file = new File(filePath);

                 files.add(file);

             }

             // 保存基础路径

             String path = "";

             if ("1".equals(WordStaticFileds.open_Fixed_path)) {

                 // 创建固定路径

                 path = WordStaticFileds.create_word_path + "word/fixed/" + djxh;

             } else {

                 // 创建不固定路径

                 path = WordStaticFileds.create_word_path + "word/notFixed/"

                         + new SimpleDateFormat("yyyyMMdd").format(new Date()) + "/" + djxh;

             }

             if (mergedFileName == null || mergedFileName.isEmpty()) {

                 if (files.size() > 0) {

                     String oldName = files.get(0).getName();

                     int lastIndexOf = oldName.lastIndexOf(".");

                     if (lastIndexOf > 0) {

                         mergedFileName = oldName.substring(0, lastIndexOf) + "-合并后.docx";

                     }

                 }

             }

             File mergedfile = new File(path);

             if (!mergedfile.exists()) {

                 mergedfile.mkdirs();

             }

             String mergedFullPath = path + "/" + mergedFileName;

             File mergeWordML = WordMergeUtils.mergeWordML(files, mergedFullPath);

             return new MergeResult(0, mergeWordML, "合并word文件成功!", mergeWordML.getAbsolutePath());

         } catch (Exception e) {

             return new MergeResult(-1, null, "合并word文件出错!错误信息:" + e.getMessage(), null);

         }

     }

 }

java合并多个word 2007 文档基于docx4j的更多相关文章

Word 2007 文档结构图混乱
Word 2007在编写大型文档时经常出现文档结构图混乱的情况,经过多番检索试验,得出结论: 绝对有效的临时性解决方案:在打开word的时候左下角会有提示word自动更新文档样式,按esc键取消,然后 ...
Microsoft Office Word 2007 文档结构图突然变小的解决办法
前记: 一个word文档不知道修改了什么,突然发现文档结构图显示的文字变得非常的小了. 用ctrl+鼠标滚轮只能放大或是缩小行间距,对文字没有什么变化. 解决办法: 1.打开文档结构图点击视图,勾选 ...
PowerDesigner 125 导致 Word 2007文档内容无法选中以及点击鼠标没用
word 2007 文档中添加文件
1. "插入"->"对象",出现的下拉框中选择"对象" 2.弹出界面 3.切换至"由文件创建",点击"浏 ...
Java解析word,获取文档中图片位置
前言(背景介绍): Apache POI是Apache基金会下一个开源的项目,用来处理office系列的文档,能够创建和解析word.excel.ppt格式的文档. 其中对word文档的处理有两个技术 ...
Word试卷文档模型化解析存储到数据库
最近在搞一套在线的考试系统,有许多人反映试题的新增比较麻烦(需要逐个输入),于是呼就整个了试卷批量导入了 poi实现word转html 模型化解析html html转Map数组 Map数组(数组的操作 ...
php解析word,获得文档中的图片
背景前段时间在写一个功能:用原生php将获得word中的内容并导入到网站系统中.因为文档中存在公式,图片,表格等,因此写的比较麻烦. 思路大体思路是先将word中格式为doc的文档转化为docx, ...
Java SE之XML<一>XML文档规约
[文档整理系列] Java SE之XML<一>XML文档规约 1.xml文档引用: 1.外部私有DTD: <!DOCTYPE rootNodeName SYSTEM "ur ...
Atitit 计算word ppt文档的页数
Atitit 计算word ppt文档的页数 http://localhost:8888/ http://git.oschina.net/attilax/ati_wordutil private vo ...

随机推荐

Django2.2 pymysql 连接mysql数据库的坑
参考链接 https://www.52pojie.cn/thread-921141-1-1.html 1. mysqlclient 1.3版本不对解决办法:注释掉D:\Users\xxx\AppDa ...
main process exited, code=exited, status=203/EXEC
问题描述: Oct :: c_3. systemd[]: Started etcd. Oct :: c_3. systemd[]: Starting etcd... Oct :: c_3. syste ...
Vagrant+VirtualBox虚拟环境
Vagrant+VirtualBox虚拟环境 VagrantVirtualBox 软件安装虚拟机基础配置虚拟机创建共享目录配置网络配置私有网络配置公有网络打包box与添加box 打包bo ...
《TensorFlow2深度学习》学习笔记（一）Tensorflow基础
本系列笔记记录了学习TensorFlow2的过程,主要依据 https://github.com/dragen1860/Deep-Learning-with-TensorFlow-book 进行学习 ...
TestAbstract
public class TestAbstract { public static void main(String[] args) { System.out.println("Hello ...
ES6 的class类笔记
class Person{ // 构造 constructor(x,y){ this.x = x; this.y = y; } toString(){ return (this.x + "的 ...
list转json数组
lights为arraylist java后台代码: try { org.tempuri.TLight[] lights = phlightSoapProxy.getLights(); ...
python--基于socket网络编程
Python 提供了两个基本的 socket 模块. 第一个是 Socket,它提供了标准的 BSD Sockets API. 第二个是 SocketServer, 它提供了服务器中心类,可以简化网络 ...
pytcharm无法debug：pydev debugger: process 15188 is connecting
今天问老师,老师说需要设置断点,果然设置断点后就可以正常调试了.
cpu的发现
system.cpu.discovery 检测到的CPU/CPU内核列表.用于低级发现返回的cpu从0开始编号,其他关于cpu的监控项就可以使用cpu的id进行单个cpu的资源监控

java合并多个word 2007 文档 基于docx4j

java合并多个word 2007 文档 基于docx4j的更多相关文章

随机推荐

热门专题

java合并多个word 2007 文档基于docx4j

java合并多个word 2007 文档基于docx4j的更多相关文章