LIBREOFFICE_DOC_FAMILIES = [
"TextDocument",
"WebDocument",
"Spreadsheet",
"Presentation",
"Graphics"
] LIBREOFFICE_IMPORT_TYPES = {
"docx": {
"FilterName": "MS Word 2007 XML"
},
"pdf": {
"FilterName": "PDF - Portable Document Format"
},
"jpg": {
"FilterName": "JPEG - Joint Photographic Experts Group"
},
"html": {
"FilterName": "HTML Document"
},
"odp": {
"FilterName": "OpenDocument Presentation (Flat XML)"
},
"pptx": {
"FilterName": "Microsoft PowerPoint 2007 XML"
}
} LIBREOFFICE_EXPORT_TYPES = {
"pdf": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"}
},
"jpg": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"}
},
"html": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"}
},
"docx": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"}
},
"odp": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"}
},
"pptx": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"}
}
}

转:

convert_test

#!/usr/bin/env python3
"""
VIEW COMPLETE CODE AT
=====================
* https://github.com/six519/libreoffice_convert
THANKS
======
* Thanks to: Mirko Nasato for his PyODConverter http://www.artofsolving.com/opensource/pyodconverter
TESTED USING
============
* Fedora release 20 (Heisenbug)
* Python 3.3.2
INSTALL DEPENDENCIES
====================
* yum install libreoffice-sdk
""" import uno
import subprocess
import time
import os from com.sun.star.beans import PropertyValue LIBREOFFICE_DEFAULT_PORT = 6519
LIBREOFFICE_DEFAULT_HOST = "localhost" LIBREOFFICE_DOC_FAMILIES = [
"TextDocument",
"WebDocument",
"Spreadsheet",
"Presentation",
"Graphics"
] LIBREOFFICE_IMPORT_TYPES = {
"docx": {
"FilterName": "MS Word 2007 XML"
},
"pdf": {
"FilterName": "PDF - Portable Document Format"
},
"jpg": {
"FilterName": "JPEG - Joint Photographic Experts Group"
},
"html": {
"FilterName": "HTML Document"
},
"odp": {
"FilterName": "OpenDocument Presentation (Flat XML)"
},
"pptx": {
"FilterName": "Microsoft PowerPoint 2007 XML"
}
} LIBREOFFICE_EXPORT_TYPES = {
"pdf": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "writer_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "writer_web_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "calc_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_pdf_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_pdf_Export"}
},
"jpg": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_jpg_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_jpg_Export"}
},
"html": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "HTML (StarWriter)"},
LIBREOFFICE_DOC_FAMILIES[1]: {"FilterName": "HTML"},
LIBREOFFICE_DOC_FAMILIES[2]: {"FilterName": "HTML (StarCalc)"},
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress_html_Export"},
LIBREOFFICE_DOC_FAMILIES[4]: {"FilterName": "draw_html_Export"}
},
"docx": {
LIBREOFFICE_DOC_FAMILIES[0]: {"FilterName": "MS Word 2007 XML"}
},
"odp": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "impress8"}
},
"pptx": {
LIBREOFFICE_DOC_FAMILIES[3]: {"FilterName": "Impress MS PowerPoint 2007 XML"}
}
} class PythonLibreOffice(object): def __init__(self, host=LIBREOFFICE_DEFAULT_HOST, port=LIBREOFFICE_DEFAULT_PORT):
self.host = host
self.port = port
self.local_context = uno.getComponentContext()
self.resolver = self.local_context.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", self.local_context)
self.connectionString = "socket,host=%s,port=%s;urp;StarOffice.ComponentContext" % (LIBREOFFICE_DEFAULT_HOST, LIBREOFFICE_DEFAULT_PORT)
self.context = None
self.desktop = None
self.runUnoProcess()
self.__lastErrorMessage = "" try:
self.context = self.resolver.resolve("uno:%s" % self.connectionString)
self.desktop = self.context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", self.context)
except Exception as e:
self.__lastErrorMessage = str(e) @property
def lastError(self): return self.__lastErrorMessage def terminateProcess(self): try:
if self.desktop:
self.desktop.terminate()
except Exception as e:
self.__lastErrorMessage = str(e)
return False return True def convertFile(self, outputFormat, inputFilename): if self.desktop: tOldFileName = os.path.splitext(inputFilename)
outputFilename = "%s.%s" % (tOldFileName[0], outputFormat)
inputFormat = tOldFileName[1].replace(".","")
inputUrl = uno.systemPathToFileUrl(os.path.abspath(inputFilename))
outputUrl = uno.systemPathToFileUrl(os.path.abspath(outputFilename)) if inputFormat in LIBREOFFICE_IMPORT_TYPES:
inputProperties = {
"Hidden": True
} inputProperties.update(LIBREOFFICE_IMPORT_TYPES[inputFormat]) doc = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self.propertyTuple(inputProperties)) try:
doc.refresh()
except:
pass docFamily = self.getDocumentFamily(doc)
if docFamily:
try:
outputProperties = LIBREOFFICE_EXPORT_TYPES[outputFormat][docFamily]
doc.storeToURL(outputUrl, self.propertyTuple(outputProperties))
doc.close(True) return True
except Exception as e:
self.__lastErrorMessage = str(e) self.terminateProcess() return False def propertyTuple(self, propDict):
properties = []
for k,v in propDict.items():
property = PropertyValue()
property.Name = k
property.Value = v
properties.append(property) return tuple(properties) def getDocumentFamily(self, doc):
try:
if doc.supportsService("com.sun.star.text.GenericTextDocument"):
return LIBREOFFICE_DOC_FAMILIES[0]
if doc.supportsService("com.sun.star.text.WebDocument"):
return LIBREOFFICE_DOC_FAMILIES[1]
if doc.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
return LIBREOFFICE_DOC_FAMILIES[2]
if doc.supportsService("com.sun.star.presentation.PresentationDocument"):
return LIBREOFFICE_DOC_FAMILIES[3]
if doc.supportsService("com.sun.star.drawing.DrawingDocument"):
return LIBREOFFICE_DOC_FAMILIES[4]
except:
pass return None def runUnoProcess(self):
subprocess.Popen('soffice --headless --norestore --accept="%s"' % self.connectionString, shell=True, stdin=None, stdout=None, stderr=None)
time.sleep(3) if __name__ == "__main__": test_libreoffice = PythonLibreOffice() #convert MS Word Document file (docx) to PDF
test_libreoffice.convertFile("pdf", "document.docx")

Libreoffice 各类文件转换的filtername的更多相关文章

  1. C# 将多个office文件转换及合并为一个PDF文件

    PDF文件介绍 PDF(Portable Document Format )文件源于20世纪90年代初期,如今早已成为了一种最流行的的文件格式之一.因为PDF文件有很多优点: 支持跨平台和跨设备共享 ...

  2. mpp文件转换成jpg图片,可以用pdf文件做中转站

    用project软件做了一个表,发现不能转换成图片,先把mpp文件转换成pdf文件,然后用PS打开pdf文件,存储为jpg格式就行了

  3. php将文件转换成二进制输出[转]

    header( "Content-type: image/jpeg"); $PSize = filesize('1.jpg'); $picturedata = fread(fope ...

  4. ocx文件转换成C#程序引用的DLL

    将ocx文件转换成C#程序引用的DLL文件的办法  将ocx文件转换成C#程序引用的DLL文件的办法,需要的朋友可以参考一下  1.打开VS2008或VS2010命令提示符(此例用VS2008) 将o ...

  5. nodejs将PDF文件转换成txt文本,并利用python处理转换后的文本文件

    目前公司Web服务端的开发是用Nodejs,所以开发功能的话首先使用Nodejs,这也是为什么不直接用python转换的原因. 由于node对文本的处理(提取所需信息)的能力不强,类似于npm上的包: ...

  6. Python:将utf-8格式的文件转换成gbk格式的文件

    需求:将utf-8格式的文件转换成gbk格式的文件 实现代码如下: def ReadFile(filePath,encoding="utf-8"): with codecs.ope ...

  7. 15个最好的PDF转word的在线转换器,将PDF文件转换成doc文件

    PDF是一种文件格式,包含文本,图像,数据等,这是独立于操作系统的文件类型.它是一个开放的标准,压缩,另一方面DOC文件和矢量图形是由微软文字处理文件.该文件格式将纯文本格式转换为格式化文档.它支持几 ...

  8. Marvel – 将图像和源文件转换成互动,共享的原型

    Marvel 是一款非常简单的工具,将图像和设计源文件转换成互动,共享的原型,无需任何编码.原型可以通过点击几下鼠标就创建出来,能工作在任何设备上的浏览器,包括移动设备,台式机.Marvel 的一个特 ...

  9. 文件转换神器Pandoc使用

    最近记录笔记,改用Markdown格式.但有时需要分享下笔记,对于不懂markdown格式的同学来说阅读感觉不是那么友好.因此就一直在寻找一款文件转换的软件,之前因为用markdownpad来编写,可 ...

随机推荐

  1. Hive| ETL清洗& 查询练习

    ETL清洗数据 导Jar包 <dependencies> <dependency> <groupId>log4j</groupId> <artif ...

  2. 在github上创建新的分支(包括管理分支)

    考虑到前面的项目在master分支上,这个不是太友好,下面在只有master分支的基础上,新建一个dev分支 一:查看 1.查看本地分支 git branch 2.查看远程分支 git branch ...

  3. PAT (Basic Level) Practise - 害死人不偿命的(3n+1)猜想

    题目链接:https://www.patest.cn/contests/pat-b-practise/1001 卡拉兹(Callatz)猜想: 对任何一个自然数n,如果它是偶数,那么把它砍掉一半:如果 ...

  4. union表关联模糊查询servlet,action方法

    2018-11-14 servletxml层 public String getSql(String keyword) { StringBuffer sqlSb = new StringBuffer( ...

  5. html-背景图片

    <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...

  6. eclipse tomcat报Several ports(8005 8080 8009)端口被占用问题解决方案

    在启动tomcat的时候eclipse突然报错 Several ports (8005,8080,8009) required by Tomcat v6.0 Server at localhost a ...

  7. HDU 5178 pairs【二分】||【尺取】

    <题目链接> 题目大意: 给定一个整数序列,求出绝对值小于等于k的有序对个数. 解题分析: $O(nlong(n))$的二分很好写,这里就不解释了.本题尺取$O(n)$也能做,并且效率很不 ...

  8. 使用vue iview遇到的一些问题

    使用阿里巴巴图标库 下载代码 这五个文件 iconfount.css 如果导入多个文件记得把@font-face复制到里面 改成./路径 //main.js import './assets/font ...

  9. linux 学习笔记 mysql安装总结

    1 安装方式 下载2禁制源码安装包 mysql-5.5.27-linux2.6-i686.tar.gz 备注:2禁制额包解压缩后直接就可以使用 不用Make 2 步骤 shell>groupad ...

  10. PHP学习笔记 ThinkPHP

    //检测文件是否存在 if(!file_exists(dirname(__FILE__)."/Bo/Admin/Conf/config.php")) { header('Locat ...