





<action name="upload" class="lucenesAction" method="upload">

<!-- 此处能够限制上传文件类型

<interceptor-ref name="fileUpload">


<param name="maximumSize">500000</param>


<param name="allowedExtensions">.jsp</param>


<param name="allowedTypes">image/pjpeg,image/gif,text/xml,text/plain,application/msword,application/vnd.ms-excel</param>


<interceptor-ref name="defaultStack"/> -->

<result name="input">/demo/lucenes/upload_fail.jsp</result>

<result name="success">/demo/lucenes/upload_ok.jsp</result>




public InputStream tInputStream;

// 上传文件,必须的三个字段。

private File data;

// 文件名称

private String dataFileName;

// 文件类型

private String dataContentType;



import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import org.apache.poi.POIXMLDocument;

import org.apache.poi.POIXMLTextExtractor;

import org.apache.poi.hwpf.HWPFDocument;

import org.apache.poi.hwpf.extractor.WordExtractor;

import org.apache.poi.hwpf.usermodel.CharacterProperties;

import org.apache.poi.hwpf.usermodel.HWPFList;

import org.apache.poi.hwpf.usermodel.ParagraphProperties;

import org.apache.poi.hwpf.usermodel.Range;

import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.xwpf.extractor.XWPFWordExtractor;

import org.junit.Test;


 * 提取word内容


 * @author wangshouhai

 * @Version 2014-4-17:下午12:07:04


public class ReadWord {

public static void main(String[] args) {

File file = new File("C:\\Users\\Administrator\\Desktop\\測试文档.docx");

// readWord2003(file);




* 支持word-2003

* @param args


private static void readWord2003(File file) {

try {

FileInputStream fis = new FileInputStream(file);

// 创建WordExtractor对象

WordExtractor wordExtractor = new WordExtractor(fis);

// 取得全部文本内容

String text = wordExtractor.getText();


} catch (Exception e) {





// 支持word-2003

public static void readWordExtractor(File file) {

try {

FileInputStream fis = new FileInputStream(file);

// 创建WordExtractor对象

WordExtractor wordExtractor = new WordExtractor(fis);

// 通过getParagraphText()提取每一个段落

String[] paragraph = wordExtractor.getParagraphText();

System.out.println("该Word文件共同拥有" + paragraph.length + "段。");

for (int i = 0; i < paragraph.length; i++) {



} catch (Exception e) {





* word 2007解决方式

* @param args



public static void readWord2007(File file) {

try {

// word 2007,读取word中字符

OPCPackage opcPackage = POIXMLDocument.openPackage("D:\\apache-tomcat-6.0.18\\webapps\\GOVWBWeb\\upload\\user\\2014\\04\\18\\08\\193b299f-e8fc-4a32-a7ba-f951beeec1d9");

POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);

String text2007 = extractor.getText();


} catch (Exception e) {






import java.io.File;

import java.io.FileInputStream;

import org.apache.poi.hssf.usermodel.HSSFCell;

import org.apache.poi.hssf.usermodel.HSSFRow;

import org.apache.poi.ss.usermodel.Sheet;

import org.apache.poi.ss.usermodel.Workbook;

import org.apache.poi.ss.usermodel.WorkbookFactory;

import org.apache.poi.xssf.usermodel.XSSFCell;

import org.apache.poi.xssf.usermodel.XSSFRow;

import org.apache.poi.xssf.usermodel.XSSFSheet;

import org.apache.poi.xssf.usermodel.XSSFWorkbook;


 * 读取excel内容

 * @author wangshouhai

 * @Version 2014-4-18:下午12:56:23


public class ReadExcel {

// 文件上传

public static void readExcel2007(File file) {

try {

// 创建工作区,读取上传文件

XSSFWorkbook wb= new XSSFWorkbook(new FileInputStream(file));

XSSFSheet sheet =wb.getSheetAt(0); 

int rows = sheet.getPhysicalNumberOfRows();// 获取全部的行

if (rows > 0) {

for (int i = 1; i < rows; i++) {

XSSFRow row=sheet.getRow(i);

if (row == null) {



try {

XSSFCell idCell = row.getCell(0);

if (idCell != null) {

double id = idCell.getNumericCellValue();

//int id = Integer.parseInt(idCell.getRichStringCellValue().toString());



// 账号

XSSFCell accountsCell = row.getCell(1);

String accounts = null;

if (accountsCell != null) {

accounts = accountsCell.getRichStringCellValue().toString();



// password

XSSFCell passwordCell = row.getCell(2);

if (passwordCell != null) {

String password = passwordCell.getRichStringCellValue().toString();



// 姓名

XSSFCell nameCell = row.getCell(3);

if (nameCell != null) {

String name = nameCell.getRichStringCellValue().toString();



// 性别

XSSFCell sexCell = row.getCell(4);

if (sexCell != null) {

double sex = idCell.getNumericCellValue();

//String sex = sexCell.getRichStringCellValue().toString();

//int sexs = Integer.parseInt(sex);



// 邮箱

XSSFCell emailCell = row.getCell(5);

if (emailCell != null) {

String email = emailCell.getRichStringCellValue().toString();



// 手机

XSSFCell phoneCell = row.getCell(6);

if (phoneCell != null) {

String phone = phoneCell.getRichStringCellValue().toString();



} catch (Exception e) {

throw new RuntimeException(e);




} catch (Exception e) {

throw new RuntimeException(e);



public static void readExcel2003(File file) {

try {

// 创建工作区,读取上传文件

Workbook wb = WorkbookFactory.create(new FileInputStream(file));

Sheet sheet = wb.getSheetAt(0);

int rows = sheet.getPhysicalNumberOfRows();// 获取全部的行

if (rows > 0) {

for (int i = 1; i < rows; i++) {


HSSFRow row = (HSSFRow) sheet.getRow(i);

if (row == null) {



try {


HSSFCell idCell = row.getCell(0);

if (idCell != null) {

double id = idCell.getNumericCellValue();

// int id =

// Integer.parseInt(idCell.getRichStringCellValue().toString());

System.out.print("id: "+id+",");


// 账号

HSSFCell accountsCell = row.getCell(1);

String accounts = null;

if (accountsCell != null) {

accounts = accountsCell.getRichStringCellValue().toString();

System.out.print("accounts: "+accounts+",");


// password

HSSFCell passwordCell = row.getCell(2);

if (passwordCell != null) {

String password = passwordCell.getRichStringCellValue().toString();

System.out.print("password: "+password+",");


// 姓名

HSSFCell nameCell = row.getCell(3);

if (nameCell != null) {

String name = nameCell.getRichStringCellValue().toString();

System.out.print("name: "+name+",");


// 性别

HSSFCell sexCell = row.getCell(4);

if (sexCell != null) {

double sex = idCell.getNumericCellValue();

// String sex =

// sexCell.getRichStringCellValue().toString();

// int sexs = Integer.parseInt(sex);

System.out.print("sex: "+sex+",");


// 邮箱

HSSFCell emailCell = row.getCell(5);

if (emailCell != null) {

String email = emailCell.getRichStringCellValue().toString();

System.out.print("email: "+email+",");


// 手机

HSSFCell phoneCell = row.getCell(6);

if (phoneCell != null) {

String phone = phoneCell.getRichStringCellValue().toString();

System.out.println("phone: "+phone);


} catch (Exception e) {

throw new RuntimeException(e);




} catch (Exception e) {

throw new RuntimeException(e);




* 读取Excel2007

* @param args


public static void main(String[] args) {

File file = new File("D:\\apache-tomcat-6.0.18\\webapps\\GOVWBWeb\\upload\\user\\2014\\04\\18\\11\\adcc6bc6-bd5e-43e9-9a53-3ba879dfa62d.xlsx");


readExcel2003(new File("C:\\Users\\Administrator\\Desktop\\export.xls"));




import java.io.FileInputStream;

import org.apache.pdfbox.cos.COSDocument;

import org.apache.pdfbox.pdfparser.PDFParser;

import org.apache.pdfbox.pdmodel.PDDocument;

import org.apache.pdfbox.util.PDFTextStripper;


 * 提取pdf中的内容

 * @author wangshouhai

 * @Version 2014-4-18:下午12:47:27


public class ReadPdf {

public String readFdf(String file) {

try {

PDFParser parser = new PDFParser(new FileInputStream(file));


COSDocument doc=parser.getDocument();

PDFTextStripper stripper = new PDFTextStripper();

String docText = stripper.getText(new PDDocument(doc));

docText= convertorSymbol(docText); 

return docText;

} catch (Exception e) {

throw new RuntimeException(e);




* 处理特殊字符

* @param sub

* @param docText


public static String convertorSymbol(String docText) {

StringBuilder sub = new StringBuilder();

char[] ch = docText.toCharArray();

for (int i = 0; i < ch.length; i++) {

char buf = ch[i];

if (9 == buf || 10 == buf || 13 == buf || 32 <= buf && !Character.isISOControl(buf)) {




return sub.toString().replaceAll("\\s*", "");


public static void main(String args[]) {

String text =new ReadPdf().readFdf("D:\\html2.pdf");





import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStreamReader;

public class ReadText {


* 读取文本内容

* @param dataFile

* @return


public static String readText(File file) {

StringBuilder sub = new StringBuilder();

BufferedReader bufReader = null;

try {

bufReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));

String str;

while ((str = bufReader.readLine()) != null) {



return convertorSymbol(sub.toString());

} catch (Exception e) {

throw new RuntimeException(e);

} finally {

if (bufReader != null) {

try {


} catch (IOException e) {

throw new RuntimeException(e);






* 处理特殊字符

* @param sub

* @param docText


public static String convertorSymbol(String docText) {

StringBuilder sub = new StringBuilder();

char[] ch = docText.toCharArray();

for (int i = 0; i < ch.length; i++) {

char buf = ch[i];

if (9 == buf || 10 == buf || 13 == buf || 32 <= buf && !Character.isISOControl(buf)) {




return sub.toString();


public static void main(String[] args) {

File file = new File("C:/Users/Administrator/Desktop/异常信息列表.txt");

String text = readText(file);





