Lucene搜索引擎例子demo

一.导入相应的jar包

KAnalyzer3.2.0Stable.jar

lucene-analyzers-3.0.1.jar

lucene-core-3.0.1.jar

lucene-highlighter-3.0.1.jar

lucene-memory-3.0.1.jar

二.写一个完整的demo

1.创建一个实体

DROP TABLE IF EXISTS `article`;

CREATE TABLE `article` (

`id` int(11) NOT NULL AUTO_INCREMENT,

`title` varchar(20) DEFAULT NULL,

`content` varchar(5000) DEFAULT NULL,

PRIMARY KEY (`id`)

) ENGINE=InnoDB DEFAULT CHARSET=utf8;

public class Article {

private int id;

private String title;

private String content;

get/set方法省略...

}

2.创建一个提供公共方法的类：

//提供封装分页数据的类：

@SuppressWarnings("rawtypes")

public class QueryResult {

private List list;

private int count;

get/set省略...

}

3.//提供创建索引，目录的类

public class LuceneUtils{

private static Directory directory;// 建立索引库存储目录

private static Analyzer analyzer;// 创建分词器

private static IndexWriter indexWriter; // 在程序启动是初始化,建立索引

private static IndexSearcher indexSearcher;// 查询

static {

try {

// 加载配置文件lucene.properties，该文件中是创建索引库的路径"path=D:\\IindexSearch

Properties prop = new Properties();

InputStream inStream = LuceneUtils.class.getClassLoader().getResourceAsStream("lucene.properties");

//InputStream inStream = ClassLoader.getSystemResourceAsStream("lucene.properties");

prop.load(inStream);

directory = FSDirectory.open(new File(prop.getProperty("path")));

analyzer = new StandardAnalyzer(Version.LUCENE_30);

// 在程序启动是初始化,建立索引

indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);

//程序退出时关闭资源

Runtime.getRuntime().addShutdownHook(new Thread(){

public void run(){

try {

indexWriter.close();

} catch (Exception e) {

e.printStackTrace();

} 

}

});

} catch (Exception e) {

e.printStackTrace();

}

}

public static Document objectToDocument(Object obj) {

Article article = (Article) obj;

// 将文档转为domcment

Document doc = new Document();

String idstr = NumericUtils.intToPrefixCoded(article.getId());

doc.add(new Field("id", idstr, Store.YES, Index.NOT_ANALYZED));

doc.add(new Field("title", article.getTitle(), Store.YES, Index.ANALYZED));

doc.add(new Field("content", article.getContent(), Store.YES, Index.ANALYZED));

return doc;

}

public static Object documentToObject(Document doc) {

Article article = new Article();

//将Document转为Article

//将字符串转化为数字

int id = NumericUtils.prefixCodedToInt(doc.get("id"));

article.setId(id);

article.setTitle(doc.get("title"));

article.setContent(doc.get("content"));

return article;

}

public static IndexWriter getIndexWriter() {

return indexWriter;

}

public static IndexSearcher getIndexSearch() {

// 执行查询

try {

indexSearcher = new IndexSearcher(directory);

} catch (Exception e) {

throw new RuntimeException(e);

}

return indexSearcher;

}

public static Directory getDirectory() {

return directory;

}

public static Analyzer getAnalyzer() {

return analyzer;

}

}

4.创建增删改查方法

public class IndexDao {

/**

* 

* @return

* @throws Exception

*/

public void save(Article article) {

try {

// 将Aritcle转为Documnet

Document doc = LuceneUtils.objectToDocument(article);

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.addDocument(doc);

indexWriter.commit();

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 删除索引库 Term 表示制定列中包含的关键字

* 

* @return

* @throws Exception

*/

public void delete(Article article) {

String idStr = NumericUtils.intToPrefixCoded(article.getId());

Term term = new Term("id", idStr);

try {

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.deleteDocuments(term);// 删除指定Term总重的documnet数据

indexWriter.commit();

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 修改索引库

* 

* @return

* @throws Exception

*/

public void update(Article article) {

// 创建Term

String idStr = NumericUtils.intToPrefixCoded(article.getId());

Term term = new Term("id", idStr);

// 准备document

Document doc = LuceneUtils.objectToDocument(article);

try {

// 建立索引

IndexWriter indexWriter = LuceneUtils.getIndexWriter();

indexWriter.updateDocument(term, doc);// 删除指定Term总重的documnet数据

indexWriter.commit();

// 先删除，在创建

// indexWriter.deleteDocuments(term);

// indexWriter.addDocument(doc);

} catch (Exception e) {

throw new RuntimeException(e);

}

}

/**

* 查询索引库

* 

* @return

* @throws Exception

*/

public QueryResult query(String queryString, int first, int max) {

IndexSearcher indexSearcher = null;

try {

// MultiFieldQueryParser：表示可以根据多个字段查询

int totail = first + max;

// 1.把字符串转为Query对象

QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30, new String[] { "id", "title", "content" },

LuceneUtils.getAnalyzer());

Query query = parser.parse(queryString);

// 2.执行查询

indexSearcher = LuceneUtils.getIndexSearch();

// 指定排序条件

Sort sort = new Sort(new SortField("id", SortField.INT));// 按照id升序

TopDocs topDocs = indexSearcher.search(query, null, totail, sort);// 查询并返回最多的前n条数据

int count = topDocs.totalHits;// 总记录数

ScoreDoc[] scoreDoc = topDocs.scoreDocs;// 最多前n条结果数据

// 生成高亮显示器；设置前缀，后缀，摘要的大小

Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");

Scorer scorer = new QueryScorer(query);// 查询条件

Highlighter highlighter = new Highlighter(formatter, scorer);

highlighter.setTextFragmenter(new SimpleFragmenter(100));// 设置摘要的大小

// 3.取出数据

int endIndex = Math.min(totail, scoreDoc.length);

List<Article> list = new ArrayList<Article>();

for (int i = 0; i < endIndex; i++) {

// float score = scoreDoc[i].score;//平均得分

int docId = scoreDoc[i].doc;

Document doc = indexSearcher.doc(docId);

// 进行高亮操作,当没有找到关键词时，返回为null

String text = highlighter.getBestFragment(LuceneUtils.getAnalyzer(), "title", doc.get("title"));

if (text != null) {

doc.getField("title").setValue(text);

}

// 将Document转为Article

Article article = (Article) LuceneUtils.documentToObject(doc);

list.add(article);

}

QueryResult queryResult = new QueryResult(list, count);

return queryResult;

} catch (Exception e) {

throw new RuntimeException(e);

}

}

5.测试dao方法：

@Test

public void testSave() {

// 创建文档对象

Article article = new Article();

for (int i = 0; i < 20; i++) {

article.setId(i);

article.setTitle("Lucene搜索的方式");

article.setContent("全文检索是计算机程序通过扫描文章中的每一个词，对每一个词建立一个索引，指明该词在文章中出现的次数和位置。");

indexDao.save(article);

}

}

@Test

public void testDelete() {

Article article = new Article();

article.setId(1);

indexDao.delete(article);

}

@Test

public void testUpdate() {

// 创建文档对象

Article article = new Article();

article.setId(1);

article.setTitle("Lucene搜索的方式");

article.setContent("跟新索引库测试是否正确");

indexDao.update(article);

}

@Test

@SuppressWarnings("unchecked")

public void testQuery() {

String queryString = "Lucene";

QueryResult queryResult = indexDao.searchAndOrderBy(queryString, 0, 10);

System.out.println("count---------->" + queryResult.getCount());

List<Article> list = (List<Article>)queryResult.getList();

for(Article article:list){

System.err.println("list--------->" + article.toString());

}

}

Lucene搜索引擎例子demo的更多相关文章

传智播客课程——Lucene搜索引擎
Lucene不是一个现成的程序,类似文件搜索程序或web网络爬行器或是一个网站的搜索引擎.Lucene是一个软件库,一个开发工具包,而不是一个具有完整特征的搜索应用程序.它本身只关注文本的索引和搜索. ...
lucene 4.4 demo
ackage com.zxf.demo; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStr ...
基于Lucene的文件检索Demo
通过Lucene实现了简单的文件检索功能的Demo.这个Demo支持基于文件内容的检索,支持中文分词和高亮显示. 下面简单的介绍下核心的类 1)索引相关的类 1.FileIndexBuilder -- ...
Nutch+Lucene搜索引擎开发实践
网络拓扑图 1 网络拓扑图安装Java JDK 首先查看系统是否已经安装了其它版本号的JDK,假设有,先要把其它版本号的JDK卸载. 用root用户登录系统. # rpm-qa|grep gcj ...
Spring aop 小例子demo
由于最近的服务项目提供接口有一个需求,所有操作都必须检查操作的服务可用,所以感觉Aop特别适合实施.完成学习的小例子. 关于spring-Aop原理:http://m.oschina.net/blog ...
学习笔记CB011:lucene搜索引擎库、IKAnalyzer中文切词工具、检索服务、查询索引、导流、word2vec
影视剧字幕聊天语料库特点,把影视剧说话内容一句一句以回车换行罗列三千多万条中国话,相邻第二句很可能是第一句最好回答.一个问句有很多种回答,可以根据相关程度以及历史聊天记录所有回答排序,找到最优,是一个 ...
一个Lucene.Net的Demo
今天突然想来看一下全文检索,于是就了解了一下Lucene.Net,然后把公司目前的产品表拿来练手,写了这么个Demo. 先看一下Demo的代码 public class ProductReposito ...
Lucene的例子
lucene爬数据库中的数据无非也是查询数据.所有我们用lucene搜索数据主要有下面几个步骤:(代码紧供参考) 一 , 从数据库中查数据 ====爬数据 ------------- ...
Lucene搜索引擎入门
一.什么是全文检索? 就是在检索数据,数据的分类: 在计算机当中,比如说存在磁盘的文本文档,HTML页面,Word文档等等...... ...

随机推荐

委托、事件、Observer观察者模式的使用解析二
一.设计模式-Observer观察者模式 Observer设计模式是为了定义对象间的一种一对多的依赖关系,以便于当一个对象的状态改变时,其他依赖于它的对象会被自动告知并更新.Observer模式是一种 ...
史上前端面试最全知识点（附答案）---html & js & css
史上前端面试最全知识点(附答案) 一.html & js & css 1.AMD和CMD是什么?它们的区别有哪些? AMD和CMD是二种模块定义规范.现在都使用模块化编程,AMD,异步 ...
Promise 对象
Promise 对象用于处理异步请求,保存一个异步操作最终完成(或失败)的结果语法 new Promise( /* executor */ function(resolve, reject) {.. ...
memcached讲解
Memcached 最近空闲的是时候研究了一下缓存,根据公司环境对缓存进行了系统的了解,我们使用memcacheed进行缓存,下面结合我的理解,以及网上的相关资料,memecached进行讲解. m ...
Github Page--CSDN新人的第二选择
我也是个CSDN新人,使用的CSDN的初衷应该和众人类似,就是想总结下平时的学习成果,或者一些想法. CSDN好的地方: 书写界面简洁,支持markdown语法人还算多,也比较年轻相对较活跃内容 ...
干货｜人人都是翻译项目的Master
在平时的工作中,我们都会经常查阅一些英文文档来解决平时遇到的问题和拓宽视野.看到好的文章或者书籍有没有想要和小伙伴分享的冲动,那么我们一起来翻译吧- 翻译主张 "信达雅" .& ...
win10 uwp 车表盘径向规
车表盘就是有刻度的圆盘加上针,这个控件可以直观让用户知道当前的速度或其他看名字不知道是什么,我就放一张图使用很简单,在Nuget,Radial Gauge 要使用大神做的,简单,在使用我们需要在N ...
php学习资料
http://medoo.in/轻量级 PHP 连接数据库的类库 http://www.thinkphp.cn/国产 PHP 万金油框架,快速做项目,效率一般,BUG 众多
【NOIP2015提高组】Day1 t1神奇的幻方
一大淼题,直接瞎搞即可,不过一定要仔细看题目给定的条件. #include<iostream> #include<cstdio> #include<cstring> ...
vue-cli + sass 的正确打开方式
关于在vue-cli搭建的项目中怎么配置sass,网上搜到的基本是这种答案: 但是我认为,直接将样式写在每个单文件的<style>里,是十分不明智的做法.且不说node-sass安装过程的 ...

Lucene搜索引擎例子demo

Lucene搜索引擎例子demo的更多相关文章

随机推荐

热门专题