Lucene 搜索方式
Lucene 的搜索方式包括:词项查询(TermQuery) / 布尔查询(BooleanQuery) / 短语查询(PhraseQuery) / 范围查询(RangeQuery) / 百搭查询(WildardQuery) / 模糊查询(FuzzQuery)
package junit;
import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.regex.RegexQuery;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import com.ljq.entity.Person;
import com.ljq.utils.Consts;
import com.ljq.utils.DateUtils;
import com.ljq.utils.LuceneUtil;
import com.ljq.utils.XMLPropertyConfig;
/**
* Lucene搜索方式大合集<br/><br/>
*
* Lucene搜索种类很多。这里就介绍几个最常用的。其中TermQuery\BooleanQuery\RegexQuery功能最强大,最为常用。
*
*/
public class IndexQueryTest {
/**
* 词条搜索(单个关键字查找)<br/><br/>
*
* 主要对象是TermQuery,调用方式如下:<br/>
* Term term=new Term(字段名, 搜索关键字);<br/>
* Query query=new TermQuery(term);<br/>
* Hits hits=searcher.search(query);<br/>
* @throws Exception
*/
@Test
public void termQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
//Term term=new Term("ids", "1");
//Term term=new Term("ages", "20");
//Term term=new Term("birthdays", "2008-06-12");
//Term term=new Term("name", "张三");
Term term=new Term("city", "厦门");
Query query=new TermQuery(term);
TopDocs topDocs=searcher.search(query, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 组合搜索(允许多个关键字组合搜索)<br/><br/>
*
* 主要对象是BooleanQuery,调用方式如下:<br/>
* Term term1=new Term(字段名, 搜索关键字);<br/>
* TermQuery query1=new TermQuery(term1);<br/><br/>
*
* Term term2=new Term(字段名, 搜索关键字);<br/>
* TermQuery query2=new TermQuery(term2);<br/><br/>
*
* BooleanQuery booleanQuery=new BooleanQuery();<br/>
* booleanQuery.add(query1, 参数);<br/>
* booleanQuery.add(query2, 参数);<br/><br/>
*
* Hits hits=searcher.search(booleanquery);<br/>
* 此方法中的核心在BooleanQuery的add方法上,其第二个参数有三个可选值,对应着逻辑上的与或非关系。<br/><br/>
*
* 参数如下:<br/>
* BooleanClause.Occur.MUST:必须包含,类似于逻辑运算的与<br/>
* BooleanClause.Occur.MUST_NOT:必须不包含,类似于逻辑运算的非<br/>
* BooleanClause.Occur.SHOULD:可以包含,类似于逻辑运算的或<br/>
* 这三者组合,妙用无穷。<br/>
* @throws Exception
*/
@Test
public void booleanQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
//组合条件:
//年龄(或):10、20、30、40
//名字(与): 四
//城市(非): 莆田
TermQuery ageQuery10=new TermQuery(new Term("ages", "10"));
TermQuery ageQuery20=new TermQuery(new Term("ages", "20"));
TermQuery ageQuery30=new TermQuery(new Term("ages", "30"));
TermQuery ageQuery40=new TermQuery(new Term("ages", "40"));
TermQuery nameQuery=new TermQuery(new Term("name", "四"));
TermQuery cityQuery=new TermQuery(new Term("city", "莆田"));
BooleanQuery booleanQuery=new BooleanQuery();
booleanQuery.add(ageQuery10, BooleanClause.Occur.SHOULD);
booleanQuery.add(ageQuery20, BooleanClause.Occur.SHOULD);
booleanQuery.add(ageQuery30, BooleanClause.Occur.SHOULD);
booleanQuery.add(ageQuery40, BooleanClause.Occur.SHOULD);
booleanQuery.add(nameQuery, BooleanClause.Occur.MUST);
booleanQuery.add(cityQuery, BooleanClause.Occur.MUST_NOT);
TopDocs topDocs=searcher.search(booleanQuery, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 范围搜索(允许搜索指定范围内的关键字结果)<br/><br/>
*
* 主要对象是TermRangeQuery,调用方式如下:<br/>
* TermRangeQuery rangequery=new TermRangeQuery(字段名, 起始值, 终止值, 起始值是否包含边界, 终止值是否包含边界); <br/><br/>
*
* Hits hits=searcher.search(rangequery);<br/>
* 此方法中的参数是Boolean类型的,表示是否包含边界 。<br/>
* true 包含边界<br/>
* false不包含边界<br/>
* @throws Exception
*/
@Test
public void rangeQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
TermRangeQuery idQuery=new TermRangeQuery("ids", "1", "3", true, true);
TermRangeQuery ageQuery=new TermRangeQuery("ages", "10", "30", true, true);
TermRangeQuery timeQuery=new TermRangeQuery("birthdays", "2011-03-09", "2013-01-07", true, true);
TopDocs topDocs=searcher.search(timeQuery, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 前缀搜索(搜索起始位置符合要求的结果)<br/><br/>
*
* 主要对象是PrefixQuery,调用方式如下:<br/>
* Term term=new Term(字段名, 搜索关键字);<br/>
* PrefixQuery prefixquery=new PrefixQuery(term);<br/>
* Hits hits=searcher.search(prefixquery);<br/>
*
* @throws Exception
*/
@Test
public void prefixQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
Term term=new Term("name", "王");
PrefixQuery prefixquery=new PrefixQuery(term);
TopDocs topDocs=searcher.search(prefixquery, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 短语搜索(根据零碎的短语组合成新的词组进行搜索)<br/><br/>
*
* 主要对象是PhraseQuery,调用方式如下:<br/>
* Term term1=new Term(字段名, 搜索关键字);<br/>
* Term term2=new Term(字段名, 搜索关键字);<br/><br/>
*
* PhraseQuery phrasequery=new PhraseQuery();<br/>
* phrasequery.setSlop(参数);<br/>
* phrasequery.add(term1);<br/>
* phrasequery.add(term2);<br/>
* Hits hits=searcher.search(phrasequery);<br/>
* 其中setSlop的参数是设置两个关键字之间允许间隔的最大值。<br/>
* @throws Exception
*/
@Test
public void phraseQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
Term term1=new Term("name", "林");
Term term2=new Term("name", "钦");
PhraseQuery phrasequery=new PhraseQuery();
phrasequery.setSlop(100);
phrasequery.add(term1);
phrasequery.add(term2);
TopDocs topDocs=searcher.search(phrasequery, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 多短语搜索(先指定一个前缀关键字,然后其他的关键字加在此关键字之后,组成词语进行搜索)<br/><br/>
*
* 主要对象是MultiPhraseQuery,调用方式如下:<br/>
*
* Term term=new Term(字段名,前置关键字);<br/>
* Term term1=new Term(字段名,搜索关键字);<br/>
* Term term2=new Term(字段名,搜索关键字);<br/><br/>
*
* MultiPhraseQuery multiPhraseQuery=new MultiPhraseQuery();<br/><br/>
*
* multiPhraseQuery.add(term);<br/>
* multiPhraseQuery.add(new Term[]{term1, term2});<br/><br/>
*
* Hits hits=searcher.search(multiPhraseQuery);<br/>
* @throws Exception
*/
@Test
public void multiPhraseQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
//查询“计张”、“计钦”组合的关键词,先指定一个前缀关键字,然后其他的关键字加在此关键字之后,组成词语进行搜索
Term term=new Term("name", "计"); //前置关键字
Term term1=new Term("name", "张"); //搜索关键字
Term term2=new Term("name", "钦"); //搜索关键字
MultiPhraseQuery multiPhraseQuery=new MultiPhraseQuery();
multiPhraseQuery.add(term);
multiPhraseQuery.add(new Term[]{term1, term2});
TopDocs topDocs=searcher.search(multiPhraseQuery, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 模糊搜索(顾名思义)<br/><br/>
*
* 主要对象是FuzzyQuery,调用方式如下:<br/><br/>
*
* Term term=new Term(字段名, 搜索关键字);<br/>
* FuzzyQuery fuzzyquery=new FuzzyQuery(term,参数);<br/>
* Hits hits=searcher.search(fuzzyquery);<br/>
* 此中的参数是表示模糊度,是小于1的浮点小数,比如0.5f
* @throws Exception
*/
@Test
public void fuzzyQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
Term term=new Term("name", "三张");
FuzzyQuery fuzzyquery=new FuzzyQuery(term, 0.5f);
TopDocs topDocs=searcher.search(fuzzyquery, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 通配符搜索(顾名思义)<br/><br/>
*
* 主要对象是:WildcardQuery,调用方式如下:<br/><br/>
*
* Term term=new Term(字段名,搜索关键字+通配符);<br/>
* WildcardQuery wildcardquery=new WildcardQuery(term);<br/>
* Hits hits=searcher.search(wildcardquery);<br/><br/>
*
* 其中的通配符分两种,即*和?<br/>
* * 表示任意多的自负<br/>
* ?表示任意一个字符
* @throws Exception
*/
@Test
public void wildcardQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
Term term=new Term("name", "三?");
WildcardQuery wildcardQuery=new WildcardQuery(term);
TopDocs topDocs=searcher.search(wildcardQuery, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 正则表达式搜索(顾名思义,这个类引入lucene-queries-3.5.0.jar包)<br/><br/>
*
* 主要对象是:RegexQuery,调用方式如下 <br/>
* String regex = ".*"; <br/>
* Term term = new Term (search_field_name, regex); <br/>
* RegexQuery query = new RegexQuery (term); <br/>
* TopDocs hits = searcher.search (query, 100); <br/>
* @throws Exception
*/
@Test
public void regexQuery() throws Exception {
IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
IndexSearcher searcher = new IndexSearcher(reader);
String regex = "林*";
Term term=new Term("name", regex);
RegexQuery query = new RegexQuery(term);
TopDocs topDocs=searcher.search(query, 1000);
System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
System.out.println();
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = searcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
}
searcher.close();
reader.close();
}
/**
* 数值范围过滤器,如:int、long、float类型等
*
* @throws Exception
*/
@Test
public void numericFilter() throws Exception{ //CustomScoreQuery
//Filter filter = NumericRangeFilter.newLongRange("id", 1l, 3l, true, true);
Filter filter = NumericRangeFilter.newIntRange("age", 1, 39, true, true);
List<Person> persons=search(filter, new String[]{"name","city"}, "厦门");
for(Person person : persons){
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s.",
person.getId(), person.getName(), person.getAge(), person.getCity(), DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT)));
}
}
/**
* 时间范围过滤器
* @throws Exception
*/
@Test
public void dateFilter() throws Exception{
//2008-06-12
long min=DateUtils.stringToDate("2008-06-12", Consts.FORMAT_SHORT).getTime();
//2013-01-07
long max=DateUtils.stringToDate("2013-01-07", Consts.FORMAT_SHORT).getTime();
Filter filter = NumericRangeFilter.newLongRange("birthday", min, max, true, true);
List<Person> persons=search(filter, new String[]{"name","city"}, "厦门");
for(Person person : persons){
System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s.",
person.getId(), person.getName(), person.getAge(), person.getCity(), DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT)));
}
}
/**
* 创建索引
*
* @throws Exception
*/
@Test
public void createIndex() throws Exception {
List<Document> docs = new ArrayList<Document>();
for (Person person : getPersons()) {
Document doc = new Document();
//声明为NumericField的字段,只能用NumericRangeFilter对象范围查询,不能用作关键字查询。
//NumericField不推荐,统一用Field
doc.add(new NumericField("id", Field.Store.YES, true).setLongValue(person.getId()));
doc.add(new NumericField("age", Field.Store.YES, true).setIntValue(person.getAge()));
doc.add(new NumericField("birthday", Field.Store.YES, true).setLongValue(person.getBirthday().getTime()));
doc.add(new Field("ids", person.getId()+"", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("ages", person.getAge()+"", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("birthdays", DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("name", person.getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("city", person.getCity(), Field.Store.YES, Field.Index.ANALYZED));
docs.add(doc);
}
LuceneUtil.createIndex(docs);
}
private List<Person> search(Filter filter, String[] fields, String keyword) {
List<Person> result = new ArrayList<Person>();
IndexSearcher indexSearcher = null;
TopDocs topDocs = null;
try {
// 创建索引搜索器,且只读
IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
indexSearcher = new IndexSearcher(indexReader);
MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35,
fields, new IKAnalyzer());
Query query = queryParser.parse(keyword);
// 返回前number条记录
if(filter == null){
topDocs=indexSearcher.search(query, 100000);
}else {
topDocs=indexSearcher.search(query, filter, 100000);
}
// 信息展示
int totalCount = topDocs.totalHits;
System.out.println("共检索出 " + totalCount + " 条记录");
//高亮显示
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
QueryScorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(100);
highlighter.setTextFragmenter(fragmenter);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (ScoreDoc scDoc : scoreDocs) {
Document document = indexSearcher.doc(scDoc.doc);
String id = document.get("id");
String name = document.get("name");
String age = document.get("age");
String city = document.get("city");
String birthday = document.get("birthday");
float score = scDoc.score; //相似度
System.out.println("相似度:"+score);
String lighterName = highlighter.getBestFragment(new IKAnalyzer(), "name", name);
if (null == lighterName) {
lighterName = name;
}
String lighterAge = highlighter.getBestFragment(new IKAnalyzer(), "age", age);
if (null == lighterAge) {
lighterAge = age;
}
String lighterCity= highlighter.getBestFragment(new IKAnalyzer(), "city", city);
if (null == lighterCity) {
lighterCity = city;
}
String lighterBirthday = highlighter.getBestFragment(new IKAnalyzer(), "birthday", birthday);
if (null == lighterBirthday) {
lighterBirthday = birthday;
}
Person person = new Person();
person.setId(Long.parseLong(id));
person.setName(lighterName);
person.setAge(NumberUtils.toInt(age));
person.setCity(lighterCity);
person.setBirthday(DateUtils.longToDate(Long.parseLong(lighterBirthday)));
result.add(person);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
indexSearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return result;
}
private List<Person> getPersons() {
try {
List<Person> persons = new ArrayList<Person>();
persons.add(new Person(1l, "张三", 10, "福州", DateUtils.stringToDate("2013-01-07", Consts.FORMAT_SHORT)));
persons.add(new Person(2l, "张四", 20, "莆田", DateUtils.stringToDate("2012-01-08", Consts.FORMAT_SHORT)));
persons.add(new Person(3l, "王五", 30, "泉州", DateUtils.stringToDate("2011-03-09", Consts.FORMAT_SHORT)));
persons.add(new Person(4l, "李四", 40, "厦门", DateUtils.stringToDate("2010-04-10", Consts.FORMAT_SHORT)));
persons.add(new Person(5l, "李白", 50, "漳州", DateUtils.stringToDate("2009-05-11", Consts.FORMAT_SHORT)));
persons.add(new Person(6l, "林计张三张三张三张三张三张三张三张三张三张三张三张三张三计钦", 60, "龙岩", DateUtils.stringToDate("2008-06-12", Consts.FORMAT_SHORT)));
return persons;
} catch (ParseException e) {
e.printStackTrace();
}
return null;
}
}
Lucene 的增删改:
package com.ljq.utils;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* lucene工具类,采用IKAnalyzer中文分词器
*
*/
public class LuceneUtil {
/** 索引库路径 */
private static final String indexPath = XMLPropertyConfig.getConfigXML()
.getString("index_path");
public static IndexWriter indexWriter = null;
private static final Logger log=Logger.getLogger(LuceneUtil.class);
public static IndexWriter getIndexWriter(){
if(indexWriter == null){
try {
//索引库路径不存在则新建一个
File indexFile=new File(indexPath);
if(!indexFile.exists()) indexFile.mkdir();
Directory fsDirectory = FSDirectory.open(indexFile);
IndexWriterConfig confIndex = new IndexWriterConfig(Version.LUCENE_35, new IKAnalyzer());
confIndex.setOpenMode(OpenMode.CREATE_OR_APPEND);
if (IndexWriter.isLocked(fsDirectory)) {
IndexWriter.unlock(fsDirectory);
}
indexWriter =new IndexWriter(fsDirectory, confIndex);
} catch (Exception e) {
e.printStackTrace();
}
}
return indexWriter;
}
/**
* 创建索引
*
* @param doc
* @throws Exception
*/
public static boolean createIndex(Document doc) {
List<Document> docs = new ArrayList<Document>();
docs.add(doc);
return createIndex(docs);
}
/**
* 创建索引
*
* @param docs
* @throws Exception
*/
public static boolean createIndex(List<Document> docs) {
try {
for (Document doc : docs) {
getIndexWriter().addDocument(doc);
}
// 优化操作
getIndexWriter().commit();
getIndexWriter().forceMerge(1); // forceMerge代替optimize
log.info("lucene create success.");
return true;
} catch (Exception e) {
log.error("lucene create failure.", e);
return false;
} finally {
if (getIndexWriter() != null) {
try {
getIndexWriter().close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 更新索引
*
* 例如:Term term = new Term("id","1234567");
* 先去索引文件里查找id为1234567的Document,如果有就更新它(如果有多条,最后更新后只有一条),如果没有就新增。
* 数据库更新的时候,我们可以只针对某个列来更新,而lucene只能针对一行数据更新。
*
* @param field Document的Field(类似数据库的字段)
* @param value Field中的一个关键词
* @param doc
* @return
*/
public static boolean updateIndex(String field, String value, Document doc) {
try {
getIndexWriter().updateDocument(new Term(field, value), doc);
log.info("lucene update success.");
return true;
} catch (Exception e) {
log.error("lucene update failure.", e);
return false;
}finally{
if(getIndexWriter()!=null){
try {
getIndexWriter().close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 删除索引
*
* @param field Document的Field(类似数据库的字段)
* @param value Field中的一个关键词
* @param doc
* @return
*/
public static boolean deleteIndex(String field, String value) {
try {
getIndexWriter().deleteDocuments(new Term(field, value));
log.info("lucene delete success.");
return true;
} catch (Exception e) {
log.error("lucene delete failure.", e);
return false;
}finally{
if(getIndexWriter()!=null){
try {
getIndexWriter().close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 删除整个索引库
*
* @return
*/
public static boolean deleteAllIndex() {
try {
getIndexWriter().deleteAll();
log.info("lucene delete all success.");
return true;
} catch (Exception e) {
log.error("lucene delete all failure.", e);
return false;
}finally{
if(getIndexWriter()!=null){
try {
getIndexWriter().close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
* 判断索引库是否已创建
*
* @return true:存在,false:不存在
* @throws Exception
*/
public static boolean existsIndex() throws Exception {
File file = new File(indexPath);
if (!file.exists()) {
file.mkdirs();
}
String indexSufix = "/segments.gen";
// 根据索引文件segments.gen是否存在判断是否是第一次创建索引
File indexFile = new File(indexPath + indexSufix);
return indexFile.exists();
}
}
原文地址:https://blog.csdn.net/eaphyy/article/details/71195111
Lucene 搜索方式的更多相关文章
- Lucene搜索方式大合集
package junit; import java.io.File; import java.io.IOException; import java.text.ParseException; imp ...
- lucene搜索方式(query类型)
Lucene有多种搜索方式,可以根据需要选择不同的方式. 1.词条搜索(单个关键字查找) 主要对象是TermQuery 调用方式如下: Term term=new Term(字段名,搜索关键字);Qu ...
- lucene 搜索demo
package com.ljq.utils; import java.io.File; import java.util.ArrayList; import java.util.List; impor ...
- Lucene学习笔记: 五,Lucene搜索过程解析
一.Lucene搜索过程总论 搜索的过程总的来说就是将词典及倒排表信息从索引中读出来,根据用户输入的查询语句合并倒排表,得到结果文档集并对文档进行打分的过程. 其可用如下图示: 总共包括以下几个过程: ...
- Lucene学习总结之七:Lucene搜索过程解析
一.Lucene搜索过程总论 搜索的过程总的来说就是将词典及倒排表信息从索引中读出来,根据用户输入的查询语句合并倒排表,得到结果文档集并对文档进行打分的过程. 其可用如下图示: 总共包括以下几个过程: ...
- Lucene核心--构建Lucene搜索(上篇,理论篇)
2.1构建Lucene搜索 2.1.1 Lucene内容模型 一个文档(document)就是Lucene建立索引和搜索的原子单元,它由一个或者多个字段(field)组成,字段才是Lucene的真实内 ...
- Lucene系列六:Lucene搜索详解(Lucene搜索流程详解、搜索核心API详解、基本查询详解、QueryParser详解)
一.搜索流程详解 1. 先看一下Lucene的架构图 由图可知搜索的过程如下: 用户输入搜索的关键字.对关键字进行分词.根据分词结果去索引库里面找到对应的文章id.根据文章id找到对应的文章 2. L ...
- (四)Lucene——搜索和相关度排序
1. 搜索 1.1 创建查询对象的方式 通过Query子类来创建查询对象 Query子类常用的有:TermQuery.NumericRangeQuery.BooleanQuery 特点:不能输入luc ...
- Lucene学习总结之七:Lucene搜索过程解析 2014-06-25 14:23 863人阅读 评论(1) 收藏
一.Lucene搜索过程总论 搜索的过程总的来说就是将词典及倒排表信息从索引中读出来,根据用户输入的查询语句合并倒排表,得到结果文档集并对文档进行打分的过程. 其可用如下图示: 总共包括以下几个过程: ...
随机推荐
- Java-Class-FC:java.time.Duration
ylbtech-Java-Class-FC:java.time.Duration 1.返回顶部 2.返回顶部 3.返回顶部 1. /* * Copyright (c) 2012, 2015, ...
- Openstack贡献者须知 — OpenPGP/SSH/CLA贡献者协议
目录 目录 前言 Openstack基金委员会 Openstack贡献者须知 注册Openstack In Launchpad 生成并上传OpenPGP密钥 生成并上传SSH公钥 Join The O ...
- 在Linux下安装PyEmu
git clone https://github.com/OpenRCE/pydbg.git git clone https://github.com/OpenRCE/paimei.git libda ...
- SDUTOJ 2498 数据结构实验之图论十一:AOE网上的关键路径
题目链接:http://acm.sdut.edu.cn/onlinejudge2/index.php/Home/Index/problemdetail/pid/2498.html 题目大意 略. 分析 ...
- PAT_A1081#Rational Sum
Source: PAT A1081 Rational Sum (20 分) Description: Given N rational numbers in the form numerator/de ...
- PAT_A1139#First Contact
Source: PAT A1139 First Contact (30 分) Description: Unlike in nowadays, the way that boys and girls ...
- Codeforces 1169B Pairs
题目链接:http://codeforces.com/contest/1169/problem/B 题意:给你 m 对数 ,问你能不能在 1 − n 之间找到俩个不相等的 x 和 y 使得 对于前面每 ...
- 类型转换、类型安全以及is和as的使用
class Program { static void Main(string[] args) { //1.类型转换 { //隐式转换:不需要转型,因为new返回一个Employee对象,而Objec ...
- 无法CREATE UNIQUE INDEX;找到重复的关键字
- add a characteristic in enovia PLM
Problem: add a new Char. name D_COI6 that the description is Injected coloration #7 (COI6) in the D_ ...