1. package junit;
  2.  
  3. import java.io.File;
  4. import java.io.IOException;
  5. import java.text.ParseException;
  6. import java.util.ArrayList;
  7. import java.util.List;
  8.  
  9. import org.apache.commons.lang.math.NumberUtils;
  10. import org.apache.lucene.document.Document;
  11. import org.apache.lucene.document.Field;
  12. import org.apache.lucene.document.NumericField;
  13. import org.apache.lucene.index.IndexReader;
  14. import org.apache.lucene.index.Term;
  15. import org.apache.lucene.queryParser.MultiFieldQueryParser;
  16. import org.apache.lucene.search.BooleanClause;
  17. import org.apache.lucene.search.BooleanQuery;
  18. import org.apache.lucene.search.Filter;
  19. import org.apache.lucene.search.FuzzyQuery;
  20. import org.apache.lucene.search.IndexSearcher;
  21. import org.apache.lucene.search.MultiPhraseQuery;
  22. import org.apache.lucene.search.NumericRangeFilter;
  23. import org.apache.lucene.search.PhraseQuery;
  24. import org.apache.lucene.search.PrefixQuery;
  25. import org.apache.lucene.search.Query;
  26. import org.apache.lucene.search.ScoreDoc;
  27. import org.apache.lucene.search.TermQuery;
  28. import org.apache.lucene.search.TermRangeQuery;
  29. import org.apache.lucene.search.TopDocs;
  30. import org.apache.lucene.search.WildcardQuery;
  31. import org.apache.lucene.search.highlight.Formatter;
  32. import org.apache.lucene.search.highlight.Fragmenter;
  33. import org.apache.lucene.search.highlight.Highlighter;
  34. import org.apache.lucene.search.highlight.QueryScorer;
  35. import org.apache.lucene.search.highlight.SimpleFragmenter;
  36. import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
  37. import org.apache.lucene.search.regex.RegexQuery;
  38. import org.apache.lucene.store.FSDirectory;
  39. import org.apache.lucene.util.Version;
  40. import org.junit.Test;
  41. import org.wltea.analyzer.lucene.IKAnalyzer;
  42.  
  43. import com.ljq.entity.Person;
  44. import com.ljq.utils.Consts;
  45. import com.ljq.utils.DateUtils;
  46. import com.ljq.utils.LuceneUtil;
  47. import com.ljq.utils.XMLPropertyConfig;
  48.  
  49. /**
  50. * Lucene搜索方式大合集<br/><br/>
  51. *
  52. * Lucene搜索种类很多。这里就介绍几个最常用的。其中TermQuery\BooleanQuery\RegexQuery功能最强大,最为常用。
  53. *
  54. * @author 林计钦
  55. * @version 1.0 2013-6-7 上午09:34:08
  56. */
  57. public class IndexQueryTest {
  58.  
  59. /**
  60. * 词条搜索(单个关键字查找)<br/><br/>
  61. *
  62. * 主要对象是TermQuery,调用方式如下:<br/>
  63. * Term term=new Term(字段名, 搜索关键字);<br/>
  64. * Query query=new TermQuery(term);<br/>
  65. * Hits hits=searcher.search(query);<br/>
  66. * @throws Exception
  67. */
  68. @Test
  69. public void termQuery() throws Exception {
  70. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  71. IndexSearcher searcher = new IndexSearcher(reader);
  72.  
  73. //Term term=new Term("ids", "1");
  74. //Term term=new Term("ages", "20");
  75. //Term term=new Term("birthdays", "2008-06-12");
  76. //Term term=new Term("name", "张三");
  77. Term term=new Term("city", "厦门");
  78.  
  79. Query query=new TermQuery(term);
  80. TopDocs topDocs=searcher.search(query, 1000);
  81. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  82. System.out.println();
  83.  
  84. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  85. for (ScoreDoc scDoc : scoreDocs) {
  86. Document document = searcher.doc(scDoc.doc);
  87. String id = document.get("id");
  88. String name = document.get("name");
  89. String age = document.get("age");
  90. String city = document.get("city");
  91. String birthday = document.get("birthday");
  92. float score = scDoc.score; //相似度
  93.  
  94. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  95. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  96. }
  97.  
  98. searcher.close();
  99. reader.close();
  100. }
  101.  
  102. /**
  103. * 组合搜索(允许多个关键字组合搜索)<br/><br/>
  104. *
  105. * 主要对象是BooleanQuery,调用方式如下:<br/>
  106. * Term term1=new Term(字段名, 搜索关键字);<br/>
  107. * TermQuery query1=new TermQuery(term1);<br/><br/>
  108. *
  109. * Term term2=new Term(字段名, 搜索关键字);<br/>
  110. * TermQuery query2=new TermQuery(term2);<br/><br/>
  111. *
  112. * BooleanQuery booleanQuery=new BooleanQuery();<br/>
  113. * booleanQuery.add(query1, 参数);<br/>
  114. * booleanQuery.add(query2, 参数);<br/><br/>
  115. *
  116. * Hits hits=searcher.search(booleanquery);<br/>
  117. * 此方法中的核心在BooleanQuery的add方法上,其第二个参数有三个可选值,对应着逻辑上的与或非关系。<br/><br/>
  118. *
  119. * 参数如下:<br/>
  120. * BooleanClause.Occur.MUST:必须包含,类似于逻辑运算的与<br/>
  121. * BooleanClause.Occur.MUST_NOT:必须不包含,类似于逻辑运算的非<br/>
  122. * BooleanClause.Occur.SHOULD:可以包含,类似于逻辑运算的或<br/>
  123. * 这三者组合,妙用无穷。<br/>
  124. * @throws Exception
  125. */
  126. @Test
  127. public void booleanQuery() throws Exception {
  128. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  129. IndexSearcher searcher = new IndexSearcher(reader);
  130.  
  131. //组合条件:
  132. //年龄(或):10、20、30、40
  133. //名字(与): 四
  134. //城市(非): 莆田
  135. TermQuery ageQuery10=new TermQuery(new Term("ages", "10"));
  136. TermQuery ageQuery20=new TermQuery(new Term("ages", "20"));
  137. TermQuery ageQuery30=new TermQuery(new Term("ages", "30"));
  138. TermQuery ageQuery40=new TermQuery(new Term("ages", "40"));
  139.  
  140. TermQuery nameQuery=new TermQuery(new Term("name", "四"));
  141.  
  142. TermQuery cityQuery=new TermQuery(new Term("city", "莆田"));
  143.  
  144. BooleanQuery booleanQuery=new BooleanQuery();
  145. booleanQuery.add(ageQuery10, BooleanClause.Occur.SHOULD);
  146. booleanQuery.add(ageQuery20, BooleanClause.Occur.SHOULD);
  147. booleanQuery.add(ageQuery30, BooleanClause.Occur.SHOULD);
  148. booleanQuery.add(ageQuery40, BooleanClause.Occur.SHOULD);
  149. booleanQuery.add(nameQuery, BooleanClause.Occur.MUST);
  150. booleanQuery.add(cityQuery, BooleanClause.Occur.MUST_NOT);
  151.  
  152. TopDocs topDocs=searcher.search(booleanQuery, 1000);
  153. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  154. System.out.println();
  155.  
  156. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  157. for (ScoreDoc scDoc : scoreDocs) {
  158. Document document = searcher.doc(scDoc.doc);
  159. String id = document.get("id");
  160. String name = document.get("name");
  161. String age = document.get("age");
  162. String city = document.get("city");
  163. String birthday = document.get("birthday");
  164. float score = scDoc.score; //相似度
  165.  
  166. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  167. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  168. }
  169.  
  170. searcher.close();
  171. reader.close();
  172. }
  173.  
  174. /**
  175. * 范围搜索(允许搜索指定范围内的关键字结果)<br/><br/>
  176. *
  177. * 主要对象是TermRangeQuery,调用方式如下:<br/>
  178. * TermRangeQuery rangequery=new TermRangeQuery(字段名, 起始值, 终止值, 起始值是否包含边界, 终止值是否包含边界); <br/><br/>
  179. *
  180. * Hits hits=searcher.search(rangequery);<br/>
  181. * 此方法中的参数是Boolean类型的,表示是否包含边界 。<br/>
  182. * true 包含边界<br/>
  183. * false不包含边界<br/>
  184. * @throws Exception
  185. */
  186. @Test
  187. public void rangeQuery() throws Exception {
  188. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  189. IndexSearcher searcher = new IndexSearcher(reader);
  190.  
  191. TermRangeQuery idQuery=new TermRangeQuery("ids", "1", "3", true, true);
  192. TermRangeQuery ageQuery=new TermRangeQuery("ages", "10", "30", true, true);
  193. TermRangeQuery timeQuery=new TermRangeQuery("birthdays", "2011-03-09", "2013-01-07", true, true);
  194.  
  195. TopDocs topDocs=searcher.search(timeQuery, 1000);
  196. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  197. System.out.println();
  198.  
  199. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  200. for (ScoreDoc scDoc : scoreDocs) {
  201. Document document = searcher.doc(scDoc.doc);
  202. String id = document.get("id");
  203. String name = document.get("name");
  204. String age = document.get("age");
  205. String city = document.get("city");
  206. String birthday = document.get("birthday");
  207. float score = scDoc.score; //相似度
  208.  
  209. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  210. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  211. }
  212.  
  213. searcher.close();
  214. reader.close();
  215. }
  216.  
  217. /**
  218. * 前缀搜索(搜索起始位置符合要求的结果)<br/><br/>
  219. *
  220. * 主要对象是PrefixQuery,调用方式如下:<br/>
  221. * Term term=new Term(字段名, 搜索关键字);<br/>
  222. * PrefixQuery prefixquery=new PrefixQuery(term);<br/>
  223. * Hits hits=searcher.search(prefixquery);<br/>
  224. *
  225. * @throws Exception
  226. */
  227. @Test
  228. public void prefixQuery() throws Exception {
  229. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  230. IndexSearcher searcher = new IndexSearcher(reader);
  231.  
  232. Term term=new Term("name", "王");
  233. PrefixQuery prefixquery=new PrefixQuery(term);
  234.  
  235. TopDocs topDocs=searcher.search(prefixquery, 1000);
  236. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  237. System.out.println();
  238.  
  239. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  240. for (ScoreDoc scDoc : scoreDocs) {
  241. Document document = searcher.doc(scDoc.doc);
  242. String id = document.get("id");
  243. String name = document.get("name");
  244. String age = document.get("age");
  245. String city = document.get("city");
  246. String birthday = document.get("birthday");
  247. float score = scDoc.score; //相似度
  248.  
  249. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  250. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  251. }
  252.  
  253. searcher.close();
  254. reader.close();
  255. }
  256.  
  257. /**
  258. * 短语搜索(根据零碎的短语组合成新的词组进行搜索)<br/><br/>
  259. *
  260. * 主要对象是PhraseQuery,调用方式如下:<br/>
  261. * Term term1=new Term(字段名, 搜索关键字);<br/>
  262. * Term term2=new Term(字段名, 搜索关键字);<br/><br/>
  263. *
  264. * PhraseQuery phrasequery=new PhraseQuery();<br/>
  265. * phrasequery.setSlop(参数);<br/>
  266. * phrasequery.add(term1);<br/>
  267. * phrasequery.add(term2);<br/>
  268. * Hits hits=searcher.search(phrasequery);<br/>
  269. * 其中setSlop的参数是设置两个关键字之间允许间隔的最大值。<br/>
  270. * @throws Exception
  271. */
  272. @Test
  273. public void phraseQuery() throws Exception {
  274. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  275. IndexSearcher searcher = new IndexSearcher(reader);
  276.  
  277. Term term1=new Term("name", "林");
  278. Term term2=new Term("name", "钦");
  279.  
  280. PhraseQuery phrasequery=new PhraseQuery();
  281. phrasequery.setSlop(100);
  282. phrasequery.add(term1);
  283. phrasequery.add(term2);
  284.  
  285. TopDocs topDocs=searcher.search(phrasequery, 1000);
  286. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  287. System.out.println();
  288.  
  289. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  290. for (ScoreDoc scDoc : scoreDocs) {
  291. Document document = searcher.doc(scDoc.doc);
  292. String id = document.get("id");
  293. String name = document.get("name");
  294. String age = document.get("age");
  295. String city = document.get("city");
  296. String birthday = document.get("birthday");
  297. float score = scDoc.score; //相似度
  298.  
  299. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  300. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  301. }
  302.  
  303. searcher.close();
  304. reader.close();
  305. }
  306.  
  307. /**
  308. * 多短语搜索(先指定一个前缀关键字,然后其他的关键字加在此关键字之后,组成词语进行搜索)<br/><br/>
  309. *
  310. * 主要对象是MultiPhraseQuery,调用方式如下:<br/>
  311. *
  312. * Term term=new Term(字段名,前置关键字);<br/>
  313. * Term term1=new Term(字段名,搜索关键字);<br/>
  314. * Term term2=new Term(字段名,搜索关键字);<br/><br/>
  315. *
  316. * MultiPhraseQuery multiPhraseQuery=new MultiPhraseQuery();<br/><br/>
  317. *
  318. * multiPhraseQuery.add(term);<br/>
  319. * multiPhraseQuery.add(new Term[]{term1, term2});<br/><br/>
  320. *
  321. * Hits hits=searcher.search(multiPhraseQuery);<br/>
  322. * @throws Exception
  323. */
  324. @Test
  325. public void multiPhraseQuery() throws Exception {
  326. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  327. IndexSearcher searcher = new IndexSearcher(reader);
  328.  
  329. //查询“计张”、“计钦”组合的关键词,先指定一个前缀关键字,然后其他的关键字加在此关键字之后,组成词语进行搜索
  330. Term term=new Term("name", "计"); //前置关键字
  331. Term term1=new Term("name", "张"); //搜索关键字
  332. Term term2=new Term("name", "钦"); //搜索关键字
  333.  
  334. MultiPhraseQuery multiPhraseQuery=new MultiPhraseQuery();
  335. multiPhraseQuery.add(term);
  336. multiPhraseQuery.add(new Term[]{term1, term2});
  337.  
  338. TopDocs topDocs=searcher.search(multiPhraseQuery, 1000);
  339. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  340. System.out.println();
  341.  
  342. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  343. for (ScoreDoc scDoc : scoreDocs) {
  344. Document document = searcher.doc(scDoc.doc);
  345. String id = document.get("id");
  346. String name = document.get("name");
  347. String age = document.get("age");
  348. String city = document.get("city");
  349. String birthday = document.get("birthday");
  350. float score = scDoc.score; //相似度
  351.  
  352. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  353. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  354. }
  355.  
  356. searcher.close();
  357. reader.close();
  358. }
  359.  
  360. /**
  361. * 模糊搜索(顾名思义)<br/><br/>
  362. *
  363. * 主要对象是FuzzyQuery,调用方式如下:<br/><br/>
  364. *
  365. * Term term=new Term(字段名, 搜索关键字);<br/>
  366. * FuzzyQuery fuzzyquery=new FuzzyQuery(term,参数);<br/>
  367. * Hits hits=searcher.search(fuzzyquery);<br/>
  368. * 此中的参数是表示模糊度,是小于1的浮点小数,比如0.5f
  369. * @throws Exception
  370. */
  371. @Test
  372. public void fuzzyQuery() throws Exception {
  373. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  374. IndexSearcher searcher = new IndexSearcher(reader);
  375.  
  376. Term term=new Term("name", "三张");
  377. FuzzyQuery fuzzyquery=new FuzzyQuery(term, 0.5f);
  378.  
  379. TopDocs topDocs=searcher.search(fuzzyquery, 1000);
  380. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  381. System.out.println();
  382.  
  383. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  384. for (ScoreDoc scDoc : scoreDocs) {
  385. Document document = searcher.doc(scDoc.doc);
  386. String id = document.get("id");
  387. String name = document.get("name");
  388. String age = document.get("age");
  389. String city = document.get("city");
  390. String birthday = document.get("birthday");
  391. float score = scDoc.score; //相似度
  392.  
  393. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  394. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  395. }
  396.  
  397. searcher.close();
  398. reader.close();
  399. }
  400.  
  401. /**
  402. * 通配符搜索(顾名思义)<br/><br/>
  403. *
  404. * 主要对象是:WildcardQuery,调用方式如下:<br/><br/>
  405. *
  406. * Term term=new Term(字段名,搜索关键字+通配符);<br/>
  407. * WildcardQuery wildcardquery=new WildcardQuery(term);<br/>
  408. * Hits hits=searcher.search(wildcardquery);<br/><br/>
  409. *
  410. * 其中的通配符分两种,即*和?<br/>
  411. * * 表示任意多的自负<br/>
  412. * ?表示任意一个字符
  413. * @throws Exception
  414. */
  415. @Test
  416. public void wildcardQuery() throws Exception {
  417. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  418. IndexSearcher searcher = new IndexSearcher(reader);
  419.  
  420. Term term=new Term("name", "三?");
  421. WildcardQuery wildcardQuery=new WildcardQuery(term);
  422.  
  423. TopDocs topDocs=searcher.search(wildcardQuery, 1000);
  424. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  425. System.out.println();
  426.  
  427. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  428. for (ScoreDoc scDoc : scoreDocs) {
  429. Document document = searcher.doc(scDoc.doc);
  430. String id = document.get("id");
  431. String name = document.get("name");
  432. String age = document.get("age");
  433. String city = document.get("city");
  434. String birthday = document.get("birthday");
  435. float score = scDoc.score; //相似度
  436.  
  437. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  438. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  439. }
  440.  
  441. searcher.close();
  442. reader.close();
  443. }
  444.  
  445. /**
  446. * 正则表达式搜索(顾名思义,这个类引入lucene-queries-3.5.0.jar包)<br/><br/>
  447. *
  448. * 主要对象是:RegexQuery,调用方式如下 <br/>
  449. * String regex = ".*"; <br/>
  450. * Term term = new Term (search_field_name, regex); <br/>
  451. * RegexQuery query = new RegexQuery (term); <br/>
  452. * TopDocs hits = searcher.search (query, 100); <br/>
  453. * @throws Exception
  454. */
  455. @Test
  456. public void regexQuery() throws Exception {
  457. IndexReader reader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  458. IndexSearcher searcher = new IndexSearcher(reader);
  459.  
  460. String regex = "林*";
  461. Term term=new Term("name", regex);
  462. RegexQuery query = new RegexQuery(term);
  463.  
  464. TopDocs topDocs=searcher.search(query, 1000);
  465. System.out.println("共检索出 " + topDocs.totalHits + " 条记录");
  466. System.out.println();
  467.  
  468. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  469. for (ScoreDoc scDoc : scoreDocs) {
  470. Document document = searcher.doc(scDoc.doc);
  471. String id = document.get("id");
  472. String name = document.get("name");
  473. String age = document.get("age");
  474. String city = document.get("city");
  475. String birthday = document.get("birthday");
  476. float score = scDoc.score; //相似度
  477.  
  478. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s, 相关度:%s.",
  479. id, name, age, city, DateUtils.longToString(Long.parseLong(birthday), Consts.FORMAT_SHORT), score));
  480. }
  481.  
  482. searcher.close();
  483. reader.close();
  484. }
  485.  
  486. /**
  487. * 数值范围过滤器,如:int、long、float类型等
  488. *
  489. * @throws Exception
  490. */
  491. @Test
  492. public void numericFilter() throws Exception{ //CustomScoreQuery
  493. //Filter filter = NumericRangeFilter.newLongRange("id", 1l, 3l, true, true);
  494. Filter filter = NumericRangeFilter.newIntRange("age", 1, 39, true, true);
  495. List<Person> persons=search(filter, new String[]{"name","city"}, "厦门");
  496. for(Person person : persons){
  497. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s.",
  498. person.getId(), person.getName(), person.getAge(), person.getCity(), DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT)));
  499. }
  500. }
  501.  
  502. /**
  503. * 时间范围过滤器
  504. * @throws Exception
  505. */
  506. @Test
  507. public void dateFilter() throws Exception{
  508. //2008-06-12
  509. long min=DateUtils.stringToDate("2008-06-12", Consts.FORMAT_SHORT).getTime();
  510. //2013-01-07
  511. long max=DateUtils.stringToDate("2013-01-07", Consts.FORMAT_SHORT).getTime();
  512. Filter filter = NumericRangeFilter.newLongRange("birthday", min, max, true, true);
  513. List<Person> persons=search(filter, new String[]{"name","city"}, "厦门");
  514. for(Person person : persons){
  515. System.out.println(String.format("id:%s, name:%s, age:%s, city:%s, birthday:%s.",
  516. person.getId(), person.getName(), person.getAge(), person.getCity(), DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT)));
  517. }
  518. }
  519.  
  520. /**
  521. * 创建索引
  522. *
  523. * @throws Exception
  524. */
  525. @Test
  526. public void createIndex() throws Exception {
  527. List<Document> docs = new ArrayList<Document>();
  528. for (Person person : getPersons()) {
  529. Document doc = new Document();
  530. //声明为NumericField的字段,只能用NumericRangeFilter对象范围查询,不能用作关键字查询。
  531. //NumericField不推荐,统一用Field
  532. doc.add(new NumericField("id", Field.Store.YES, true).setLongValue(person.getId()));
  533. doc.add(new NumericField("age", Field.Store.YES, true).setIntValue(person.getAge()));
  534. doc.add(new NumericField("birthday", Field.Store.YES, true).setLongValue(person.getBirthday().getTime()));
  535.  
  536. doc.add(new Field("ids", person.getId()+"", Field.Store.YES, Field.Index.NOT_ANALYZED));
  537. doc.add(new Field("ages", person.getAge()+"", Field.Store.YES, Field.Index.NOT_ANALYZED));
  538. doc.add(new Field("birthdays", DateUtils.dateToString(person.getBirthday(), Consts.FORMAT_SHORT),
  539. Field.Store.YES, Field.Index.NOT_ANALYZED));
  540. doc.add(new Field("name", person.getName(), Field.Store.YES, Field.Index.ANALYZED));
  541. doc.add(new Field("city", person.getCity(), Field.Store.YES, Field.Index.ANALYZED));
  542.  
  543. docs.add(doc);
  544. }
  545. LuceneUtil.createIndex(docs);
  546. }
  547.  
  548. private List<Person> search(Filter filter, String[] fields, String keyword) {
  549. List<Person> result = new ArrayList<Person>();
  550.  
  551. IndexSearcher indexSearcher = null;
  552. TopDocs topDocs = null;
  553. try {
  554. // 创建索引搜索器,且只读
  555. IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(XMLPropertyConfig.getConfigXML().getString("index_path"))), true);
  556. indexSearcher = new IndexSearcher(indexReader);
  557.  
  558. MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35,
  559. fields, new IKAnalyzer());
  560. Query query = queryParser.parse(keyword);
  561.  
  562. // 返回前number条记录
  563. if(filter == null){
  564. topDocs=indexSearcher.search(query, 100000);
  565. }else {
  566. topDocs=indexSearcher.search(query, filter, 100000);
  567. }
  568.  
  569. // 信息展示
  570. int totalCount = topDocs.totalHits;
  571. System.out.println("共检索出 " + totalCount + " 条记录");
  572.  
  573. //高亮显示
  574. Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
  575. QueryScorer fragmentScorer = new QueryScorer(query);
  576. Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
  577. Fragmenter fragmenter = new SimpleFragmenter(100);
  578. highlighter.setTextFragmenter(fragmenter);
  579.  
  580. ScoreDoc[] scoreDocs = topDocs.scoreDocs;
  581.  
  582. for (ScoreDoc scDoc : scoreDocs) {
  583. Document document = indexSearcher.doc(scDoc.doc);
  584. String id = document.get("id");
  585. String name = document.get("name");
  586. String age = document.get("age");
  587. String city = document.get("city");
  588. String birthday = document.get("birthday");
  589. float score = scDoc.score; //相似度
  590. System.out.println("相似度:"+score);
  591.  
  592. String lighterName = highlighter.getBestFragment(new IKAnalyzer(), "name", name);
  593. if (null == lighterName) {
  594. lighterName = name;
  595. }
  596.  
  597. String lighterAge = highlighter.getBestFragment(new IKAnalyzer(), "age", age);
  598. if (null == lighterAge) {
  599. lighterAge = age;
  600. }
  601.  
  602. String lighterCity= highlighter.getBestFragment(new IKAnalyzer(), "city", city);
  603. if (null == lighterCity) {
  604. lighterCity = city;
  605. }
  606.  
  607. String lighterBirthday = highlighter.getBestFragment(new IKAnalyzer(), "birthday", birthday);
  608. if (null == lighterBirthday) {
  609. lighterBirthday = birthday;
  610. }
  611.  
  612. Person person = new Person();
  613. person.setId(Long.parseLong(id));
  614. person.setName(lighterName);
  615. person.setAge(NumberUtils.toInt(age));
  616. person.setCity(lighterCity);
  617. person.setBirthday(DateUtils.longToDate(Long.parseLong(lighterBirthday)));
  618. result.add(person);
  619. }
  620. } catch (Exception e) {
  621. e.printStackTrace();
  622. } finally {
  623. try {
  624. indexSearcher.close();
  625. } catch (IOException e) {
  626. e.printStackTrace();
  627. }
  628. }
  629.  
  630. return result;
  631. }
  632.  
  633. private List<Person> getPersons() {
  634. try {
  635. List<Person> persons = new ArrayList<Person>();
  636. persons.add(new Person(1l, "张三", 10, "福州", DateUtils.stringToDate("2013-01-07", Consts.FORMAT_SHORT)));
  637. persons.add(new Person(2l, "张四", 20, "莆田", DateUtils.stringToDate("2012-01-08", Consts.FORMAT_SHORT)));
  638. persons.add(new Person(3l, "王五", 30, "泉州", DateUtils.stringToDate("2011-03-09", Consts.FORMAT_SHORT)));
  639. persons.add(new Person(4l, "李四", 40, "厦门", DateUtils.stringToDate("2010-04-10", Consts.FORMAT_SHORT)));
  640. persons.add(new Person(5l, "李白", 50, "漳州", DateUtils.stringToDate("2009-05-11", Consts.FORMAT_SHORT)));
  641. persons.add(new Person(6l, "林计张三张三张三张三张三张三张三张三张三张三张三张三张三计钦", 60, "龙岩", DateUtils.stringToDate("2008-06-12", Consts.FORMAT_SHORT)));
  642. return persons;
  643. } catch (ParseException e) {
  644. e.printStackTrace();
  645. }
  646. return null;
  647. }
  648. }
  649.  
  650. 2LuceneUtil类:Lucene增删改工具类
  1. package com.ljq.utils;
  2.  
  3. import java.io.File;
  4. import java.util.ArrayList;
  5. import java.util.List;
  6.  
  7. import org.apache.log4j.Logger;
  8. import org.apache.lucene.document.Document;
  9. import org.apache.lucene.index.IndexWriter;
  10. import org.apache.lucene.index.IndexWriterConfig;
  11. import org.apache.lucene.index.Term;
  12. import org.apache.lucene.index.IndexWriterConfig.OpenMode;
  13. import org.apache.lucene.store.Directory;
  14. import org.apache.lucene.store.FSDirectory;
  15. import org.apache.lucene.util.Version;
  16. import org.wltea.analyzer.lucene.IKAnalyzer;
  17.  
  18. /**
  19. * lucene工具类,采用IKAnalyzer中文分词器
  20. *
  21. * @author 林计钦
  22. * @version 1.0 2013-6-3 下午03:51:29
  23. */
  24. public class LuceneUtil {
  25. /** 索引库路径 */
  26. private static final String indexPath = XMLPropertyConfig.getConfigXML()
  27. .getString("index_path");
  28. public static IndexWriter indexWriter = null;
  29. private static final Logger log=Logger.getLogger(LuceneUtil.class);
  30.  
  31. public static IndexWriter getIndexWriter(){
  32. if(indexWriter == null){
  33. try {
  34. //索引库路径不存在则新建一个
  35. File indexFile=new File(indexPath);
  36. if(!indexFile.exists()) indexFile.mkdir();
  37.  
  38. Directory fsDirectory = FSDirectory.open(indexFile);
  39. IndexWriterConfig confIndex = new IndexWriterConfig(Version.LUCENE_35, new IKAnalyzer());
  40. confIndex.setOpenMode(OpenMode.CREATE_OR_APPEND);
  41. if (IndexWriter.isLocked(fsDirectory)) {
  42. IndexWriter.unlock(fsDirectory);
  43. }
  44. indexWriter =new IndexWriter(fsDirectory, confIndex);
  45. } catch (Exception e) {
  46. e.printStackTrace();
  47. }
  48. }
  49. return indexWriter;
  50. }
  51.  
  52. /**
  53. * 创建索引
  54. *
  55. * @param doc
  56. * @throws Exception
  57. */
  58. public static boolean createIndex(Document doc) {
  59. List<Document> docs = new ArrayList<Document>();
  60. docs.add(doc);
  61. return createIndex(docs);
  62. }
  63.  
  64. /**
  65. * 创建索引
  66. *
  67. * @param docs
  68. * @throws Exception
  69. */
  70. public static boolean createIndex(List<Document> docs) {
  71. try {
  72. for (Document doc : docs) {
  73. getIndexWriter().addDocument(doc);
  74. }
  75. // 优化操作
  76. getIndexWriter().commit();
  77. getIndexWriter().forceMerge(1); // forceMerge代替optimize
  78. log.info("lucene create success.");
  79. return true;
  80. } catch (Exception e) {
  81. log.error("lucene create failure.", e);
  82. return false;
  83. } finally {
  84. if (getIndexWriter() != null) {
  85. try {
  86. getIndexWriter().close();
  87. } catch (Exception e) {
  88. e.printStackTrace();
  89. }
  90. }
  91. }
  92. }
  93.  
  94. /**
  95. * 更新索引
  96. *
  97. * 例如:Term term = new Term("id","1234567");
  98. * 先去索引文件里查找id为1234567的Document,如果有就更新它(如果有多条,最后更新后只有一条),如果没有就新增。
  99. * 数据库更新的时候,我们可以只针对某个列来更新,而lucene只能针对一行数据更新。
  100. *
  101. * @param field Document的Field(类似数据库的字段)
  102. * @param value Field中的一个关键词
  103. * @param doc
  104. * @return
  105. */
  106. public static boolean updateIndex(String field, String value, Document doc) {
  107. try {
  108. getIndexWriter().updateDocument(new Term(field, value), doc);
  109.  
  110. log.info("lucene update success.");
  111. return true;
  112. } catch (Exception e) {
  113. log.error("lucene update failure.", e);
  114. return false;
  115. }finally{
  116. if(getIndexWriter()!=null){
  117. try {
  118. getIndexWriter().close();
  119. } catch (Exception e) {
  120. e.printStackTrace();
  121. }
  122. }
  123. }
  124. }
  125.  
  126. /**
  127. * 删除索引
  128. *
  129. * @param field Document的Field(类似数据库的字段)
  130. * @param value Field中的一个关键词
  131. * @param doc
  132. * @return
  133. */
  134. public static boolean deleteIndex(String field, String value) {
  135. try {
  136. getIndexWriter().deleteDocuments(new Term(field, value));
  137.  
  138. log.info("lucene delete success.");
  139. return true;
  140. } catch (Exception e) {
  141. log.error("lucene delete failure.", e);
  142. return false;
  143. }finally{
  144. if(getIndexWriter()!=null){
  145. try {
  146. getIndexWriter().close();
  147. } catch (Exception e) {
  148. e.printStackTrace();
  149. }
  150. }
  151. }
  152. }
  153.  
  154. /**
  155. * 删除整个索引库
  156. *
  157. * @return
  158. */
  159. public static boolean deleteAllIndex() {
  160. try {
  161. getIndexWriter().deleteAll();
  162. log.info("lucene delete all success.");
  163. return true;
  164. } catch (Exception e) {
  165. log.error("lucene delete all failure.", e);
  166. return false;
  167. }finally{
  168. if(getIndexWriter()!=null){
  169. try {
  170. getIndexWriter().close();
  171. } catch (Exception e) {
  172. e.printStackTrace();
  173. }
  174. }
  175. }
  176. }
  177.  
  178. /**
  179. * 判断索引库是否已创建
  180. *
  181. * @return true:存在,false:不存在
  182. * @throws Exception
  183. */
  184. public static boolean existsIndex() throws Exception {
  185. File file = new File(indexPath);
  186. if (!file.exists()) {
  187. file.mkdirs();
  188. }
  189. String indexSufix = "/segments.gen";
  190. // 根据索引文件segments.gen是否存在判断是否是第一次创建索引
  191. File indexFile = new File(indexPath + indexSufix);
  192. return indexFile.exists();
  193. }
  194.  
  195. }
  1.  

Lucene搜索方式大合集的更多相关文章

  1. Lucene 搜索方式

    Lucene 的搜索方式包括:词项查询(TermQuery) / 布尔查询(BooleanQuery) / 短语查询(PhraseQuery) / 范围查询(RangeQuery) / 百搭查询(Wi ...

  2. [题解+总结]NOIP动态规划大合集

    1.前言 NOIP2003-2014动态规划题目大合集,有简单的也有难的(对于我这种动态规划盲当然存在难的),今天就把这些东西归纳一下,做一个比较全面的总结,方便对动态规划有一个更深的理解. 2.NO ...

  3. python字符串操作实方法大合集

    python字符串操作实方法大合集,包括了几乎所有常用的python字符串操作,如字符串的替换.删除.截取.复制.连接.比较.查找.分割等,需要的朋友可以参考下:   #1.去空格及特殊符号 s.st ...

  4. NOIP动态规划大合集

    1.前言 NOIP2003-2014动态规划题目大合集,有简单的也有难的(对于我这种动态规划盲当然存在难的),今天就把这些东西归纳一下,做一个比较全面的总结,方便对动态规划有一个更深的理解. 2.NO ...

  5. [题解+总结]动态规划大合集II

    1.前言 大合集总共14道题,出自江哥之手(这就没什么好戏了),做得让人花枝乱颤.虽说大部分是NOIP难度,也有简单的几道题目,但是还是做的很辛苦,有几道题几乎没思路,下面一道道边看边分析一下. 2. ...

  6. 直接拿来用!Facebook移动开源项目大合集

    直接拿来用!Facebook移动开源项目大合集 时间:2014-04-22 15:37 作者:唐小引 随着iOS依赖管理工具CocoaPods和大量第三方开源库成熟起来,业界积累了大量的优秀开源项目. ...

  7. 【Oracle教程资源大合集】Oracle数据库免费学习资源汇总

    Oracle的产品非常丰富,各类学习资源也五花八门,本文将介绍Oracle官方的免费教程与风哥整理的Oracle视频教程: 1.Oracle帮助中心 Oracle帮助中心也称为Oracle文档中心,这 ...

  8. HCNP学习笔记之史上最全华为路由器交换机配置命令大合集

    先来一张思科和华为命令的对照表: 史上最全华为路由器交换机配置命令大合集,熟练掌握下面的华为路由器交换机配置知识点,你只需花几分钟的时间就能明白华为路由器交换机配置.交换机的配置命令等等. 华为路由器 ...

  9. 动态规划大合集II

    1.前言 大合集总共14道题,出自江哥之手(这就没什么好戏了),做得让人花枝乱颤.虽说大部分是NOIP难度,也有简单的几道题目,但是还是做的很辛苦,有几道题几乎没思路,下面一道道边看边分析一下. 2. ...

随机推荐

  1. ModelState.IsValid总为false原因

    总结在开发中遇到的一个问题 ModelState.IsValid 一直是false 且在局部变量中,没有发现有问题啊,Model非常正常有木有,可是为什么 ModelState.IsValid 总是f ...

  2. Int,Long比较重使用equal替换==

    首先,==有很多限制,如Integer 类型的值在[-128,127] 期间,Integer 用 “==”是可以的(参考),超过范围则不行,那么使用equal则代替则完全ok public stati ...

  3. 为什么WebSphere好好的,他就不干活了?

    “修理不好用的WebSphere,有时候要看运气.”这个是我接触过很过有历史的运维工程师经常说的一个梗;研发人员也经常说这个程序在我这里运行好好的,怎么到你那就不灵了?问题是你的,你自己解决. 声明一 ...

  4. 1Z0-053 争议题目解析707

    1Z0-053 争议题目解析707 考试科目:1Z0-053 题库版本:V13.02 题库中原题为: 707.Because of a logical corruption in the EMPLOY ...

  5. 如何用easyui+JAVA 实现动态拼凑datagrid表格(续)

    前面一段时间写了一篇文章: 如何用easyui+JAVA 实现动态拼凑datagrid表格 这篇文章的话,效果是可以实现,但是经过我反复试验,还是存在一些问题的. 今天这篇文章就是向大家介绍下如何避免 ...

  6. js实现无限极分类

    转载注明出处!!! 转载注明出处!!! 转载注明出处!!! 因为要实现部门通讯录,后台传来的数据是直接从数据库里拿的部门表,所以没有层级分类,只有parentId表示从属关系,所以分类的事情就交给我来 ...

  7. 无法将类型为“Microsoft.Office.Interop.Word.ApplicationClass”的 COM 对象强制转换为接口类型“Microsoft.Office.Interop.Word._Application”。

    无法将类型为“Microsoft.Office.Interop.Word.ApplicationClass”的 COM 对象强制转换为接口类型“Microsoft.Office.Interop.Wor ...

  8. Go语言异常处理defer\panic\recover

    Go语言追求简洁优雅,所以,Go语言不支持传统的 try…catch…finally 这种异常,因为Go语言的设计者们认为,将异常与控制结构混在一起会很容易使得代码变得混乱.因为开发者很容易滥用异常, ...

  9. 在Visual Studio 2012中使用VMSDK开发领域特定语言(一)

    前言 本专题主要介绍在Visual Studio 2012中使用Visualization & Modeling SDK进行领域特定语言(DSL)的开发,包括两个部分的内容.在第一部分中,将对 ...

  10. padding/border与width的关系

    本文讲述 padding / border 的设置后是否对 width 有影响,width 等于 auto 与 100% 的区别 CSS 框模型 (Box Model) 规定了元素框处理元素内容.内边 ...