【转】一个lucene的官网例子

创建索引：

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.io.InputStreamReader;

import java.nio.charset.StandardCharsets;

import java.util.Date;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.LongField;

import org.apache.lucene.document.StringField;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.index.Term;

import org.apache.lucene.index.IndexWriterConfig.OpenMode;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

public class CreateLuceneIndex {

    public static void main(String[] args) {

        if (args == null || args.length <= 0) {

            args = new String[] {

                    "-index",

                    "E:\\00.学习\\03.软件\\JAVA\\lu01\\index",

                    "-docs",

                    "E:\\00.学习\\03.软件\\JAVA\\lu01\\doc" };

        }

        String indexPath = "";

        String docsPath = null;

        boolean create = true;

        for (int i = 0; i < args.length; i++) {

            if ("-index".equals(args[i])) {

                indexPath = args[i + 1];

                i++;

            } else if ("-docs".equals(args[i])) {

                docsPath = args[i + 1];

                i++;

            } else if ("-update".equals(args[i])) {

                create = false;

            }

        }

        if (docsPath == null) {

            System.err.println("资源文件所在目录为空，请指定资源文件所在目录！！！");

            System.exit(1);

        }

        final File docDir = new File(docsPath);

        if (!docDir.exists() || !docDir.canRead()) {

            System.out.println("资源文件目录 '" + docDir.getAbsolutePath()

                    + "' 不存在或不可读，请检查！");

            System.exit(1);

        }

        Date start = new Date();

        try {

            System.out.println("建立索引文件到该目录 '" + indexPath + "'...");

            Directory dir = FSDirectory.open(new File(indexPath));

            Analyzer analyzer = new StandardAnalyzer();

            IndexWriterConfig iwc = new IndexWriterConfig(

                    Version.LUCENE_4_10_2, analyzer);

            if (create) {

                iwc.setOpenMode(OpenMode.CREATE);

            } else {

                iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);

            }

            IndexWriter writer = new IndexWriter(dir, iwc);

            indexDocs(writer, docDir);

            writer.close();

            Date end = new Date();

            System.out.println(end.getTime() - start.getTime()

                    + " total milliseconds");

        } catch (IOException e) {

            System.out.println(" caught a " + e.getClass()

                    + "\n with message: " + e.getMessage());

        }

    }

    /**

     * 将资源文件索引到指定目录下，生成磁盘的索引文件

     *

     * @param writer

     *            索引文件

     * @param file

     *            资源文件

     */

    static void indexDocs(IndexWriter writer, File file) throws IOException {

        if (!file.canRead()) {

            return;

        }

        if (file.isDirectory()) {

            String[] files = file.list();

            if (files != null) {

                for (int i = 0; i < files.length; i++) {

                    indexDocs(writer, new File(file, files[i]));

                }

            }

        } else {

            FileInputStream fis;

            try {

                fis = new FileInputStream(file);

            } catch (FileNotFoundException fnfe) {

                return;

            }

            try {

                Document doc = new Document();

                Field pathField = new StringField("path", file.getPath(),

                        Field.Store.YES);

                doc.add(pathField);

                doc.add(new LongField("modified", file.lastModified(),

                        Field.Store.NO));

                doc.add(new TextField("contents", new BufferedReader(

                        new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {

                    System.out.println("adding " + file);

                    writer.addDocument(doc);

                } else {

                    System.out.println("updating " + file);

                    writer.updateDocument(new Term("path", file.getPath()), doc);

                }

            } finally {

                fis.close();

            }

        }

    }

}

全文检索：

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.nio.charset.StandardCharsets;

import java.util.Date;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.FSDirectory;

public class SearchFiles {

    public static void main(String[] args) throws Exception {

        args = new String[] {

                "-index",

                "E:\\00.学习\\03.软件\\JAVA\\lu01\\index"

            };

        if (args.length > 0

                && ("-h".equals(args[0]) || "-help".equals(args[0]))) {

            System.exit(0);

        }

        String index = "index";

        String field = "contents";

        String queries = null;

        int repeat = 0;

        boolean raw = false;

        String queryString = null;

        int hitsPerPage = 10;

        for (int i = 0; i < args.length; i++) {

            if ("-index".equals(args[i])) {

                index = args[i + 1];

                i++;

            } else if ("-field".equals(args[i])) {

                field = args[i + 1];

                i++;

            } else if ("-queries".equals(args[i])) {

                queries = args[i + 1];

                i++;

            } else if ("-query".equals(args[i])) {

                queryString = args[i + 1];

                i++;

            } else if ("-repeat".equals(args[i])) {

                repeat = Integer.parseInt(args[i + 1]);

                i++;

            } else if ("-raw".equals(args[i])) {

                raw = true;

            } else if ("-paging".equals(args[i])) {

                hitsPerPage = Integer.parseInt(args[i + 1]);

                if (hitsPerPage <= 0) {

                    System.err.println("最少每页有1条数据");

                    System.exit(1);

                }

                i++;

            }

        }

        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(

                index)));

        IndexSearcher searcher = new IndexSearcher(reader);

        // :Post-Release-Update-Version.LUCENE_XY:

        Analyzer analyzer = new StandardAnalyzer();

        BufferedReader in = null;

        if (queries != null) {

            in = new BufferedReader(new InputStreamReader(new FileInputStream(

                    queries), StandardCharsets.UTF_8));

        } else {

            in = new BufferedReader(new InputStreamReader(System.in,

                    StandardCharsets.UTF_8));

        }

        // :Post-Release-Update-Version.LUCENE_XY:

        QueryParser parser = new QueryParser(field,

                analyzer);

        while (true) {

            if (queries == null && queryString == null) { // prompt the user

                System.out.println("输入查询关键字: ");

            }

            String line = queryString != null ? queryString : in.readLine();

            if (line == null || line.length() == -1) {

                break;

            }

            line = line.trim();

            if (line.length() == 0) {

                break;

            }

            Query query = parser.parse(line);

            System.out.println("Searching for: " + query.toString(field));

            if (repeat > 0) { // repeat & time as benchmark

                Date start = new Date();

                for (int i = 0; i < repeat; i++) {

                    searcher.search(query, null, 100);

                }

                Date end = new Date();

                System.out.println("Time: " + (end.getTime() - start.getTime())

                        + "ms");

            }

            doPagingSearch(in, searcher, query, hitsPerPage, raw,

                    queries == null && queryString == null);

            if (queryString != null) {

                break;

            }

        }

        reader.close();

    }

    public static void doPagingSearch(BufferedReader in,

            IndexSearcher searcher, Query query, int hitsPerPage, boolean raw,

            boolean interactive) throws IOException {

        // Collect enough docs to show 5 pages

        TopDocs results = searcher.search(query, 5 * hitsPerPage);

        ScoreDoc[] hits = results.scoreDocs;

        int numTotalHits = results.totalHits;

        System.out.println(numTotalHits + " total matching documents");

        int start = 0;

        int end = Math.min(numTotalHits, hitsPerPage);

        while (true) {

            if (end > hits.length) {

                System.out

                        .println("Only results 1 - " + hits.length + " of "

                                + numTotalHits

                                + " total matching documents collected.");

                System.out.println("Collect more (y/n) ?");

                String line = in.readLine();

                if (line.length() == 0 || line.charAt(0) == 'n') {

                    break;

                }

                hits = searcher.search(query, numTotalHits).scoreDocs;

            }

            end = Math.min(hits.length, start + hitsPerPage);

            for (int i = start; i < end; i++) {

                if (raw) { // output raw format

                    System.out.println("doc=" + hits[i].doc + " score="

                            + hits[i].score);

                    continue;

                }

                Document doc = searcher.doc(hits[i].doc);

                String path = doc.get("path");

                if (path != null) {

                    System.out.println((i + 1) + ". " + path);

                    String title = doc.get("title");

                    if (title != null) {

                        System.out.println("   Title: " + doc.get("title"));

                    }

                } else {

                    System.out.println((i + 1) + ". "

                            + "No path for this document");

                }

            }

            if (!interactive || end == 0) {

                break;

            }

            if (numTotalHits >= end) {

                boolean quit = false;

                while (true) {

                    System.out.print("Press ");

                    if (start - hitsPerPage >= 0) {

                        System.out.print("(p)revious page, ");

                    }

                    if (start + hitsPerPage < numTotalHits) {

                        System.out.print("(n)ext page, ");

                    }

                    System.out

                            .println("(q)uit or enter number to jump to a page.");

                    String line = in.readLine();

                    if (line.length() == 0 || line.charAt(0) == 'q') {

                        quit = true;

                        break;

                    }

                    if (line.charAt(0) == 'p') {

                        start = Math.max(0, start - hitsPerPage);

                        break;

                    } else if (line.charAt(0) == 'n') {

                        if (start + hitsPerPage < numTotalHits) {

                            start += hitsPerPage;

                        }

                        break;

                    } else {

                        int page = Integer.parseInt(line);

                        if ((page - 1) * hitsPerPage < numTotalHits) {

                            start = (page - 1) * hitsPerPage;

                            break;

                        } else {

                            System.out.println("No such page");

                        }

                    }

                }

                if (quit)

                    break;

                end = Math.min(numTotalHits, start + hitsPerPage);

            }

        }

    }

}

//************************************************************************************************************************

新增部分基础概念的梳理：参考lucene 实战

1、基本概念

lucence 可以认为分为两大组件：

1）索引组件

a、内容获取：即将原始的内容材料，可以是数据库、网站（爬虫）、文本转换为lucence的Docment
Docment 为若干个带值得域
b、文档分析：利用分词器对文档进行分析；
c、建立索引：根据文档分析的结构建立索引文件；

2）搜索组件

a、建立查询：即将用户请求转换为搜索引擎支持的查询对象格式；
b、执行搜索查询；并反馈Document结果集；

2、官方样例代码分析

**索引组件部分工作
1、创建一个包含某个分词器的IndexWriter
private IndexWriter writer;
writer = new IndexWriter(IndexDir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);

2、遍历被索引的文件（即原始内容）将其转换为Document
Docment doc = new Docment();
doc.add(new Field("contents", new FileReader(f));
doc.add(new Field("filename"), f.getName(), Field.Store.Yes, Field.Index.NOT_ANALYZED);

3、针对该Docment创建索引
writer.addDocment(doc);

**搜索组件部分工作
1、打开索引文件
IndexSearcher is = new IndexSearcher(IndexDir);

2、创建一个查询对象
QueryParser parser = new QueryParser(Version.LUCENE_30, "contents", new StandardAnalyzer());
Query query = parser("要找的查询关键字");

3、执行查询并返回结果
TopDocs hits = is.search(query, 10);

【转】一个lucene的官网例子的更多相关文章

OpenLayers 官网例子的中文详解
https://segmentfault.com/a/1190000009679800?utm_source=tag-newest 当你希望实现某种功能的时候,即使你对 openlayers 几乎一窍 ...
针对Openlayer3官网例子的简介
网址:http://openlayers.org/en/latest/examples/ 如果大家想了解ol3能做什么,或者说已提供的API有什么,又闲一个个翻例子跟API累的话,就看看这个吧. 1. ...
Vue组件化应用构建官网例子 Unknown custom element: <todo-item>
[博客园cnblogs笔者m-yb原创,转载请加本文博客链接,笔者github: https://github.com/mayangbo666,公众号aandb7,QQ群927113708] htt ...
apache lucene solr 官网历史版本下载地址
官网上一般只提供最新版本的下载,下面两个链接为所有历史版本的下载地址: lucene地址:archive.apache.org/dist/lucene/java/ solr地址:archive.apa ...
STREAMING HIVE流过滤官网例子注意中间用的py脚本
Simple Example Use Cases MovieLens User Ratings First, create a table with tab-delimited text file f ...
导航条且手机版.html——仿照官网例子
<!doctype html> <html> <head> <meta charset="utf-8"> <title> ...
官网例子，mt-field password获取不到
新尝试了Mint-UI,在使用表单组件Field时, 直接从demo中拷贝了如下代码: <mt-field label="username" placeholder=&quo ...
Java微信扫描支付模式二Demo ,整合官网直接运行版本
概述场景介绍用户使用微信“扫一扫”扫描二维码后,获取商品支付信息,引导用户完成支付. 详细代码下载:http://www.demodashi.com/demo/13880.html 一.相关配置 ...
【如何在mysql 官网下载最新版本mysql 数据库】
方法/步骤打开百度搜索,输入MySQL,第一个是MySQL官网点击第一个链接地址,进入MySQL官方网站,单击“Downloads”下载Tab页,进入下载界面找到Community( ...

随机推荐

Servlet目录
Servlet目录课时1 Servlet概述12:30 课时2 servlet的第一个例子31:08 课时3 servlet的生命周期18:18 课时4 HttpServlet详细讲解31:43 ...
实战录 | Kafka-0.10 Consumer源码解析
<实战录>导语前方高能!请注意本期攻城狮幽默细胞爆表,坐地铁的拉好把手,喝水的就建议暂时先别喝了:)本期分享人为云端卫士大数据工程师韩宝君,将带来Kafka-0.10 Consumer源 ...
移动端HTML5<video>视频播放优化实践[转]
http://blog.csdn.net/u010918416/article/details/52705732 http://www.xuanfengge.com/html5-video-play. ...
运费模版源码(.net)
之前写了一篇关于nop商城系统中运费模版模块相关的随笔,说要把源码贴出来,一直没有贴,现在我把源码贴出来,有任何问题欢迎留言讨论. 源码是在nop上写的,所以文件夹结构和nop的文件夹对应,源码包含的 ...
tab栏切换的特殊效果
在实际的开发过程中,我们可能会遇到这种需求,如下图左边是三个tab栏,右边是显示内容的div,当鼠标滑到坐标的tab上时,给它一个高亮显示,让它对应的内容在右边的div中显示出来,当鼠标移出的时候把 ...
[PL/SQL] 如何规避异常ORA-01403
如果mytable表中不存在 ID = 123 的数据,那么 SELECT Flag INTO flag FROM mytable WHERE ID = 123 将抛出异常ORA-01403 SELE ...
vs2010的“应用程序向导”新建MFC程序报错“当前页面的脚本发送错误”
原创文章,欢迎阅读,禁止转载. 问题现象不知道从什么时候开始,我的vs2010不能新建MFC程序了,报错如图:... 解决方法根据提示排查,发现是应用程序向导的相关html被损坏了.从同事电脑上把 ...
Allegro Out Of Date Shapes原因及解决方法
使用Allegro设计PCB板时,查看Status,经常会遇到out of date shapes的警告信息,具体如下: dynamic shape is still out of data or e ...
搜索引擎 ElasticSearch 之步步为营1 【环境搭建&初识ElasticSearch】
1.下载ElasticSearch a.下载Java环境JDK:http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloa ...
用sql语句清除日志
DUMP TRANSACTION [数据库] WITH NO_LOGBACKUP LOG [数据库] WITH NO_LOGDBCC SHRINKDATABASE([数据库])

【转】一个lucene的官网例子

【转】一个lucene的官网例子的更多相关文章

随机推荐

热门专题