lucene简单使用

lucene7以上最低要求jdk1.8

lucene下载地址：

http://archive.apache.org/dist/lucene/java/

<dependency>

            <groupId>org.apache.lucene</groupId>

            <artifactId>lucene-core</artifactId>

            <version>6.0.0</version>

        </dependency>

        <dependency>

            <groupId>org.apache.lucene</groupId>

            <artifactId>lucene-highlighter</artifactId>

            <version>6.0.0</version>

        </dependency>

        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-analyzers-common -->

        <dependency>

            <groupId>org.apache.lucene</groupId>

            <artifactId>lucene-analyzers-common</artifactId>

            <version>6.0.0</version>

        </dependency>

        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-memory -->

        <dependency>

            <groupId>org.apache.lucene</groupId>

            <artifactId>lucene-memory</artifactId>

            <version>6.0.0</version>

        </dependency>

        <dependency>

            <groupId>junit</groupId>

            <artifactId>junit</artifactId>

            <version>4.9</version>

        </dependency>

        <!-- http://mvnrepository.com/artifact/org.apache.lucene/lucene-queryparser -->

        <dependency>

            <groupId>org.apache.lucene</groupId>

            <artifactId>lucene-queryparser</artifactId>

            <version>6.0.0</version>

        </dependency>

        <dependency>

            <groupId>commons-io</groupId>

            <artifactId>commons-io</artifactId>

            <version>2.6</version>

        </dependency>

        <dependency>

            <groupId>org.apache.lucene</groupId>

            <artifactId>lucene-core</artifactId>

            <version>6.0.0</version>

        </dependency>

        <dependency>

            <groupId>org.apache.lucene</groupId>

            <artifactId>lucene-analyzers-smartcn</artifactId>

            <version>7.3.0</version>

        </dependency>

        <dependency>

            <groupId>com.janeluo</groupId>

            <artifactId>ikanalyzer</artifactId>

            <version>2012_u6</version>

            <!--排除掉里面旧的lucene包，因为我们要重写里面的分析器和分词器  -->

            <exclusions>

                <exclusion>

                    <groupId>org.apache.lucene</groupId>

                    <artifactId>lucene-core</artifactId>

                </exclusion>

                <exclusion>

                    <groupId>org.apache.lucene</groupId>

                    <artifactId>lucene-queryparser</artifactId>

                </exclusion>

                <exclusion>

                    <groupId>org.apache.lucene</groupId>

                    <artifactId>lucene-analyzers-common</artifactId>

                </exclusion>

            </exclusions>

        </dependency>

        <dependency>

            <groupId>com.janeluo</groupId>

            <artifactId>ikanalyzer</artifactId>

            <version>2012_u6</version>

        </dependency>

package com.ytkj.lucene;

import org.apache.commons.io.FileUtils;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.TokenStream;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.document.TextField;

import org.apache.lucene.index.*;

import org.apache.lucene.search.*;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;

import java.io.IOException;

/**

 * lucene入门程序

 */

public class LuceneFrist {

    /**

     * 创建索引库

     * @throws Exception

     */

    public static void createIndex()throws Exception{

        //1.创建一个Director对象，指定索引库保存的位置，保存在磁盘中

        Directory directory=FSDirectory.open(new File("E:\\lucene\\lucenetemp").toPath());

        //2.创建IndexWriter对象

            //创建使用的分词器

        StandardAnalyzer analyzer = new StandardAnalyzer();

        IndexWriter indexWriter=new IndexWriter(directory,new IndexWriterConfig(analyzer));

        //3.读取磁盘上的文件，对应每个文件创建一个文档对象

        File dir=new File("E:\\lucene\\luceneresource");

        File[] files = dir.listFiles();

        for (File file : files) {

            //文件名称

            String name = file.getName();

            //文件路径

            String path = file.getPath();

            //文件内容

            String content = FileUtils.readFileToString(file, "utf-8");

            //文件大小

            long size = FileUtils.sizeOf(file);

            //创建域 参数：域的名称 文件名称 是否存储

            Field fieldName=new TextField("name",name,Field.Store.YES);

            Field fieldPath=new TextField("path",path,Field.Store.YES);

            Field fieldContent=new TextField("content",content,Field.Store.YES);

            Field fieldSize=new TextField("size",size+"",Field.Store.YES);

            //4.创建文档对象

            Document document=new Document();

            //5.向文档对象中添加域

            document.add(fieldName);

            document.add(fieldPath);

            document.add(fieldContent);

            document.add(fieldSize);

            //6.把文档对象写入索引库

            indexWriter.addDocument(document);

        }

        //7.关闭indexwriter对象

        indexWriter.close();

    }

    /**

     * 查询索引库

     * @throws Exception

     */

    public static void searchIndex() throws Exception {

        //1.创建一个Director对象，指定索引库保存的位置

        Directory directory=FSDirectory.open(new File("E:\\lucene\\lucenetemp").toPath());

        //2.创建indexReader对象

        IndexReader indexReader= DirectoryReader.open(directory);

        //3.创建indexsearcher对象，构造方法中的参数indexReader对象

        IndexSearcher indexSearcher=new IndexSearcher(indexReader);

        //4.创建一个query对象

        Query query=new TermQuery(new Term("content","spring"));

        //5.执行查询，得到一个TopDocs对象 参数：查询对象 查询结果返回的最大记录数

        TopDocs topDocs = indexSearcher.search(query, 10);

        //6.取查询结果的总记录数

        int totalHits = topDocs.totalHits;

        System.out.println("查询结果的总记录数："+totalHits);

        //7.获取文档列表

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;

        for (ScoreDoc scoreDoc : scoreDocs) {

            //取文档id

            int docId = scoreDoc.doc;

            //8.根据id获取文档对象

            Document document = indexSearcher.doc(docId);

            System.out.println(document.get("name"));

            System.out.println(document.get("path"));

            System.out.println(document.get("content"));

            System.out.println(document.get("size"));

        }

        //关闭创建indexReader对象

        indexReader.close();

    }

    /**

     * 查看分词器的分词效果

     * @throws Exception

     */

    public static  void testTikenStream() throws Exception {

        //创建使用的标准分词器

        StandardAnalyzer analyzer = new StandardAnalyzer();

        //使用分词器对象的tokenStream方法获取tokenStream对象

        TokenStream tokenStream=analyzer.tokenStream("","org.springframework.jdbc.datasource.DataSourceTransactionManager");

        //向tokenstream对象中设置一个引用，相当于一个指针

        CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

        //调用tokenstream的rest方法

        tokenStream.reset();

        //循环遍历tokenStream对象

        while (tokenStream.incrementToken()){

            System.out.println(charTermAttribute.toString());

        }

        //关闭

        tokenStream.close();

    }

    /**

     * 中文分词器测试

     */

    public static  void testIKAnalyzer() throws Exception {

        String etext = "Analysis is one of the main causes of slow indexing. Simply put, the more you analyze the slower analyze the indexing (in most cases).";

        String chineseText = "张三说的确实在理。";

        /**

         * ikanalyzer 中文分词器 因为Analyzer的createComponents方法API改变了 需要我们自己实现

         * 分析器IKAnalyzer4Lucene7和分词器IKTokenizer4Lucene7

         */

        // IKAnalyzer 细粒度切分

        try (Analyzer ik = new IKAnalyzer();) {

            TokenStream ts = ik.tokenStream("content", etext);

            System.out.println("IKAnalyzer中文分词器 细粒度切分，英文分词效果：");

            doToken(ts);

            ts = ik.tokenStream("content", chineseText);

            System.out.println("IKAnalyzer中文分词器 细粒度切分，中文分词效果：");

            doToken(ts);

        }

        // IKAnalyzer 智能切分

        try (Analyzer ik = new IKAnalyzer(true);) {

            TokenStream ts = ik.tokenStream("content", etext);

            System.out.println("IKAnalyzer中文分词器 智能切分，英文分词效果：");

            doToken(ts);

            ts = ik.tokenStream("content", chineseText);

            System.out.println("IKAnalyzer中文分词器 智能切分，中文分词效果：");

            doToken(ts);

        }

    }

    private static void doToken(TokenStream ts) throws IOException {

        ts.reset();

        CharTermAttribute cta = ts.getAttribute(CharTermAttribute.class);

        while (ts.incrementToken()) {

            System.out.print(cta.toString() + "|");

        }

        System.out.println();

        ts.end();

        ts.close();

    }

    public static void main(String[] args) throws Exception {

        //createIndex();

        //searchIndex();

        //testTikenStream();

        testIKAnalyzer();

    }

}

lucene简单使用的更多相关文章

Lucene 简单API使用
本demo 简单模拟实现一个图书搜索功能. 模拟向数据库添加数据的时候,添加书籍索引. 提供搜索接口,支持按照书名,作者,内容进行搜索. 按默认规则排序返回搜索结果. Jar依赖: <prope ...
Lucene 简单手记http://www.cnblogs.com/hoojo/archive/2012/09/05/2671678.html
什么是全文检索与全文检索系统? 全文检索是指计算机索引程序通过扫描文章中的每一个词,对每一个词建立一个索引,指明该词在文章中出现的次数和位置,当用户查询时,检索程序就根据事先建立的索引进行查找,并将查 ...
Lucene简单介绍
[2016.6.11]以前写的笔记,拿出来放到博客里面~ 相关软件: Solr, IK Analyzer, Luke, Nutch;Tomcat; 1.是什么: Lucene是apache软件基金会j ...
lucene简单搜索demo
方法类 package com.wxf.Test; import com.wxf.pojo.Goods; import org.apache.lucene.analysis.standard.Stan ...
Lucene简单总结
Lucene API Document Document:文档对象,是一条原始数据文档编号文档内容 1 谷歌地图之父跳槽FaceBook 2 谷歌地图之父加盟FaceBook 3 谷歌地图创始人拉 ...
lucene简单使用demo
测试结构目录: 1.索引库.分词器 Configuration.java package com.test.www.web.lucene; import java.io.File; import or ...
Lucene简单了解和使用
一,Lucene简介 1 . Lucene 是什么? Lucene 是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎, ...
lucene 简单搜索步骤
1.创建IndexReader实例: Directory dir = FSDirectory.open(new File(indexDir)); IndexReader reader = Direct ...
Lucene入门的基本知识（四）
刚才在写创建索引和搜索类的时候发现非常多类的概念还不是非常清楚,这里我总结了一下. 1 lucene简单介绍 1.1 什么是lucene Lucene是一个全文搜索框架,而不是应用产品.因此它并不 ...

随机推荐

简单DP入门（二）最长上升子序列及其优化
最长上升子序列解决问题: 有N个数,求出它最长的上升子序列并输出长度. 在题里不会讲的这么直白,这个算法往往会与其他的算法混在一起使用. 在这篇文章中不会出现其他的例题,为了让大家更好的理解,我只会对 ...
bootstrap知识点
首先,声明本次笔记是来自biaoyansu.com表严肃老师的bootstrap课程视频. 1.基本知识:1-1.首先,Html(理解:骨骼).Css(理解:皮肤).Js(理解:神经)分工不同.1-2 ...
Python Challenge 关卡目录及解答过程
第0关:http://www.pythonchallenge.com/pc/def/0.html 线索:试着改变URL的地址-->把图片中得到的数字输入到URL中 2**38 输出: 第1关:h ...
java多态的实现机制
Java提供了编译时多态和运行时多态两种多态机制.前者是通过方法重载实现的,后者是通过方法的覆盖实现的. 在方法覆盖中,子类可以覆盖父类的方法,因此同类的方法会在父类与子类中有着不同的表现形式. 在J ...
HDU 1816 Get Luffy Out *
Get Luffy Out * Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)T ...
简述ArcGIS的空间连接（Spatial Join）与字段映射（Field Map）操作
插个广告,制作ArcGIS的Tool工具学习下面的教程就对了:零基础学习Python制作ArcGIS自定义工具牢骚一下在使用ArcMap进行空间连接操作的时候,往往会有两种特殊需求,其一是连接重叠 ...
【目录】Identityserver 4 老张的哲学
随笔分类 - .IdentityServer4 从壹开始 [ Ids4实战 ] 之四 ║ 用户数据管理 & 前后端授权联调摘要: 前言哈喽~~~ 大家周一好!夏天到了,大家舒服了没有,熟话 ...
Spring学习笔记（10）——方法注入
引用在大部分情况下,容器中的bean都是singleton类型的.如果一个singleton bean要引用另外一个singleton bean,或者一个非singleton bean要引用另外一个 ...
Centos 7安装的一些事项
一.Wifi无法连接 ip addr 显示:unmanaged, plugin missing 先连有线网yum install -y NetworkManager-wifi systemctl re ...
go语言从例子开始之Example13.函数多返回值
Go 内建多返回值支持.这个特性在 Go 语言中经常被用到,例如用来同时返回一个函数的结果和错误信息. Example: package main import "fmt" // ...

lucene简单使用

lucene简单使用的更多相关文章

随机推荐

热门专题