Lucene.Net+盘古分词->开发自己的搜索引擎

//封装类

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using Lucene.Net.Analysis;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using System.Reflection;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
namespace SearchTest
{
 /// <summary>
 /// 盘古分词在lucene.net中的使用帮助类
 /// 调用PanGuLuceneHelper.instance
 /// </summary>
 public class PanGuLuceneHelper
 {
 private PanGuLuceneHelper() { }

#region 单一实例
 private static PanGuLuceneHelper _instance = null;
 /// <summary>
 /// 单一实例
 /// </summary>
 public static PanGuLuceneHelper instance
 {
 get
 {
 if (_instance == null) _instance = new PanGuLuceneHelper();
 return _instance;
 }
 }
 #endregion

#region 分词测试
 /// <summary>
 /// 分词测试
 /// </summary>
 /// <param name="keyword"></param>
 /// <returns></returns>
 public string Token(string keyword)
 {
 string ret = "";
 System.IO.StringReader reader = new System.IO.StringReader(keyword);
 Lucene.Net.Analysis.TokenStream ts = analyzer.TokenStream(keyword, reader);
 bool hasNext = ts.IncrementToken();
 Lucene.Net.Analysis.Tokenattributes.ITermAttribute ita;
 while (hasNext)
 {
 ita = ts.GetAttribute<Lucene.Net.Analysis.Tokenattributes.ITermAttribute>();
 ret += ita.Term + "|";
 hasNext = ts.IncrementToken();
 }
 ts.CloneAttributes();
 reader.Close();
 analyzer.Close();
 return ret;
 }
 #endregion

#region 创建索引
 /// <summary>
 /// 创建索引
 /// </summary>
 /// <param name="datalist"></param>
 /// <returns></returns>
 public bool CreateIndex(List<MySearchUnit> datalist)
 {
 IndexWriter writer = null;
 try
 {
 writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
 }
 catch
 {
 writer = new IndexWriter(directory_luce, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);//false表示追加（true表示删除之前的重新写入）
 }
 foreach (MySearchUnit data in datalist)
 {
 CreateIndex(writer, data);
 }
 writer.Optimize();
 writer.Dispose();
 return true;
 }

public bool CreateIndex(IndexWriter writer, MySearchUnit data)
        {
            try
            {

if (data == null) return false;
Document doc = new Document();
Type type = data.GetType();//assembly.GetType("Reflect_test.PurchaseOrderHeadManageModel", true, true); //命名空间名称 + 类名

//创建类的实例
 //object obj = Activator.CreateInstance(type, true);
 //获取公共属性
 PropertyInfo[] Propertys = type.GetProperties();
 for (int i = 0; i < Propertys.Length; i++)
 {
 //Propertys[i].SetValue(Propertys[i], i, null); //设置值
 PropertyInfo pi = Propertys[i];
 string name=pi.Name;
 object objval = pi.GetValue(data, null);
 string value = objval == null ? "" : objval.ToString(); //值
 if (name == "id" || name=="flag" )//id在写入索引时必是不分词，否则是模糊搜索和删除，会出现混乱
 {
 doc.Add(new Field(name, value, Field.Store.YES, Field.Index.NOT_ANALYZED));//id不分词
 }
 else
 {
 doc.Add(new Field(name, value, Field.Store.YES, Field.Index.ANALYZED));
 }
 }
 writer.AddDocument(doc);
 }
 catch (System.IO.FileNotFoundException fnfe)
 {
 throw fnfe;
 }
 return true;
 }
 #endregion

#region 在title和content字段中查询数据
 /// <summary>
 /// 在title和content字段中查询数据
 /// </summary>
 /// <param name="keyword"></param>
 /// <returns></returns>
 public List<MySearchUnit> Search(string keyword)
 {

string[] fileds = { "title", "content" };//查询字段
 //Stopwatch st = new Stopwatch();
 //st.Start();
 QueryParser parser = null;// new QueryParser(Lucene.Net.Util.Version.LUCENE_30, field, analyzer);//一个字段查询
 parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
 Query query = parser.Parse(keyword);
 int n = 1000;
 IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
 TopDocs docs = searcher.Search(query, (Filter)null, n);
 if (docs == null || docs.TotalHits == 0)
 {
 return null;
 }
 else
 {
 List<MySearchUnit> list = new List<MySearchUnit>();
 int counter = 1;
 foreach (ScoreDoc sd in docs.ScoreDocs)//遍历搜索到的结果
 {
 try
 {
 Document doc = searcher.Doc(sd.Doc);
 string id = doc.Get("id");
 string title = doc.Get("title");
 string content = doc.Get("content");
 string flag = doc.Get("flag");
 string imageurl = doc.Get("imageurl");
 string updatetime = doc.Get("updatetime");

string createdate = doc.Get("createdate");
 PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("", "");
 PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
 highlighter.FragmentSize = 50;
 content = highlighter.GetBestFragment(keyword, content);
 string titlehighlight = highlighter.GetBestFragment(keyword, title);
 if (titlehighlight != "") title = titlehighlight;
 list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
 }
 catch (Exception ex)
 {
 Console.WriteLine(ex.Message);
 }
 counter++;
 }
 return list;
 }
 //st.Stop();
 //Response.Write("查询时间：" + st.ElapsedMilliseconds + " 毫秒 ");

}
#endregion

#region 在不同的分类下再根据title和content字段中查询数据(分页)
 /// <summary>
 /// 在不同的类型下再根据title和content字段中查询数据(分页)
 /// </summary>
 /// <param name="_flag">分类,传空值查询全部</param>
 /// <param name="keyword"></param>
 /// <param name="PageIndex"></param>
 /// <param name="PageSize"></param>
 /// <param name="TotalCount"></param>
 /// <returns></returns>
 public List<MySearchUnit> Search(string _flag,string keyword, int PageIndex, int PageSize, out int TotalCount)
 {
 if (PageIndex < 1) PageIndex = 1;
 //Stopwatch st = new Stopwatch();
 //st.Start();
 BooleanQuery bq = new BooleanQuery();
 if (_flag != "")
 {
 QueryParser qpflag = new QueryParser(version, "flag", analyzer);
 Query qflag = qpflag.Parse(_flag);
 bq.Add(qflag, Occur.MUST);//与运算
 }
 if (keyword != "")
 {
 string[] fileds = { "title", "content" };//查询字段
 QueryParser parser = null;// new QueryParser(version, field, analyzer);//一个字段查询
 parser = new MultiFieldQueryParser(version, fileds, analyzer);//多个字段查询
 Query queryKeyword = parser.Parse(keyword);
 bq.Add(queryKeyword, Occur.MUST);//与运算
 }

 TopScoreDocCollector collector = TopScoreDocCollector.Create(PageIndex * PageSize, false);
 IndexSearcher searcher = new IndexSearcher(directory_luce, true);//true-表示只读
 searcher.Search(bq, collector);
 if (collector == null || collector.TotalHits == 0)
 {
 TotalCount = 0;
 return null;
 }
 else
 {
 int start = PageSize * (PageIndex - 1);
 //结束数
 int limit = PageSize;
 ScoreDoc[] hits = collector.TopDocs(start, limit).ScoreDocs;
 List<MySearchUnit> list = new List<MySearchUnit>();
 int counter = 1;
 TotalCount = collector.TotalHits;
 foreach (ScoreDoc sd in hits)//遍历搜索到的结果
 {
 try
 {
 Document doc = searcher.Doc(sd.Doc);
 string id = doc.Get("id");
 string title = doc.Get("title");
 string content = doc.Get("content");
 string flag = doc.Get("flag");
 string imageurl = doc.Get("imageurl");
 string updatetime = doc.Get("updatetime");

PanGu.HighLight.SimpleHTMLFormatter simpleHTMLFormatter = new PanGu.HighLight.SimpleHTMLFormatter("", "");
 PanGu.HighLight.Highlighter highlighter = new PanGu.HighLight.Highlighter(simpleHTMLFormatter, new PanGu.Segment());
 highlighter.FragmentSize = 50;
 content = highlighter.GetBestFragment(keyword, content);
 string titlehighlight = highlighter.GetBestFragment(keyword, title);
 if (titlehighlight != "") title = titlehighlight;
 list.Add(new MySearchUnit(id, title, content, flag,imageurl, updatetime));
 }
 catch (Exception ex)
 {
 Console.WriteLine(ex.Message);
 }
 counter++;
 }
 return list;
 }
 //st.Stop();
 //Response.Write("查询时间：" + st.ElapsedMilliseconds + " 毫秒 ");

}
#endregion

#region 删除索引数据（根据id）
 /// <summary>
 /// 删除索引数据（根据id）
 /// </summary>
 /// <param name="id"></param>
 /// <returns></returns>
 public bool Delete(string id)
 {
 bool IsSuccess = false;
 Term term = new Term("id", id);
 //Analyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
 //Version version = new Version();
 //MultiFieldQueryParser parser = new MultiFieldQueryParser(version, new string[] { "name", "job" }, analyzer);//多个字段查询
 //Query query = parser.Parse("小王");

//IndexReader reader = IndexReader.Open(directory_luce, false);
 //reader.DeleteDocuments(term);
 //Response.Write("删除记录结果： " + reader.HasDeletions + " ");
 //reader.Dispose();

IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
            writer.DeleteDocuments(term); // writer.DeleteDocuments(term)或者writer.DeleteDocuments(query);
            ////writer.DeleteAll();
            writer.Commit();
            //writer.Optimize();//
            IsSuccess = writer.HasDeletions();
            writer.Dispose();
            return IsSuccess;
        }
        #endregion

#region 删除全部索引数据
 /// <summary>
 /// 删除全部索引数据
 /// </summary>
 /// <returns></returns>
 public bool DeleteAll()
 {
 bool IsSuccess = true;
 try
 {
 IndexWriter writer = new IndexWriter(directory_luce, analyzer, false, IndexWriter.MaxFieldLength.LIMITED);
 writer.DeleteAll();
 writer.Commit();
 //writer.Optimize();//
 IsSuccess = writer.HasDeletions();
 writer.Dispose();
 }
 catch
 {
 IsSuccess = false;
 }
 return IsSuccess;
 }
 #endregion

#region directory_luce
 private Lucene.Net.Store.Directory _directory_luce = null;
 /// <summary>
 /// Lucene.Net的目录-参数
 /// </summary>
 public Lucene.Net.Store.Directory directory_luce
 {
 get
 {
 if (_directory_luce == null) _directory_luce = Lucene.Net.Store.FSDirectory.Open(directory);
 return _directory_luce;
 }
 }
 #endregion

#region directory
 private System.IO.DirectoryInfo _directory = null;
 /// <summary>
 /// 索引在硬盘上的目录
 /// </summary>
 public System.IO.DirectoryInfo directory
 {
 get
 {
 if (_directory == null)
 {
 string dirPath = AppDomain.CurrentDomain.BaseDirectory + "SearchIndex";
 if (System.IO.Directory.Exists(dirPath) == false) _directory = System.IO.Directory.CreateDirectory(dirPath);
 else _directory = new System.IO.DirectoryInfo(dirPath);
 }
 return _directory;
 }
 }
 #endregion

#region analyzer
 private Analyzer _analyzer = null;
 /// <summary>
 /// 分析器
 /// </summary>
 public Analyzer analyzer
 {
 get
 {
 //if (_analyzer == null)
 {
 _analyzer = new Lucene.Net.Analysis.PanGu.PanGuAnalyzer();//盘古分词分析器
 //_analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);//标准分析器
 }
 return _analyzer;
 }
 }
 #endregion

#region version
 private static Lucene.Net.Util.Version _version = Lucene.Net.Util.Version.LUCENE_30;
 /// <summary>
 /// 版本号枚举类
 /// </summary>
 public Lucene.Net.Util.Version version
 {
 get
 {
 return _version;
 }
 }
 #endregion
 }

#region 索引的一个行单元，相当于数据库中的一行数据
 /// <summary>
 /// 索引的一个行单元，相当于数据库中的一行数据
 /// </summary>
 public class MySearchUnit
 {
 public MySearchUnit(string _id, string _title, string _content, string _flag, string _imageurl, string _updatetime)
 {
 this.id = _id;
 this.title = _title;
 this.content = _content;
 this.flag = _flag;
 this.imageurl = _imageurl;
 this.updatetime = _updatetime;
 }
 /// <summary>
 /// 唯一的id号
 /// </summary>
 public string id { get; set; }
 /// <summary>
 /// 标题
 /// </summary>
 public string title { get; set; }
 /// <summary>
 /// 内容
 /// </summary>
 public string content { get; set; }
 /// <summary>
 /// 其他信息
 /// </summary>
 public string flag { get; set; }
 /// <summary>
 /// 图片路径
 /// </summary>
 public string imageurl { get; set; }
 /// <summary>
 /// 时间
 /// </summary>
 public string updatetime { get; set; }
 }
 #endregion
}

原文地址：http://blog.csdn.net/pukuimin1226/article/details/17558247

Lucene.Net+盘古分词->开发自己的搜索引擎的更多相关文章

【盘古分词】Lucene.Net 盘古分词实现公众号智能自动回复
盘古分词是一个基于 .net framework 的中英文分词组件.主要功能中文未登录词识别盘古分词可以对一些不在字典中的未登录词自动识别词频优先盘古分词可以根据词频来解决分词的歧义问题多元 ...
【原创】Lucene.Net+盘古分词器(详细介绍)
本章阅读概要 1.Lucenne.Net简介 2.介绍盘古分词器 3.Lucene.Net实例分析 4.结束语(Demo下载) Lucene.Net简介 Lucene.net是Lucene的.net移 ...
Lucene.Net+盘古分词器(详细介绍)(转)
出处:http://www.cnblogs.com/magicchaiy/archive/2013/06/07/LuceneNet%E7%9B%98%E5%8F%A4%E5%88%86%E8%AF%8 ...
Lucene.Net+盘古分词器(详细介绍)
本章阅读概要1.Lucenne.Net简介2.介绍盘古分词器3.Lucene.Net实例分析4.结束语(Demo下载)Lucene.Net简介 Lucene.net是Lucene的.net移植版本,是 ...
Lucene.Net+盘古分词
前言各位朋友,谢谢大家的支持,由于文件过大,有考虑到版权的问题,故没有提供下载,本人已建立一个搜索技术交流群:77570783,源代码已上传至群共享,需要的朋友,请自行下载! 首先自问自答几个问题, ...
完整的站内搜索Demo(Lucene.Net+盘古分词)
前言首先自问自答几个问题,以让各位看官了解写此文的目的什么是站内搜索?与一般搜索的区别? 很多网站都有搜索功能,很多都是用SQL语句的Like实现的,但是Like无法做到模糊匹配(例如我搜索“.n ...
使用Lucene.net+盘古分词实现搜索查询
这里我的的Demo的逻辑是这样的:首先我基本的数据是储存在Sql数据库中,然后我把我的必需的数据推送到MongoDB中,这样再去利用Lucene.net+盘古创建索引:其中为什么要这样把数据推送到Mo ...
完整的站内搜索实战应用(Lucene.Net+盘古分词)
首先自问自答几个问题,以让各位看官了解写此文的目的什么是站内搜索?与一般搜索的区别? 多网站都有搜索功能,很多都是用SQL语句的Like实现的,但是Like无法做到模糊匹配(例如我搜索". ...
Lucene.net 全文检索盘古分词
lucene.net + 盘古分词引用: 1.Lucene.Net.dll 2.PanGu.Lucene.Analyzer.dll 3.PanGu.HighLight.dll 4.PanGu.dll ...

随机推荐

谷歌console开发人员官方文档（注意是谷歌）
https://developers.google.com/web/tools/chrome-devtools/debug/console/console-ui?hl=en#opening-the-c ...
ConvertHelper类
/// <summary> /// 处理数据类型转换,数制转换.编码转换相关的类 /// </summary> public sealed class ConvertHelpe ...
POJ1185 炮兵阵地
题目描述 Description 司令部的将军们打算在N × M的网格地图上部署他们的炮兵部队.一个N × M的地图由N行M列组成,地图的每一格可能是山地(用"H"表示),也可能是 ...
css伪元素研究（::before/::after）
::before/::after和:before/:after实质上效果一样不过,在 CSS3 中为了区别伪元素和伪类为伪元素使用了双冒号,因此如果使用了 display 或者 width 等属性时 ...
CodeForces 37E Trial for Chief
Time Limit: 2000MS Memory Limit: 262144KB 64bit IO Format: %I64d & %I64u Description Having ...
Swift 函数做参数和闭包做参数的一个细节差别
函数作参数,示例为传入一个String和一个添加前缀的函数,返回一个添加完前缀的String: func demo(str:String,addPrefix:(String)->String)- ...
MyEclipse 8.5 注册码生成代码
import java.io.*; public class MyEclipseGen { private static final String LL = "Decompiling thi ...
js实现身份证号码验证
/*根据[中华人民共和国国家标准 GB 11643-1999]中有关公民身份号码的规定,公民身份号码是特征组合码,由十七位数字本体码和一位数字校验码组成.排列顺序从左至右依次为:六位数字地址码,八位数 ...
Docker入门教程（六）另外的15个Docker命令
Docker入门教程(六)另外的15个Docker命令 [编者的话]DockerOne组织翻译了Flux7的Docker入门教程,本文是系列入门教程的第六篇,继续介绍Docker命令.之前的第二篇文章 ...
在不同的pyhon版本中切换
issue discription 在一台电脑上同时安装了python2.7和python3.5,怎样在这两个版本中切换调用? solution to the issue 进入python安装文件夹, ...

Lucene.Net+盘古分词->开发自己的搜索引擎

Lucene.Net+盘古分词->开发自己的搜索引擎的更多相关文章

随机推荐

热门专题