接下来看看下面index部分的源码实现:

    data := struct {
Name string
Des string
}{
Name: "hello world this is bone",
Des: "this is a good time",
} // index some data
index.Index("id", data)

其中,

index.Index("id", data)

实现代码:

// Index adds the specified index operation to the
// batch. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) Index(id string, data interface{}) error {
if id == "" {
return ErrorEmptyID
}
doc := document.NewDocument(id)
err := b.index.Mapping().MapDocument(doc, data)
if err != nil {
return err
}
b.internal.Update(doc)
return nil
}

根据mapping来映射文档,

 b.index.Mapping().MapDocument(doc, data)

该代码的实现:

func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
docType := im.determineType(data)
docMapping := im.mappingForType(docType)
walkContext := im.newWalkContext(doc, docMapping)
if docMapping.Enabled {
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext) // see if the _all field was disabled
allMapping := docMapping.documentMappingForPath("_all")
if allMapping == nil || (allMapping.Enabled != false) {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)
doc.AddField(field)
}
} return nil
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overriden
structTagKey := dm.StructTagKey
if structTagKey == "" {
structTagKey = "json"
} val := reflect.ValueOf(data)
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Struct:
for i := ; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// anonymous fields of type struct can elide the type name
if field.Anonymous && field.Type.Kind() == reflect.Struct {
fieldName = ""
} // if the field has a name under the specified tag, prefer that
tag := field.Tag.Get(structTagKey)
tagFieldName := parseTagName(tag)
if tagFieldName == "-" {
continue
}
// allow tag to set field name to empty, only if anonymous
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
fieldName = tagFieldName
} if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
newpath := path
if fieldName != "" {
newpath = append(path, fieldName)
}
dm.processProperty(fieldVal, newpath, indexes, context)
}
}
case reflect.Slice, reflect.Array:
for i := ; i < val.Len(); i++ {
if val.Index(i).CanInterface() {
fieldVal := val.Index(i).Interface()
dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
}
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
dm.processProperty(ptrElem.Interface(), path, indexes, context)
}
case reflect.String:
dm.processProperty(val.String(), path, indexes, context)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(val.Int()), path, indexes, context)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(val.Uint()), path, indexes, context)
case reflect.Float32, reflect.Float64:
dm.processProperty(float64(val.Float()), path, indexes, context)
case reflect.Bool:
dm.processProperty(val.Bool(), path, indexes, context)
} }
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
pathString := encodePath(path)
// look to see if there is a mapping for this field
subDocMapping := dm.documentMappingForPath(pathString)
closestDocMapping := dm.closestDocMapping(pathString) // check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
return
} propertyValue := reflect.ValueOf(property)
if !propertyValue.IsValid() {
// cannot do anything with the zero value
return
}
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior // first see if it can be parsed by the default date parser
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic(context.im)
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
return
case reflect.Float64, reflect.Float32:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic(context.im)
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
propertyValBool := propertyValue.Bool()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic(context.im)
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, pathString, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
default:
dm.walkDocument(property, path, indexes, context)
}
}

分词的部分终于来了!

func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
if fm.Type == "text" {
analyzer := fm.analyzerForField(path, context)
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field) if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
} else if fm.Type == "datetime" {
dateTimeFormat := context.im.DefaultDateTimeParser
if fm.DateFormat != "" {
dateTimeFormat = fm.DateFormat
}
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
if dateTimeParser != nil {
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
if err == nil {
fm.processTime(parsedDateTime, pathString, path, indexes, context)
}
}
}
} func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "number" {
options := fm.Options()
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)
context.doc.AddField(field) if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}

bleve搜索引擎源码分析之索引——mapping真复杂啊的更多相关文章

  1. bleve搜索引擎源码分析之索引——mapping和lucene一样,也有_all

    例子: package main import ( "fmt" "github.com/blevesearch/bleve" ) func main() { / ...

  2. Spark源码分析 – 汇总索引

    http://jerryshao.me/categories.html#architecture-ref http://blog.csdn.net/pelick/article/details/172 ...

  3. wukong引擎源码分析之索引——part 1 倒排列表本质是有序数组存储

    searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"}) engine.go ...

  4. wukong引擎源码分析之索引——part 3 文档评分 无非就是将docid对应的fields信息存储起来,为搜索结果rank评分用

    之前的文章分析过,接受索引请求处理的代码在segmenter_worker.go里: func (engine *Engine) segmenterWorker() { for { request : ...

  5. lua源码分析 伪索引

    Lua 提供了一个 注册表, 这是一个预定义出来的表, 可以用来保存任何 C 代码想保存的 Lua 值. 这个表可以用有效伪索引 LUA_REGISTRYINDEX 来定位. 任何 C 库都可以在这张 ...

  6. wukong引擎源码分析之索引——part 2 持久化 直接set(key,docID数组)在kv存储里

    前面说过,接收indexerRequest的代码在index_worker.go里: func (engine *Engine) indexerAddDocumentWorker(shard int) ...

  7. 4 weekend110的textinputformat对切片规划的源码分析 + 倒排索引的mr实现 + 多个job在同一个main方法中提交

    好的,现在,来weekend110的textinputformat对切片规划的源码分析, Inputformat默认是textinputformat,一通百通. 这就是今天,weekend110的te ...

  8. 【异常及源码分析】org.mybatis.spring.MyBatisSystemException: nested exception is org.apache.ibatis.type.TypeException: Could not set parameters for mapping: ParameterMapping

    一.异常出现的场景 1)异常出现的SQL @Select("SELECT\n" + " id,discount_type ,min_charge, ${cardFee} ...

  9. Solr4.8.0源码分析(14)之SolrCloud索引深入(1)

    Solr4.8.0源码分析(14) 之 SolrCloud索引深入(1) 上一章节<Solr In Action 笔记(4) 之 SolrCloud分布式索引基础>简要学习了SolrClo ...

随机推荐

  1. 大数据学习——HDFS的shell

    -help 功能:输出这个命令参数手册 -ls 功能:显示目录信息 示例: hadoop fs -ls hdfs://hadoop-server01:9000/ 备注:这些参数中,所有的hdfs路径都 ...

  2. HDU-2509-Be the Winner,博弈题~~水过~~

    Be the Winner Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 32768/32768 K (Java/Others) ht ...

  3. JS Enter键跳转 控件获得焦点

    //回车跳转 jQuery(document).ready(function () { //$(':input:text:first').focus(); jQuery(':input:enabled ...

  4. [luoguP2886] [USACO07NOV]牛继电器Cow Relays(矩阵)

    传送门 矩阵快速幂,本质是floyd 把 * 改成 + 即可 注意初始化 因为只有100条边,所以可以离散化 #include <cstdio> #include <cstring& ...

  5. 【搜索】codeforces C. The Tag Game

    http://codeforces.com/contest/813/problem/C [题意] 给定一棵有n个结点的树,初始时Alice在根结点1,Bob在非根结点x; Alice和Bob轮流走,每 ...

  6. hdu 1879

    #include<stdio.h> #include<stdlib.h> #define N 100 struct node { int x,y,dis; }road[N*N] ...

  7. BZOJ1740: [Usaco2005 mar]Yogurt factory 奶酪工厂

    n<=10000天每天Ci块生产一东西,S块保存一天,每天要交Yi件东西,求最少花多少钱. 这个我都不知道归哪类了.. #include<stdio.h> #include<s ...

  8. python学习之 - re模块

    re模块功能:实现字符串匹配. 元字符 描述\ 将下一个字符标记符.或一个向后引用.或一个八进制转义符.例如,“\\n”匹配\n.“\n”匹配换行符.序列“\\”匹配“\”而“\(”则匹配“(”.即相 ...

  9. kis

    http://5.xp510.com:801/xp2011/%E9%87%91%E8%9D%B6kis%E4%B8%93%E4%B8%9A%E7%89%88.rar

  10. GreenDao数据库的升级

    应用使用了GreenDao数据库,在版本升级的时候需要更改dao的字段,新增.修改.删除字段操作,如果直接删除原来的表的话那用户原来的一些数据就没有了,所以在更新数据库的时候需要做一次封装,把原来的数 ...