bleve搜索引擎源码分析之索引—

接下来看看下面index部分的源码实现：

    data := struct {

        Name string

        Des  string

    }{

        Name: "hello world this is bone",

        Des:  "this is a good time",

    }

    // index some data

    index.Index("id", data)

其中，

index.Index("id", data)

实现代码：

// Index adds the specified index operation to the

// batch.  NOTE: the bleve Index is not updated

// until the batch is executed.

func (b *Batch) Index(id string, data interface{}) error {

    if id == "" {

        return ErrorEmptyID

    }

    doc := document.NewDocument(id)

    err := b.index.Mapping().MapDocument(doc, data)

    if err != nil {

        return err

    }

    b.internal.Update(doc)

    return nil

}

根据mapping来映射文档，

 b.index.Mapping().MapDocument(doc, data)

该代码的实现：

func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {

    docType := im.determineType(data)

    docMapping := im.mappingForType(docType)

    walkContext := im.newWalkContext(doc, docMapping)

    if docMapping.Enabled {

        docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)

        // see if the _all field was disabled

        allMapping := docMapping.documentMappingForPath("_all")

        if allMapping == nil || (allMapping.Enabled != false) {

            field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, document.IndexField|document.IncludeTermVectors)

            doc.AddField(field)

        }

    }

    return nil

}

func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {

    // allow default "json" tag to be overriden

    structTagKey := dm.StructTagKey

    if structTagKey == "" {

        structTagKey = "json"

    }

    val := reflect.ValueOf(data)

    typ := val.Type()

    switch typ.Kind() {

    case reflect.Map:

        // FIXME can add support for other map keys in the future

        if typ.Key().Kind() == reflect.String {

            for _, key := range val.MapKeys() {

                fieldName := key.String()

                fieldVal := val.MapIndex(key).Interface()

                dm.processProperty(fieldVal, append(path, fieldName), indexes, context)

            }

        }

    case reflect.Struct:

        for i := ; i < val.NumField(); i++ {

            field := typ.Field(i)

            fieldName := field.Name

            // anonymous fields of type struct can elide the type name

            if field.Anonymous && field.Type.Kind() == reflect.Struct {

                fieldName = ""

            }

            // if the field has a name under the specified tag, prefer that

            tag := field.Tag.Get(structTagKey)

            tagFieldName := parseTagName(tag)

            if tagFieldName == "-" {

                continue

            }

            // allow tag to set field name to empty, only if anonymous

            if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {

                fieldName = tagFieldName

            }

            if val.Field(i).CanInterface() {

                fieldVal := val.Field(i).Interface()

                newpath := path

                if fieldName != "" {

                    newpath = append(path, fieldName)

                }

                dm.processProperty(fieldVal, newpath, indexes, context)

            }

        }

    case reflect.Slice, reflect.Array:

        for i := ; i < val.Len(); i++ {

            if val.Index(i).CanInterface() {

                fieldVal := val.Index(i).Interface()

                dm.processProperty(fieldVal, path, append(indexes, uint64(i)), context)

            }

        }

    case reflect.Ptr:

        ptrElem := val.Elem()

        if ptrElem.IsValid() && ptrElem.CanInterface() {

            dm.processProperty(ptrElem.Interface(), path, indexes, context)

        }

    case reflect.String:

        dm.processProperty(val.String(), path, indexes, context)

    case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:

        dm.processProperty(float64(val.Int()), path, indexes, context)

    case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:

        dm.processProperty(float64(val.Uint()), path, indexes, context)

    case reflect.Float32, reflect.Float64:

        dm.processProperty(float64(val.Float()), path, indexes, context)

    case reflect.Bool:

        dm.processProperty(val.Bool(), path, indexes, context)

    }

}

func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {

    pathString := encodePath(path)

    // look to see if there is a mapping for this field

    subDocMapping := dm.documentMappingForPath(pathString)

    closestDocMapping := dm.closestDocMapping(pathString)

    // check to see if we even need to do further processing

    if subDocMapping != nil && !subDocMapping.Enabled {

        return

    }

    propertyValue := reflect.ValueOf(property)

    if !propertyValue.IsValid() {

        // cannot do anything with the zero value

        return

    }

    propertyType := propertyValue.Type()

    switch propertyType.Kind() {

    case reflect.String:

        propertyValueString := propertyValue.String()

        if subDocMapping != nil {

            // index by explicit mapping

            for _, fieldMapping := range subDocMapping.Fields {

                fieldMapping.processString(propertyValueString, pathString, path, indexes, context)

            }

        } else if closestDocMapping.Dynamic {

            // automatic indexing behavior

            // first see if it can be parsed by the default date parser

            dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)

            if dateTimeParser != nil {

                parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)

                if err != nil {

                    // index as text

                    fieldMapping := newTextFieldMappingDynamic(context.im)

                    fieldMapping.processString(propertyValueString, pathString, path, indexes, context)

                } else {

                    // index as datetime

                    fieldMapping := newDateTimeFieldMappingDynamic(context.im)

                    fieldMapping.processTime(parsedDateTime, pathString, path, indexes, context)

                }

            }

    case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:

        dm.processProperty(float64(propertyValue.Int()), path, indexes, context)

        return

    case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:

        dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)

        return

    case reflect.Float64, reflect.Float32:

        propertyValFloat := propertyValue.Float()

        if subDocMapping != nil {

            // index by explicit mapping

            for _, fieldMapping := range subDocMapping.Fields {

                fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)

            }

        } else if closestDocMapping.Dynamic {

            // automatic indexing behavior

            fieldMapping := newNumericFieldMappingDynamic(context.im)

            fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)

        }

    case reflect.Bool:

        propertyValBool := propertyValue.Bool()

        if subDocMapping != nil {

            // index by explicit mapping

            for _, fieldMapping := range subDocMapping.Fields {

                fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)

            }

        } else if closestDocMapping.Dynamic {

            // automatic indexing behavior

            fieldMapping := newBooleanFieldMappingDynamic(context.im)

            fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)

        }

    case reflect.Struct:

        switch property := property.(type) {

        case time.Time:

            // don't descend into the time struct

            if subDocMapping != nil {

                // index by explicit mapping

                for _, fieldMapping := range subDocMapping.Fields {

                    fieldMapping.processTime(property, pathString, path, indexes, context)

                }

            } else if closestDocMapping.Dynamic {

                fieldMapping := newDateTimeFieldMappingDynamic(context.im)

                fieldMapping.processTime(property, pathString, path, indexes, context)

            }

        default:

            dm.walkDocument(property, path, indexes, context)

        }

    default:

        dm.walkDocument(property, path, indexes, context)

    }

}

分词的部分终于来了！

func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {

    fieldName := getFieldName(pathString, path, fm)

    options := fm.Options()

    if fm.Type == "text" {

        analyzer := fm.analyzerForField(path, context)

        field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)

        context.doc.AddField(field)     

        if !fm.IncludeInAll {

            context.excludedFromAll = append(context.excludedFromAll, fieldName)

        }

    } else if fm.Type == "datetime" {

        dateTimeFormat := context.im.DefaultDateTimeParser

        if fm.DateFormat != "" {

            dateTimeFormat = fm.DateFormat

        }

        dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)

        if dateTimeParser != nil {

            parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)

            if err == nil {

                fm.processTime(parsedDateTime, pathString, path, indexes, context)

            }

        }

    }

}

func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {

    fieldName := getFieldName(pathString, path, fm)

    if fm.Type == "number" {

        options := fm.Options()

        field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)

        context.doc.AddField(field)

        if !fm.IncludeInAll {

            context.excludedFromAll = append(context.excludedFromAll, fieldName)

        }

    }

}

bleve搜索引擎源码分析之索引——mapping真复杂啊的更多相关文章

bleve搜索引擎源码分析之索引——mapping和lucene一样，也有_all
例子: package main import ( "fmt" "github.com/blevesearch/bleve" ) func main() { / ...
Spark源码分析 – 汇总索引
http://jerryshao.me/categories.html#architecture-ref http://blog.csdn.net/pelick/article/details/172 ...
wukong引擎源码分析之索引——part 1 倒排列表本质是有序数组存储
searcher.IndexDocument(0, types.DocumentIndexData{Content: "此次百度收购将成中国互联网最大并购"}) engine.go ...
wukong引擎源码分析之索引——part 3 文档评分无非就是将docid对应的fields信息存储起来，为搜索结果rank评分用
之前的文章分析过,接受索引请求处理的代码在segmenter_worker.go里: func (engine *Engine) segmenterWorker() { for { request : ...
lua源码分析伪索引
Lua 提供了一个注册表, 这是一个预定义出来的表, 可以用来保存任何 C 代码想保存的 Lua 值. 这个表可以用有效伪索引 LUA_REGISTRYINDEX 来定位. 任何 C 库都可以在这张 ...
wukong引擎源码分析之索引——part 2 持久化直接set（key，docID数组）在kv存储里
前面说过,接收indexerRequest的代码在index_worker.go里: func (engine *Engine) indexerAddDocumentWorker(shard int) ...
4 weekend110的textinputformat对切片规划的源码分析 + 倒排索引的mr实现 + 多个job在同一个main方法中提交
好的,现在,来weekend110的textinputformat对切片规划的源码分析, Inputformat默认是textinputformat,一通百通. 这就是今天,weekend110的te ...
【异常及源码分析】org.mybatis.spring.MyBatisSystemException: nested exception is org.apache.ibatis.type.TypeException: Could not set parameters for mapping: ParameterMapping
一.异常出现的场景 1)异常出现的SQL @Select("SELECT\n" + " id,discount_type ,min_charge, ${cardFee} ...
Solr4.8.0源码分析(14)之SolrCloud索引深入(1)
Solr4.8.0源码分析(14) 之 SolrCloud索引深入(1) 上一章节<Solr In Action 笔记(4) 之 SolrCloud分布式索引基础>简要学习了SolrClo ...

随机推荐

xtu summer individual 1 D - Round Numbers
D - Round Numbers Time Limit:2000MS Memory Limit:65536KB 64bit IO Format:%I64d & %I64u D ...
Relocation（状压DP）
Description Emma and Eric are moving to their new house they bought after returning from their honey ...
UVA674-Coin Change,用动归思想来递推！
674 - Coin Change 题意:有1分,5分,10分,25分,50分共5种硬币,数量不限.给你一个n求有多少种方法凑齐n,注意:d[0]=1; 思路:推了前几组样例,可以发现直接用当前状态累 ...
C. The Smallest String Concatenation-C++sort排序~~
C. The Smallest String Concatenation time limit per test 3 seconds memory limit per test 256 megabyt ...
NOIP2009T3最优贸易（Dfs + spfa）
洛谷传送门看到这个题,原本想先从后往前dfs,求出能到终点的点,再在这些点里从前往后spfa,用一条边上的两个城市的商品价格的差来作边权,实施过后,发现图中既有负边权,又有回路,以及各种奇奇怪怪的东 ...
BZOJ——2697: 特技飞行
http://www.lydsy.com/JudgeOnline/problem.php?id=2697 Time Limit: 10 Sec Memory Limit: 256 MBSubmit: ...
Codeforces 954 D Fight Against Traffic
Discription Little town Nsk consists of n junctions connected by m bidirectional roads. Each road co ...
CORS:Source.priciple implimentation in Spring
Cors(Cross-origin Resource Sharing)一种跨域访问技术,基本思想是使用自定义的HTTP头部允许浏览器和服务器相互了解对方,从而决定响应成功与否. CORS与JSONP对 ...
手机没Root？你照样可以渗透路由器
和Metasploit差不多,RouterSploit是一个强大的漏洞利用框架,用于快速识别和利用路由器中的普通漏洞,它还有个亮点,就是可以在绝大多数安卓设备上运行. 如果你想在电脑上运行,可以阅读这 ...
【转】nginx 和 php-fpm 通信使用unix socket还是TCP，及其配置
原文: http://blog.csdn.net/pcyph/article/details/46513521 -------------------------------------------- ...

bleve搜索引擎源码分析之索引——mapping真复杂啊

bleve搜索引擎源码分析之索引——mapping真复杂啊的更多相关文章

随机推荐

热门专题