Elasticsearch由浅入深（九）搜索引擎：query DSL、filter与query、query搜索实战

search api的基本语法

语法概要：

GET /_search

{}

GET /index1,index2/type1,type2/_search

{}

GET /_search

{

  "from": ,

  "size":

}

http协议中get是否可以带上request body？

HTTP协议，一般不允许get请求带上request body，但是因为get更加适合描述查询数据的操作，因此还是这么用了。

很多浏览器，或者是服务器，也都支持GET+request body模式

如果遇到不支持的场景，也可以用POST /_search

GET /_search?from=&size=

POST /_search

{

  "from":,

  "size":

}

query DSL

一个例子让你明白什么是query DSL

GET /_search

{

    "query": {

        "match_all": {}

    }

}

Query DSL的基本语法

GET /{index}/_search/{type}

{

    "各种条件"

}

示例：

GET /test_index/test_type/_search

{

  "query": {

    "match": {

      "test_field": "test"

    }

  }

}

{

  "took": ,

  "timed_out": false,

  "_shards": {

    "total": ,

    "successful": ,

    "failed":

  },

  "hits": {

    "total": ,

    "max_score": 0.843298,

    "hits": [

      {

        "_index": "test_index",

        "_type": "test_type",

        "_id": "",

        "_score": 0.843298,

        "_source": {

          "test_field": "test test"

        }

      },

      {

        "_index": "test_index",

        "_type": "test_type",

        "_id": "",

        "_score": 0.43445712,

        "_source": {

          "test_field": "test client 2"

        }

      },

      {

        "_index": "test_index",

        "_type": "test_type",

        "_id": "",

        "_score": 0.25316024,

        "_source": {

          "test_field": "test client 1"

        }

      }

    ]

  }

}

组合多个搜索条件

搜索需求：title必须包含elasticsearch，content可以包含elasticsearch也可以不包含，author_id必须不为111

构造数据：

PUT /website/article/

{

  "title":"my elasticsearch article",

  "content":"es is very bad",

  "author_id":

}

PUT /website/article/

{

  "title":"my hadoop article",

  "content":"hadoop is very bad",

  "author_id":

}

PUT /website/article/

{

  "title":"my hadoop article",

  "content":"hadoop is very good",

  "author_id":

}

组合查询：

GET /website/article/_search

{

  "query": {

    "bool": {

      "must": [

        {

          "match": {

            "title": "elasticsearch"

          }

        }

      ],

      "should": [

        {

          "match": {

            "content": "elasticsearch"

          }

        }

      ],

      "must_not": [

        {

          "match": {

            "author_id":

          }

        }

      ]

    }

  }

}

查询结果：

{

  "took": ,

  "timed_out": false,

  "_shards": {

    "total": ,

    "successful": ,

    "failed":

  },

  "hits": {

    "total": ,

    "max_score": 0.25316024,

    "hits": [

      {

        "_index": "website",

        "_type": "article",

        "_id": "",

        "_score": 0.25316024,

        "_source": {

          "title": "my elasticsearch article",

          "content": "es is very bad",

          "author_id":

        }

      }

    ]

  }

}

filter与query

初始化数据：

PUT /company/employee/

{

  "address": {

    "country": "china",

    "province": "jiangsu",

    "city": "nanjing"

  },

  "name": "tom",

  "age": ,

  "join_date": "2016-01-01"

}

PUT /company/employee/

{

  "address": {

    "country": "china",

    "province": "shanxi",

    "city": "xian"

  },

  "name": "marry",

  "age": ,

  "join_date": "2015-01-01"

}

搜索请求：年龄必须大于等于30，同时join_date必须是2016-01-01

GET /company/employee/_search

{

  "query": {

    "bool": {

      "must": [

        {

          "match": {

            "join_date": "2016-01-01"

          }

        }

      ],

      "filter": {

        "range": {

          "age": {

            "gte":

          }

        }

      }

    }

  }

}

filter与query对比大揭秘

filter，仅仅只是按照搜索条件过滤出需要的数据而已，不计算任何相关度分数，对相关度没有任何影响
query，会去计算每个document相对于搜索条件的相关度，并按照相关度进行排序

一般来说，如果你是在进行搜索，需要将最匹配搜索条件的数据先返回，那么用query；如果你只是要根据一些条件筛选出一部分数据，不关注其排序，那么用filter

除非是你的这些搜索条件，你希望越符合这些搜索条件的document越排在前面返回，那么这些搜索条件要放在query中；如果你不希望一些搜索条件来影响你的document排序，那么就放在filter中即可

filter与query性能

filter，不需要计算相关度分数，不需要按照相关度分数进行排序，同时还有内置的自动cache最常使用filter的数据
query，相反，要计算相关度分数，按照分数进行排序，而且无法cache结果

Elasticsearch 实战各种query搜索

各种query搜索语法

match_all

GET /_search

{

    "query": {

        "match_all": {}

    }

}

match

GET /{index}/_search

{

  "query": {

    "match": {

      "FIELD": "TEXT"

    }

  }

}

multi match

GET /{index}/_search

{

  "query": {

    "multi_match": {

      "query": "",

      "fields": []

    }

  }

}

示例

GET /test_index/test_type/_search

{

  "query": {

    "multi_match": {

      "query": "test",

      "fields": ["test_field", "test_field1"]

    }

  }

}

range query

GET /{index}/_search

{

  "query": {

    "range": {

      "FIELD": {

        "gte": ,

        "lte":

      }

    }

  }

}

示例

GET /company/employee/_search

{

  "query": {

    "range": {

      "age": {

        "gte":

      }

    }

  }

}

term query(与match相比不分词)

GET /{index}/_search

{

  "query": {

    "term": {

      "FIELD": {

        "value": "VALUE"

      }

    }

  }

}

示例

GET /test_index/test_type/_search

{

  "query": {

    "term": {

      "test_field": "test hello"

    }

  }

}

terms query

GET /{index}/_search

{

  "query": {

    "terms": {

      "FIELD": [

        "VALUE1",

        "VALUE2"

      ]

    }

  }

}

示例

GET /_search

{

    "query": { "terms": { "tag": [ "search", "full_text", "nosql" ] }}

}

exist query

GET /{index}/_search

{

  "query": {

    "exists": {

       "field": ""

    }

  }

}

多搜索条件组合查询

bool: must, must_not, should, filter

每个子查询都会计算一个document针对它的相关度分数，然后bool综合所有分数，合并为一个分数，当然filter是不会计算分数的。

GET /company/employee/_search

{

  "query": {

    "constant_score": {

      "filter": {

        "range": {

          "age": {

            "gte":

          }

        }

      }

    }

  }

}

定位不合法的搜索

一般用在那种特别复杂庞大的搜索下，比如你一下子写了上百行的搜索，这个时候可以先用validate api去验证一下，搜索是否合法

GET /test_index/test_type/_validate/query?explain

{

  "query": {

    "math": {

      "test_field": "test"

    }

  }

}

{

  "valid": false,

  "error": "org.elasticsearch.common.ParsingException: no [query] registered for [math]"

}

正常数据

GET /test_index/test_type/_validate/query?explain

{

  "query":{

    "match":{

      "test_field":"test"

    }

  }

}

{

  "valid": true,

  "_shards": {

    "total": ,

    "successful": ,

    "failed":

  },

  "explanations": [

    {

      "index": "test_index",

      "valid": true,

      "explanation": "+test_field:test #(#_type:test_type)"

    }

  ]

}

定制搜索结果的排序规则

默认情况下，返回的document是按照_score降序排列的。如果我们想自己定义排序规则怎么办，此时只需要使用sort即可

语法：

# 主要语法

"sort": [

    {

      "FIELD": {

        "order": "desc"

      }

    }

  ]

# 整体位置

GET /{index}/_search

{

  "query": {

    "constant_score": {

      "filter": {

        "exists": {

          "field": ""

        }

      },

      "boost": 1.2

    }

  },

  "sort": [

    {

      "FIELD": {

        "order": "desc"

      }

    }

  ]

}

示例：

GET company/employee/_search

{

  "query": {

    "constant_score": {

      "filter": {

        "range": {

          "age": {

            "gte":

          }

        }

      }

    }

  },

  "sort": [

    {

      "join_date": {

        "order": "asc"

      }

    }

  ]

}

将一个field索引两次来解决字符串排序问题

如果某个字段的类型是text，在创建索引的时候，针对每个document，对应的这个text字段都会对内容进行分词。由于ES不允许对已经存在的field的类型进行修改，就会导致该字段一直都是会被分词，那么如果之后有需求想对该字段排序，就不行了。具体看下面展示的示例。

# 删除原来的删除索引

DELETE /website

# 手动建立索引

PUT /website

{

  "mappings": {

    "article": {

      "properties": {

        "title":{

          "type": "text",

          "fields": {

            "raw":{

              "type": "string",

              "index": "not_analyzed"

            }

          },

          "fielddata": true

        },

        "content":{

          "type": "text"

        },

        "post_date":{

          "type": "date"

        },

        "author_id":{

          "type": "long"

        }

      }

    }

  }

}

插入模拟数据

PUT /website/article/

{

  "title": "second article",

  "content": "this is my second article",

  "post_date": "2017-01-01",

  "author_id":

}

PUT /website/article/

{

  "title": "first article",

  "content": "this is my first article",

  "post_date": "2017-02-01",

  "author_id":

}

PUT /website/article/

{

  "title": "third article",

  "content": "this is my third article",

  "post_date": "2017-03-01",

  "author_id":

}

按照不分词排序

GET /website/article/_search

{

  "query": {

    "match_all": {}

  },

  "sort": [

    {

      "title.raw": {

        "order": "desc"

      }

    }

  ]

}