

from datetime import datetime
from elasticsearch_dsl import Document, Date, Nested, Boolean, \
analyzer, InnerDoc, Completion, Keyword, Text,Integer from elasticsearch_dsl.analysis import CustomAnalyzer as _CustomAnalyzer from elasticsearch_dsl.connections import connections
connections.create_connection(hosts=["localhost"]) # class CustomAnalyzer(_CustomAnalyzer):
# def get_analysis_definition(self):
# return {} # ik_analyzer = CustomAnalyzer("ik_max_word", filter=["lowercase"])
class ArticleType(Document):
# suggest = Completion(analyzer=ik_analyzer)
title = Text(analyzer="ik_max_word")
create_date = Date()
url = Keyword()
url_object_id = Keyword()
front_image_url = Keyword()
front_image_path = Keyword()
praise_nums = Integer()
comment_nums = Integer()
fav_nums = Integer()
tags = Text(analyzer="ik_max_word")
content = Text(analyzer="ik_max_word") class Meta:
index = "jobbole"
doc_type = "article" if __name__ == "__main__":





from ArticleSpider.models.es_types import ArticleType
from elasticsearch_dsl.connections import connections
# 与ElasticSearch进行连接,生成搜索建议
es = connections.create_connection(ArticleType)


def gen_suggests(index,info_tuple):
used_words = set()
suggests = []
for text, weight in info_tuple:
if text:
words = es.indices.analyze(index="jobbole",
body={"analyzer": "ik_max_word", "text": "{0}".format(text)})
anylyzed_words = set([r["token"] for r in words["tokens"] if len(r["token"])>1])
new_words = anylyzed_words - used_words
new_words = set() if new_words:
suggests.append({"input":list(new_words), "weight":weight}) return suggests


 article.suggest = gen_suggests(ArticleType, ((article.title, 10), (article.tags, 7)))



