测试代码1:

  1. def test(self):
  2. data = {"add": {"doc": {"id": "", "*字段名*": u"我是一个大好人"}}}
  3. params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  4. url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  5. headers = {"Content-Type": "application/json"}
  6. r = requests.post(url, json=data, params=params, headers=headers)
  7. print r.text
  8.  
  9. def Index_data(self):
  10. solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)
  11.  
  12. # How you'd index data.
  13. result = solr.add([
  14. {
  15. "id": "doc_1",
  16. "title": "A test document",
  17. },
  18. {
  19. "id": "doc_2",
  20. "title": "The Banana: Tasty or Dangerous?",
  21. },
  22. ])
  23. print result

测试代码2

实际数据:

  1. def Index_Data_FromCSV(self, csvfile):
  2. '''
  3. 从CSV文件中读取数据,并索引到solr中
  4. :param csvfile: csv文件,包括完整路径
  5. :return:
  6. '''
  7. list = CSVOP.ReadCSV(csvfile)
  8. index = 0
  9. doc = {}
  10. params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  11. url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  12. headers = {"Content-Type": "application/json"}
  13. for item in list:
  14. if index > 0: # 第一行是标题
  15. try:
  16. doc['title'] = item[0].decode('GB2312')
  17. doc['link'] = item[1]
  18. # doc['date'] = item[2]
  19. doc['source'] = item[3].decode('GB2312')
  20. doc['keyword'] = item[4].decode('GB2312')
  21. data = {"add": {"doc": doc}}
  22. r = requests.post(url, json=data, params=params, headers=headers)
  23. print r.text
  24. except Exception,e:
  25. print e.message
  26.  
  27. print index
  28. index += 1
  29.  
  30. #pysolr客户端代码
  31. def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  32. '''
  33. 从CSV文件中读取数据,并索引到solr中
  34. :param csvfile: csv文件,包括完整路径
  35. :return:
  36. '''
  37. list = CSVOP.ReadCSV(csvfile)
  38. index = 0
  39. listdocs = []
  40. for item in list:
  41. if index > 0: # 第一行是标题
  42. doc = {}
  43. try:
  44. doc['title'] = item[0].decode('GB2312')
  45. doc['link'] = item[1]
  46. # doc['date'] = item[2]
  47. doc['source'] = item[3].decode('GB2312')
  48. doc['keyword'] = item[4].decode('GB2312')
  49. listdocs.append(doc)
  50. except Exception,e:
  51. print e.message
  52. index += 1
  53. solr = pysolr.Solr(url, timeout=10)
  54. result = solr.add(listdocs)
  55. print result

查询代码:

  1. def search_data(self,message='视频'):
  2. url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message
  3. r = requests.get(url, verify=False)
  4. print r.text
  5. r = r.json()['response']['numFound']
  6. print message + ":" + str(r)
  7.  
  8. #pysolr客户端
  9. def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  10. solr = pysolr.Solr(url, timeout=10)
  11. dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  12. result = solr.search('title:视频',**dict)
  13. # result = solr.search('title:视频')
  14. # print result.raw_response['response']['numFound']
  15.  
  16. for item in result:
  17. print 'keyword: %s'% item['keyword']
  18. print 'title: %s'% item['title']
  19. print 'source: %s'% item['source']
  20. print 'link: %s'% item['link']
  21. print ' '

输出结果:

  1. {
  2. "responseHeader":{
  3. "status":0,
  4. "QTime":0,
  5. "params":{
  6. "q":"title:\"\\视频\"",
  7. "indent":"true",
  8. "wt":"json"}},
  9. "response":{"numFound":123,"start":0,"docs":[
  10. {
  11. "source":"中彩网",
  12. "link":"http://www.zhcw.com/video/kaijiangshipin-3D/11981126.shtml",
  13. "keyword":"视频",
  14. "title":"福彩3D开奖 视频 -中彩 视频",
  15. "id":"2f0a9d21-3771-4efa-a0cc-e0484cc97993",
  16. "_version_":1584214368617234432},
  17. {
  18. "source":"新浪视频",
  19. "link":"http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1",
  20. "keyword":"视频",
  21. "title":"今日热门 视频 汇总20170707",
  22. "id":"c8aae0af-01e9-491f-b999-24b97004a4ba",
  23. "_version_":1584214367507841024},
  24. {
  25. "source":"网易新闻",
  26. "link":"http://news.163.com/17/0707/13/COOCNUIE00018AOR.html",
  27. "keyword":"视频",
  28. "title":"网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责",
  29. "id":"353de48d-ede7-481b-89d3-bc20ab4b3884",
  30. "_version_":1584214367821365248},
  31. {
  32. "source":"凤凰视频",
  33. "link":"http://v.ifeng.com/video_7480871.shtml",
  34. "keyword":"视频",
  35. "title":"创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ...",
  36. "id":"dc5f19c4-180f-4004-a0db-4499d875a60f",
  37. "_version_":1584214366819975168},
  38. {
  39. "source":"凤凰视频",
  40. "link":"http://v.ifeng.com/video_7805858.shtml",
  41. "keyword":"视频",
  42. "title":"节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门...",
  43. "id":"5e9eb7a7-48b8-4e41-9514-7712ae619d9a",
  44. "_version_":1584214367516229632},
  45. {
  46. "source":"凤凰视频",
  47. "link":"http://v.ifeng.com/video_7483506.shtml",
  48. "keyword":"视频",
  49. "title":"听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-...",
  50. "id":"6b1482f1-c0c9-479f-bef7-7de324fb9372",
  51. "_version_":1584214367647301632},
  52. {
  53. "source":"汽车杂志",
  54. "link":"http://www.jiemian.com/article/1445267.html",
  55. "keyword":"视频",
  56. "title":"【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频",
  57. "id":"1d327555-a6f3-4513-9a21-43d59418ab82",
  58. "_version_":1584214368157958144},
  59. {
  60. "source":"味觉大师",
  61. "link":"http://www.jiemian.com/article/1453545.html",
  62. "keyword":"视频",
  63. "title":"【视频】大董没有肉的肉味烧茄子",
  64. "id":"7d777870-93cb-4c18-a32b-734af8f133f1",
  65. "_version_":1584213891451191296},
  66. {
  67. "source":"新浪汽车",
  68. "link":"http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml",
  69. "keyword":"视频",
  70. "title":"视频 :两大神车pk!高尔夫思域怎么选?",
  71. "id":"3a50b303-6b54-4da3-aee1-a61c678c752d",
  72. "_version_":1584213892090822656},
  73. {
  74. "source":"味觉大师",
  75. "link":"http://www.jiemian.com/article/1453545.html",
  76. "keyword":"视频",
  77. "title":"【视频】大董没有肉的肉味烧茄子",
  78. "id":"01da8e11-77bc-4c31-ba3a-ba668e846d9d",
  79. "_version_":1584214366191878144}]
  80. }}

完整代码:

  1. #-*- coding: UTF-8 -*-
  2. import csv
  3. import os
  4. import codecs
  5.  
  6. def ReadCSV(filename):
  7. if os.path.exists(filename):
  8. with open(filename, 'r') as f:
  9. reader = csv.reader(f)
  10. list = []
  11. for item in reader:
  12. list.append(item)
  13. return list
  14.  
  15. #################################################
  16. #coding=utf-8
  17. import json
  18. import requests
  19.  
  20. import os
  21. import time
  22. from os import walk
  23. import CSVOP
  24. from datetime import datetime
  25. import pysolr
  26. import math
  27.  
  28. class SolrClientObj:
  29.  
  30. def test(self):
  31. data = {"add": {"doc": {"id": "", "*字段名*": u"我是一个大好人"}}}
  32. params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  33. url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  34. headers = {"Content-Type": "application/json"}
  35. r = requests.post(url, json=data, params=params, headers=headers)
  36. print r.text
  37.  
  38. def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  39. '''
  40. 从CSV文件中读取数据,并索引到solr中
  41. :param csvfile: csv文件,包括完整路径
  42. :return:
  43. '''
  44. list = CSVOP.ReadCSV(csvfile)
  45. index = 0
  46. listdocs = []
  47. for item in list:
  48. if index > 0: # 第一行是标题
  49. doc = {}
  50. try:
  51. doc['title'] = item[0].decode('GB2312')
  52. doc['link'] = item[1]
  53. # doc['date'] = item[2]
  54. doc['source'] = item[3].decode('GB2312')
  55. doc['keyword'] = item[4].decode('GB2312')
  56. listdocs.append(doc)
  57. except Exception,e:
  58. print e.message
  59. index += 1
  60. solr = pysolr.Solr(url, timeout=10)
  61. result = solr.add(listdocs)
  62. print result
  63.  
  64. def Index_Data_FromCSV(self, csvfile):
  65. '''
  66. 从CSV文件中读取数据,并索引到solr中
  67. :param csvfile: csv文件,包括完整路径
  68. :return:
  69. '''
  70. list = CSVOP.ReadCSV(csvfile)
  71. index = 0
  72. doc = {}
  73. params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  74. url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  75. headers = {"Content-Type": "application/json"}
  76. for item in list:
  77. if index > 0: # 第一行是标题
  78. try:
  79. doc['title'] = item[0].decode('GB2312')
  80. doc['link'] = item[1]
  81. # doc['date'] = item[2]
  82. doc['source'] = item[3].decode('GB2312')
  83. doc['keyword'] = item[4].decode('GB2312')
  84. data = {"add": {"doc": doc}}
  85. r = requests.post(url, json=data, params=params, headers=headers)
  86. print r.text
  87. except Exception,e:
  88. print e.message
  89.  
  90. print index
  91. index += 1
  92.  
  93. def Index_data(self):
  94. solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)
  95.  
  96. # How you'd index data.
  97. result = solr.add([
  98. {
  99. "id": "doc_1",
  100. "title": "A test document",
  101. },
  102. {
  103. "id": "doc_2",
  104. "title": "The Banana: Tasty or Dangerous?",
  105. },
  106. ])
  107. print result
  108.  
  109. def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  110. solr = pysolr.Solr(url, timeout=10)
  111. dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  112. result = solr.search('title:视频',**dict)
  113. # result = solr.search('title:视频')
  114. # print result.raw_response['response']['numFound']
  115.  
  116. for item in result:
  117. print 'keyword: %s'% item['keyword']
  118. print 'title: %s'% item['title']
  119. print 'source: %s'% item['source']
  120. print 'link: %s'% item['link']
  121. print ' '
  122.  
  123. def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'):
  124. '''
  125. 删除索引
  126. :param where: 删除的条件
  127. :param url: url
  128. :return:
  129. '''
  130. solr = pysolr.Solr(url, timeout=10)
  131. # solr.delete(id=where) #id='id1':删除id为“id1”的索引
  132. result = solr.delete(q=where) #q='*:*'删除所有索引
  133. print result
  134.  
  135. obj = SolrClientObj()
  136. # obj.delete_index_data('*:*') #删除所有索引
  137. # obj.Index_data()
  138. # obj.search_data()
  139. # obj.delete_index_data('doc_1')
  140. obj.search_data('视频')
  141. # csvfile = 'D:/work/Solr/other/exportExcels/2017-07-07_info.csv'
  142. # obj.pysolr_Index_Data_FromCSV(csvfile)

python 操作solr索引数据的更多相关文章

  1. 使用solrj操作solr索引库

    (solrj)初次使用solr的开发人员总是很郁闷,不知道如何去操作solr索引库,以为只能用<五分钟solr4.5教程(搭建.运行)>中讲到的用xml文件的形式提交数据到索引库,其实没有 ...

  2. 使用solrj操作solr索引库,solr是lucene服务器

    客户端开发 Solrj 客户端开发 Solrj Solr是搭建好的lucene服务器 当然不可能完全满足一般的业务需求 可能 要针对各种的架构和业务调整 这里就需要用到Solrj了 Solrj是Sol ...

  3. [solr] - 索引数据删除

    删除solr索引数据,使用XML有两种写法: 1) <delete><id>1</id></delete> <commit/> 2) < ...

  4. Solr(六)Solr索引数据存放到HDFS下

    Solr索引数据存放到HDFS下 一 新建solr core hdfs 方法:http://www.cnblogs.com/Matchman/p/7287385.html 二 修改solrconfig ...

  5. [转][solr] - 索引数据删除

    删除solr索引数据,使用XML有两种写法: 1) <delete><id>1</id></delete> <commit/> 2) < ...

  6. Solr索引数据

    一般来说,索引是系统地排列文档或(其他实体).索引使用户能够在文档中快速地查找信息. 索引集合,解析和存储文档. 索引是为了在查找所需文档时提高搜索查询的速度和性能. 在Apache Solr中的索引 ...

  7. (二) solr 索引数据导入:xml格式

    xml 是最常用的数据索引格式,不仅可以索引数据,还可以对文档与字段进行增强,从而改变它们的重要程度. 下面就是具体的实现方式: schema.xml的字段配置部分如下: <field name ...

  8. Java solr 索引数据增删改查

    具体代码如下: import java.io.IOException; import java.util.*; import org.apache.solr.client.solrj.SolrClie ...

  9. 企业级搜索引擎Solr 第三章 索引数据(Indexing Data)[1]

    转载:http://quweiprotoss.wap.blog.163.com/ Push data to Solr or have Solr pull it 尽管一个应用通过HTTP方式与Solr通 ...

随机推荐

  1. python3 迭代器,生成器

    一 .什么是迭代 1. 重复 2.下次重复一定是基于上一次的结果而来 while True: cmd=input(':') print(cmd) l=[1,2,3,4] count=0 while c ...

  2. camera驱动框架分析(下)

    sensor的驱动 v4l2_i2c_new_subdev_board先用client = i2c_new_device(adapter, info);创建info对应的i2c_client对象(代表 ...

  3. shell浅谈之三for、while、until循环【转】

    转自:http://blog.csdn.net/taiyang1987912/article/details/38929069 版权声明:本文为博主原创文章,未经博主允许不得转载.   目录(?)[- ...

  4. AOP相关

    静态代理.动态代理与AOP: 简单易懂:http://blog.csdn.net/hejingyuan6/article/details/36203505 补充:http://layznet.itey ...

  5. CHM文件出现的常见错误及解决办法

    一.chm文件内容乱码: 解决方式:将IE浏览器的编码换成自动选择,或者选择UTF-8和GBK看那个能使用.因为chm文件默认的是IE浏览器的内核. 二.easy chm软件编译文件时左侧索引乱码,源 ...

  6. Mac-安装itellij idea

    1.安装解压 Unzip and copy JetbrainsCrack.jar to /Applications/CLion.app/Contents/bin/JetbrainsCrack.jar ...

  7. Docker Ubuntu容器安装ping(zz)

    更新apt-get的软件包信息,然后再安装 sudo docker run ubuntu apt-get update sudo docker run ubuntu apt-get install i ...

  8. opencv inrange 和 mix

    opencv inrange: http://blog.csdn.net/xiaoyufei117122/article/details/53572904 http://blog.csdn.net/w ...

  9. 【互动问答分享】第7期决胜云计算大数据时代Spark亚太研究院公益大讲堂

    “决胜云计算大数据时代” Spark亚太研究院100期公益大讲堂 [第7期互动问答分享] Q1:Spark中的RDD到底是什么? RDD是Spark的核心抽象,可以把RDD看做“分布式函数编程语言”. ...

  10. centos6.5 phpmyadmin 您应升级到 MySQL 5.5.0 或更高版本

    看到自己当初写的,并没有直接的解决问题,而是退而求其次,安装低版本的mysql5.1,然后安装对应版本的phpmyadmin 4.0.10.5 UnicodeDecodeError: 'ascii' ...