json.dumps(i['bd_res'], ensure_ascii=False)

import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading
import math
import csv
import pprint
import json
from openpyxl import Workbook curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath) def gen_file_data(fodir, fname, sheet_index=0, ):
if fname.find('.xlsx') > -1:
fname_open = '%s\\%s' % (fodir, fname)
book = xlrd.open_workbook(fname_open, on_demand=True)
sheet = book.sheet_by_index(sheet_index)
data = [[str(c.value) for c in sheet.row(i)] for i in range(sheet.nrows)]
book.release_resources()
del book
elif fname.find('.csv') > -1:
data = []
fname_open = '%s\\%s' % (fodir, fname)
with open(fname_open, 'r', encoding='utf-8') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
data.append(row)
csvfile.close()
return data # 3 9
request_dic, target_type_list, target_type_except_list = {}, ['北京市', '上海市', '广州市'], ['火车站', '高铁站', '汽车站', '飞机场', '小学',
'幼儿园', '中学',
'综合医院', '商场']
# ['4s店','餐饮','家电','酒店','咖啡馆','售楼处','专科医院']
# ['住宅小区','写字楼'] fname_source = 'jfinder_public_jmtool_old_data.csv'
data_file = gen_file_data(curPath, fname_source) def replace_illeagl_tag(str_):
l = [' ', '\n', '\t']
for i in l:
str_ = str_.replace(i, '')
return str_ # 碧海富通城三期(3栋) ok
# =碧海富通城-三期(3栋) ok
replace_to_empty_l = [' ', '|', '\t', '\n', '/', '?', '?', '·', '.'] def gen_bd_query_origin_name(name_):
for i in replace_to_empty_l:
name_ = name_.replace(i, '')
return name_.replace('(', '(').replace(')', ')').replace('?', '').replace('?', '') for l in data_file:
# db_from, db_id, db_area_code, db_name, db_type_, db_city, db_district, db_address, db_street, db_uid, db_submit_time = l
# db_from, id, area_code, name, type_, city, district, address, street, uid, submit_time = l
dbid, area_code, uid, name_, type_en, city, district, address, street, db_from, submit_time, type_ = l if db_from == 'db_from':
continue
request_name = gen_bd_query_origin_name(name_)
if city not in request_dic:
request_dic[city] = {}
if district not in request_dic[city]:
request_dic[city][district] = {}
request_dic[city][district]['request_name_list'] = []
request_dic[city][district]['request_uid_list'] = []
request_dic[city][district]['file_row_list'] = []
if request_name not in request_dic[city][district]['request_name_list']:
request_dic[city][district]['request_name_list'].append(request_name)
uid = uid.replace(' ', '')
if len(uid) > 0 and uid not in request_dic[city][district]['request_uid_list']:
request_dic[city][district]['request_uid_list'].append(uid)
request_dic[city][district]['file_row_list'].append(l)
del data_file def get_search_word_res_first_uid(jstr):
uid, dic_ = '', json.loads(jstr)
if 'result' in dic_:
if len(dic_['result']) > 0:
uid = dic_['result'][0]['uid']
return uid def get_uid_res_first_uid(jstr):
uid, dic_ = '', json.loads(jstr)
if 'result' in dic_:
if len(dic_['result']) > 0:
uid = dic_['result'][0]['uid']
return uid def chk_uid_res_5_field(jstr):
r, d = False, json.loads(jstr)
if 'result' in d:
dd = d['result']
if 'location' in dd:
if 'lng' in dd['location'] and 'lat' in dd['location']:
if 'name' in dd:
if 'address' in dd:
r = True
return r fname_source = 'updating-百度search_word_ret170909152547.xlsx'
bd_search_word_dic, data_file = {}, gen_file_data(curPath, fname_source)
for l in data_file:
search_word, jstr = l
if search_word == 'search_word':
continue
bd_search_word_dic[search_word] = jstr
del data_file fname_source = 'updating-百度search_uid_ret170909160924.xlsx'
bd_city_district_uid_dic, data_file = {}, gen_file_data(curPath, fname_source)
for l in data_file:
city_district_uid, jstr = l
if city_district_uid == 'search_uid':
continue
bd_city_district_uid_dic[city_district_uid] = jstr
del data_file res_dic = {}
for city in request_dic:
for district in request_dic[city]:
for l in request_dic[city][district]['file_row_list']:
dbid, area_code, uid, name_, type_en, city, district, address, street, db_from, submit_time, type_ = l
request_name, task_uid, task_uid_name, task_uid_addr, task_uid_lat, task_uid_lng = gen_bd_query_origin_name(
name_), '', '', '', '', ''
if len(uid.replace(' ', '')) > 0:
task_uid = uid
else:
if request_name in bd_search_word_dic:
jstr = bd_search_word_dic[request_name]
task_uid = get_search_word_res_first_uid(jstr)
if len(task_uid) > 0:
city_district_uid = '%s%s%s' % (city, district, task_uid)
if city_district_uid in bd_city_district_uid_dic:
jstr = bd_city_district_uid_dic[city_district_uid]
r = chk_uid_res_5_field(jstr)
if r is True:
task_uid_d = json.loads(jstr)
d = task_uid_d['result']
task_uid_name, task_uid_addr, task_uid_lat, task_uid_lng = d['name'], d['address'], \
d['location']['lat'], d['location'][
'lng'] ll = dbid, area_code, uid, name_, type_en, city, district, address, street, db_from, submit_time, type_, task_uid, task_uid_name, task_uid_addr, task_uid_lat, task_uid_lng
if city not in res_dic:
res_dic[city] = {}
if district not in res_dic[city]:
res_dic[city][district] = []
res_dic[city][district].append(ll) wb = Workbook()
worksheet = wb.active
file_title_str = 'dbid, area_code, uid, name_, type_en, city, district, address, street, db_from, submit_time, type_, task_uid, task_uid, task_uid_name, task_uid_addr, task_uid_lat, task_uid_lng'
file_title_l = file_title_str.replace(' ', '').split(',')
worksheet.append(file_title_l)
for city in res_dic:
for district in res_dic[city]:
for l in res_dic[city][district]:
worksheet.append(l)
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
fname = '9场景添加task_uid'
file_name = '%s\\%s%s' % (curPath, fname, localtime_)
file_name_save = '%s%s' % (file_name, '.xlsx')
wb.save(file_name_save)
wb.close()
dd = 9

  

import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading
import math
import csv curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath) MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST, next_day_tag = 1900, '天配额超限,限制访问', 'DB_KEY_EXHAUST', '000003' db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db) def db_chk_one_exist(key):
conn = sqlite3.connect(db)
c = conn.cursor()
sql = 'SELECT key FROM baidu_map_key_used WHERE key="%s"' % (key)
r = 0
res = c.execute(sql).fetchone()
if res is not None:
r = 1
conn.close
return r # def db_init_key_table():
# conn = sqlite3.connect(db)
# c = conn.cursor()
# k_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
# with open(k_file, 'r', encoding='utf-8') as pf:
# for i in pf:
# if len(i) < 4:
# continue
# author, key = i.replace(' ', '').replace('\n', '').replace('\t', '').split(';')
# r = db_chk_one_exist(key)
# if r == 0:
# localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
# sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
# author, key, localtime_, 0)
# c.execute(sql)
# conn.commit()
# conn.close()
# pf.close()
#
#
# db_init_key_table() def db_recovery_bdkeynum():
if time.strftime("%H%M%S", time.localtime()) == next_day_tag:
conn = sqlite3.connect(db)
c = conn.cursor()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
sql = 'UPDATE baidu_map_key_used SET today_used = 0 ,update_time=%s ' % (localtime_)
c.execute(sql)
conn.commit()
conn.close()
return def db_get_one_effective():
db_recovery_bdkeynum()
conn = sqlite3.connect(db)
c = conn.cursor()
sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ORDER BY today_used ASC' % (MAX_USED_TIMES)
res, r = c.execute(sql).fetchone(), ''
if res is None:
r = DB_KEY_EXHAUST
else:
r = res[0]
conn.close()
return r def db_update_one_today_used(key):
conn = sqlite3.connect(db)
c = conn.cursor()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
localtime_, key)
c.execute(sql)
conn.commit()
conn.close() dir_, dir_exception, requested_file_list = 'baidu_map_uid', 'baidu_map_uid_exception', []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str) def gen_requested_file_list(file_postfix='.html'):
filepath = '%s\\%s' % (curPath, dir_)
pathDir = os.listdir(filepath)
for allDir in pathDir:
child = os.path.join('%s%s' % (filepath, allDir))
requested_file = child.split(dir_)[1].split('&')[0].split(file_postfix)[0]
if requested_file not in requested_file_list:
requested_file_list.append(requested_file) def gen_file_data(fodir, fname, sheet_index=0, ):
if fname.find('.xlsx') > -1:
fname_open = '%s\\%s' % (fodir, fname)
book = xlrd.open_workbook(fname_open, on_demand=True)
sheet = book.sheet_by_index(sheet_index)
data = [[str(c.value) for c in sheet.row(i)] for i in range(sheet.nrows)]
book.release_resources()
del book
elif fname.find('.csv') > -1:
data = []
fname_open = '%s\\%s' % (fodir, fname)
with open(fname_open, 'r', encoding='utf-8') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',')
for row in spamreader:
data.append(row)
csvfile.close()
return data # 3 9
request_dic, target_type_list, target_type_except_list = {}, ['北京市', '上海市', '广州市'], ['火车站', '高铁站', '汽车站', '飞机场', '小学',
'幼儿园', '中学',
'综合医院', '商场']
# ['4s店','餐饮','家电','酒店','咖啡馆','售楼处','专科医院']
# ['住宅小区','写字楼'] # file_postfix_l = ['.html', '.txt']
# for i in file_postfix_l:
# gen_requested_file_list(i) fname_source = 'jfinder_public_jmtool_old_data.csv'
data_file = gen_file_data(curPath, fname_source) def replace_illeagl_tag(str_):
l = [' ', '\n', '\t']
for i in l:
str_ = str_.replace(i, '')
return str_ # 碧海富通城三期(3栋) ok
# =碧海富通城-三期(3栋) ok
replace_to_empty_l = [' ', '|', '\t', '\n', '/', '?', '?', '·', '.'] def gen_bd_query_origin_name(name_):
for i in replace_to_empty_l:
name_ = name_.replace(i, '')
return name_.replace('(', '(').replace(')', ')').replace('?', '').replace('?', '') for l in data_file:
# db_from, db_id, db_area_code, db_name, db_type_, db_city, db_district, db_address, db_street, db_uid, db_submit_time = l
# db_from, id, area_code, name, type_, city, district, address, street, uid, submit_time = l
dbid, area_code, uid, name_, type_en, city, district, address, street, db_from, submit_time, type_ = l if db_from == 'db_from':
continue
request_name = gen_bd_query_origin_name(name_)
input_ = '%s%s%s' % (city, district, request_name)
if input_ in requested_file_list:
print('requested', input_)
continue
if city not in request_dic:
request_dic[city] = {}
if district not in request_dic[city]:
request_dic[city][district] = {}
request_dic[city][district]['request_name_list'] = []
request_dic[city][district]['request_uid_list'] = []
request_dic[city][district]['file_row_list'] = []
if request_name not in request_dic[city][district]['request_name_list']:
request_dic[city][district]['request_name_list'].append(request_name)
uid = uid.replace(' ', '')
if len(uid) > 0 and uid not in request_dic[city][district]['request_uid_list']:
request_dic[city][district]['request_uid_list'].append(uid)
request_dic[city][district]['file_row_list'].append(l)
del data_file base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY&region=R-CITY&city_limit=true&output=json&ak=R-AK'
ex_l = ['Proxy Error', 'APP IP校验失败', 'APP不存在,AK有误请检查再重试', 'The requested URL could not be retrieved',
'Address already in use', '天配额超限,限制访问', 'Parameter Invalid'] write_res_file_dir = '%s\\%s\\' % (curPath, dir_) def write_res_file(str_, input_, ak, dir_=write_res_file_dir, file_postfix='.txt'):
for ex in ex_l:
if str_.find(ex) > -1:
print('EXCEPTION-', ex, 'AK-', ak, 'STR-', str_) return
fname = '%s%s%s' % (dir_, input_, file_postfix)
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_)
ft.close()
print('ok', threading.get_ident(), input_) class MyThread(threading.Thread):
def __init__(self, func, args, name):
threading.Thread.__init__(self)
self.name, self.func, self.args = name, func, args def run(self):
self.func(self.args) def fun_(city):
for district in request_dic[city]:
for request_name in request_dic[city][district]['request_name_list']:
ak = db_get_one_effective()
if ak == DB_KEY_EXHAUST:
print(DB_KEY_EXHAUST)
break
else:
url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
print(url_)
input_ = '%s%s%s' % (city, district, request_name) bd_res_json_str = requests.get(url_).text
db_update_one_today_used(ak)
write_res_file(bd_res_json_str, input_, ak) # try:
# # gen_requested_file_list()
# # gen_requested_file_list('.txt')
# # if input_ in requested_file_list:
# # continue
# bd_res_json_str = requests.get(url_).text
# db_update_one_today_used(ak)
# write_res_file(bd_res_json_str, input_)
# except Exception:
# bd_res_json_str = '请求百度-异常'
# write_res_file(bd_res_json_str, input_, requested_file_dir_exception_str)
# print(bd_res_json_str, input_) try:
start_loop, stop_loop = int(sys.argv[1]), int(sys.argv[2])
except Exception:
start_loop, stop_loop = -1, 200 def main():
threads_list, nloop = [], 0
request_dic_city_l = sorted(request_dic, reverse=False)
for city in request_dic_city_l:
nloop += 1
if nloop < start_loop or nloop > stop_loop:
continue
thread_instance = MyThread(fun_, (city), fun_.__name__)
threads_list.append(thread_instance)
for t in threads_list:
t.setDaemon = False
t.start()
for t in threads_list:
t.join() if __name__ == '__main__':
main()

  

json.dumps(i['bd_res'], ensure_ascii=False)的更多相关文章

  1. [转]python json.dumps 中的ensure_ascii 参数引起的中文编码

    本文转自: 梁小白博客(http://biangbiang.cnblogs.com) 在使用json.dumps时要注意一个问题 >>> import json >>&g ...

  2. python json.dumps 中的ensure_ascii 参数引起的中文编码问题

    在使用json.dumps时要注意一个问题   >>> import json >>> print json.dumps('中国') "\u4e2d\u5 ...

  3. python3.4学习笔记(二十六) Python 输出json到文件,让json.dumps输出中文 实例代码

    python3.4学习笔记(二十六) Python 输出json到文件,让json.dumps输出中文 实例代码 python的json.dumps方法默认会输出成这种格式"\u535a\u ...

  4. django 使用json.dumps转换queryset的datatime报错问题解决

    最近在使用django做项目的时候想使用ajax来实现前后台数据的交互,但是在将数据库查询结果转换成json数据时,遇到时间格式的数据转换遇到问题,无法正确的进行转换,具体如下: 转换成json时使用 ...

  5. json中的json.dumps()

    Json简介 JSON(JavaScript Object Notation) 是一种轻量级的数据交换格式.它基于JavaScript(Standard ECMA-262 3rd Edition - ...

  6. python json.loads json.dumps(ensure_ascii = False) 汉字乱码问题解决

    python 转换为json时候 汉字编码问题 2017年03月23日 18:50:04 阅读数:5604 有这样一个需求: 需要一个json 文件 数据从数据库里查询出来 1. 设置文件头 # -* ...

  7. python json.dumps()函数输出json格式,使用ensure_ascii参数对中文输入的支持

    在python使用过程中,输入中文,不能正常的输出,可以使用ensure_ascii参数来解决不能输入中文的问题 代码块: import json friends={"name": ...

  8. json.dumps ensure_ascii 方法

    在使用json.dumps时要注意一个问题   import json print (json.dumps('中国')) "\u4e2d\u56fd"   输出的会是 '中国' 中 ...

  9. python json.dumps中ensure_ascii的使用,load与loads的区别

    json模块最常用的两个功能: 一:json.dumps(),用于将dict拆分成str格式,称为序列化,注意序列化后,虽然print出来仍然显示的字典的样子,但是此时已经是str类型了. 其中,有时 ...

随机推荐

  1. android开发常用地址

    一. android市场占用率的url http://developer.android.com/about/dashboards/ 二. ADT下载 下载地址是:http://developer.a ...

  2. JAVA Eclipse如何重新设置工作空间workspace

    窗口-首选项-常规-启动和关闭,勾选启动时提示工作空间,然后移除现有的工作空间,最好也勾选启动时刷新工作空间   重启之后就可以设置工作空间了  

  3. Django——基于类的视图(class-based view)

    刚开始的时候,django只有基于函数的视图(Function-based views).为了解决开发视图中繁杂的重复代码,基于函数的通用视图( Funcation-based generic vie ...

  4. <context:annotation-config/>、<context:component-scan/>

    在基于主机方式配置Spring的配置文件中,你可能会见到<context:annotation-config/>这样一条配置,他的作用是式地向 Spring 容器注册 AutowiredA ...

  5. Spark on Intellij IDEA

    添加scala插件 如果网络有问题,可以手动下载插件安装包(http://plugins.jetbrains.com/plugin/?id=1347),在上面选择“Install plugin fro ...

  6. wps如何设置文字环绕图片

    wps在编辑一些文字的时候,经常会插入一些图片,但是插入图片后,文字和图片就被分离开来,整体显得没有那么美观整洁,这个时候就用到了软件的文字环绕功能,那么具体如何设置呢,接下来看教程. 首先打开wps ...

  7. 500 lines or less

    今天碰到一本书 <500 lines or less>突然就想在博客上记录一下自己的阅读经历了. 现在记录一下这本书的地址 http://aosabook.org/en/index.htm ...

  8. destroy其他所有activity

    Intent intent = new Intent(ActivityA.this, ActivityB.class);intent.setFlags(Intent.FLAG_ACTIVITY_NEW ...

  9. Ubuntu下安装配置JDK,Tomcat,MySql

    jdk安装配置 下载jdk-6u45-linux-x64.bin 切换到root用户su root 切换目录,新建文件夹,复制文件cd /usr      mkdir javacd javacp 路径 ...

  10. Newtonsoft.Json之JArray, JObject, JProperty,JValue

    JObject staff = new JObject(); staff.Add(new JProperty("Name", "Jack")); staff.A ...