每天迁移MySQL历史数据到历史库Python脚本
#!/usr/bin/env python
# coding:utf-8
#__author__
=
'Logan'
import
MySQLdb
import
sys
import
datetime
import
time
class
ClassMigrate(
object
):
def
_get_argv(
self
):
self
.usage
=
"""
usage():
python daily_migration.py --source=192.168.1.4:3306/db_name:tab_name/proxy/password \\
--dest=192.168.1.150:13301/db_name_archive:tab_name_201601/proxy/password \\
--delete_strategy=delete --primary_key=auto_id --date_col=ut --time_interval=180
"""
if
len
(sys.argv)
=
=
1
:
print
self
.usage
sys.exit(
1
)
elif
sys.argv[
1
]
=
=
'--help'
or
sys.argv[
1
]
=
=
'-h'
:
print
self
.usage
sys.exit()
elif
len
(sys.argv) >
2
:
for
i
in
sys.argv[
1
:]:
_argv
=
i.split(
'='
)
if
_argv[
0
]
=
=
'--source'
:
_list
=
_argv[
1
].split(
'/'
)
self
.source_host
=
_list[
0
].split(
':'
)[
0
]
self
.source_port
=
int
(_list[
0
].split(
':'
)[
1
])
self
.source_db
=
_list[
1
].split(
':'
)[
0
]
self
.source_tab
=
_list[
1
].split(
':'
)[
1
]
self
.source_user
=
_list[
2
]
self
.source_password
=
_list[
3
]
elif
_argv[
0
]
=
=
'--dest'
:
_list
=
_argv[
1
].split(
'/'
)
self
.dest_host
=
_list[
0
].split(
':'
)[
0
]
self
.dest_port
=
int
(_list[
0
].split(
':'
)[
1
])
self
.dest_db
=
_list[
1
].split(
':'
)[
0
]
self
.dest_tab
=
_list[
1
].split(
':'
)[
1
]
self
.dest_user
=
_list[
2
]
self
.dest_password
=
_list[
3
]
elif
_argv[
0
]
=
=
'--delete_strategy'
:
self
.deleteStrategy
=
_argv[
1
]
if
self
.deleteStrategy
not
in
(
'delete'
,
'drop'
):
print
(
self
.usage)
sys.exit(
1
)
elif
_argv[
0
]
=
=
'--primary_key'
:
self
.pk
=
_argv[
1
]
elif
_argv[
0
]
=
=
'--date_col'
:
self
.date_col
=
_argv[
1
]
elif
_argv[
0
]
=
=
'--time_interval'
:
self
.interval
=
_argv[
1
]
else
:
print
(
self
.usage)
sys.exit(
1
)
def
__init__(
self
):
self
._get_argv()
## --------------------------------------------------------------------
self
.sourcedb_conn_str
=
MySQLdb.connect(host
=
self
.source_host, port
=
self
.source_port, user
=
self
.source_user, passwd
=
self
.source_password, db
=
self
.source_db, charset
=
'utf8'
)
self
.sourcedb_conn_str.autocommit(
True
)
self
.destdb_conn_str
=
MySQLdb.connect(host
=
self
.dest_host, port
=
self
.dest_port, user
=
self
.dest_user, passwd
=
self
.dest_password, db
=
self
.dest_db, charset
=
'utf8'
)
self
.destdb_conn_str.autocommit(
True
)
## --------------------------------------------------------------------
self
.template_tab
=
self
.source_tab
+
'_template'
self
.step_size
=
20000
## --------------------------------------------------------------------
self
._migCompleteState
=
False
self
._deleteCompleteState
=
False
## --------------------------------------------------------------------
self
.source_cnt
=
''
self
.source_min_id
=
''
self
.source_max_id
=
''
self
.source_checksum
=
''
self
.dest_cn
=
''
## --------------------------------------------------------------------
self
.today
=
time.strftime(
"%Y-%m-%d"
)
# self.today = '2016-05-30 09:59:40'
def
sourcedb_query(
self
, sql, sql_type):
try
:
cr
=
self
.sourcedb_conn_str.cursor()
cr.execute(sql)
if
sql_type
=
=
'select'
:
return
cr.fetchall()
elif
sql_type
=
=
'dml'
:
rows
=
self
.sourcedb_conn_str.affected_rows()
return
rows
else
:
return
True
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
return
False
finally
:
cr.close()
def
destdb_query(
self
, sql, sql_type, values
=
''):
try
:
cr
=
self
.destdb_conn_str.cursor()
if
sql_type
=
=
'select'
:
cr.execute(sql)
return
cr.fetchall()
elif
sql_type
=
=
'insertmany'
:
cr.executemany(sql, values)
rows
=
self
.destdb_conn_str.affected_rows()
return
rows
else
:
cr.execute(sql)
return
True
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
return
False
finally
:
cr.close()
def
create_table_from_source(
self
):
'''''因为tab_name表的数据需要迁移到archive引擎表,所以不适合使用这种方式。 预留作其他用途。'''
try
:
sql
=
"show create table %s;"
%
self
.source_tab
create_str
=
self
.sourcedb_query(sql,
'select'
)[
0
][
1
]
create_str
=
create_str.replace(
'CREATE TABLE'
,
'CREATE TABLE IF NOT EXISTS'
)
self
.destdb_query(create_str,
'ddl'
)
return
True
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
return
False
def
create_table_from_template(
self
):
try
:
sql
=
'CREATE TABLE IF NOT EXISTS %s like %s;'
%
(
self
.dest_tab,
self
.template_tab)
state
=
self
.destdb_query(sql,
'ddl'
)
if
state:
return
True
else
:
return
False
except
Exception, e:
print
(
str
(e
+
"<br>"
)
+
"<br>"
)
return
False
def
get_min_max(
self
):
""" 创建目标表、并获取源表需要迁移的总条数、最小id、最大id """
try
:
print
(
"\nStarting Migrate at -- %s <br>"
)
%
(datetime.datetime.now().__str__())
sql
=
"""select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """
\
%
(
self
.pk,
self
.pk,
self
.source_tab,
self
.date_col,
self
.today,
self
.interval,
self
.date_col,
self
.today,
self
.interval)
q
=
self
.sourcedb_query(sql,
'select'
)
self
.source_cnt
=
q[
0
][
0
]
self
.source_min_id
=
q[
0
][
1
]
self
.source_max_id
=
q[
0
][
2
]
self
.source_checksum
=
str
(
self
.source_cnt)
+
'_'
+
str
(
self
.source_min_id)
+
'_'
+
str
(
self
.source_max_id)
if
self
.source_cnt
=
=
0
or
self
.source_min_id
=
=
-
1
or
self
.source_max_id
=
=
-
1
:
print
(
"There is 0 record in source table been matched! <br>"
)
return
False
else
:
return
True
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
return
False
def
migrate_2_destdb(
self
):
try
:
get_min_max_id
=
self
.get_min_max()
if
get_min_max_id:
k
=
self
.source_min_id
desc_sql
=
"desc %s;"
%
self
.source_tab
# self.filed = []
cols
=
self
.sourcedb_query(desc_sql,
'select'
)
# for j in cols:
# self.filed.append(j[0])
fileds
=
"%s,"
*
len
(cols)
# 源表有多少个字段,就拼凑多少个%s,拼接到insert语句
fileds
=
fileds.rstrip(
','
)
while
k <
=
self
.source_max_id:
sql
=
"""select * from %s where %s >= %d and %s< %d \
and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """
\
%
(
self
.source_tab,
self
.pk, k,
self
.pk, k
+
self
.step_size,
self
.date_col,
self
.today,
self
.interval,
self
.date_col,
self
.today,
self
.interval)
print
(
"\n%s <br>"
)
%
sql
starttime
=
datetime.datetime.now()
results
=
self
.sourcedb_query(sql,
'select'
)
insert_sql
=
"insert into "
+
self
.dest_tab
+
" values (%s)"
%
fileds
rows
=
self
.destdb_query(insert_sql,
'insertmany'
, results)
if
rows
=
=
False
:
print
(
"Insert failed!! <br>"
)
else
:
print
(
"Inserted %s rows. <br>"
)
%
rows
endtime
=
datetime.datetime.now()
timeinterval
=
endtime
-
starttime
print
(
"Elapsed :"
+
str
(timeinterval.seconds)
+
'.'
+
str
(timeinterval.microseconds)
+
" seconds <br>"
)
k
+
=
self
.step_size
print
(
"\nInsert complete at -- %s <br>"
)
%
(datetime.datetime.now().__str__())
return
True
else
:
return
False
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
return
False
def
verify_total_cnt(
self
):
try
:
sql
=
"""select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """
\
%
(
self
.pk,
self
.pk,
self
.dest_tab,
self
.date_col,
self
.today,
self
.interval,
self
.date_col,
self
.today,
self
.interval)
dest_result
=
self
.destdb_query(sql,
'select'
)
self
.dest_cnt
=
dest_result[
0
][
0
]
dest_checksum
=
str
(
self
.dest_cnt)
+
'_'
+
str
(dest_result[
0
][
1
])
+
'_'
+
str
(dest_result[
0
][
2
])
print
(
"source_checksum: %s, dest_checksum: %s <br>"
)
%
(
self
.source_checksum, dest_checksum)
if
self
.source_cnt
=
=
dest_result[
0
][
0
]
and
dest_result[
0
][
0
] !
=
0
and
self
.source_checksum
=
=
dest_checksum:
self
._migCompleteState
=
True
print
(
"Verify successfully !!<br>"
)
else
:
print
(
"Verify failed !!<br>"
)
sys.exit(
77
)
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
def
drop_daily_partition(
self
):
try
:
if
self
._migCompleteState:
sql
=
"""explain partitions select * from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00')
and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """
\
%
(
self
.source_tab,
self
.date_col,
self
.today,
self
.interval,
self
.date_col,
self
.today,
self
.interval)
partition_name
=
self
.sourcedb_query(sql,
'select'
)
partition_name
=
partition_name[
0
][
3
]
sql
=
"""select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s partition (%s)"""
\
%
(
self
.pk,
self
.pk,
self
.source_tab, partition_name)
q
=
self
.sourcedb_query(sql,
'select'
)
source_cnt
=
q[
0
][
0
]
source_min_id
=
q[
0
][
1
]
source_max_id
=
q[
0
][
2
]
checksum
=
str
(source_cnt)
+
'_'
+
str
(source_min_id)
+
'_'
+
str
(source_max_id)
if
source_cnt
=
=
0
or
source_min_id
=
=
-
1
or
source_max_id
=
=
-
1
:
print
(
"There is 0 record in source PARTITION been matched! <br>"
)
else
:
if
checksum
=
=
self
.source_checksum:
drop_par_sql
=
"alter table %s drop partition %s;"
%
(
self
.source_tab, partition_name)
droped
=
self
.sourcedb_query(drop_par_sql,
'ddl'
)
if
droped:
print
(drop_par_sql
+
" <br>"
)
print
(
"\nDrop partition complete at -- %s <br>"
)
%
(datetime.datetime.now().__str__())
self
._deleteCompleteState
=
True
else
:
print
(drop_par_sql
+
" <br>"
)
print
(
"Drop partition failed.. <br>"
)
else
:
print
(
"The partition %s checksum failed !! Drop failed !!"
)
%
partition_name
sys.exit(
77
)
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
def
delete_data(
self
):
try
:
if
self
._migCompleteState:
k
=
self
.source_min_id
while
k <
=
self
.source_max_id:
sql
=
"""delete from %s where %s >= %d and %s< %d \
and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \
and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """
\
%
(
self
.source_tab,
self
.pk, k,
self
.pk, k
+
self
.step_size,
self
.date_col,
self
.today,
self
.interval,
self
.date_col,
self
.today,
self
.interval)
print
(
"\n%s <br>"
)
%
sql
starttime
=
datetime.datetime.now()
rows
=
self
.sourcedb_query(sql,
'dml'
)
if
rows
=
=
False
:
print
(
"Delete failed!! <br>"
)
else
:
print
(
"Deleted %s rows. <br>"
)
%
rows
endtime
=
datetime.datetime.now()
timeinterval
=
endtime
-
starttime
print
(
"Elapsed :"
+
str
(timeinterval.seconds)
+
'.'
+
str
(timeinterval.microseconds)
+
" seconds <br>"
)
time.sleep(
1
)
k
+
=
self
.step_size
print
(
"\nDelete complete at -- %s <br>"
)
%
(datetime.datetime.now().__str__())
self
._deleteCompleteState
=
True
except
Exception, e:
print
(
str
(e)
+
"<br>"
)
def
do(
self
):
tab_create
=
self
.create_table_from_template()
if
tab_create:
migration
=
self
.migrate_2_destdb()
if
migration:
self
.verify_total_cnt()
if
self
._migCompleteState:
if
self
.deleteStrategy
=
=
'drop'
:
self
.drop_daily_partition()
else
:
self
.delete_data()
print
(
"\n<br>"
)
print
(
"====="
*
5
+
'<br>'
)
print
(
"source_total_cnt: %s <br>"
)
%
self
.source_cnt
print
(
"dest_total_cnt: %s <br>"
)
%
self
.dest_cnt
print
(
"====="
*
5
+
'<br>'
)
if
self
._deleteCompleteState:
print
(
"\nFinal result: Successfully !! <br>"
)
sys.exit(
88
)
else
:
print
(
"\nFinal result: Failed !! <br>"
)
sys.exit(
254
)
else
:
print
(
"Create table failed ! Exiting. . ."
)
sys.exit(
255
)
f
=
ClassMigrate()
f.do()
每天迁移MySQL历史数据到历史库Python脚本的更多相关文章
- 从零开始学安全(三十五)●mysql 盲注手工自定义python脚本
import requests import string #mysql 手动注入 通用脚本 适用盲注 可以跟具自己的需求更改 def home(): url="url" list ...
- 迁移mysql数据到oracle上
转自:http://www.cnblogs.com/Warmsunshine/p/4651283.html 我是生成的文件里面的master.sql里面的sql,一个一个拷出来的. 迁移mysql数据 ...
- 基于binlog来分析mysql的行记录修改情况(python脚本分析)
最近写完mysql flashback,突然发现还有有这种使用场景:有些情况下,可能会统计在某个时间段内,MySQL修改了多少数据量?发生了多少事务?主要是哪些表格发生变动?变动的数量是怎 ...
- MySQL利用binlog恢复误操作数据(python脚本)
在人工手动进行一些数据库写操作的时候(比方说数据订正),尤其是一些不可控的批量更新或删除,通常都建议备份后操作.不过不怕万一,就怕一万,有备无患总是好的.在线上或者测试环境误操作导致数据被删除或者更新 ...
- mysql更新(三)语句 库的操作 表的操作
04-初始mysql语句 本节课先对mysql的基本语法初体验. 操作文件夹(库) 增 create database db1 charset utf8; 查 # 查看当前创建的数据库 show ...
- HBase——使用Put迁移MySql数据到Hbase
先上code: /** * 功能:迁移mysql上电池历史数据到hbase * Created by liuhuichao on 2016/12/6. */ public class MySqlToH ...
- mysql数据库从删库到跑路之mysql基础
一 数据库是什么 之前所学,数据要永久保存,比如用户注册的用户信息,都是保存于文件中,而文件只能存在于某一台机器上. 如果我们不考虑从文件中读取数据的效率问题,并且假设我们的程序所有的组件都运行在一台 ...
- 使用第三方库连接MySql数据库:PyMysql库和Pandas库
使用PyMysql库和Pandas库链接Mysql 1 系统环境 系统版本:Win10 64位 Mysql版本: 8.0.15 MySQL Community Server - GPL pymysql ...
- 32.修改IK分词器源码来基于mysql热更新词库
主要知识点, 修改IK分词器源码来基于mysql热更新词库 一.IK增加新词的原因 在第32小节中学习到了直接在es的词库中增加词语,来扩充自已的词库,但是这样做有以下缺点: (1)每次添加完 ...
随机推荐
- 纯C语言(C89)实现简单链表
起因 工作很少接触纯C项目,业余写着玩玩,不断雕琢 目标 纯C实现简单链表,提供方便易用泛型接口,避免依赖 实现 完全封装,隐藏结构体细节,不支持栈创建 拷贝存储,轻微性能代价换来易用性 list.h ...
- 记一次jenkins svn构建过程
本文主要参考:maven实战第11章Hudson持续集成 安装Hudson,Hudson插件下载不了,尝试结果未果,转而使用Jenkins 放入tomcat的webapp目录,在bin下点击start ...
- python2 与 python3 依赖包冲突问题
原文链接 https://www.2cto.com/database/201805/749294.html 执行pip的时候取的是/usr/bin这里的pip,查看这里是否存在pip3,没有的话需 ...
- ; 按快捷键`(即波浪号~所在的键盘按键)立即打开随身U盘中的办公专用文件夹
; 按快捷键`(即波浪号~所在的键盘按键)立即打开随身U盘中的办公专用文件夹; WorkFolderHotkey.ahk;; http://www.autoahk.com/; https://www. ...
- Centos8 Tomcat 开机自启配置
第一步:修改catalina.sh文件 # vim 编辑 Tomcat bin/catalina.sh 文件.增加如下内容 CATALINA_PID="$CATALINA_BASE/bin/ ...
- Python 统计列表中重复元素的个数并返回其索引值
需求:统计列表list1中元素3的个数,并返回每个元素的索引 list1 = [3, 3, 8, 9, 2, 10, 6, 2, 8, 3, 4, 5, 5, 4, 1, 5, 9, 7, 10, 2 ...
- Visio操作【未完】
Visio 1.如何操作文档 新建基本框图和空白框图 单击基本框图打开后有模具 空白框图打开之后并没有形状 左下角发现有 更改纸张方向大小 自动调整大小: 如果我们选择形状进入到我们的页面,如果放到边 ...
- Java的几种创建实例方法的性能对比(二)
上一篇里对几种书写方式进行了简单的测试,得出了一些初步的结论.这次简单了解Lambda原理后,对测试做了一些调整,发现得到不一样的结果,而这个调整,明显更契合实际开发的场景. 暂时还没有亲自去验证,主 ...
- Java 在PPT中插入OLE对象
PPT幻灯片中支持将文档作为OLE对象插入到PPT幻灯片指定位置,在幻灯片中可直接点击该对象,打开或编辑等.下面以插入Excel工作簿文档为例,介绍如何来插入到幻灯片. 程序运行环境 编译环境:I ...
- nc基本操作&反弹shell
一.nc简介 nc 被称为瑞士军刀netcat ,所做的就是在两台电脑之间建立链接,并返回两个数据流. 可运行在TCP或者UDP模式,添加参数 -u 则调整为UDP,默认为TCP 即可用在window ...