DataX的安装及使用
DataX的安装及使用
DataX的安装
DataX不需要依赖其他服务,直接上传、解压、安装、配置环境变量即可
也可以直接在windows上解压
DataX的使用
stream2stream
编写配置文件stream2stream.json
# stream2stream.json
{
"job": {
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"sliceRecordCount": 10,
"column": [
{
"type": "long",
"value": "10"
},
{
"type": "string",
"value": "hello,你好,世界-DataX"
}
]
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "UTF-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
执行同步任务
datax.py stream2stream.json
执行结果

mysql2mysql
需要新建student2数据库,并创建student表
编写配置文件mysql2mysql.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
执行同步任务
datax.py mysql2mysql.json
mysql2hdfs
写hive跟hdfs时一样的
编写配置文件mysql2hdfs.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://master:9000",
"fileType": "text",
"path": "/user/hive/warehouse/datax.db/students",
"fileName": "student",
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "age",
"type": "INT"
},
{
"name": "gender",
"type": "string"
},
{
"name": "clazz",
"type": "string"
},
{
"name": "last_mod",
"type": "string"
}
],
"writeMode": "append",
"fieldDelimiter": ","
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
hbase2mysql
{
"job": {
"content": [
{
"reader": {
"name": "hbase11xreader",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "student",
"encoding": "utf-8",
"mode": "normal",
"column": [
{
"name": "rowkey",
"type": "string"
},
{
"name": "cf1:name",
"type": "string"
},
{
"name": "cf1:age",
"type": "string"
},
{
"name": "cf1:gender",
"type": "string"
},
{
"name": "cf1:clazz",
"type": "string"
}
],
"range": {
"startRowkey": "",
"endRowkey": "",
"isBinaryRowkey": false
}
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
mysql2hbase
mysql中的score表需将cource_id改为course_id,并将student_id、course_id设为主键,并将所有字段的类型改为int
hbase需先创建score表:create 'score','cf1'
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"student_id",
"course_id",
"score"
],
"splitPk": "course_id",
"connection": [
{
"table": [
"score"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "score",
"mode": "normal",
"rowkeyColumn": [
{
"index":0,
"type":"string"
},
{
"index":-1,
"type":"string",
"value":"_"
},
{
"index":1,
"type":"string"
}
],
"column": [
{
"index":2,
"name": "cf1:score",
"type": "int"
}
],
"encoding": "utf-8"
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
mysql2Phoenix
在Phoenix中创建STUDENT表
CREATE TABLE IF NOT EXISTS STUDENT (
ID VARCHAR NOT NULL PRIMARY KEY,
NAME VARCHAR,
AGE BIGINT,
GENDER VARCHAR ,
CLAZZ VARCHAR
);
编写配置文件MySQLToPhoenix.json
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"splitPk": "id",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student?useSSL=false"
]
}
]
}
},
"writer": {
"name": "hbase11xsqlwriter",
"parameter": {
"batchSize": "256",
"column": [
"ID",
"NAME",
"AGE",
"GENDER",
"CLAZZ"
],
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2",
"zookeeper.znode.parent": "/hbase"
},
"nullMode": "skip",
"table": "STUDENT"
}
}
}
]
}
}
HDFSToHBase
将students.txt数据上传至HDFS的
/data/student1/目录在HBase中创建datax表:
create 'datax','cf1'
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/data/student1/",
"defaultFS": "hdfs://master:9000",
"column": [
{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "string"
},
{
"index": 5,
"type": "string"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": ","
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2"
},
"table": "datax",
"mode": "normal",
"rowkeyColumn": [
{
"index": 0,
"type": "string"
},
{
"index": -1,
"type": "string",
"value": "_"
},
{
"index": 1,
"type": "string"
}
],
"column": [
{
"index": 2,
"name": "cf1:age",
"type": "string"
},
{
"index": 3,
"name": "cf1:gender",
"type": "string"
},
{
"index": 4,
"name": "cf1:clazz",
"type": "string"
},
{
"index": 5,
"name": "cf1:ts",
"type": "string"
}
],
"versionColumn": {
"index": 5
},
"encoding": "utf-8"
}
}
}
]
}
}
DataX的安装及使用的更多相关文章
- DataX的安装
DataX的安装 1. 可下载tar包 https://github.com/alibaba/DataX/blob/master/userGuid.md 2. 下载源码自己编译 git clone h ...
- [大数据技术]datax的安装以及使用
1.datax简述 DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL.Oracle.SqlServer.Postgre.HDFS.Hive.ADS.HBase.Ta ...
- dataX windows10安装
按照视频课程,从Github上下载文件:https://github.com/alibaba/DataX 然后将下载的压缩包解压即可,不过需要的前提Python环境是要求python2,于是在pyth ...
- datax的安装和使用(windows)
github官方文档和项目:https://github.com/alibaba/DataX 下载后在windows环境下是可以直接用python编译执行的,但从github上下载的版本只支持pyth ...
- 在datax之前版本中添加filewriter并创建job时出现问题
问题描述:
- DataX的使用——大数据同步技术
准备工作: 1.视频教学http://113.31.104.47/portal/#/course/dashboard/b34d160db64624732ef152a1118af11a 2.DataX的 ...
- Pyhton开源框架(加强版)
info:Djangourl:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC)风格的 ...
- Python开源框架
info:更多Django信息url:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC) ...
- DataX的简单编译安装测试
搭建环境: Java > =1.6 Python>=2.6 <3 Ant Rpmbuild G++ 编译DataX: 进入rpm文件夹 ...
随机推荐
- 爱思助手备份 iPhone 时没有设置密码,恢复备份时需要密码的问题
i4.cn 备份时 iPhone 上登陆的 Apple ID 曾经设置过备份密码,这个密码就是恢复备份时需要输入的密码!
- 【XSS】XSS修炼之独孤九剑
题目地址 xcao.vip/test 题目作者给出的解题思路 http://xcao.vip/test/xss/XSS修炼之独孤九剑.pdf 独孤九剑-第一式 题目 过滤了等号 =.小括号 (),要求 ...
- Linux档案权限篇之一
一.查看档案的属性 "ls" 第一列为档案的权限: d:代表是目录 -:代表是文件 l:代表是连接文件(相当于windows里面的快捷方式) b:代表块设备(如硬盘) c:代表字符 ...
- CentOS7系统搭建FTP服务器
创建FTP服务器1.安装FTP服务 yum install -y vsftpd 默认的FTP服务的配置文件路径为/etc/vsftpd cd /etc/vsftpd[root@test924 vsft ...
- C#开源类库SimpleTCP
目录 简介 使用方法 实现客户端 实现服务端 总结 简介 工作中经常遇到需要实现TCP客户端或服务端的时候,如果每次都自己写会很麻烦且无聊,使用SuperSocket库又太大了.这时候就可以使用Sim ...
- tk.mybatis中常用方法的使用(最实用)
一.前言 不知道环境和maven依赖怎么配置的,先看一下这个: SpringBoot整合tk.mybatis 二.方法的介绍和使用 插入方法 int insertSelective(T var1); ...
- python生成时间序列(date_range)
介绍 自己写了一个用python内置模块实现的生成时间序列的函数 支持自动推断字符串到datetime的转换, 但对格式有一定要求, 其它格式可手动指定格式化方式, 格式化方式与python内置格式化 ...
- Vue开发多人聊天室 复盘总结
前言 在上个月初,接到一个需求,要开发一个 聊天通讯 模块 并且 集成到 项目中的多个 入口,实现业务数据的记录追踪. 接到需求后,还挺开心,这是我第一次 搞 通讯 类的需求,之前一直是 B 端 的业 ...
- pymysql基础教程
pymysql基础教程 1.下载pymysql 在命令框输入指令即可 pip install pymysql 2.连接pymysql 连接数据库: import pymysql conn = pymy ...
- 学习PDO中的错误与错误处理模式
在 PDO 的学习过程中,我们经常会在使用事务的时候加上 try...catch 来进行事务的回滚操作,但是大家有没有注意到默认情况下 PDO 是如何处理错误语句导致的数据库操作失败问题呢?今天,我们 ...