DataX的安装及使用

DataX的安装

DataX不需要依赖其他服务,直接上传、解压、安装、配置环境变量即可

也可以直接在windows上解压

DataX的使用

stream2stream

编写配置文件stream2stream.json
# stream2stream.json
{
"job": {
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"sliceRecordCount": 10,
"column": [
{
"type": "long",
"value": "10"
},
{
"type": "string",
"value": "hello,你好,世界-DataX"
}
]
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "UTF-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
执行同步任务
datax.py stream2stream.json
执行结果

mysql2mysql

需要新建student2数据库,并创建student表

编写配置文件mysql2mysql.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
执行同步任务
datax.py mysql2mysql.json

mysql2hdfs

写hive跟hdfs时一样的

编写配置文件mysql2hdfs.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://master:9000",
"fileType": "text",
"path": "/user/hive/warehouse/datax.db/students",
"fileName": "student",
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "age",
"type": "INT"
},
{
"name": "gender",
"type": "string"
},
{
"name": "clazz",
"type": "string"
},
{
"name": "last_mod",
"type": "string"
}
],
"writeMode": "append",
"fieldDelimiter": ","
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}

hbase2mysql

{
"job": {
"content": [
{
"reader": {
"name": "hbase11xreader",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "student",
"encoding": "utf-8",
"mode": "normal",
"column": [
{
"name": "rowkey",
"type": "string"
},
{
"name": "cf1:name",
"type": "string"
},
{
"name": "cf1:age",
"type": "string"
},
{
"name": "cf1:gender",
"type": "string"
},
{
"name": "cf1:clazz",
"type": "string"
}
],
"range": {
"startRowkey": "",
"endRowkey": "",
"isBinaryRowkey": false
}
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}

mysql2hbase

mysql中的score表需将cource_id改为course_id,并将student_id、course_id设为主键,并将所有字段的类型改为int

hbase需先创建score表:create 'score','cf1'

{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"student_id",
"course_id",
"score"
],
"splitPk": "course_id",
"connection": [
{
"table": [
"score"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "score",
"mode": "normal",
"rowkeyColumn": [
{
"index":0,
"type":"string"
},
{
"index":-1,
"type":"string",
"value":"_"
},
{
"index":1,
"type":"string"
}
],
"column": [
{
"index":2,
"name": "cf1:score",
"type": "int"
}
],
"encoding": "utf-8"
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}

mysql2Phoenix

在Phoenix中创建STUDENT表
CREATE TABLE IF NOT EXISTS STUDENT (
ID VARCHAR NOT NULL PRIMARY KEY,
NAME VARCHAR,
AGE BIGINT,
GENDER VARCHAR ,
CLAZZ VARCHAR
);
编写配置文件MySQLToPhoenix.json
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"splitPk": "id",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student?useSSL=false"
]
}
]
}
},
"writer": {
"name": "hbase11xsqlwriter",
"parameter": {
"batchSize": "256",
"column": [
"ID",
"NAME",
"AGE",
"GENDER",
"CLAZZ"
],
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2",
"zookeeper.znode.parent": "/hbase"
},
"nullMode": "skip",
"table": "STUDENT"
}
}
}
]
}
}

HDFSToHBase

将students.txt数据上传至HDFS的/data/student1/目录

在HBase中创建datax表:create 'datax','cf1'

{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/data/student1/",
"defaultFS": "hdfs://master:9000",
"column": [
{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "string"
},
{
"index": 5,
"type": "string"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": ","
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2"
},
"table": "datax",
"mode": "normal",
"rowkeyColumn": [
{
"index": 0,
"type": "string"
},
{
"index": -1,
"type": "string",
"value": "_"
},
{
"index": 1,
"type": "string"
}
],
"column": [
{
"index": 2,
"name": "cf1:age",
"type": "string"
},
{
"index": 3,
"name": "cf1:gender",
"type": "string"
},
{
"index": 4,
"name": "cf1:clazz",
"type": "string"
},
{
"index": 5,
"name": "cf1:ts",
"type": "string"
}
],
"versionColumn": {
"index": 5
},
"encoding": "utf-8"
}
}
}
]
}
}

DataX的安装及使用的更多相关文章

  1. DataX的安装

    DataX的安装 1. 可下载tar包 https://github.com/alibaba/DataX/blob/master/userGuid.md 2. 下载源码自己编译 git clone h ...

  2. [大数据技术]datax的安装以及使用

    1.datax简述 DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL.Oracle.SqlServer.Postgre.HDFS.Hive.ADS.HBase.Ta ...

  3. dataX windows10安装

    按照视频课程,从Github上下载文件:https://github.com/alibaba/DataX 然后将下载的压缩包解压即可,不过需要的前提Python环境是要求python2,于是在pyth ...

  4. datax的安装和使用(windows)

    github官方文档和项目:https://github.com/alibaba/DataX 下载后在windows环境下是可以直接用python编译执行的,但从github上下载的版本只支持pyth ...

  5. 在datax之前版本中添加filewriter并创建job时出现问题

    问题描述:

  6. DataX的使用——大数据同步技术

    准备工作: 1.视频教学http://113.31.104.47/portal/#/course/dashboard/b34d160db64624732ef152a1118af11a 2.DataX的 ...

  7. Pyhton开源框架(加强版)

    info:Djangourl:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC)风格的 ...

  8. Python开源框架

    info:更多Django信息url:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC) ...

  9. DataX的简单编译安装测试

    搭建环境:     Java > =1.6     Python>=2.6 <3     Ant     Rpmbuild     G++     编译DataX: 进入rpm文件夹 ...

随机推荐

  1. Linux系统的ssh与sshd服务

    当主机中开启openssh服务,那么就对外开放了远程连接的接口 ssh为openssh服务的客户端,sshd为openssh服务的服务端 远程管理工具ssh具有数据加密传输.网络开销小以及应用平台范围 ...

  2. 机械硬盘换到SSD后系统引导报错代码0xc000000e

    由于机械硬盘IO不够用,系统使用起来非常的缓慢,特意购买了新的SSD进行了替换.机械硬盘的IO在70左右,SSD的IO在1000-4000左右指普通消费SSD. 由于不想安装系统,就直接把机械硬盘的数 ...

  3. Walker

      emmm.......随机化.   好吧,我们不熟.   考虑随机选取两组数据高斯消元消除结果后带入检验,能有超过1/2正确就输出.   其实方程就四个,手动解都没问题.   只是要注意看sin与 ...

  4. Java并发知识总结,超详细!

    首先给大家分享一个github仓库,上面放了200多本经典的计算机书籍,包括C语言.C++.Java.Python.前端.数据库.操作系统.计算机网络.数据结构和算法.机器学习.编程人生等,可以sta ...

  5. Golang入门学习(二):控制分支

    文章目录 @[TOC] 1. 控制分支 1.1 if-else分支 1.2 switch分支 1.4 while 和do...while循环结构 1.5 多种循环结构 1.6 break 1.7 co ...

  6. Apache网页优化

    目录: 一.Apache网页优化概述 二.网页压缩 三.网页缓存 四.隐藏版本信息 五.Apache防盗链 一.Apache网页优化概述 在企业中,部署Apache后只采用默认的配置参数,会引发网站很 ...

  7. SQL:1999基本语法

    SQL:1999基本语法 SELECT [DISTINCT] * | 列名称 [AS]别名,........ FROM 表名称1 [别名1][CROSS JOIN表名称2 别名2]| [NATURAL ...

  8. PTA——c++2017Final 圆周率山

    为了参加学校的社团风采展,怡山小学数学组的同学们决定画一座圆周率山,以宣传圆周率. 已知圆周率为:3. 1415926535 8979323846 2643383279 5028841971 6939 ...

  9. C++课后习题

    一.设计一个类people,有保护数据成员:age(年龄,整型),name(姓名,string),行为成员:两个构造函数(一个默认,另一个带参数):析构函数:void setValue(int m, ...

  10. PHP中的垃圾回收相关函数

    之前我们已经学习过 PHP 中的引用计数以及垃圾回收机制的概念.这些内容非常偏理论,也是非常常见的面试内容.而今天介绍的则是具体的关于垃圾回收的一些功能函数.关于之前的两篇介绍文章,大家可以到文章底部 ...