DataX的安装及使用

DataX的安装

DataX不需要依赖其他服务,直接上传、解压、安装、配置环境变量即可

也可以直接在windows上解压

DataX的使用

stream2stream

编写配置文件stream2stream.json
# stream2stream.json
{
"job": {
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"sliceRecordCount": 10,
"column": [
{
"type": "long",
"value": "10"
},
{
"type": "string",
"value": "hello,你好,世界-DataX"
}
]
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "UTF-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": 5
}
}
}
}
执行同步任务
datax.py stream2stream.json
执行结果

mysql2mysql

需要新建student2数据库,并创建student表

编写配置文件mysql2mysql.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}
执行同步任务
datax.py mysql2mysql.json

mysql2hdfs

写hive跟hdfs时一样的

编写配置文件mysql2hdfs.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"splitPk": "age",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://master:9000",
"fileType": "text",
"path": "/user/hive/warehouse/datax.db/students",
"fileName": "student",
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "age",
"type": "INT"
},
{
"name": "gender",
"type": "string"
},
{
"name": "clazz",
"type": "string"
},
{
"name": "last_mod",
"type": "string"
}
],
"writeMode": "append",
"fieldDelimiter": ","
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}

hbase2mysql

{
"job": {
"content": [
{
"reader": {
"name": "hbase11xreader",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "student",
"encoding": "utf-8",
"mode": "normal",
"column": [
{
"name": "rowkey",
"type": "string"
},
{
"name": "cf1:name",
"type": "string"
},
{
"name": "cf1:age",
"type": "string"
},
{
"name": "cf1:gender",
"type": "string"
},
{
"name": "cf1:clazz",
"type": "string"
}
],
"range": {
"startRowkey": "",
"endRowkey": "",
"isBinaryRowkey": false
}
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"writeMode": "insert",
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"preSql": [
"truncate student2"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master:3306/student2?useUnicode=true&characterEncoding=utf8",
"table": [
"student2"
]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}

mysql2hbase

mysql中的score表需将cource_id改为course_id,并将student_id、course_id设为主键,并将所有字段的类型改为int

hbase需先创建score表:create 'score','cf1'

{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"student_id",
"course_id",
"score"
],
"splitPk": "course_id",
"connection": [
{
"table": [
"score"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student"
]
}
]
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master:2181"
},
"table": "score",
"mode": "normal",
"rowkeyColumn": [
{
"index":0,
"type":"string"
},
{
"index":-1,
"type":"string",
"value":"_"
},
{
"index":1,
"type":"string"
}
],
"column": [
{
"index":2,
"name": "cf1:score",
"type": "int"
}
],
"encoding": "utf-8"
}
}
}
],
"setting": {
"speed": {
"channel": 6
}
}
}
}

mysql2Phoenix

在Phoenix中创建STUDENT表
CREATE TABLE IF NOT EXISTS STUDENT (
ID VARCHAR NOT NULL PRIMARY KEY,
NAME VARCHAR,
AGE BIGINT,
GENDER VARCHAR ,
CLAZZ VARCHAR
);
编写配置文件MySQLToPhoenix.json
{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "123456",
"column": [
"id",
"name",
"age",
"gender",
"clazz"
],
"splitPk": "id",
"connection": [
{
"table": [
"student"
],
"jdbcUrl": [
"jdbc:mysql://master:3306/student?useSSL=false"
]
}
]
}
},
"writer": {
"name": "hbase11xsqlwriter",
"parameter": {
"batchSize": "256",
"column": [
"ID",
"NAME",
"AGE",
"GENDER",
"CLAZZ"
],
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2",
"zookeeper.znode.parent": "/hbase"
},
"nullMode": "skip",
"table": "STUDENT"
}
}
}
]
}
}

HDFSToHBase

将students.txt数据上传至HDFS的/data/student1/目录

在HBase中创建datax表:create 'datax','cf1'

{
"job": {
"setting": {
"speed": {
"channel": 3
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/data/student1/",
"defaultFS": "hdfs://master:9000",
"column": [
{
"index": 0,
"type": "string"
},
{
"index": 1,
"type": "string"
},
{
"index": 2,
"type": "string"
},
{
"index": 3,
"type": "string"
},
{
"index": 4,
"type": "string"
},
{
"index": 5,
"type": "string"
}
],
"fileType": "text",
"encoding": "UTF-8",
"fieldDelimiter": ","
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"hbaseConfig": {
"hbase.zookeeper.quorum": "master,node1,node2"
},
"table": "datax",
"mode": "normal",
"rowkeyColumn": [
{
"index": 0,
"type": "string"
},
{
"index": -1,
"type": "string",
"value": "_"
},
{
"index": 1,
"type": "string"
}
],
"column": [
{
"index": 2,
"name": "cf1:age",
"type": "string"
},
{
"index": 3,
"name": "cf1:gender",
"type": "string"
},
{
"index": 4,
"name": "cf1:clazz",
"type": "string"
},
{
"index": 5,
"name": "cf1:ts",
"type": "string"
}
],
"versionColumn": {
"index": 5
},
"encoding": "utf-8"
}
}
}
]
}
}

DataX的安装及使用的更多相关文章

  1. DataX的安装

    DataX的安装 1. 可下载tar包 https://github.com/alibaba/DataX/blob/master/userGuid.md 2. 下载源码自己编译 git clone h ...

  2. [大数据技术]datax的安装以及使用

    1.datax简述 DataX 是阿里巴巴集团内被广泛使用的离线数据同步工具/平台,实现包括 MySQL.Oracle.SqlServer.Postgre.HDFS.Hive.ADS.HBase.Ta ...

  3. dataX windows10安装

    按照视频课程,从Github上下载文件:https://github.com/alibaba/DataX 然后将下载的压缩包解压即可,不过需要的前提Python环境是要求python2,于是在pyth ...

  4. datax的安装和使用(windows)

    github官方文档和项目:https://github.com/alibaba/DataX 下载后在windows环境下是可以直接用python编译执行的,但从github上下载的版本只支持pyth ...

  5. 在datax之前版本中添加filewriter并创建job时出现问题

    问题描述:

  6. DataX的使用——大数据同步技术

    准备工作: 1.视频教学http://113.31.104.47/portal/#/course/dashboard/b34d160db64624732ef152a1118af11a 2.DataX的 ...

  7. Pyhton开源框架(加强版)

    info:Djangourl:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC)风格的 ...

  8. Python开源框架

    info:更多Django信息url:https://www.oschina.net/p/djangodetail: Django 是 Python 编程语言驱动的一个开源模型-视图-控制器(MVC) ...

  9. DataX的简单编译安装测试

    搭建环境:     Java > =1.6     Python>=2.6 <3     Ant     Rpmbuild     G++     编译DataX: 进入rpm文件夹 ...

随机推荐

  1. android http get

    Executors.newSingleThreadExecutor().execute{ val uri = "https://www.cnblogs.com/hangj" val ...

  2. Python - 面向对象编程 - 魔术方法(双下划线方法)

    什么是魔术方法 在Python中,所有以 __ 双下划线包起来的方法,都统称为 Magic Method 魔术方法,也叫双下划线方法 有哪些重要的魔术方法? __new__ https://www.c ...

  3. Linux常用命令 - nl命令详解

    21篇测试必备的Linux常用命令,每天敲一篇,每次敲三遍,每月一循环,全都可记住!! https://www.cnblogs.com/poloyy/category/1672457.html 显示行 ...

  4. TreeView和ListView数据库查询数据联动操作

    好久不用了,重新整理下放这里以备需要使用,功能见图 数据库表结构 定义TreeView addObject中data存储的记录集 type PNode = ^TNode; TNode = record ...

  5. 在C#中使用RSA进行加密和解密

    这篇文章向您展示了如何在c#.net Windows窗体应用程序中使用RSA算法对字符串进行加密和解密.RSA是由Ron Rivest,Adi Shamir和Leonard Adleman开发的非对称 ...

  6. Filter案例之登录验证

    一.登录验证,权限控制 1.需求分析 其中,登录有关的资源被访问时要直接放行,不然会死循环: 2.代码实现

  7. python库--pandas--Series.str--字符串处理

    原数据 import pandas as pd a = pd.Series(['aSd', 'asd', 'dfd fsAsf sfs']) b = pd.Series([None, 'asd', ' ...

  8. ms sql 带自增列 带外键约束 数据导入导出

    1,生成建表脚本 选中要导的表,点右键-编写表脚本为-create到  ,生成建表脚本 2,建表(在新库),但不建外键关系 不要选中生成外键的那部分代码,只选择建表的代码 3,导数据,用SQL STU ...

  9. Lua io.lines()

    前言# 从文章的题目可以看出,今天的内容是和文件的行相关的,其实这个函可以看成是一个文件读取函数,只不过文件读取的形式固定了,就是只能一行一行的读,接下来我们就一起来看看这个函数究竟要怎么使用. 内容 ...

  10. 动态查看及加载PHP扩展

    在编译并完成 php.ini 的配置之后,我们就成功的安装了一个 PHP 的扩展.不过, PHP 也为我们提供了两个在动态运行期间可以查看扩展状态以及加载未在 php.ini 中进行配置的扩展的函数. ...