hadoop生态搭建(3节点)-17.sqoop配置_单节点
# ==================================================================安装 sqoop
tar -zxvf ~/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
mv ~/sqoop-1.4.7.bin__hadoop-2.6.0 /usr/local/sqoop-1.4.7
# 环境变量
# ==================================================================node1
vi /etc/profile # 在export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL下添加 export JAVA_HOME=/usr/java/jdk1.8.0_111
export JRE_HOME=/usr/java/jdk1.8.0_111/jre
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.12
export HADOOP_HOME=/usr/local/hadoop-2.7.6
export MYSQL_HOME=/usr/local/mysql
export HBASE_HOME=/usr/local/hbase-1.2.4
export HIVE_HOME=/usr/local/hive-2.1.1
export SCALA_HOME=/usr/local/scala-2.12.4
export KAFKA_HOME=/usr/local/kafka_2.12-0.10.2.1
export FLUME_HOME=/usr/local/flume-1.8.0
export SPARK_HOME=/usr/local/spark-2.3.0
export STORM_HOME=/usr/local/storm-1.1.0
export REDIS_HOME=/usr/local/redis-4.0.2
export ERLANG_HOME=/usr/local/erlang
export RABBITMQ_HOME=/usr/local/rabbitmq_server-3.7.5
export MONGODB_HOME=/usr/local/mongodb-3.4.5
export NGINX_HOME=/usr/local/nginx
export CATALINA_BASE=/usr/local/tomcat
export CATALINA_HOME=/usr/local/tomcat
export TOMCAT_HOME=/usr/local/tomcat
export KEEPALIVED_HOME=/usr/local/keepalived
export ELASTICSEARCH_HOME=/usr/local/elasticsearch-6.2.4
export LOGSTASH_HOME=/usr/local/logstash-6.2.4
export KIBANA_HOME=/usr/local/kibana-6.2.4
export SQOOP_HOME=/usr/local/sqoop-1.4.7 export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$MYSQL_HOME/bin:$HBASE_HOME/bin:$HIVE_HOME/bin:$SCALA_HOME/bin:$KAFKA_HOME/bin:$FLUME_HOME/bin:$SPARK_HOME/bin:$STORM_HOME/bin:$REDIS_HOME/bin:$ERLANG_HOME/bin:$RABBITMQ_HOME/ebin:$RABBITMQ_HOME/sbin:$MONGODB_HOME/bin:$NGINX_HOME/sbin:$CATALINA_HOME/bin:$KEEPALIVED_HOME/sbin:$ELASTICSEARCH_HOME/bin:$LOGSTASH_HOME/bin:$KIBANA_HOME/bin:$SQOOP_HOME/bin
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
# 使环境变量生效
source /etc/profile # 查看配置结果
echo $SQOOP_HOME
cp -a $SQOOP_HOME/conf/sqoop-env-template.sh $SQOOP_HOME/conf/sqoop-env.sh
vi $SQOOP_HOME/conf/sqoop-env.sh #Set path to where bin/hadoop is available
export HADOOP_COMMON_HOME=${HADOOP_HOME} #Set path to where hadoop-*-core.jar is available
export HADOOP_MAPRED_HOME=${HADOOP_HOME} #set the path to where bin/hbase is available
#export HBASE_HOME=${HBASE_HOME} #Set the path to where bin/hive is available
export HIVE_HOME=${HIVE_HOME} #Set the path for where zookeper config dir is
#export ZOOCFGDIR= cp -a ~/mysql-connector-java-5.1.46.jar $SQOOP_HOME/lib/
cp -a $HIVE_HOME/lib/hive-exec-2.1.1.jar $SQOOP_HOME/lib/ sqoop-version
mysql -u root -p > create database sqooptest character set utf8 ;
> create user 'sqoop'@'%' identified by 'Sqoop-123';
> grant all privileges on *.* to 'sqoop'@'%';
> flush privileges; > show databases; > quit;
# 用户 sqoop 登录 mysql
mysql -u sqoop -p Enter password: Sqoop-123 > use sqooptest; > create table emp(id INT NOT NULL PRIMARY KEY, name VARCHAR(20), age INT); > insert into emp(id, name, age)values(1, 'zhangsan', 11);
insert into emp(id, name, age)values(2, 'lisi', 12);
insert into emp(id, name, age)values(3, '王五', 13); > create table emp_add(id INT NOT NULL PRIMARY KEY, name VARCHAR(20), age INT, sex VARCHAR(20)); > insert into emp_add(id, name, age, sex)values(1, 'zhangsan', 11, '男');
insert into emp_add(id, name, age, sex)values(2, 'lisi', 12, '男');
insert into emp_add(id, name, age, sex)values(3, '王五', 13, '女');
insert into emp_add(id, name, age, sex)values(4,'liuliu', 11, '男'); > show tables; > select * from emp;
select * from emp_add; > quit;
# 启动 hadoop
# ==================================================================node1 node2 node3
zkServer.sh start # ==================================================================node1
# 启动hadoop所有进程
$HADOOP_HOME/sbin/start-all.sh $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc # ==================================================================node2
$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc hadoop dfsadmin -safemode get # 命令强制离开
# hadoop dfsadmin -safemode leave # 网页访问
# http://node1:50070?user.name=hadoop
# http://node2:50070?user.name=hadoop
# http://node1:8088?user.name=hadoop/cluster/nodes
# 导入表数据到HDFS
# ==================================================================node1
sqoop import \
--connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop \
--password Sqoop-123 \
--table emp \
--m 1
# 如果包下面的错误请将 $HADOOP_HOME/etc/hadoop/yarn-site.xml 文件进行调整
# ERROR tool.ImportTool: Import failed: java.io.IOException: org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException:
# Invalid resource request, requested memory < 0, or requested memory > max configured, requestedMemory=1536, maxMemory=1024
# ==================================================================node1 node2 node3
vi $HADOOP_HOME/etc/hadoop/yarn-site.xml <property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
# 重启Yarn
# ==================================================================node1
$HADOOP_HOME/sbin/start-yarn.sh
# 导入表数据到HDFS
sqoop import \
--connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop \
--password Sqoop-123 \
--table emp \
--m 1 # 查看导入的数据
hadoop fs -ls /user/root/emp hadoop fs -cat /user/root/emp/part-m-00000 # 导入表到HDFS指定目录
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--target-dir /sqooptest/table_emp/queryresult \
--table emp --num-mappers 1 # 查看导入的数据
hadoop fs -ls /sqooptest/table_emp/queryresult hadoop fs -cat /sqooptest/table_emp/queryresult/part-m-00000
# 导入关系表到HIVE
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--table emp --hive-import \
--num-mappers 1
# 如报下面的错误,请先删除 /user/root/emp
# ERROR tool.ImportTool: Import failed: org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://appcluster/user/root/emp already exists
hadoop fs -rmr /user/root/emp
# 重新导入关系表到HIVE
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--table emp --hive-import \
--num-mappers 1
# 启动 hive
hive
# 查看导入的数据
> show tables; > select * from emp;
# 导入表到HIVE指定库指定表
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--table emp \
--delete-target-dir \
--fields-terminated-by '\t' \
--hive-import \
--hive-database sqooptest \
--hive-table hive_emp \
--num-mappers 1
# 报错 hive的库sqooptest,必须先建立。否则会报:FAILED: SemanticException [Error 10072]: Database does not exist: sqooptest
> create database sqooptest; > show databases;
# 导入表到HIVE指定库指定表
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--table emp \
--delete-target-dir \
--fields-terminated-by '\t' \
--hive-import \
--hive-database sqooptest \
--hive-table hive_emp \
--num-mappers 1 > use sqooptest; > show tables; > select * from hive_emp;
# 导入表数据子集 where子句的导入
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--table emp_add \
--where "age =11" \
--target-dir /sqooptest/table_emp/queryresult2 \
--num-mappers 1 # 查看导入的数据
hadoop fs -ls /sqooptest/table_emp/queryresult2 hadoop fs -cat /sqooptest/table_emp/queryresult2/part-m-00000
# query按需导入
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--query 'select id,name,age from emp WHERE id>=2 and $CONDITIONS' \
--split-by id \
--fields-terminated-by '\t' \
--target-dir /sqooptest/table_emp/queryresult3 \
--num-mappers 1 # 查看导入的数据
hadoop fs -cat /sqooptest/table_emp/queryresult3/part-m-00000
# 增量导入
sqoop import --connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--table emp \
--incremental append \
--check-column id \
--last-value 2 \
--fields-terminated-by '\t' \
--target-dir /sqooptest/table_emp/queryresult4 \
--num-mappers 1 # 查看导入的数据
hadoop fs -ls /sqooptest/table_emp/queryresult4/ hadoop fs -cat /sqooptest/table_emp/queryresult4/part-m-00000
# Sqoop的数据导出
# 查看 hdfs 数据
hadoop fs -ls /sqooptest/table_emp/queryresult
hadoop fs -cat /sqooptest/table_emp/queryresult/part-m-00000
# 1、首先需要手动创建mysql中的目标表
mysql -u sqoop -p Enter password: Sqoop-123 > use sqooptest; > CREATE TABLE employee(id INT NOT NULL PRIMARY KEY,name VARCHAR(20),age INT); > show tables; > select * from employee;
# 2、然后执行导出命令
sqoop export \
--connect jdbc:mysql://node1:3306/sqooptest \
--username sqoop --password Sqoop-123 \
--table employee \
--export-dir /sqooptest/table_emp/queryresult/ # 验证表mysql命令行
> select * from employee; > quit;
shutdown -h now
# sqoop
hadoop生态搭建(3节点)-17.sqoop配置_单节点的更多相关文章
- hadoop生态搭建(3节点)-05.mysql配置_单节点
# ==================================================================node1 # ======================== ...
- hadoop生态搭建(3节点)
软件:CentOS-7 VMware12 SSHSecureShellClient shell工具:Xshell 规划 vm网络配置 01.基础配置 02.ssh配置 03.zookeep ...
- js 节点 document html css 表单节点操作
js 节点 document html css 表单节点操作 节点操作:访问.属性.创建 (1)节点的访问:firstChild.lastChild.childNodes.parentChild(父子 ...
- hadoop生态搭建(3节点)-13.mongodb配置
# 13.mongodb配置_副本集_认证授权# ==================================================================安装 mongod ...
- hadoop生态搭建(3节点)-12.rabbitmq配置
# 安装 需要相关包# ==================================================================node1 node2 node3 yum ...
- hadoop生态搭建(3节点)-08.kafka配置
如果之前没有安装jdk和zookeeper,安装了的请直接跳过 # https://www.oracle.com/technetwork/java/javase/downloads/java-arch ...
- hadoop生态搭建(3节点)-04.hadoop配置
如果之前没有安装jdk和zookeeper,安装了的请直接跳过 # https://www.oracle.com/technetwork/java/javase/downloads/java-arch ...
- hadoop生态搭建(3节点)-10.spark配置
# https://www.scala-lang.org/download/2.12.4.html# ================================================= ...
- hadoop生态搭建(3节点)-15.Nginx_Keepalived_Tomcat配置
# Nginx+Tomcat搭建高可用服务器名称 预装软件 IP地址Nginx服务器 Nginx1 192.168.6.131Nginx服务器 Nginx2 192.168.6.132 # ===== ...
随机推荐
- 使用pdb调试python脚本
pdb 是 python 自带的一个包,为 python 程序提供了一种交互的源代码调试功能,主要特性包括设置断点.单步调试.进入函数调试.查看当前代码.查看栈片段.动态改变变量的值等.pdb 提供了 ...
- easyui datagrid行编辑中数据联动
easyui的datagrid中行内编辑使用数据联动.即:当编辑产品编号时,该行的产品名称自动根据产品编号显示出来. 在编辑中获取当前行的索引 function getRowIndex(target) ...
- 爬虫入门之Scrapy框架基础框架结构及腾讯爬取(十)
Scrapy终端是一个交互终端,我们可以在未启动spider的情况下尝试及调试代码,也可以用来测试XPath或CSS表达式,查看他们的工作方式,方便我们爬取的网页中提取的数据. 如果安装了 IPyth ...
- Kibana基本使用
现在你已经安装了Kibana,现在你一步步通过本教程快速获取Kibana核心功能的实践经验.学习完本教程,你将: 1.加载案例数据到你安装的Elasticsearch中 2. 定义至少一个索引匹配模式 ...
- How to import .sql script
How to import .sql script 1.Export .sql from pl/sql developer you can reference to other document in ...
- Struts学习-Hibernate
Hibernate (开放源代码的对象关系映射框架) http://www.cnblogs.com/wenwen123/p/5658625.html 一. 1.新建 2.配置 <!-- hibe ...
- 如何给Docker hub用户上传头像
我第一次使用Docker hub时,觉得很奇怪,这个网站上面没有允许用户上传头像的地方. 后来经过研究才发现,需要用在Docker hub上注册用户的同一个邮箱到Gravatar这个网站上再注册一个账 ...
- Service Fabric eShop On Containers
Service Fabric承载eShop On Containers 从模块化到微服务化 从Pet Shop 到eShop on Container都是Microsoft在技术演进的路径上给开发者展 ...
- dynamic_cast动态转换
我们都知道dynamic_cast会在运行时进行类型检查,比较安全,static_cast静态转换,不安全 dynamic_cast转换的类型必须是个类,且这个类中必须有虚函数,为什么呢? 虚函数对于 ...
- HDU 5677 ztr loves substring(Manacher+dp+二进制分解)
题目链接:HDU 5677 ztr loves substring 题意:有n个字符串,任选k个回文子串,问其长度之和能否等于L. 题解:用manacher算法求出所有回文子串的长度,并记录各长度回文 ...