基于:Hadoop 2.6.0-cdh5.4.0 hive1.1.0 HBase 1.0.0-cdh5.4.0 关键配置文件
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://gagcluster</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data1/hadoop_tmp/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>master.hadoop:2181,whoami:2181,slave1.hadoop:2181,slave2.hadoop:2181,slave3.hadoop:2181</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.nameservices</name>
<value>gagcluster</value>
</property>
<property>
<name>dfs.ha.namenodes.gagcluster</name>
<value>master.hadoop,whoami</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data1/hadoop_tmp/hdfs/datanode</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.gagcluster.master.hadoop</name>
<value>master.hadoop:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.gagcluster.whoami</name>
<value>whoami:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.gagcluster.master.hadoop</name>
<value>master.hadoop:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.gagcluster.whoami</name>
<value>whoami:50070</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data1/hadoop_tmp/hdfs/namenode</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master.hadoop:8485;whoami:8485;slave1.hadoop:8485;slave2.hadoop:8485;slave3.hadoop:8485/gagcluster</value>
</property> <property>
<name>dfs.journalnode.edits.dir</name>
<value>/data1/hadoop/tmp/journal</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.gagcluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence(hdfs)
shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<!--
<property>
<name>mapred.job.tracker</name>
<value>master.hadoop:9001</value>
</property>
-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--<property>
<name>mapreduce.cluster.local.dir</name>
<value>/data1/hadoop_tmp/tmp/map</value>
</property>
-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>0.0.0.0:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>0.0.0.0:19888</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration> <!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rm-cluster</value>
</property> <property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.recover.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master.hadoop</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>whoami</value>
</property>
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm1</value>
<description>If we want to launch more than one RM in single node, we need this configuration</description>
</property> <property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master.hadoop:2181,whoami:2181,slave1.hadoop:2181,slave2.hadoop:2181,slave3.hadoop:2181</value>
</property>
<!--开启自动恢复功能-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>master.hadoop:8030</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>whoami:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>master.hadoop:8031</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>whoami:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>master.hadoop:8032</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>whoami:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>master.hadoop:8033</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>whoami:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master.hadoop:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>whoami:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>12288</value>
</property>
</configuration>
hadoop-env.sh
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. # Set Hadoop-specific environment variables here. # The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes. # The java implementation to use.
export JAVA_HOME=/usr/local/java/jdk1.7.0_65 # The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done # The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE="" # Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" # Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" # On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} # Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER # Where log files are stored in the secure data environment.a
export HADOOP_LOG_DIR=/data1/logs/
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} ###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS="" ###
# Advanced Users Only!
### # The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} # A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER
hive-env.sh
export HIVE_HOME=/usr/local/hive
export JAVA_HOME=/usr/local/java/jdk1.7.0_65
export HADOOP_HOME=/usr/local/hadoop
export HIVE_CONF_DIR=/usr/local/hive/conf
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. # Set Hive and Hadoop environment variables here. These variables can be used
# to control the execution of Hive. It should be used by admins to configure
# the Hive installation (so that users do not have to set environment variables
# or set command line parameters to get correct behavior).
#
# The hive service being invoked (CLI/HWI etc.) is available via the environment
# variable SERVICE # Hive Client memory usage can be an issue if a large number of clients
# are running at the same time. The flags below have been useful in
# reducing memory usage:
#
# if [ "$SERVICE" = "cli" ]; then
# if [ -z "$DEBUG" ]; then
# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
# else
# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
# fi
# fi # The heap size of the jvm stared by hive shell script can be controlled via:
#
export HADOOP_HEAPSIZE=512
#
# Larger heap size may be required when running queries over large number of files or partitions.
# By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be
# appropriate for hive server (hwi etc). # Set HADOOP_HOME to point to a specific hadoop install directory
# HADOOP_HOME=${bin}/../../hadoop
hive-site.xml
<value>jdbc:mysql://127.0.0.1:3306/hive</value>
<description>驱动名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>用户名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>buzhidao</value>
<description>密码</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/data1/cloud/hive</value>
<description>数据路径(相对hdfs)</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://192.168.0.103:9083</value>
<description>运行hive得主机地址及端口</description>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi-1.2.0.war</value>
<description>hwi得war路径</description>
</property>
<property>
<name>mapred.input.dir.recursive</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.supports.subdirectories</name>
<value>true</value>
</property>
<property>
<name>hive.exec.mode.local.auto</name>
<value>true</value>
</property>
<property>
<name>hive.exec.parallel</name>
<value>true</value>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>true</value>
</property>
<property>
<name>mapred.job.reuse.jvm.num.tasks</name>
<value>10</value>
</property>
</configuration>
hbase-site.xml
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://gagcluster/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/data1/cloud/zookeeper/data</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.master</name>
<value>60000</value>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master.hadoop,slave1.hadoop,slave2.hadoop,slave3.hadoop,whoami</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>100</value>
</property>
<property>
<name>hfile.block.cache.size</name>
<value>0.4</value>
</property>
<property>
<name>hbase.hregion.memstore.flush.size</name>
<value>266666</value>
</property>
<property>
<name>hbase.hregion.max.filesize</name>
<value>1000000</value>
</property>
<property>
<name>hbase.hstore.compactionThreshold</name>
<value>2</value>
</property>
<property>
<name>hbase.client.write.buffer</name>
<value>20971520</value>
</property>
<property>
<name>hbase.client.scanner.caching</name>
<value>500</value>
</property>
</configuration>
zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/data1/cloud/zookeeper/data
dataLogDir=/data1/cloud/zookeeper/logs
# the port at which the clients will connect
clientPort=2181
server.0=master.hadoop:2888:3888
server.1=whoami:2888:3888
server.2=slave1.hadoop:2888:3888
server.3=slave2.hadoop:2888:3888
server.4=slave3.hadoop:2888:3888
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
基于:Hadoop 2.6.0-cdh5.4.0 hive1.1.0 HBase 1.0.0-cdh5.4.0 关键配置文件的更多相关文章
- 基于Hadoop 2.6.0运行数字排序的计算
上个博客写了Hadoop2.6.0的环境部署,下面写一个简单的基于数字排序的小程序,真正实现分布式的计算,原理就是对多个文件中的数字进行排序,每个文件中每个数字占一行,排序原理是按行读取后分块进行排序 ...
- 搭建hadoop环境,在win7的eclipse上远程操作(Linux上)hadoop2.6.0出错的一些总结
问题1:在DFS Lcation 上不能对文件进行操作: 解决方法: 在hadoop上的每个节点上修改该文件 conf/mapred-site.xml,增加: <property> < ...
- 大数据项目实践:基于hadoop+spark+mongodb+mysql+c#开发医院临床知识库系统
一.前言 从20世纪90年代数字化医院概念提出到至今的20多年时间,数字化医院(Digital Hospital)在国内各大医院飞速的普及推广发展,并取得骄人成绩.不但有数字化医院管理信息系统(HIS ...
- 【hbase0.96】基于hadoop搭建hbase的心得
hbase是基于hadoop的hdfs框架做的分布式表格存储系统,所谓表格系统就是在k/v系统的基础上,对value部分支持column family和column,并支持多版本读写. hbase的工 ...
- 基于hadoop的数据仓库工具:Hive概述
Hive是基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,并提供完整的sql查询功能,可以将sql语句转换为MapReduce任务进行运行.其优点是学习成本低,可以通过类 ...
- Hive -- 基于Hadoop的数据仓库分析工具
Hive是一个基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,通过类SQL语句快速实现简单的MapReduce统计,不必开发专门的MapReduce应用,十分适合数据仓库 ...
- 基于Hadoop的大数据平台实施记——整体架构设计[转]
http://blog.csdn.net/jacktan/article/details/9200979 大数据的热度在持续的升温,继云计算之后大数据成为又一大众所追捧的新星.我们暂不去讨论大数据到底 ...
- 基于Hadoop的大数据平台实施记——整体架构设计
大数据的热度在持续的升温,继云计算之后大数据成为又一大众所追捧的新星.我们暂不去讨论大数据到底是否适用于您的组织,至少在互联网上已经被吹嘘成无所不能的超级战舰.好像一夜之间我们就从互联网时代跳跃进了大 ...
- 基于hadoop的图书推荐
根据在炼数成金上的学习,将部分代码总结一下在需要的时候可以多加温习.首先根据原理作简要分析.一般推荐系统使用的协同过滤推荐模型:分别是基于ItemCF的推荐模型或者是基于UserCF的推荐模型:首先分 ...
- 基于Hadoop分布式集群YARN模式下的TensorFlowOnSpark平台搭建
1. 介绍 在过去几年中,神经网络已经有了很壮观的进展,现在他们几乎已经是图像识别和自动翻译领域中最强者[1].为了从海量数据中获得洞察力,需要部署分布式深度学习.现有的DL框架通常需要为深度学习设置 ...
随机推荐
- review35
使用Thread创建线程通常使用的方法是:Thread(Runnable target).该构造方法中的参数是一个Runnable类型的接口,因此在创建线程对象时必须向构造方法的参数传递一个实现Run ...
- bootstrap参考网站
http://www.chuntao.org.cn/http://www.dianxiaohuo.com/
- MDX中Filter 与Exist的区别
获得一个集合,这个一般用来筛选出一个自定义的set,比如在中国的餐厅 该set返回所有MSDNteam下并且在Fact Thread度量上有记录的products 用Exists实现 sele ...
- java数组类Arrays:比较,填充,排序
int i1[] = {1,2,3,4,5,6}; int i2[] = {6,5,4,3,2,1}; //排序 Arrays.sort(i2); System.out.println(i1.equa ...
- jquery——简单的下拉列表制作及bind()方法的示例
<!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title> ...
- commons-dbcp连接池的使用
数据库连接池 池参数(所有池参数都有默认值): 初始大小: 最小空闲连接数: 增量:一次创建的最小单位(5个) 最大空闲连接数:12个 最大连接数:20个 最大的等待时间:1000毫秒 四大连接参数: ...
- 剑指offer--18.从尾到头打印链表
递归,逐个加到后面 ------------------------------------------------------------------------------ 时间限制:1秒 空间限 ...
- 70. Climbing Stairs Add to List
You are climbing a stair case. It takes n steps to reach to the top. Each time you can either climb ...
- JS计算字符串的长度
最近项目上经常要用到计算字符串的长度的问题,有时需要按照byte进行计算长度,所以我就想在页面上用js实现,于是就到网上查了相关的资料,发现确实有很多的版本,这里给出两个比较好用的. //方法一:逐个 ...
- htc使用方式(转)
一.htc的两种使用方式: 关联行为(Attach Behavior): IE 5.0以上支持, htc 技术出现的初衷. 主要目的是把对象复杂的样式变化包装起来,替代 javascript + cs ...