core-site.xml

<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://gagcluster</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data1/hadoop_tmp/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>master.hadoop:2181,whoami:2181,slave1.hadoop:2181,slave2.hadoop:2181,slave3.hadoop:2181</value>
</property>
</configuration>

hdfs-site.xml

<configuration>
<property>
<name>dfs.nameservices</name>
<value>gagcluster</value>
</property>
<property>
<name>dfs.ha.namenodes.gagcluster</name>
<value>master.hadoop,whoami</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data1/hadoop_tmp/hdfs/datanode</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.gagcluster.master.hadoop</name>
<value>master.hadoop:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.gagcluster.whoami</name>
<value>whoami:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.gagcluster.master.hadoop</name>
<value>master.hadoop:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.gagcluster.whoami</name>
<value>whoami:50070</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data1/hadoop_tmp/hdfs/namenode</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master.hadoop:8485;whoami:8485;slave1.hadoop:8485;slave2.hadoop:8485;slave3.hadoop:8485/gagcluster</value>
</property> <property>
<name>dfs.journalnode.edits.dir</name>
<value>/data1/hadoop/tmp/journal</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.gagcluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence(hdfs)
shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.datanode.max.transfer.threads</name>
<value>8192</value>
</property>
</configuration>

mapred-site.xml

<configuration>
<!--
<property>
<name>mapred.job.tracker</name>
<value>master.hadoop:9001</value>
</property>
-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--<property>
<name>mapreduce.cluster.local.dir</name>
<value>/data1/hadoop_tmp/tmp/map</value>
</property>
-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>0.0.0.0:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>0.0.0.0:19888</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>

yarn-site.xml

<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration> <!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rm-cluster</value>
</property> <property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.recover.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master.hadoop</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>whoami</value>
</property>
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm1</value>
<description>If we want to launch more than one RM in single node, we need this configuration</description>
</property> <property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master.hadoop:2181,whoami:2181,slave1.hadoop:2181,slave2.hadoop:2181,slave3.hadoop:2181</value>
</property>
<!--开启自动恢复功能-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>master.hadoop:8030</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>whoami:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>master.hadoop:8031</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>whoami:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>master.hadoop:8032</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>whoami:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>master.hadoop:8033</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>whoami:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master.hadoop:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>whoami:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>12288</value>
</property>
</configuration>

hadoop-env.sh

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. # Set Hadoop-specific environment variables here. # The only required environment variable is JAVA_HOME. All others are
# optional. When running a distributed configuration it is best to
# set JAVA_HOME in this file, so that it is correctly defined on
# remote nodes. # The java implementation to use.
export JAVA_HOME=/usr/local/java/jdk1.7.0_65 # The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"} # Extra Java CLASSPATH elements. Automatically insert capacity-scheduler.
for f in $HADOOP_HOME/contrib/capacity-scheduler/*.jar; do
if [ "$HADOOP_CLASSPATH" ]; then
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$f
else
export HADOOP_CLASSPATH=$f
fi
done # The maximum amount of heap to use, in MB. Default is 1000.
#export HADOOP_HEAPSIZE=
#export HADOOP_NAMENODE_INIT_HEAPSIZE="" # Extra Java runtime options. Empty by default.
export HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true" # Command specific options appended to HADOOP_OPTS when specified
export HADOOP_NAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_NAMENODE_OPTS"
export HADOOP_DATANODE_OPTS="-Dhadoop.security.logger=ERROR,RFAS $HADOOP_DATANODE_OPTS" export HADOOP_SECONDARYNAMENODE_OPTS="-Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,RFAS} -Dhdfs.audit.logger=${HDFS_AUDIT_LOGGER:-INFO,NullAppender} $HADOOP_SECONDARYNAMENODE_OPTS" export HADOOP_NFS3_OPTS="$HADOOP_NFS3_OPTS"
export HADOOP_PORTMAP_OPTS="-Xmx512m $HADOOP_PORTMAP_OPTS" # The following applies to multiple commands (fs, dfs, fsck, distcp etc)
export HADOOP_CLIENT_OPTS="-Xmx512m $HADOOP_CLIENT_OPTS"
#HADOOP_JAVA_PLATFORM_OPTS="-XX:-UsePerfData $HADOOP_JAVA_PLATFORM_OPTS" # On secure datanodes, user to run the datanode as after dropping privileges.
# This **MUST** be uncommented to enable secure HDFS if using privileged ports
# to provide authentication of data transfer protocol. This **MUST NOT** be
# defined if SASL is configured for authentication of data transfer protocol
# using non-privileged ports.
export HADOOP_SECURE_DN_USER=${HADOOP_SECURE_DN_USER} # Where log files are stored. $HADOOP_HOME/logs by default.
#export HADOOP_LOG_DIR=${HADOOP_LOG_DIR}/$USER # Where log files are stored in the secure data environment.a
export HADOOP_LOG_DIR=/data1/logs/
export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/${HADOOP_HDFS_USER} ###
# HDFS Mover specific parameters
###
# Specify the JVM options to be used when starting the HDFS Mover.
# These options will be appended to the options specified as HADOOP_OPTS
# and therefore may override any similar flags set in HADOOP_OPTS
#
# export HADOOP_MOVER_OPTS="" ###
# Advanced Users Only!
### # The directory where pid files are stored. /tmp by default.
# NOTE: this should be set to a directory that can only be written to by
# the user that will run the hadoop daemons. Otherwise there is the
# potential for a symlink attack.
export HADOOP_PID_DIR=${HADOOP_PID_DIR}
export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR} # A string representing this instance of hadoop. $USER by default.
export HADOOP_IDENT_STRING=$USER

hive-env.sh

export HIVE_HOME=/usr/local/hive
export JAVA_HOME=/usr/local/java/jdk1.7.0_65
export HADOOP_HOME=/usr/local/hadoop
export HIVE_CONF_DIR=/usr/local/hive/conf
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. # Set Hive and Hadoop environment variables here. These variables can be used
# to control the execution of Hive. It should be used by admins to configure
# the Hive installation (so that users do not have to set environment variables
# or set command line parameters to get correct behavior).
#
# The hive service being invoked (CLI/HWI etc.) is available via the environment
# variable SERVICE # Hive Client memory usage can be an issue if a large number of clients
# are running at the same time. The flags below have been useful in
# reducing memory usage:
#
# if [ "$SERVICE" = "cli" ]; then
# if [ -z "$DEBUG" ]; then
# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
# else
# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
# fi
# fi # The heap size of the jvm stared by hive shell script can be controlled via:
#
export HADOOP_HEAPSIZE=512
#
# Larger heap size may be required when running queries over large number of files or partitions.
# By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be
# appropriate for hive server (hwi etc). # Set HADOOP_HOME to point to a specific hadoop install directory
# HADOOP_HOME=${bin}/../../hadoop

hive-site.xml

<value>jdbc:mysql://127.0.0.1:3306/hive</value>
<description>驱动名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
<description>用户名</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>buzhidao</value>
<description>密码</description>
</property>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/data1/cloud/hive</value>
<description>数据路径(相对hdfs)</description>
</property>
<property>
<name>hive.metastore.uris</name>
<value>thrift://192.168.0.103:9083</value>
<description>运行hive得主机地址及端口</description>
</property>
<property>
<name>hive.hwi.war.file</name>
<value>lib/hive-hwi-1.2.0.war</value>
<description>hwi得war路径</description>
</property>
<property>
<name>mapred.input.dir.recursive</name>
<value>true</value>
</property>
<property>
<name>hive.mapred.supports.subdirectories</name>
<value>true</value>
</property>
<property>
<name>hive.exec.mode.local.auto</name>
<value>true</value>
</property>
<property>
<name>hive.exec.parallel</name>
<value>true</value>
</property>
<property>
<name>hive.limit.optimize.enable</name>
<value>true</value>
</property>
<property>
<name>mapred.job.reuse.jvm.num.tasks</name>
<value>10</value>
</property>
</configuration>

hbase-site.xml

<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://gagcluster/hbase</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/data1/cloud/zookeeper/data</value>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.master</name>
<value>60000</value>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>master.hadoop,slave1.hadoop,slave2.hadoop,slave3.hadoop,whoami</value>
</property>
<property>
<name>hbase.regionserver.handler.count</name>
<value>100</value>
</property>
<property>
<name>hfile.block.cache.size</name>
<value>0.4</value>
</property>
<property>
<name>hbase.hregion.memstore.flush.size</name>
<value>266666</value>
</property>
<property>
<name>hbase.hregion.max.filesize</name>
<value>1000000</value>
</property>
<property>
<name>hbase.hstore.compactionThreshold</name>
<value>2</value>
</property>
<property>
<name>hbase.client.write.buffer</name>
<value>20971520</value>
</property>
<property>
<name>hbase.client.scanner.caching</name>
<value>500</value>
</property>
</configuration>

zoo.cfg

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/data1/cloud/zookeeper/data
dataLogDir=/data1/cloud/zookeeper/logs
# the port at which the clients will connect
clientPort=2181
server.0=master.hadoop:2888:3888
server.1=whoami:2888:3888
server.2=slave1.hadoop:2888:3888
server.3=slave2.hadoop:2888:3888
server.4=slave3.hadoop:2888:3888
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1

基于:Hadoop 2.6.0-cdh5.4.0 hive1.1.0 HBase 1.0.0-cdh5.4.0 关键配置文件的更多相关文章

  1. 基于Hadoop 2.6.0运行数字排序的计算

    上个博客写了Hadoop2.6.0的环境部署,下面写一个简单的基于数字排序的小程序,真正实现分布式的计算,原理就是对多个文件中的数字进行排序,每个文件中每个数字占一行,排序原理是按行读取后分块进行排序 ...

  2. 搭建hadoop环境,在win7的eclipse上远程操作(Linux上)hadoop2.6.0出错的一些总结

    问题1:在DFS Lcation 上不能对文件进行操作: 解决方法: 在hadoop上的每个节点上修改该文件 conf/mapred-site.xml,增加: <property> < ...

  3. 大数据项目实践:基于hadoop+spark+mongodb+mysql+c#开发医院临床知识库系统

    一.前言 从20世纪90年代数字化医院概念提出到至今的20多年时间,数字化医院(Digital Hospital)在国内各大医院飞速的普及推广发展,并取得骄人成绩.不但有数字化医院管理信息系统(HIS ...

  4. 【hbase0.96】基于hadoop搭建hbase的心得

    hbase是基于hadoop的hdfs框架做的分布式表格存储系统,所谓表格系统就是在k/v系统的基础上,对value部分支持column family和column,并支持多版本读写. hbase的工 ...

  5. 基于hadoop的数据仓库工具:Hive概述

    Hive是基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,并提供完整的sql查询功能,可以将sql语句转换为MapReduce任务进行运行.其优点是学习成本低,可以通过类 ...

  6. Hive -- 基于Hadoop的数据仓库分析工具

    Hive是一个基于Hadoop的一个数据仓库工具,可以将结构化的数据文件映射为一张数据库表,通过类SQL语句快速实现简单的MapReduce统计,不必开发专门的MapReduce应用,十分适合数据仓库 ...

  7. 基于Hadoop的大数据平台实施记——整体架构设计[转]

    http://blog.csdn.net/jacktan/article/details/9200979 大数据的热度在持续的升温,继云计算之后大数据成为又一大众所追捧的新星.我们暂不去讨论大数据到底 ...

  8. 基于Hadoop的大数据平台实施记——整体架构设计

    大数据的热度在持续的升温,继云计算之后大数据成为又一大众所追捧的新星.我们暂不去讨论大数据到底是否适用于您的组织,至少在互联网上已经被吹嘘成无所不能的超级战舰.好像一夜之间我们就从互联网时代跳跃进了大 ...

  9. 基于hadoop的图书推荐

    根据在炼数成金上的学习,将部分代码总结一下在需要的时候可以多加温习.首先根据原理作简要分析.一般推荐系统使用的协同过滤推荐模型:分别是基于ItemCF的推荐模型或者是基于UserCF的推荐模型:首先分 ...

  10. 基于Hadoop分布式集群YARN模式下的TensorFlowOnSpark平台搭建

    1. 介绍 在过去几年中,神经网络已经有了很壮观的进展,现在他们几乎已经是图像识别和自动翻译领域中最强者[1].为了从海量数据中获得洞察力,需要部署分布式深度学习.现有的DL框架通常需要为深度学习设置 ...

随机推荐

  1. eclipse导入redis的源码

    import--c/c++ Executable  选择编译后的so文件.这样就导入工程了. 可以在eclipse直接修改c代码,重新编译后就能看到效果了. 重新编译: 1:make clean 2: ...

  2. myeclipse发布项目

    最好不要点击restart 虽然点击run,也会发布,但是最好先点击发布. 点击restart,可能会出现tomcat关不掉又启动不了.

  3. java中finally的使用

    以前认为finally没用,但是实际上在try使用中是不可缺少的.

  4. 1 Python 环境搭建

    Python可应用于多平台包括 Linux 和 Mac OS X. 你可以通过终端窗口输入 "python" 命令来查看本地是否已经安装Python以及Python的安装版本. U ...

  5. 2018-2019-2 20165210《网络对抗技术》Exp6 信息搜集与漏洞扫描

    2018-2019-2 20165210<网络对抗技术>Exp6 信息搜集与漏洞扫描 一.实验目标: 掌握信息搜集的最基础技能与常用工具的使用方法. 二.实验内容: 各种搜索技巧的应用 G ...

  6. 2018.7.28 A murder that scandalised Harvard and the world

    A murder that scandalised Harvard and the worldVisiting Boston in 1868, Charles Dickens was asked wh ...

  7. hihocoder 1049 后序遍历树

    #include<cstdio> #include<cstdlib> #include<iostream> #include<cstring> #inc ...

  8. python中读写LMDB数据库

    LMDB的全称是Lightning Memory-Mapped Database(快如闪电的内存映射数据库),它的文件结构简单,包含一个数据文件和一个锁文件: LMDB文件可以同时由多个进程打开,具有 ...

  9. WordPress发布文章前强制要求上传特色图像

    如果你的网站需要给每篇文章设置特色图像才能达到理想的显示效果,而且允许其他用户在后台发布文章的,那么您可能需要强制要求他们给文章上传特色图像,否者就无法发布.Require Featured Imag ...

  10. Java演示手机发送短信验证码功能实现

    我们这里采用阿里大于的短信API 第一步:登陆阿里大于,下载阿里大于的SDK a.在阿里大于上创建自己的应用 b.点击配置管理中的验证码,先添加签名,再配置短信模板 第二步:解压相关SDK,第一个为j ...