待安装列表
hadoop
hive
scala
spark
一.环境变量配置:
~/.bash_profile
PATH=$PATH:$HOME/bin

export PATH

JAVA_HOME=/usr/local/jdk
export SCALA_HOME=/usr/local/scala
export SPARK_HOME=/usr/local/spark
export PATH=.:$JAVA_HOME/bin:$SCALA_HOME/bin:$PATH

HADOOP_HOME=/usr/local/hadoop
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HDFS_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HADOOP_HOME PATH

HIVE_HOME=/usr/local/hive
PATH=$HIVE_HOME/bin:$PATH
export HIVE_HOME PATH

二.hadoop 安装搭建
1.配置ssh互信
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
chmod 700 ~/.ssh
chmod 600 ~/.ssh/authorized_keys

2.修改hostname 为yul32 vi/etc/hosts vi /etc/sysconfig/network
(3.修改hadoop-env.sh
export JAVA_HOME=/usr/local/jdk)

(4.修改core-site.xml)
<property>
<name>fs.defaultFS</name>
<value>hdfs://yul32:9000</value>
</property>

(5.修改hdfs-site.xml) (/usr/hadoop-2.3.0/etc/hadoop)
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permission</name>
<value>false</value>
</property>

(5.修改mapred-site.xml) (mapred-site.xml.template ?) (/usr/hadoop-2.3.0/etc/hadoop)
<property>
<name>mapreduce.cluster.temp.dir</name>
<value></value>
<description>No description</description>
<final>true</final>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value></value>
<description>No description</description>
<final>true</final>
</property>

(6.修改yarn-site.xml) (/usr/hadoop-2.3.0/etc/hadoop)
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>

7.修改slaves.sh ??
yul32
8.namenode format
输入命令hadoop namenode –format
9.启动hadoop
cd hadoop/sbin start-all.sh

ifup ifdown

三.spark 搭建
(/usr/spark-1.1.0-bin-hadoop2.3/conf) <报错 readonly>
1.修改conf/slaves
yul32

(2.修改spark-env.sh (/usr/spark-1.1.0-bin-hadoop2.3/conf))
export SCALA_HOME=/usr/local/scala
export JAVA_HOME=/usr/local/jdk
export SPARK_MASTER_IP=yul32
export SPARK_WORKER_CORES=1
export SPARK_WORKER_INSTANCES=1
export SPARK_MASTER_PORT=7077
export SPARK_WORKER_MEMORY=1g
export MASTER=spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}

3.启动spark
./sbin/start-all.sh
4.运行spark例子
./bin/run-example org.apache.spark.examples.JavaSparkPi 2
5.运行scala-shell
./bin/spark-shell --master local[2]
6.python
./bin/pyspark --master local[2]
7.启动spark sql
./sbin/start-thriftserver.sh(./sbin/start-thriftserver.sh --master yarn)
在后台运行命令: nohup ./sbin/start-thriftserver.sh --master yarn &
查看后台运行进程命令: jobs -l
启动后jps 中包含 SparkSubmit
8.spark sql 客户端连接
./bin/beeline -u jdbc:hive2://yul32:10000 -n spark -p spark
说明 -n 用户名 -p 密码
或者输入命令 ./bin/beeline
beeline> !connect jdbc:hive2://yul32:10000
用户名
密码

上传文件,创建表;
1.hadoop fs -ls /user/ocdc/coc
hadoop fs -put /home/ocdc/CI_CUSER_20141104112305197.csv /user/ocdc/coc
2.shark> create table CI_CUSER_20141104112305196( PRODUCT_NO string)ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' ;
shark> load data inpath '/user/ocdc/coc/CI_CUSER_20141104112305197.csv' into table CI_CUSER_20141104112305196;
shark> create table CI_CUSER_20141104112305197( PRODUCT_NO string)ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' stored as rcfile;
shark> insert into table CI_CUSER_20141104112305197 select * from CI_CUSER_20141104112305196;

四.hive 安装配置(非必须)
1.修改hive-env.sh
export HADOOP_HOME=/usr/local/hadoop
export HIVE_CONF_DIR=/usr/local/hive/conf
2.hive 远程服务 (端口号10000) 启动方式
hive --service hiveserver &
连接Hive JDBC URL:jdbc:hive://ip:10000/default (Hive默认端口:10000 默认数据库名:default)
hive数据仓库的位置
hive/conf/hive-site.xml
hive.metastroe.warehouse.dir:数据仓库的位置,默认是/user/hive/warehouse;
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive/warehouse</value>
<description>location of default database for the warehouse</description>
</property>

shark jdbc 连接
1.查看SharServer 是否启动
[ocdc@oc98 conf]$ jps
7983 Kafka
8803 SharkCliDriver
7377 ResourceManager
16894 SharkServer
6925 JournalNode
12601 CoarseGrainedExecutorBackend
17056 CoarseGrainedExecutorBackend
18424 Jps
14486 Master
4108 QuorumPeerMain
23408 HRegionServer
17655 RunJar
6727 DataNode
7132 DFSZKFailoverController
7510 NodeManager
12553 WorkerLauncher
6614 NameNode
23268 HMaster
12415 SharkCliDriver
2.查看SharkServer端口
[ocdc@oc98 conf]$ netstat -apn | grep 16894
tcp 0 0 ::ffff:10.1.251.98:57902 :::* LISTEN 16894/java
tcp 0 0 :::52309 :::* LISTEN 16894/java
tcp 0 0 :::9977 :::* LISTEN 16894/java
tcp 0 0 :::41222 :::* LISTEN 16894/java
tcp 0 0 :::4040 :::* LISTEN 16894/java
tcp 0 0 :::45192 :::* LISTEN 16894/java
tcp 0 0 ::ffff:10.1.251.98:35289 ::ffff:10.1.251.98:3306 ESTABLISHED 16894/java
tcp 0 0 ::ffff:10.1.251.98:57902 ::ffff:10.1.251.104:41877 ESTABLISHED 16894/java
tcp 0 0 ::ffff:10.1.251.98:57902 ::ffff:10.1.251.98:53176 ESTABLISHED 16894/java
tcp 0 0 ::ffff:10.1.251.98:9977 ::ffff:10.1.48.20:60586 ESTABLISHED 16894/java
tcp 1 0 ::ffff:10.1.251.98:57320 ::ffff:10.1.251.98:50012 CLOSE_WAIT 16894/java
tcp 0 0 ::ffff:10.1.251.98:9977 ::ffff:10.1.48.20:59756 ESTABLISHED 16894/java
tcp 0 0 ::ffff:10.1.251.98:57902 ::ffff:10.1.251.101:50160 ESTABLISHED 16894/java
tcp 0 0 ::ffff:10.1.251.98:57902 ::ffff:10.1.251.98:53172 ESTABLISHED 16894/java
tcp 0 0 ::ffff:10.1.251.98:57902 ::ffff:10.1.251.101:50159 ESTABLISHED 16894/java
unix 2 [ ] STREAM CONNECTED 8889813 16894/java
unix 2 [ ] STREAM CONNECTED 8889793 16894/java
端口为9977 即shark服务启动端口 nohup ./bin/shark –-service sharkserver –-p 9977 &

3.jdbc连接
public class SharkTest {
private static String driverName = "org.apache.hadoop.hive.jdbc.HiveDriver";

public static void main(String args[]) throws SQLException {
try {
Class.forName(driverName);
} catch (ClassNotFoundException e) {

e.printStackTrace();

System.exit(1);
}
Connection con = DriverManager.getConnection(
"jdbc:hive://10.1.251.98:9977/default", "ocdc", "asiainfo");
Statement stmt = con.createStatement();
ResultSet res = stmt.executeQuery("select * from src ");
if (res.next()) {
System.out.println(res.getString(1)+ " " + res.getString(2));
}
}
}

Sparksql Sever启动命令
./sbin/start-thriftserver.sh --master yarn
客户端连接
./bin/beeline -u jdbc:hive2://10.1.251.98:10000 -n ocdc -p asiainfo
让配置文件立即生效
source /etc/profile

依赖jar包
hive-common-0.8.1.jar
hive-exec-0.8.1.jar
hive-jdbc-0.8.1.jar
hive-metastore-0.8.1.jar
hive-service-0.8.1.jar
libfb303.jar
slf4j-api-1.4.3.jar
slf4j-log4j12-1.4.3.jar
httpclient-4.2.5.jar
hadoop-common-2.3.0.jar

wq 是保存
i 是编辑
q 是强制退出

(赋权)
1、到你想要赋权的文件夹路径下
2、使用 chmod 777 slaves(为这个文件赋权)
3、赋权给ysy(用户)写的权限 chown -R ysy132:ysy132 dfs

切换用户 使用 su - ysy

(hadoop报错日志位置为 /usr/hadoop-2.3.0/logs)
tail -500 hadoop-root-namenode-ysy0915.log 查看500行报错日志

(启动hadoop)
在hadoop-2.3.0目录下 输入./sbin/start-dfs.sh
停止 .sbin/stop-dfs.sh ./sbin/stop-dfs.sh

查看启动的节点 jps
(回退到上一个目录下)

eg:spark SQL
(select a+b from table)
val a:Int = inputRow.getInt(0)
val b:Int = inputRow.getInt(1)
val result:Int = a + b
resultRow.setInt(0,result)

def generateCode(e: Expression): Tree = e match{
case Attribute(ordinal) =>
q"inputRow.getInt($ordinal)"
case Add(left,right)=>
q"""
{
val leftResult = ${generateCode(left)}
val rightResult = ${generateCode(right)}
leftResult + rightResult
}
"""
}

spark单机模式简单搭建的更多相关文章

  1. Spark—local模式环境搭建

    Spark--local模式环境搭建 一.Spark运行模式介绍 1.本地模式(loca模式):spark单机运行,一般用户测试和开发使用 2.Standalone模式:构建一个主从结构(Master ...

  2. 初学者值得拥有Hadoop单机模式环境搭建

    单机模式Hadoop环境搭建 Hadoop环境搭建流程图 具体过程 文章目录 单机模式Hadoop环境搭建 Hadoop环境搭建流程图 具体过程 1.搭建准备工作 (1)关闭防火墙 (2)关闭seli ...

  3. 初学Hadoop之单机模式环境搭建

    本文仅作为学习笔记,供大家初学Hadoop时学习参考.初学Hadoop,欢迎有经验的朋友进行指导与交流! 1.安装CentOS7 准备 CentOS系统镜像CentOS-7.0-1406-x86_64 ...

  4. Window7 上跑 Spark 单机模式

    一.下载Spark 下载地址:http://www.eu.apache.org/dist/spark/spark-1.5.2/spark-1.5.2-bin-hadoop2.4.tgz 我这里测试这个 ...

  5. spark单机模式

    1.下载spark,解压2.复制conf/spark-env.sh和conf/log4j.properties cp spark-env.sh.template spark-env.sh cp log ...

  6. [Hadoop] 在Ubuntu系统上一步步搭建Hadoop(单机模式)

    1 Hadoop的三种创建模式 单机模式操作是Hadoop的默认操作模式,当首次解压Hadoop的源码包时,Hadoop无法了解硬件安装环境,会保守地选择最小配置,即单机模式.该模式主要用于开发调试M ...

  7. windows7 spark单机环境搭建及pycharm访问spark

    windows7 spark单机环境搭建 follow this link how to run apache spark on windows7 pycharm 访问本机 spark 安装py4j ...

  8. Ubuntu上搭建Hadoop环境(单机模式+伪分布模式)

    首先要了解一下Hadoop的运行模式: 单机模式(standalone)        单机模式是Hadoop的默认模式.当首次解压Hadoop的源码包时,Hadoop无法了解硬件安装环境,便保守地选 ...

  9. 简单读!zookeeper单机模式的启动逻辑

    zk用处如此之多,以至于每个地方都要你理解zk原理! 请按如下操作姿势打开: 1. 打开zk的git仓库地址:https://github.com/apache/zookeeper , 确认过眼神,它 ...

随机推荐

  1. 远程升级openSSH

    1.确认安装了telnet服务,并且telnet服务能正常运行 查询是否安装了telnet服务:rpm -qa telnet 如果显示出 类似于telnet-server-0.17-31.EL4.5这 ...

  2. jQuery 1.10.3 参考手册

    Jquery是轻量级的js库 ,它兼容CSS3,还兼容各种浏览器(IE 6.0+, FF 1.5+, Safari 2.0+, Opera 9.0+),jQuery2.0及后续版本将不再支持IE6/7 ...

  3. Learn python the hard way. python test program 2016.04.27

    # this will not be printed in python ! print "I could have code like this." # and the comm ...

  4. 二十四种设计模式:提供者模式(Provider Pattern)

    提供者模式(Provider Pattern) 介绍为一个API进行定义和实现的分离.示例有一个Message实体类,对它的操作有Insert()和Get()方法,持久化数据在SqlServer数据库 ...

  5. 1.Nexus安装与配置

    1.Nexus下载下载地址:http://www.sonatype.org/nexus/go/下载后的文件:nexus-2.11.4-01-bundle.zip安装:直接解压到某个目录即可解压后,会有 ...

  6. linux 通用IO

    open(),read(),write(),close()可以应用于管道,FIFO,socket,或者终端等所有文件类型执行IO操作. lseek()并不适用于所有类型的文件.不允许将lseek()应 ...

  7. 【转】vs2012 打包安装更改 setup.exe的图标

    还是老外的文章给力 I'm not aware of any way to change the icon BEFORE building the project so that once the C ...

  8. Oracle报错,ORA-28001: 口令已经失效

    Oracle11G创建用户时缺省密码过期限制是180天(即6个月), 如果超过180天用户密码未做修改则该用户无法登录. Oracle公司是为了数据库的安全性默认在11G中引入了这个默认功能,但是这个 ...

  9. Extjs4.2.1中的helloworld

    ExtJS最新正式版4.2.1下载地址:http://pan.baidu.com/s/1qWNt2kk (开发api文档:https://github.com/extjs-doc-cn/ext4api ...

  10. HTML5服务器推送事件

    目前客户端(浏览器)和服务端交互大致有以下几种方式: 1)form表单提交方式,适合访问量不大,对用户体验要求不高的web系统开发,或者页面整体刷新无伤大雅的场合,通信方向是客户端提交给服务端,是客户 ...