hadoop-hive学习笔记

create table hive_1(id string,name string ,gender string)
row format delimited fields terminated by ','
stored as TEXTFILE;

load data local inpath '/luozt/hive_001.txt' into table hive_1 ;

create EXTERNAL table hive_2(id string,name string ,gender string)
row format delimited fields terminated by ','
stored as TEXTFILE;

load data inpath '/luo/hive_001.txt' into table hive_2 ;
//查询记录数
count 'hive_1'
//清空表
truncate table log_struct;

//删除表
drop table log_struct;

create table partition_table
(name string ,salary float,gender string,level string)
partitioned by(dt string,dept string)
row format delimited fields terminated by ','
stored as TEXTFILE;

desc partition_table

show partitions partition_table;//查看分区表的分区信息
//给分区表插入数据
load data local inpath '/luozt/par.txt' into table partition_table partition(dt='2014-04-01',dept='yonyu');

添加分区：
alter table partition_table add partition(dt='2014-04-03',dept='yonyou3') location '/user/hive/warehouse/luo.db/partition_table/dt=2014-04-03/dept=yonyou3';

删除分区：
alter table partition_table drop partition(dt='2014-04-03',dept='yonyou4')

select * from partition_table where salary>7600;
//嵌套
from (select name,salary from partition_table)e select e.name,e.salary where e.salary>7600;

//in 的用法

select * from partition_table where salary in(7000,6700);

//case的用法
select name,salary,
case
when salary<6800 then 'L1'
when salary>6800 and salary <8000 then 'L2'
when salary>8100 then 'L3'
else 'L0'
end as salary_level
from partition_table;

//having的用法

select gender,sum(salary) from partition_table group by gender;

//练习join
create table group1 (user string,score int)
row format delimited fields terminated by ','
stored as TEXTFILE;

//
create table group_join (user string,class string)
row format delimited fields terminated by ','
stored as TEXTFILE;

//普通的join
select b.class,a.score from group1 a join group_join b on (a.user=b.user);
//有个表很小时用mapjoin(b) b为小表
select /*+MAPJOIN(b)*/ b.class,a.score from group1 a join group_join b on (a.user=b.user);
//left Semi join

//分组
select user ,sum(score) from group1 group by user; ----分组时select的字段要全部作为group字段
//优化
set hive.map.aggr=true

//order by
create table orderby_test (user string,class string,math int,english int)
row format delimited fields terminated by ','
stored as TEXTFILE;

//默认升序
select * from orderby_test order by math; --desc改为降序，若将set hive.mapred.mode=strict;则要加上limit

//sort by 不受set hive.mapred.mode=strict的影响可以指定 set mapred.reduce.tasks=<number> sort by 只会在么给reduce上进行排序，reduce输出的数据时有序的，提高全局排序的效率

//union all hive不支持顶层union ，只能将union封装在子查询中，且必须为union的查询输出定义别名
select * from (select count(*) from group1 union all select count(*) from orderby_test) temp;

//记得这种用法
select name,height,mark
from
(select name,height,'0' as mark from a
union all
select name height,'1' as mark from b) t;

//索引
create table index_test(id int,name string) partitioned by (dt string) row format delimited fields terminated by ',' stored as TEXTFILE;
//先创建一个临时表
create table temp(id int,name string ,dt string ) row format delimited fields terminated by ',' stored as TEXTFILE;
//动态分区
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.dynamic.partition=true;

insert overwrite table index_test partition(dt) select id,name,dt from temp;

//索引创建索引时要有partition 否则不行
create index index1 on table index_test(id) AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler' with DEFERRED REBUILD;

alter index index1 on index_test rebuild;

show index on index_test;
show partitions index_test;

//Bucket 桶就是抽样

create table tb_tmp(id int,age int,name string,timeflag bigint) row format delimited fields terminated by ',' ;
create table tb_stu(id int,age int,name string,timeflag bigint) clustered by(id) sorted by (age) into 5 buckets row format delimited fields terminated by ','

insert into table tb_stu select id,age,name,timeflag from tb_tmp;

利用桶进行查询
select * from tb_stu tablesample(bucket 1 out of 5 on id);

//存储类型和复合数据类型
rcfile 是直接load不进去的要用临时表insert进去

//array
create table log_array(ip string,uid array<bigint>) partitioned by (dt string) row format delimited fields terminated by ',' collection items terminated by '|' stored AS TEXTFILE;

load data local inpath '/luo/log_array.txt' into table log_array partition(dt=20150902);
//查询array中的值
select uid[1] from log_array;
select ip,size(uid) from log_array where dt=20150902;
select ip from log_array where dt=20150902 and array_contains(uid,4732974)

//map
create table log_map(ts string,ip string,type string,logtype string,request Map<string,string>,response Map<string,string>)
row format delimited fields terminated by '#' collection items terminated by '&' Map keys terminated by '=' stored as TEXTFILE;

//查询
select request['src'] from log_map;

//struct
create table log_struct(ip string,user struct<name:string,age:int>)
row format delimited fields terminated by ','
collection items terminated by '#'
stored as TEXTFILE;

数据：192.168.1.1,wow#23
192.168.1.1,wow#23
192.168.1.1,wow#23
192.168.1.1,wow#23
192.168.1.1,wow#23
192.168.1.1,wow#23

select user.name from log_struct;

hadoop-hive学习笔记的更多相关文章

hive学习笔记之一：基本数据类型
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
hive学习笔记之三：内部表和外部表
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
hive学习笔记之四：分区表
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
hive学习笔记之五：分桶
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
hive学习笔记之六：HiveQL基础
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
hive学习笔记之七：内置函数
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
hive学习笔记之九：基础UDF
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
hive学习笔记之十：用户自定义聚合函数(UDAF)
欢迎访问我的GitHub 这里分类和汇总了欣宸的全部原创(含配套源码):https://github.com/zq2599/blog_demos 本篇概览本文是<hive学习笔记>的第十 ...
hive学习笔记之十一：UDTF
欢迎访问我的GitHub https://github.com/zq2599/blog_demos 内容:所有原创文章分类汇总及配套源码,涉及Java.Docker.Kubernetes.DevOPS ...
Hadoop入门学习笔记---part4
紧接着<Hadoop入门学习笔记---part3>中的继续了解如何用java在程序中操作HDFS. 众所周知,对文件的操作无非是创建,查看,下载,删除.下面我们就开始应用java程序进行操 ...

随机推荐

HashTable的使用，扑克牌发牌游戏
l 场景主要实现以下功能: 1. 首先给扑克牌中每张牌设定一个编号,下面算法实现的编号规则如下: 红桃按照从小到大依次为:1-13 方块按照从小到大依次为:14-26 黑桃按 ...
springboot-整合freemarker
freemarker是一个页面模板引擎.用springboot整合freemarker的方式如以下步骤: 1.在创建springboot的项目的时候,选择freemarker的组件,或者自己手动在ma ...
yuv转opencv中的IplImage
http://www.2cto.com/kf/201208/145559.html http://www.opencv.org.cn/forum.php?mod=viewthread&tid= ...
EG:nginx反向代理两台web服务器,实现负载均衡所有的web服务共享一台nfs的存储
step1: 三台web服务器环境配置:iptables -F; setenforce 0 关闭防火墙:关闭setlinux step2:三台web服务器装软件 step3: 主机修改配置文件:vi ...
快乐学习 Ionic Framework+PhoneGap 手册1-4 {登录页面}
编程的快乐和乐趣,来自于能成功运行程序并运用到项目中有了面板然后加个登录页面,请看效果图和代码 Index HTML Code <!DOCTYPE html> <html ng-a ...
2018.7.12训练赛 -G
第二道水题前边说了很多话,但就最后两段有用. 就是给你一个序列,然后你判断一下这个序列是不是递增的,是就输出yes,否则输出no. 所以以后不管题目看起来多长.多复杂,都要读一遍. 代码就不贴了.
CentOs linux安装SVN服务
SVN服务器有2种运行方式:1.独立服务器(例如:svn://xxx.com/xxx):2.借助apache (例如:http://svn.xxx.com/xxx):为了不依赖apache,我选择 ...
未能将网站配置为使用ASP.NET4.X 解决方法
WIN 10系统安装Visual Studio 2012新建ASP.NET MVC 4 WEB 应用程序出错有些图片是网上截取而来,之前光顾着处理问题而忘记截图了,提示的ASP.net 版本有些不同 ...
【codevs3012+codevs3037】线段覆盖4+线段覆盖5(DP)
线段覆盖4网址:http://codevs.cn/problem/3012/ 线段覆盖5网址:http://codevs.cn/problem/3037/ 题目大意:给出一条直线上的一坨线段,每条线段 ...
hdoj1012--u Calculate e
Problem Description A simple mathematical formula for e is where n is allowed to go to infinity. Thi ...

hadoop-hive学习笔记

hadoop-hive学习笔记的更多相关文章

随机推荐

热门专题