set hive.cli.print.current.db=true;
set hive.mapred.mode=strict;
set hive.mapred.mode=nonstrict;

--Dynamic Partition Inserts --by position not by names

PARTITION (country, state)
SELECT ..., se.cnty,
FROM staged_employees se;


----with this way , we can not generate the temporary table

SELECT upper(name), salary, deductions["Federal Taxes"] as fed_taxes,
  round(salary * (1 - deductions["Federal Taxes"])) as salary_minus_fed_taxes
  FROM employees
) e
SELECT, e.salary_minus_fed_taxes
WHERE e.salary_minus_fed_taxes > 70000;

--When Hive Can Avoid MapReduce


--Hive supports the classic SQL JOINstatement, but only equi-joinsare supported.
--Hive also assumes that the lasttable in the query is the  largest
--It attempts to buffer the other tables and then stream the last table through
-- you should structure your join queries so the largest table is last.

SELECT /*+ STREAMTABLE(s) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

hive.mapjoin.smalltable.filesize=25000000;--table size less than this can use in map phase
SELECT /*+ MAPJOIN(d) */ s.ymd, s.symbol, s.price_close, d.dividend
FROM stocks s JOIN dividends d ON s.ymd = d.ymd AND s.symbol = d.symbol
WHERE s.symbol = 'AAPL';

set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;

--Using  DISTRIBUTE BY ... SORT BYor the shorthand  CLUSTER BYclauses is a way to exploit
--the parallelism of SORT BY, yet achieve a total ordering across the output files.
--this method is better than use order by (just one reducer);

--Queries that Sample Data

SELECT * from numbers TABLESAMPLE(BUCKET 3 OUT OF 10 ON rand()) s;
SELECT * FROM numbersflat TABLESAMPLE(0.1 PERCENT) s;--block sampling


CREATE INDEX employees_index
ON TABLE employees (country)
AS 'org.apache.hadoop.hive.ql.index.compact.CompactIndexHandler'

hiveql basic的更多相关文章

  1. Atitit HTTP 认证机制基本验证 (Basic Authentication) 和摘要验证 (Digest Authentication)attilax总结

    Atitit HTTP认证机制基本验证 (Basic Authentication) 和摘要验证 (Digest Authentication)attilax总结 1.1. 最广泛使用的是基本验证 ( ...

  2. Basic Tutorials of Redis(9) -First Edition RedisHelper

    After learning the basic opreation of Redis,we should take some time to summarize the usage. And I w ...

  3. Basic Tutorials of Redis(8) -Transaction

    Data play an important part in our project,how can we ensure correctness of the data and prevent the ...

  4. Basic Tutorials of Redis(7) -Publish and Subscribe

    This post is mainly about the publishment and subscription in Redis.I think you may subscribe some o ...

  5. Basic Tutorials of Redis(6) - List

    Redis's List is different from C#'s List,but similar with C#'s LinkedList.Sometimes I confuse with t ...

  6. Basic Tutorials of Redis(5) - Sorted Set

    The last post is mainly about the unsorted set,in this post I will show you the sorted set playing a ...

  7. Basic Tutorials of Redis(4) -Set

    This post will introduce you to some usages of Set in Redis.The Set is a unordered set,it means that ...

  8. Basic Tutorials of Redis(3) -Hash

    When you first saw the name of Hash,what do you think?HashSet,HashTable or other data structs of C#? ...

  9. Basic Tutorials of Redis(2) - String

    This post is mainly about how to use the commands to handle the Strings of Redis.And I will show you ...


  1. C#组态控件Iocomp应用案例

    Iocomp组件需要在vs2010环境下使用,目前用到的是4.04版本.在两个项目中用到了它,一个是锅炉监控系统,另一个是绝缘靴检测系统. 锅炉监测系统 这个节目基本都是使用Iocomp控件完成. 出 ...

  2. WPF 程序自删除(自毁)|卸载程序删除

    一般是在MainWindow_Closed 事件中调用批处理命令删除. 在借鉴别人的想法的基础上的算是改进. 自删除步骤: 1.删除文件 2.删除存放文件夹. 实现代码: private static ...

  3. MVC中视图View向控制器传值的方法

    MVC中视图View向控制器传值的方法步骤如下: 1.index页面: 页面中只需要一个触发事件的按钮

  4. 不可或缺 Windows Native (2) - C 语言: 常量,变量,基本数据类型

    [源码下载] 不可或缺 Windows Native (2) - C 语言: 常量,变量,基本数据类型 作者:webabcd 介绍不可或缺 Windows Native 之 C 语言 常量 变量 基本 ...

  5. JSChart_页面图形报表

    首先在页头的"head"中加上: $(document).ready(function() { //myData与colors变量  是做演示用的,可以直接赋值给myChart就可 ...

  6. Error generating Swagger server (Python Flask) from Swagger editor

    1down votefavorite ...

  7. OAUTH 协议介绍

    OAUTH 产生背景 随着互联网的深入发展,一些互联网巨头积累了海量的用户和数据.对于平台级软件厂商来说,用户的需求多种多样,变化万千 以一己之力予以充分满足,难免疲于本命.因此将数据以接口的形式开放 ...

  8. Step by step configuration of Outgoing Emails from SharePoint to Microsoft Online

    First of all your SharePoint server should be added to Microsoft online safe sender list, so that Sh ...

  9. This task is currently locked by a running workflow and cannot be edited

    转自: 转自: ...

  10. R语言学习笔记:因子

    R语言中的因子就是factor,用来表示分类变量(categorical variables),这类变量不能用来计算而只能用来分类或者计数. 可以排序的因子称为有序因子(ordered factor) ...