转:http://blog.itpub.net/30089851/viewspace-2122586/

一.Hive订单数据仓库构建

1. 创建事实表并插入数据

DROP TABLE IF EXISTS default.fact_order ;
create table default.fact_order (
  time_key string,
  product_key string,
  salesperson_key string,
  custom_key string,
  quantity_ordered bigint,
  order_dollars bigint,
  cost_dollars bigint
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;

load data local inpath '/root/kylinsample/fact_order.txt' overwrite into table default.fact_order;

##load data local inpath '/root/kylinsample/fact_order.txt'  into table default.fact_order;

fact_order.txt

2016-05-01,pd001,sp001,ct001,100,2000,1000
2016-05-01,pd001,sp002,ct002,100,2000,1000
2016-05-01,pd001,sp003,ct002,100,2000,1000
2016-05-01,pd002,sp002,ct002,100,2000,1000
2016-05-01,pd003,sp003,ct001,100,2000,1000
2016-05-01,pd001,sp003,ct001,100,2000,1000
2016-05-01,pd001,sp002,ct001,100,2000,1000
2016-05-01,pd001,sp003,ct002,100,2000,1000
2016-05-01,pd002,sp001,ct001,100,2000,1000
2016-05-01,pd003,sp001,ct001,100,2000,1000
2016-05-01,pd004,sp001,ct001,50,1000,600
2016-05-02,pd001,sp001,ct001,50,1000,600
2016-05-02,pd001,sp002,ct002,100,2000,1000
2016-05-02,pd001,sp003,ct002,100,2000,1000
2016-05-02,pd002,sp001,ct001,50,1000,600
2016-05-02,pd003,sp001,ct001,50,1000,600
2016-05-02,pd004,sp001,ct001,50,1000,600
2016-05-03,pd001,sp001,ct001,50,1000,600
2016-05-03,pd001,sp002,ct002,100,2000,1000
2016-05-03,pd001,sp003,ct002,100,2000,1000
2016-05-04,pd002,sp001,ct001,700,14000,10000
2016-05-04,pd003,sp001,ct001,700,14000,10000
2016-05-04,pd004,sp001,ct001,100,2000,1000
2016-05-05,pd001,sp001,ct001,100,2000,1000
2016-05-05,pd001,sp002,ct002,700,14000,10000
2016-05-05,pd001,sp003,ct002,700,14000,10000
2016-05-05,pd002,sp001,ct001,100,2000,1000
2016-05-05,pd003,sp001,ct001,100,2000,1000
2016-05-05,pd004,sp001,ct001,100,2000,1000
2016-05-06,pd001,sp001,ct001,100,2000,1000
2016-05-06,pd001,sp002,ct002,100,2000,1000
2016-05-06,pd001,sp003,ct002,100,2000,1000
2016-05-07,pd002,sp001,ct001,100,2000,1000
2016-05-07,pd003,sp001,ct001,100,2000,1000
2016-05-07,pd004,sp001,ct001,50,1000,600
2016-05-07,pd002,sp001,ct001,100,2000,1000
2016-05-07,pd003,sp001,ct001,100,2000,1000
2016-05-07,pd004,sp001,ct001,50,1000,600
2016-05-08,pd001,sp001,ct001,50,1000,600
2016-05-08,pd001,sp002,ct002,100,2000,1000
2016-05-08,pd001,sp003,ct002,100,2000,1000
2016-05-08,pd001,sp001,ct001,50,1000,600
2016-05-08,pd001,sp002,ct002,100,2000,1000
2016-05-08,pd001,sp003,ct002,100,2000,1000
2016-05-08,pd001,sp001,ct001,50,1000,600
2016-05-08,pd001,sp002,ct002,100,2000,1000
2016-05-08,pd001,sp003,ct002,100,2000,1000
2016-05-09,pd002,sp001,ct001,50,1000,600
2016-05-09,pd003,sp001,ct001,50,1000,600
2016-05-09,pd004,sp001,ct001,50,1000,600
2016-05-09,pd001,sp001,ct001,50,1000,600
2016-05-09,pd002,sp001,ct001,50,1000,600
2016-05-09,pd003,sp001,ct001,50,1000,600
2016-05-09,pd004,sp001,ct001,50,1000,600
2016-05-09,pd001,sp001,ct001,50,1000,600
2016-05-09,pd001,sp002,ct002,100,2000,1000
2016-05-09,pd004,sp003,ct002,100,2000,1000
2016-05-09,pd002,sp001,ct001,700,14000,10000
2016-05-09,pd003,sp003,ct001,700,14000,10000
2016-05-09,pd004,sp003,ct001,100,2000,1000
2016-05-10,pd001,sp001,ct001,100,2000,1000
2016-05-10,pd001,sp002,ct002,700,14000,10000
2016-05-10,pd001,sp003,ct002,700,14000,10000
2016-05-10,pd002,sp001,ct001,100,2000,1000
2016-05-11,pd003,sp003,ct001,100,2000,1000
2016-05-11,pd004,sp001,ct001,100,2000,1000
2016-05-12,pd001,sp001,ct001,100,2000,1000
2016-05-12,pd004,sp002,ct002,100,2000,1000
2016-05-12,pd001,sp003,ct002,100,2000,1000
2016-05-12,pd001,sp001,ct001,100,2000,1000
2016-05-12,pd004,sp002,ct002,100,2000,1000
2016-05-12,pd001,sp003,ct002,100,2000,1000
2016-05-13,pd002,sp001,ct001,100,2000,1000
2016-05-13,pd003,sp001,ct001,100,2000,1000
2016-05-13,pd004,sp001,ct001,50,1000,600
2016-05-14,pd001,sp001,ct001,50,1000,600
2016-05-14,pd001,sp002,ct002,100,2000,1000
2016-05-14,pd001,sp003,ct002,100,2000,1000
2016-05-15,pd002,sp001,ct001,50,1000,600
2016-05-15,pd003,sp001,ct001,50,1000,600
2016-05-15,pd004,sp001,ct001,50,1000,600
2016-05-15,pd002,sp001,ct001,50,1000,600
2016-05-15,pd003,sp001,ct001,50,1000,600
2016-05-15,pd004,sp001,ct001,50,1000,600
2016-05-15,pd002,sp001,ct001,50,1000,600
2016-05-15,pd003,sp001,ct001,50,1000,600
2016-05-15,pd004,sp001,ct001,50,1000,600
2016-05-16,pd001,sp001,ct001,50,1000,600
2016-05-16,pd001,sp002,ct002,100,2000,1000
2016-05-16,pd001,sp003,ct002,100,2000,1000
2016-05-16,pd001,sp001,ct001,50,1000,600
2016-05-16,pd001,sp002,ct002,100,2000,1000
2016-05-16,pd001,sp003,ct002,100,2000,1000
2016-05-17,pd002,sp001,ct001,700,14000,10000
2016-05-17,pd003,sp001,ct001,700,14000,10000
2016-05-17,pd004,sp001,ct001,100,2000,1000
2016-05-17,pd002,sp001,ct001,700,14000,10000
2016-05-17,pd003,sp001,ct001,700,14000,10000
2016-05-17,pd004,sp001,ct001,100,2000,1000
2016-05-18,pd001,sp001,ct001,100,2000,1000
2016-05-18,pd003,sp002,ct001,700,14000,10000
2016-05-18,pd001,sp003,ct002,700,14000,10000
2016-05-19,pd002,sp001,ct001,100,2000,1000
2016-05-19,pd003,sp001,ct002,100,2000,1000
2016-05-20,pd001,sp001,ct001,100,2000,1000
2016-05-20,pd002,sp002,ct002,100,2000,1000
2016-05-20,pd003,sp003,ct001,100,2000,1000
2016-05-20,pd004,sp001,ct001,100,2000,1000
2016-05-20,pd001,sp002,ct002,100,2000,1000
2016-05-20,pd002,sp001,ct002,100,2000,1000

2. 创建天维度表dim_day

DROP TABLE IF EXISTS default.dim_day ;

create table default.dim_day (
  day_key string,
  full_day string,
  month_name string,
  quarter string,
  year string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE; 
load data local inpath '/root/kylinsample/dim_day.txt' overwrite into table default.dim_day;

dim_day.txt
  
2016-05-01,2016-05-01,201605,2016q2,2016
2016-05-02,2016-05-02,201605,2016q2,2016
2016-05-03,2016-05-03,201605,2016q2,2016
2016-05-04,2016-05-04,201605,2016q2,2016
2016-05-05,2016-05-05,201605,2016q2,2016
2016-05-06,2016-05-06,201605,2016q2,2016
2016-05-07,2016-05-07,201605,2016q2,2016
2016-05-08,2016-05-08,201605,2016q2,2016
2016-05-09,2016-05-09,201605,2016q2,2016
2016-05-10,2016-05-10,201605,2016q2,2016
2016-05-11,2016-05-11,201605,2016q2,2016
2016-05-12,2016-05-12,201605,2016q2,2016
2016-05-13,2016-05-13,201605,2016q2,2016
2016-05-14,2016-05-14,201605,2016q2,2016
2016-05-15,2016-05-15,201605,2016q2,2016
2016-05-16,2016-05-16,201605,2016q2,2016
2016-05-17,2016-05-17,201605,2016q2,2016
2016-05-18,2016-05-18,201605,2016q2,2016
2016-05-19,2016-05-19,201605,2016q2,2016
2016-05-20,2016-05-20,201605,2016q2,2016

3. 创建售卖员的维度表salesperson_dim
 
DROP TABLE IF EXISTS default.dim_salesperson ;
 
create table default.dim_salesperson (
  salesperson_key string,
  salesperson string,
  salesperson_id string,
  region string,
  region_code string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
  
load data local inpath '/root/kylinsample/dim_salesperson.txt' overwrite into table default.dim_salesperson;
  
dim_salesperson.txt
  
sp001,hongbin,sp001,beijing,10086
sp002,hongming,sp002,beijing,10086
sp003,hongmei,sp003,beijing,10086

4. 创建客户维度 custom_dim

DROP TABLE IF EXISTS default.dim_custom ;
  
create table default.dim_custom (
  custom_key string,
  custom_name string,
  custorm_id string,
  headquarter_states string,
  billing_address string,
  billing_city string,
  billing_state string,
  industry_name string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
 
load data local inpath '/root/kylinsample/dim_custom.txt' overwrite into table default.dim_custom;

dim_custom.txt
  
ct001,custom_john,ct001,beijing,zgx-beijing,beijing,beijing,internet                    
ct002,custom_herry,ct002,henan,shlinjie,shangdang,henan,internet     
 
 
5. 创建产品维度表并插入数据
 
DROP TABLE IF EXISTS default.dim_product ;                                              
                                                                                          
create table default.dim_product (                                                      
  product_key string,                                                                 
  product_name string,                                                                
  product_id string,                                                                  
  product_desc string,                                                                
  sku string,                                                                         
  brand string,                                                                       
  brand_code string,                                                                  
  brand_manager string,                                                               
  category string,                                                                    
category_code string                                                                
)                                                                                       
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','                                           
STORED AS TEXTFILE;                                                                     
                       
load data local inpath '/root/kylinsample/dim_product.txt' overwrite into table default.dim_product;      
dim_product.txt
  
pd001,Box-Large,pd001,Box-Large-des,large1.0,brand001,brandcode001,brandmanager001,Packing,cate001
pd002,Box-Medium,pd001,Box-Medium-des,medium1.0,brand001,brandcode001,brandmanager001,Packing,cate001
pd003,Box-small,pd001,Box-small-des,small1.0,brand001,brandcode001,brandmanager001,Packing,cate001
pd004,Evelope,pd001,Evelope_des,large3.0,brand001,brandcode001,brandmanager001,Pens,cate002

这样一个星型的结构表在hive中创建完毕, 实际上一个离线的数据仓库已经完成, 它包含一个主题, 即商品订单.

三.Kylin的Project创建与数据同步
  1.单击"Manage Project" 
  2.单击"New Project"
  3.输入"Project Name", WareHouse_01
  4.Submit

  1.选择WareHouse_01,选择"Data Source" tab页
  2.单击"Load Hive Table"
  3.输入需要同步的表
      "DEFAULT.FACT_ORDER,DEFAULT.DIM_DAY,DEFAULT.DIM_PRODUCT,DEFAULT.DIM_SALESPERSON,DEFAULT.DIM_CUSTOM"
  4.Sync

四.Kylin的Model创建
  1.选择"Models" tab页,单击"New Model"
  2."Model Name"输入,WareHouse_01_Model
  3.选择"Fact Table"为 DEFAULT.FACT_ORDER;再 添加Lookup Table;
  4.选取每张表的哪些列字段作为Dimensions
     ID Table Name           Columns
     1.DEFAULT.FACT_ORDER  TIME_KEY PRODUCT_KEY SALESPERSON_KEY CUSTOM_KEY
     2.DEFAULT.DIM_DAY          FULL_DAY
     3.DEFAULT.DIM_PRODUCT  PRODUCT_NAME
     4.DEFAULT.DIM_SALESPERSON  SALESPERSON
     5.DEFAULT.DIM_CUSTOM  CUSTOM_NAME

  5.选取DEFAULT.FACT_ORDER表的哪些列字段作为measures
        QUANTITY_ORDERED ORDER_DOLLARS COST_DOLLARS

  6.a.选取 "Partition Date Column"为DEFAULT.FACT_ORDER.TIME_KEY,格式 yyyy-MM-dd
    b.对于"Filter"条件,由于没有要过滤的条件,故不填写

  7.Save

五.Kylin的Cube创建

  1.选择"Models" tab页,单击"New Cube“

  2.Cube Info:
        "Model Name"选择,WareHouse_01_Model
        "Cube Name"输入,cube01

  3.Dismensions:
        单击"Auto Generator",依据情况选择维度的列,全选

  4.Measures:
    a.单击"+Measure",添加要聚合计算的度量,比如 sum(QUANTITY_ORDERED)
    b.Expression: SUM/MIN/MAX/COUNT/COUNT_DISTINCT/TOP_N/RAW
  5.Refresh Setting:
    a.Auto Merge Thresholds,自动合并阈值,7~28 days
    b.Retention Threshold,保留天数,60
    c.Partition Start Date,非常重要,是后面build cube的开始日期

  6.Advanced Setting:
    --Aggregation Groups:
    a.Includes: TIME_KEY ,PRODUCT_KEY ,SALESPERSON_KEY , CUSTOM_KEY
    b.Mandatory Dimensions: TIME_KEY
    c.Hierarchy Dimensions: PRODUCT_KEY ,SALESPERSON_KEY ,CUSTOM_KEY
    d.Joint Dimensions: 无
    
    --Rowkeys:
    TIME_KEY ,PRODUCT_KEY ,SALESPERSON_KEY ,CUSTOM_KEY 4个字段为dict字典编码
 
  7.Configuration Overwrites: 无

  8.Overview:
    保存cube

五.Cube Build

  1.选择 cube01,单击”Action”,选择Build

  2.填写End Date,Submit

  3.单击”Monitor”,观察Job

  4.等待Process100% (Any Errors)

  5.返回cube01,查看 cube size 和 Source Records等字段更新

六.Hive* kyin 查询对比

  点击(此处)折叠或打开

  1. 1.2016-05-01到2016-05-15期间的每天的订单数量,订单金额,订单成本
  2. Hive: 65.816 s
  3. select fact.time_key, sum(fact.quantity_ordered), sum(fact.order_dollars), sum(fact.cost_dollars) from fact_order as fact
  4. where fact.time_key >= "2016-05-01" and fact.time_key <= "2016-05-15"
  5. group by fact.time_key order by fact.time_key;
  6. Kylin: 0.32s-->0.27s
  7. select fact.time_key, sum(fact.quantity_ordered), sum(fact.order_dollars), sum(fact.cost_dollars) from fact_order as fact
  8. where fact.time_key between '2016-05-01' and '2016-05-15'
  9. group by fact.time_key order by fact.time_key

  点击(此处)折叠或打开

  1. 2.2016-05-01到2016-05-15期间的每天的产品的订单量
  2. Hive: 100.336s
  3. select dday.full_day,dsp.product_name, sum(fact.quantity_ordered) from fact_order as fact
  4. inner join dim_day as dday on fact.time_key = dday.day_key
  5. inner join dim_product as dsp on fact.product_key = dsp.product_key
  6. where dday.full_day >= "2016-05-01" and dday.full_day <= "2016-05-15"
  7. group by dday.full_day,dsp.product_name
  8. order by dday.full_day,dsp.product_name;
  9. Kylin:0.93s-->0.39s
  10. select dday.full_day,dsp.product_name, sum(fact.quantity_ordered) from fact_order as fact
  11. inner join dim_day as dday on fact.time_key = dday.day_key
  12. inner join dim_product as dsp on fact.product_key = dsp.product_key
  13. where dday.full_day >= '2016-05-01' and dday.full_day <= '2016-05-15'
  14. group by dday.full_day,dsp.product_name
  15. order by dday.full_day,dsp.product_name

 

Apache Kylin1.5.2.1之订单案例详细构建流程的更多相关文章

  1. kylin2.4.1订单案例详细构建流程

    一.Hive订单数据仓库构建: hive表创建可以在命令行中直接完成,也可以在Hue中完成,本文在Hue中的完成,如下图: 下文的样例文本文件下载地址:https://files-cdn.cnblog ...

  2. Apache Hadoop 2.9.2 的归档案例剖析

    Apache Hadoop 2.9.2 的归档案例剖析 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任.   能看到这篇文章说明你对NameNode 工作原理是有深入的理解啦!我们知道 ...

  3. Httpd服务进阶知识-基于Apache Modele的LAMP架构之WordPress案例

    Httpd服务进阶知识-基于Apache Modele的LAMP架构之WordPress案例 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任. 一.安装依赖包及数据库授权 博主推荐阅读 ...

  4. Httpd服务进阶知识-基于Apache Modele的LAMP架构之PhpMyAdmin案例

    Httpd服务进阶知识-基于Apache Modele的LAMP架构之PhpMyAdmin案例 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任. 一.常见LAMP应用 PhpMyAdm ...

  5. JavaWeb完整案例详细步骤

    JavaWeb完整案例详细步骤 废话少说,展示完整案例 代码的业务逻辑图 主要实现功能 基本的CURD.分页查询.条件查询.批量删除 所使用的技术 前端:Vue+Ajax+Elememt-ui 后端: ...

  6. Robot Framework--05 案例设计之流程与数据分离

    转自:http://blog.csdn.net/tulituqi/article/details/7651049 这一讲主要说一下案例设计了.还记得我们前面做的case么?先打开浏览器访问百度,输入关 ...

  7. 第3章 文件I/O(8)_贯穿案例:构建标准IO函数库

    9. 贯穿案例:构建标准IO函数库 //mstdio.h #ifndef __MSTDIO_H__ #define __MSTDIO_H__ #include <unistd.h> #de ...

  8. 全网最详细Apache Kylin1.5安装(单节点)和测试案例

    转:http://blog.itpub.net/30089851/viewspace-2121221/ 微视频链接: Apache Kylin初识      1.版本(当前实验版本组合,版本一定要兼容 ...

  9. Httpd服务进阶知识-基于Apache Modele的LAMP架构之Discuz!案例

    Httpd服务进阶知识-基于Apache Modele的LAMP架构之Discuz!论坛案例 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任. 一.安装依赖包及数据库  博主推荐阅读: ...

随机推荐

  1. 【JMeter】如何优雅的写脚本

    cc给发的视频链接: http://v.youku.com/v_show/id_XMzA4Mjg1ODA0MA==.html?spm=a2h3j.8428770.3416059.1 ————————— ...

  2. Java学习之路-Burlap学习

    今天我们来学一下Burlap. Burlap是一种基于XML远程调用技术,但与其他基于XML的远程技术(例如SOAP或者XML-RPC)不同,Burlap的消息结构尽可能的简单,不需要额外的外部定义语 ...

  3. js-jquery-对象与JSON字符串互相转换

    1:jQuery插件支持的转换方式 代码如下: String→Object$.parseJSON( jsonstr ); //jQuery.parseJSON(jsonstr),可以将json字符串转 ...

  4. 你知道Windows和WordPress上帝模式吗?

    一.Windows 上帝模式 这个玩意出来很久很久了,估计不用多说,知道的同学还是挺多的,不知道的也只要百度一下,你就知道了. 方法很简单,在 Windows 系统任何地方新建一个文件夹,如下命名即可 ...

  5. mysql数据库的初始化及相关配置

    接着上篇文章我们继续探讨在安装完mysq数据库之后的一些相关配置: 一.mysql数据库的初始化 我们在安装完mysql数据库以后,会发现会多出一个mysqld的服务,这个就是咱们的数据库服务,我们通 ...

  6. 微信小程序----团购或秒杀的批量倒计时实现

    效果图 实现思路微信小程序实现倒计时,可以将倒计时的时间进行每一秒的计算和渲染! JS模拟商品列表数据 goodsList:在 onLoad 周期函数中对活动结束时间进行提取:建立时间格式化函数 ti ...

  7. 尝试.Net Core—使用.Net Core + Entity FrameWork Core构建WebAPI(一)

    想尝试.Net Core很久了,一直没有时间,今天回家,抛开一切,先搭建一个.Net Core的Demo出来玩玩. 废话少说,咱直奔主题: 一.开发环境 VS2015 Update3 Microsof ...

  8. [LeetCode] 114. Flatten Binary Tree to Linked List_Medium tag: DFS

    Given a binary tree, flatten it to a linked list in-place. For example, given the following tree: 1 ...

  9. [LeetCode] 1. Two Sum_Easy tag: Hash Table

    Given an array of integers, return indices of the two numbers such that they add up to a specific ta ...

  10. [LeetCode] 717. 1-bit and 2-bit Characters_Easy

    We have two special characters. The first character can be represented by one bit 0. The second char ...