测试数据位于:/home/hadoop/luogankun/workspace/sync_data/pig

dept和emp表来源自oracle数据库自带的表

dept.txt

10      ACCOUNTING      NEW YORK
20 RESEARCH DALLAS
30 SALES CHICAGO
40 OPERATIONS BOSTON

emp.txt

7369    SMITH   CLERK   7902    1980-12-17      800.00          20
7499 ALLEN SALESMAN 7698 1981-2-20 1600.00 300.00 30
7521 WARD SALESMAN 7698 1981-2-22 1250.00 500.00 30
7566 JONES MANAGER 7839 1981-4-2 2975.00 20
7654 MARTIN SALESMAN 7698 1981-9-28 1250.00 1400.00 30
7698 BLAKE MANAGER 7839 1981-5-1 2850.00 30
7782 CLARK MANAGER 7839 1981-6-9 2450.00 10
7788 SCOTT ANALYST 7566 1987-4-19 3000.00 20
7839 KING PRESIDENT 1981-11-17 5000.00 10
7844 TURNER SALESMAN 7698 1981-9-8 1500.00 0.00 30
7876 ADAMS CLERK 7788 1987-5-23 1100.00 20
7900 JAMES CLERK 7698 1981-12-3 950.00 30
7902 FORD ANALYST 7566 1981-12-3 3000.00 20
7934 MILLER CLERK 7782 1982-1-23 1300.00 10

上传数据到HDFS系统中

cd /home/hadoop/luogankun/workspace/sync_data/pig
hadoop fs -put dept.txt input/pig/dept.txt
hadoop fs -put emp.txt input/pig/emp.txt

导入

CREATE TABLE TMP_TABLE(USER VARCHAR(32),AGE INT,IS_MALE BOOLEAN);

dept= LOAD 'input/pig/dept.txt' USING PigStorage('\t') AS (deptno:int,dname:chararray,loc:chararray);
emp = LOAD 'input/pig/emp.txt' USING PigStorage('\t') AS (empno:int,ename:chararray, job:chararray, mgr:int, hiredate:chararray,sal:double,comm:double,dept:int);

查看表结构

desc TMP_TABLE;
describe dept
dept: {deptno: int,dname: chararray,loc: chararray} describe emp
emp: {empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int}

查询整张表

SELECT * FROM TMP_TABLE;
DUMP dept
(10,ACCOUNTING,NEW YORK)
(20,RESEARCH,DALLAS)
(30,SALES,CHICAGO)
(40,OPERATIONS,BOSTON) DUMP emp
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)

查询前N条

SELECT * FROM TMP_TABLE LIMIT 10;
emp_table_limit = LIMIT emp 10;
DUMP emp_table_limit;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)

查询表中的某些列

SELECT USER FROM TMP_TABLE;
emp_table_empno_ename = FOREACH emp GENERATE empno, ename; describe emp_table_empno_ename
emp_table_empno_ename: {empno: int,ename: chararray} DUMP emp_table_empno_ename;
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)

给列取别名

SELECT USER AS USER_NAME,AGE AS USER_AGE FROM TMP_TABLE;
emp_table_column_alias = FOREACH emp GENERATE empno AS id,ename AS name; describe emp_table_column_alias
emp_table_column_alias: {id: int,name: chararray} DUMP emp_table_column_alias
(7369,SMITH)
(7499,ALLEN)
(7521,WARD)
(7566,JONES)
(7654,MARTIN)
(7698,BLAKE)
(7782,CLARK)
(7788,SCOTT)
(7839,KING)
(7844,TURNER)
(7876,ADAMS)
(7900,JAMES)
(7902,FORD)
(7934,MILLER)

排序

SELECT * FROM TMP_TABLE ORDER BY AGE;
emp_table_order = ORDER emp BY empno ASC; DUMP emp_table_order;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)

条件查询

SELECT * FROM TMP_TABLE WHERE AGE>20;
emp_table_where = FILTER emp by sal > 1500; DUMP emp_table_where;
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20)

内连接Inner Join

SELECT * FROM TMP_TABLE A JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_inner_join = JOIN emp BY dept,dept BY deptno; describe emp_table_inner_join
emp_table_inner_join:
{emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_inner_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)

左连接Left Join

SELECT * FROM TMP_TABLE A LEFT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_left_join = JOIN emp BY dept LEFT OUTER,dept BY deptno; describe emp_table_left_join
emp_table_left_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_left_join; (7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)

右连接Right Join

SELECT * FROM TMP_TABLE A RIGHT JOIN TMP_TABLE_2 B ON A.AGE=B.AGE;
emp_table_right_join = JOIN emp BY dept RIGHT OUTER,dept BY deptno; describe emp_table_right_join
emp_table_right_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_right_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)

全连接Full Join

SELECT * FROM TMP_TABLE A  JOIN TMP_TABLE_2 B ON A.AGE=B.AGE
emp_table_full_join = JOIN emp BY dept FULL OUTER,dept BY deptno; describe emp_table_full_join
emp_table_full_join: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_full_join;
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(,,,,,,,,40,OPERATIONS,BOSTON)

同时对多张表交叉查询

SELECT * FROM TMP_TABLE,TMP_TABLE_2;
emp_table_cross = CROSS emp,dept; describe emp_table_cross
emp_table_cross: {emp::empno: int,emp::ename: chararray,emp::job: chararray,emp::mgr: int,
emp::hiredate: chararray,emp::sal: double,emp::comm: double,emp::dept: int,
dept::deptno: int,dept::dname: chararray,dept::loc: chararray} DUMP emp_table_cross;
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,10,ACCOUNTING,NEW YORK)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,20,RESEARCH,DALLAS)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,30,SALES,CHICAGO)
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20,40,OPERATIONS,BOSTON)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,10,ACCOUNTING,NEW YORK)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,20,RESEARCH,DALLAS)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,30,SALES,CHICAGO)
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30,40,OPERATIONS,BOSTON)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,10,ACCOUNTING,NEW YORK)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,20,RESEARCH,DALLAS)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,30,SALES,CHICAGO)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30,40,OPERATIONS,BOSTON)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,10,ACCOUNTING,NEW YORK)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,20,RESEARCH,DALLAS)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,30,SALES,CHICAGO)
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20,40,OPERATIONS,BOSTON)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,10,ACCOUNTING,NEW YORK)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,20,RESEARCH,DALLAS)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,30,SALES,CHICAGO)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30,40,OPERATIONS,BOSTON)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,10,ACCOUNTING,NEW YORK)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,20,RESEARCH,DALLAS)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,30,SALES,CHICAGO)
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30,40,OPERATIONS,BOSTON)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,10,ACCOUNTING,NEW YORK)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,20,RESEARCH,DALLAS)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,30,SALES,CHICAGO)
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10,40,OPERATIONS,BOSTON)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,20,RESEARCH,DALLAS)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,30,SALES,CHICAGO)
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20,40,OPERATIONS,BOSTON)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,10,ACCOUNTING,NEW YORK)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,20,RESEARCH,DALLAS)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,30,SALES,CHICAGO)
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10,40,OPERATIONS,BOSTON)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,10,ACCOUNTING,NEW YORK)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,20,RESEARCH,DALLAS)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,30,SALES,CHICAGO)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30,40,OPERATIONS,BOSTON)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,10,ACCOUNTING,NEW YORK)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,20,RESEARCH,DALLAS)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,30,SALES,CHICAGO)
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20,40,OPERATIONS,BOSTON)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,10,ACCOUNTING,NEW YORK)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,20,RESEARCH,DALLAS)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,30,SALES,CHICAGO)
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30,40,OPERATIONS,BOSTON)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,10,ACCOUNTING,NEW YORK)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,20,RESEARCH,DALLAS)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,30,SALES,CHICAGO)
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20,40,OPERATIONS,BOSTON)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,10,ACCOUNTING,NEW YORK)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,20,RESEARCH,DALLAS)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,30,SALES,CHICAGO)
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10,40,OPERATIONS,BOSTON)

分组GROUP BY

SELECT * FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group = GROUP emp BY dept; describe emp_table_group
emp_table_group: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double, comm: double,dept: int)
}
} DUMP emp_table_group;
(10,{
(7839,KING,PRESIDENT,,1981-11-17,5000.0,,10),
(7782,CLARK,MANAGER,7839,1981-6-9,2450.0,,10),
(7934,MILLER,CLERK,7782,1982-1-23,1300.0,,10)})
(20,{
(7369,SMITH,CLERK,7902,1980-12-17,800.0,,20),
(7566,JONES,MANAGER,7839,1981-4-2,2975.0,,20),
(7876,ADAMS,CLERK,7788,1987-5-23,1100.0,,20),
(7902,FORD,ANALYST,7566,1981-12-3,3000.0,,20),
(7788,SCOTT,ANALYST,7566,1987-4-19,3000.0,,20)})
(30,{
(7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30),
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30),
(7900,JAMES,CLERK,7698,1981-12-3,950.0,,30),
(7698,BLAKE,MANAGER,7839,1981-5-1,2850.0,,30),
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30),
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)})

分组并统计

SELECT IS_MALE,COUNT(*) FROM TMP_TABLE GROUP BY IS_MALE;
emp_table_group_count = GROUP emp BY dept; describe emp_table_group_count
emp_table_group_count: {
group: int,
emp: {
(empno: int,ename: chararray,job: chararray,mgr: int,hiredate: chararray,sal: double,comm: double,dept: int
)
}
}
emp_table_group_count = FOREACH emp_table_group_count GENERATE group,COUNT($1); describe emp_table_group_count DUMP emp_table_group_count;
(10,3)
(20,5)
(30,6) emp_table_group_count = FOREACH emp_table_group_count GENERATE emp.dept,COUNT($1); describe emp_table_group_count
emp_table_group_count: {{(dept: int)},long} DUMP emp_table_group_count;
({(10),(10),(10)},3)
({(20),(20),(20),(20),(20)},5)
({(30),(30),(30),(30),(30),(30)},6)

去重DISTINCT

SELECT DISTINCT IS_MALE FROM TMP_TABLE;
emp_table_distinct = FOREACH emp GENERATE dept; describe emp_table_distinct
emp_table_distinct: {dept: int} emp_table_distinct = DISTINCT emp_table_distinct;
describe emp_table_distinct
emp_table_distinct: {dept: int} DUMP emp_table_distinct;
(10)
(20)
(30)

pig判空

select * from emp where comm is not null;
emp_table_where_null = FILTER emp by comm is not null;
DUMP emp_table_where_null; (7499,ALLEN,SALESMAN,7698,1981-2-20,1600.0,300.0,30)
(7521,WARD,SALESMAN,7698,1981-2-22,1250.0,500.0,30)
(7654,MARTIN,SALESMAN,7698,1981-9-28,1250.0,1400.0,30)
(7844,TURNER,SALESMAN,7698,1981-9-8,1500.0,0.0,30)

pig和mysql脚本对比的更多相关文章

  1. [原] KVM 环境下MySQL性能对比

    KVM 环境下MySQL性能对比 标签(空格分隔): Cloud2.0 [TOC] 测试目的 对比MySQL在物理机和KVM环境下性能情况 压测标准 压测遵循单一变量原则,所有的对比都是只改变一个变量 ...

  2. MongoDB(五)mongo语法和mysql语法对比学习

    我们总是在对比中看到自己的优点和缺点,对于mongodb来说也是一样,对比学习让我们尽快的掌握关于mongodb的基础知识. mongodb与MySQL命令对比 关系型数据库一般是由数据库(datab ...

  3. mongodb与mysql命令对比

    mongodb与mysql命令对比 传统的关系数据库一般由数据库(database).表(table).记录(record)三个层次概念组成,MongoDB是由数据库(database).集合(col ...

  4. cmd执行mssql脚本或者执行mysql脚本

    private static int ExecuteMSSql(DbInfo db, string sqlPath) { Console.WriteLine("=============== ...

  5. 聚集索引、非聚集索引、聚集索引组织表、堆组织表、Mysql/PostgreSQL对比、联合主键/自增长、InnoDB/MyISAM(引擎方面另开一篇)

    参考了多篇文章,分别记录,如下. 下面是第一篇的总结 http://www.jb51.net/article/76007.htm: 在MySQL中,InnoDB引擎表是(聚集)索引组织表(cluste ...

  6. LoadRunner利用ODBC编写MySql脚本

    最近做了几周的LoadRunner测试,有一些心得,记录下来,以便以后查找. LoadRunner测试数据库是模拟客户端去连接数据库服务器,因此,需要协议(或者说驱动的支持).LoadRunner本身 ...

  7. 数据市中心全省中国mysql脚本

    1.查尔斯省 watermark/2/text/aHR0cDovL2Jsb2cuY3Nkbi5uZXQvd2h6aGFvY2hhbw==/font/5a6L5L2T/fontsize/400/fill ...

  8. Rehat一键安装mysql脚本和备份数据库脚本

    Rehat一键安装mysql脚本 ##说明:适用,Rehat 5 6 7 1.运行状态,运行成功输出mysql临时密码 2.代码如下 #!/bin/bash #获取系统信息 sudo cat /etc ...

  9. MongoDB批量操作及与MySQL效率对比

    本文主要通过批量与非批量对比操作的方式介绍MongoDB的bulkWrite()方法的使用.顺带与关系型数据库MySQL进行对比,比较这两种不同类型数据库的效率.如果只是想学习bulkWrite()的 ...

随机推荐

  1. putc,fputc,和putchar

    putc()功能和fputc()差不多,一个是宏,一个是函数 putc(int ch,FILE *fp),即将字符ch输出到fp所指的文件中:putchar(char ch),即将字符ch输出到标准输 ...

  2. JAVA正则表达式-捕获组与非捕获组

    Java捕获组与非捕获组的问题 先看例子: import java.util.regex.Matcher; import java.util.regex.Pattern; public class P ...

  3. HDU 1251:统计难题

    题目链接:http://acm.hdu.edu.cn/showproblem.php?pid=1251 题意不难理解,就是先输入不知道多少个字符串,然后用一个空行结束这个输入,然后接下来不知道多少行再 ...

  4. <frameset>框架集中不同<frame>之间的调用【js代码中】

    top:永远指分割窗口最高层次的浏览器窗口;parent:包含当前分割窗口的父窗口,本文将围绕js中top.parent.frame进行讲述及他们的应用案例 引用方法top: 该变量永远指分割窗口最高 ...

  5. 偶尔用得上的MySQL操作

    数据库编码 查看数据库编码 use xxx show variables like 'character_set_database'; 切换数据库编码 alter database xxx CHARA ...

  6. Math类的学习 java 类库 API 文档学习

  7. C经典案例

    1. C中可变参数函数作为函数参数: void media_debug_set_handler(struct media_device *media, void (*debug_handler)(vo ...

  8. FastAdmin 插件的 Git 开发流程(简明)

    FastAdmin 插件的 Git 开发流程(简明) cms zip 安装 包安装 删除 addons 里的 cms 使用 mklink 软链接到 cms 插件 Git 仓库 修改 cms 插件 gi ...

  9. Windows nginx php cgi-fcgi 配置 xdebug

    之前使用的是 Apache + PHP,不用怎么配置就可以. 由于服务器用的是 nginx,为了和服务器一致,所以本地开发也改为 nginx. 开始只是简单的开启 xdebug, 发现并不行. 找了一 ...

  10. saltops 安装及相关环境安装

    本次布署测试环境 阿里云 Centos 7.3 1.安装nginx,这里采用yum 安装方式 A.yum install nginx B.创建开机启动 systemctl enable nginx.s ...