KingbaseES 的行列转换
在电子表格Excel中的数据透视表,能够快速汇总列表中的数据,能把很多行的流水数据表格变成二维汇总表格,然后使用 PowerQuery ,再变成流水数据表格。
KingbaseES 数据库中,使用SQL查询语句,同样实现数据在流水与汇总样式之间转换。
drop view if exists testscore_view ;
create or replace view testscore_view
(year, student, subject, testscore) as
values (2020, '张三', '语文', 76),
(2020, '张三', '化学', 83),
(2020, '张三', '物理', 69),
(2020, '张三', '数学', 49),
(2020, '张三', '英语', 63),
(2020, '李四', '语文', 48),
(2020, '李四', '化学', 48),
(2020, '李四', '物理', 77),
(2020, '李四', '英语', 38),
(2020, '王五', '语文', 88),
(2020, '王五', '物理', 45),
(2020, '王五', '数学', 92),
(2020, '王五', '英语', 56),
(2021, '张三', '语文', 55),
(2021, '张三', '化学', 73),
(2021, '张三', '物理', 87),
(2021, '张三', '数学', 41),
(2021, '张三', '英语', 52),
(2021, '李四', '语文', 87),
(2021, '李四', '化学', 91),
(2021, '李四', '物理', 39),
(2021, '李四', '英语', 54),
(2021, '王五', '语文', 80),
(2021, '王五', '物理', 70),
(2021, '王五', '数学', 46),
(2021, '王五', '英语', 29);
drop view if exists student_view;
create view student_view as
select *
from (values (1, '张三'), (2, '李四'), (3, '王五')) std(sn, student);
drop view if exists subject_view;
create view subject_view as
select *
from (values (1, '语文'), (2, '化学'), (3, '物理'), (4, '数学'), (5, '英语')) std(sn, subject);
drop view if exists year_view;
create view year_view as
select *
from (values (2020), (2021), (2022)) std(year);
通常在SQL的select list中,使用CASE语句,建立二维报表。
select student,
avg(case when subject = '语文' then testscore end) as "语文",
avg(case when subject = '化学' then testscore end) as "化学",
avg(case when subject = '物理' then testscore end) as "物理",
avg(case when subject = '数学' then testscore end) as "数学",
avg(case when subject = '英语' then testscore end) as "英语"
from testscore_view
group by student
student | 语文 | 化学 | 物理 | 数学 | 英语
张三 | 66 | 78 | 78 | 45 | 58
王五 | 84 | | 58 | 69 | 43
李四 | 68 | 70 | 58 | | 46
(3 行记录)
- 字符分割
select student,
split_part(split_part(subscr_spl, ',', 1), ':', 2) as "语文",
split_part(split_part(subscr_spl, ',', 2), ':', 2) as "化学",
split_part(split_part(subscr_spl, ',', 3), ':', 2) as "物理",
split_part(split_part(subscr_spl, ',', 4), ':', 2) as "数学",
split_part(split_part(subscr_spl, ',', 5), ':', 2) as "英语"
from (select student, string_agg(subject || ':' || val, ',') as subscr_spl
from (select student_view.student,
avg(testscore)::numeric(10, 2) val
from (student_view cross join subject_view)
left join testscore_view using (student, subject)
group by student_view.student, subject_view.subject
order by min(, min( std
group by student
) as t;
- 数组
select student,
subscr_arr[1] as "语文",
subscr_arr[2] as "化学",
subscr_arr[3] as "物理",
subscr_arr[4] as "数学",
subscr_arr[5] as "英语"
from (select student, array_agg(val) as subscr_arr
from (select student_view.student,
avg(testscore)::numeric(10, 2) val
from (student_view cross join subject_view)
left join testscore_view using (student, subject)
group by student_view.student, subject_view.subject
order by min(, min( std
group by student
) as t;
- JSON数据格式
select student,
subscr_json ->> '语文' as "语文",
subscr_json ->> '化学' as "化学",
subscr_json ->> '物理' as "物理",
subscr_json ->> '数学' as "数学",
subscr_json ->> '英语' as "英语"
from (select student, json_object_agg(subject, val) as subscr_json
from (select student, subject, avg(testscore)::numeric(10, 2) val
from testscore_view
group by student, subject) std
group by student) std ;
扩展 tablefunc 中的 crosstab 函数,用来生成pivot 展示,即通过横向而不是下拉展示。
create extension tablefunc;
FROM crosstab($query$ select student, subject, avg(testscore) value
from testscore_view
group by student, subject
order by 1,2 $query$,
$column$ select subject from subject_view $column$)
AS ct(student text,
"语文" numeric(10, 2),
"化学" numeric(10, 2),
"物理" numeric(10, 2),
"数学" numeric(10, 2),
"英语" numeric(10, 2));
PIVOT 通过一种新的操作符以交叉表格式显示任何查询,可以满足纵向多列的表格样式。
- pivot(聚合函数 for 列名 in (类型)),其中 in ('') 中可以指定列名,还可以指定子查询
- pivot(任一聚合函数 for 需转为列的值所在列名 in (需转为列名的值))
create extension kdb_utils_function;
select *
from (select student, subject, testscore from testscore_view)
pivot (
avg(testscore) for subject in (
'语文' as "语文",
'化学' as "化学",
'物理' as "物理",
'数学' as "数学",
'英语' as "英语" )) ;
student | 语文 | 化学 | 物理 | 数学 | 英语
李四 | 67.5000000000000000 | 69.5000000000000000 | 58.0000000000000000 | | 46.0000000000000000
王五 | 84.0000000000000000 | | 57.5000000000000000 | 69.0000000000000000 | 42.5000000000000000
张三 | 65.5000000000000000 | 78.0000000000000000 | 78.0000000000000000 | 45.0000000000000000 | 57.5000000000000000
(3 行记录)
select *
from (select year, student, subject, testscore from testscore_view)
pivot (
avg(testscore) for subject in (
'语文' as "语文",
'化学' as "化学",
'物理' as "物理",
'数学' as "数学",
'英语' as "英语" )) ;
year | student | 语文 | 化学 | 物理 | 数学 | 英语
2020 | 李四 | 48.0000000000000000 | 48.0000000000000000 | 77.0000000000000000 | | 38.0000000000000000
2020 | 王五 | 88.0000000000000000 | | 45.0000000000000000 | 92.0000000000000000 | 56.0000000000000000
2020 | 张三 | 76.0000000000000000 | 83.0000000000000000 | 69.0000000000000000 | 49.0000000000000000 | 63.0000000000000000
2021 | 李四 | 87.0000000000000000 | 91.0000000000000000 | 39.0000000000000000 | | 54.0000000000000000
2021 | 王五 | 80.0000000000000000 | | 70.0000000000000000 | 46.0000000000000000 | 29.0000000000000000
2021 | 张三 | 55.0000000000000000 | 73.0000000000000000 | 87.0000000000000000 | 41.0000000000000000 | 52.0000000000000000
(6 行记录)
PIVOT 操作符的限制
PIVOT 操作符是根据明确的数据集合类型进行运算,需要避免以下使用方式
- FROM子句的子查询,包含 * 号。
select *
from ( select * from testscore_view)
pivot ( ...
- FROM子句的子查询,定义别名
select *
from (select student, subject, testscore from testscore_view) as stdsrc
pivot ( ...
- 使用 CTE 代替FROM子句的子查询
with stdsrc as (select student, subject, testscore from testscore_view)
select *
from stdsrc
pivot ( ...
- FROM子句的子查询,在select子句和from子句中,包含函数或子查询等表达式
select *
from (select student, subject, SQRT(testscore)*10 testscore from testscore_view )
pivot ( ... ;
select *
from (select student, subject, testscore from (select student, subject, testscore from testscore_view ))
pivot ( ... ;
上述被限制使用的查询,可以创建成视图,便可以进行 PIVOT 操作符运算。
工具 ksql 的元命令 \crosstabview
\crosstabview [ colV [ colH [ colD [ sortcolH ] ] ] ]
select student, subject, avg(testscore)::numeric(10,2) testscore
from testscore_view
group by student, subject
\crosstabview subject student testscore
subject | 李四 | 王五 | 张三
语文 | 67.50 | 84.00 | 65.50
物理 | 58.00 | 57.50 | 78.00
英语 | 46.00 | 42.50 | 57.50
数学 | | 69.00 | 45.00
化学 | 69.50 | | 78.00
(5 行记录)
create or replace view stdscore_view
(year, student, "语文", "化学", "物理", "数学", "英语")
values (2020, '李四', 48, 48, 77, null, 38),
(2020, '王五', 88, null, 45, 92, 56),
(2020, '张三', 76, 83, 69, 49, 63),
(2021, '李四', 87, 91, 39, null, 54),
(2021, '王五', 80, null, 70, 46, 29),
(2021, '张三', 55, 73, 87, 41, 52) ;
union all
with stdscore (year, student, subject, testscore) as
(select year, student, '语文', "语文"
from stdscore_view
union all
select year, student, '化学', "化学"
from stdscore_view
union all
select year, student, '物理', "物理"
from stdscore_view
union all
select year, student, '数学', "数学"
from stdscore_view
union all
select year, student, '英语', "英语"
from stdscore_view
select *
from stdscore
where testscore is not null ;
select year,
case subject
when '语文' then "语文"
when '化学' then "化学"
when '物理' then "物理"
when '数学' then "数学"
when '英语' then "英语"
end as testscore
from stdscore_view, subject_view
where testscore is not null ;
UNPIVOT 操作符可以看作 PIVOT 操作符的反向运算,根据多个列合并为新维度列,列值作为新数据行而合并到指定列。
select year,
from stdscore_view
unpivot (testscore for subject in
( '语文' as "语文", '化学' as "化学", '物理' as "物理", '数学' as "数学", '英语' as "英语" ));
当数据需要变形是,Pivot 为 SQL 语言增添了一个非常重要且实用的功能。您可以使用 pivot 函数针对任何关系表创建一个交叉表报表,而不必编写包含大量 decode 函数的令人费解的、不直观的代码。同样,您可以使用 unpivot 操作转换任何交叉表报表,以常规关系表的形式对其进行存储。
