gp sql

appendonly

-- drop table if exists test_appendonly;

-- create table test_appendonly with(appendonly=true, compresslevel=5) as

-- select generate_series(0, 1000) a, 'helloworld'::varchar(50) b

-- distributed by(a);

select oid from pg_class where relname='test_appendonly';

select oid, oid::regclass from pg_class where relname='test_appendonly' or relname like '%212266%';

-- \d pg_aoseg.pg_aoseg_212266

select * from get_ao_compression_ratio('test_appendonly');

select sum(eofuncompressed)/sum(eof) as compression_ratio from gp_dist_random('pg_aoseg.pg_aoseg_212266');

select * from get_ao_distribution('test_appendonly') order by segmentid;

select gp_segment_id, tupcount from gp_dist_random('pg_aoseg.pg_aoseg_212266') order by gp_segment_id;

drop table if exists public.table_info cascade;

create table public.table_info (

tablename text, -- 表名

subparname text, -- 分区各

tablecount bigint, -- 表的行数

tablesize bigint, -- 表大小

prettysize text, -- 格式化大小输出

max_div_avg float, -- 斜率，最大节点数据量/平均节点数据量

compression_ratio text -- 压缩率

);

-- 获取表信息

create or replace function public.get_table_info(tablename text) returns setof table_info as $$

def one_table_info(plpy, tablename, subparname, aosegname, privilege):

    aosegsql = ""

    # plpy.info(privilege)

    if privilege == '1':

        aosegsql = '''

            select '%s' tablename, '%s' subparname,

                coalesce(sum(tupcount)::bigint, 0) tablecount,

                coalesce(sum(eof)::bigint, 0) tablesize,

                pg_size_pretty(coalesce(sum(tupcount)::bigint, 0)) prettysize,

                coalesce(max(tupcount)::bigint, 1)/(case when coalesce(avg(tupcount), 1.0) = 0 then 1

                    else coalesce(avg(tupcount), 1.0) end) max_div_avg,

                coalesce(sum(eofuncompressed), 1)/(case when coalesce(sum(eof), 1.0) = 0 then 1

                    else coalesce(sum(eof), 1.0) end) compression_ratio

            from gp_dist_random('%s');

        '''%(tablename, subparname, aosegname)

    else:

        aosegsql = '''

            select '%s' tablename, '%s' subparname,

                0 tablecount, 0 tablesize, 'permission denied' prettysize,

                0 max_div_avg, 0 compression_ratio;

        '''%(tablename, subparname)

    plpy.info(aosegsql)

    result_rv=plpy.execute(aosegsql)

    # plpy.info(result_rv[0]);

    return result_rv[0]

try:

    table_name = tablename.lower().split('.')[1]

    table_schema = tablename.lower().split('.')[0]

except(IndexError):

    plpy.error('Please in put "tableschema.table_name"')

# check version of database

check_version_sql = """

    select substring(version(), 'Database (.*) build') as version;

"""

rv = plpy.execute(check_version_sql)

version = rv[0]['version']

plpy.execute("set enable_seqscan=off")

# get table oid

get_table_oid = ''

if version > '3.4.0':

    get_table_oid = """

        select a.oid, reloptions, b.segrelid, regclass2text(b.segrelid::regclass) aosegname, relstorage,

            case has_table_privilege(user, b.segrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_class a left join pg_appendonly b on a.oid=b.relid where a.oid='%s'::regclass;

    """%(tablename)

else:

    get_table_oid = """

        select oid, reloptions, relaosegrelid, regclass2text(relaosegrelid::regclass) aosegname, relstorage,

            case has_table_privilege(user, relaosegrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_class where oid='%s'::regclass;

    """%(tablename)

try:

    rv_oid = plpy.execute(get_table_oid, 5)

    if not rv_oid:

        plpy.error('Did not find any relation named "' + tablename + '".')

except (Error):

    plpy.error('Did not find any relation named "' + tablename + '".')

#

table_oid = rv_oid[0]['oid']

if rv_oid[0]['relstorage'] != 'a':

    plpy.error(tablename + ' is not appendonly table, this function only support appendonly talbe')

# plpy.info('table_oid')

# check if table is partitin table

check_par_table="select count(*) from pg_partition where parrelid=%s"%(table_oid)

if version > '3.4.0':

    tablecount_sql = """

        select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pa.segrelid,

            regclass2text(pa.segrelid::regclass) aosegname,

            case has_table_privilege(user, pa.segrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_partition pp, pg_partition_rule prl, pg_appendonly pa

        where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pa.relid=prl.parchildrelid

        order by prl.parruleord;

    """%(table_oid)

else:

    tablecount_sql = """

        select regclass2text(pp.parrelid::regclass) tabname, prl.parname, parruleord, pc.relaosegrelid,

            regclass2text(pc.relaosegrelid::regclass) aosegname,

            case has_table_privilege(user, pc.relaosegrelid, 'select') when 't' then '1' else '0' end privilege

        from pg_partition pp, pg_partition_rule prl, pg_class pc

        where pp.paristemplate=false and pp.parrelid=%s and prl.paroid=pp.oid and pc.oid=prl.parchildrelid

        and relaosegrelid <> 0 order by prl.parruleord;

    """%(table_oid)

rv = plpy.execute(check_par_table)

if rv[0]['count'] == 1:

    al = plpy.execute(tablecount_sql)

    result_rv = []

    rv_tmp = []

    totalcount = 0

    totalsize = 0

    unzipsize = 0

    compression_ratio = 1

    for i in al:

        rv_ao = one_table_info(plpy, tablename, i['parname'], i['aosegname'], str(i['privilege']))

        rv_tmp.append(rv_ao)

        totalsize = totalsize + rv_ao['tablesize']

        totalcount = totalcount + rv_ao['tablecount']

        unzipsize = unzipsize + rv_ao['tablesize'] * rv_ao['compression_ratio']

    if totalsize == 0:

        compression_total = 1

    else:

        compression_ratio = unzipsize/totalsize

    total_count_sql = """

        select '%s' as tablename, '###ALL###' as subparname, %d as tablecount, %d as tablesize,

            pg_size_pretty(%d::bigint) prettysize, null as max_div_avg, %f as compression_ratio;

    """%(tablename, totalcount, totalsize, totalsize, compression_ratio)

    a2 = plpy.execute(total_count_sql)

    result_rv.append(a2[0])

    plpy.info('===' + total_count_sql)

    for i in rv_tmp:

        result_rv.append(i)

    return result_rv;

else:

    result_rv = []

    rv_ao = one_table_info(plpy, tablename, '', rv_oid[0]['aosegname'], str(rv_oid[0]['privilege']));

    result_rv.append(rv_ao)

    return result_rv

$$ language plpythonu;

select * from get_table_info('public.test_appendonly');

select get_table_info('public.test_appendonly');

hostname

-- create language plpythonu ;

create or replace function public.hostname() returns text as $$

import socket;

return socket.gethostname();

$$ language plpythonu;

create or replace function public.reverse(str text) returns text as $$

if str != None:

return str[::-1]

else:

return None

$$ language plpythonu;

create or replace function public.json_parse(data text) returns text as $$

import json

try:

mydata = json.loads(data)

except:

return ['Parse json error']

returndata = []

try:

for people in mydata['people']:

returndata.append(people['firstName'] + ' ' + people['lastName'])

except:

return ['Parse json error']

return returndata

$$ language plpythonu;

select hostname();

select hostname() from gp_dist_random('gp_id');

select gp_segment_id, count(1) from gp_dist_random('pg_class') group by 1 order by 1;

select * from gp_configuration;

select * from gp_segment_configuration;

select * from pg_filespace_entry;

select * from gp_configuration_history;

-- 判断某个表是否是分区表

select count(1) from pg_partition where parrelid='public.tb_partition_list_yyyymmdd'::regclass;

select * from pg_partition_rule;

drop view public.v_pg_partitions;

create view public.v_pg_partitions as

select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,

case

when pp.parkind='h'::"char" then 'hash'::text

when pp.parkind='r'::"char" then 'range'::text

when pp.parkind='l'::"char" then 'list'::text

else NULL::text

end as partitiontype,

case

when pg_get_expr(prl.parrangeend, prl.parchildrelid) = ''

then pg_get_expr(prl.parlistvalues, prl.parchildrelid)

else pg_get_expr(prl.parrangeend, prl.parchildrelid)

end as HIGH_VALUE,

pg_get_partition_rule_def(prl.oid, true) as partitionboundary,

prl.parruleord as partitionposition

from pg_partition pp, pg_partition_rule prl

where pp.paristemplate = false and prl.paroid=pp.oid;

select * from public.v_pg_partitions where tableoid='tb_partition_list_yyyymmdd'::regclass order by partitionposition;

partition

drop table if exists public.tb_partition_range_yyyymmdd cascade;

create table public.tb_partition_range_yyyymmdd (

id numeric,

yyyymmdd date

) with(appendonly=true, compresslevel=5)

distributed by(id)

partition by range(yyyymmdd)

(

partition p20120811 start ('2012-08-11'::date) end ('2012-08-12'::date)

-- partition p20120812 start ('2012-08-12'::date) end ('2012-08-13'::date)

);

drop table if exists public.tb_partition_list_yyyymmdd cascade;

create table public.tb_partition_list_yyyymmdd (

id numeric,

yyyymmdd varchar(128)

) with(appendonly=true, compresslevel=5)

distributed by(id)

partition by list(yyyymmdd)

(

partition p20120811 values('20120811'),

partition p20120812 values('20120812')

);

drop view if exists public.v_pg_add_partitions cascade;

create view public.v_pg_add_partitions as

select pp.parrelid tableoid, prl.parchildrelid, prl.parname as partitionname,

case

when pp.parkind='h'::"char" then 'hash'::text

when pp.parkind='r'::"char" then 'range'::text

when pp.parkind='l'::"char" then 'list'::text

else NULL::text

end as partitiontype,

translate(pg_get_expr(prl.parlistvalues, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', '') as partitionlistvalue,

substring(translate(pg_get_expr(prl.parrangestart, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangestart,

substring(translate(pg_get_expr(prl.parrangeend, prl.parchildrelid), '-''::date

character varying bpchar numeric double precision timestamp without time zone', ''), 1, 8) as partitionrangeend,

prl.parruleord as partitionposition,

substring(parlistvalues, 'consttype ([0-9]+)')::integer::regtype listtype,

substring(parrangeend, 'consttype ([0-9]+)')::integer::regtype rangetype

from pg_partition pp, pg_partition_rule prl where pp.paristemplate=false and prl.paroid=pp.oid;

create or replace function public.add_partition_info(tableoid oid, days_from_now integer) returns setof text as $$

import datetime

def now():

    d = datetime.datetime.now()

    format = '%Y%m%d'

    return datetime.datetime.strftime(d, format)

def add_day(d, n):

    format = '%Y%m%d'

    d2 = datetime.datetime.strptime(d, format)

    d3 = d2 + datetime.timedelta(days = n)

    return datetime.datetime.strftime(d3, format)

def add_month(d, n):

    format = '%Y%m%d'

    formatymd = '%Y%m01'

    if d.__len__() == 6:

        format = '%Y%m'

        formatymd = '%Y%m'

    d2 = datetime.datetime.strptime(d, format)

    d3 = d2 + datetime.timedelta(days = 31 * n)

    return datetime.datetime.strftime(d3, formatymd)

relist = []

# pre_value 是上一个分区的值，主要是 list 分区时使用

sql = """select *, tableoid::regclass tablename, lead(case when partitionrangeend <>  '' then partitionrangeend

 else partitionlistvalue end) over(partition by tableoid order by partitionposition desc) as pre_value,

 row_number() over(partition by tableoid order by partitionposition desc) rn

 from v_pg_add_partitions where substr(partitionname, 1, 3) = 'p20' and tableoid=%s;"""%(tableoid)

rv = plpy.execute(sql);

sql_relation = "select array_to_string(reloptions, ',') reloptions from pg_class where oid=%s"%(tableoid)

rv_relation = plpy.execute(sql_relation)

if rv.nrows() == -1:

    return []

else:

    reloptions = rv_relation[0]['reloptions']

    tablename = rv[0]['tablename']

    partitiontype = rv[0]['partitiontype']

    partitionname = rv[0]['partitionname']

    pre_value = rv[0]['pre_value']

    now_add_7days = add_day(now(), days_from_now)

    # 处理 range 分区

    if partitiontype == 'range':

        rangetype = rv[0]['rangetype']

        partitionrangestart = rv[0]['partitionrangestart']

        partitionrangeend = rv[0]['partitionrangeend']

        interval = int(partitionrangeend) - int(partitionrangestart)

        # 按月分区

        if partitionname.__len__() == 7:

            func_add = add_month

            interval = int(partitionrangeend[0:6]) - int(partitionrangestart[0:6])

        # 按天分区

        elif partitionname.__len__() == 9:

            func_add = add_day

        # 分区名不规范，不处理

        else:

            return []

        partitionrangestart = now()

        while partitionrangestart < now_add_7days:

            partitionrangeend = func_add(partitionrangestart,  )

            partitionname = 'p' + partitionrangestart

            add_sql = "alter table %s add partition %s start ('%s'::%s) end ('%s'::%s)"%(tablename, partitionname, partitionrangestart, rangetype, partitionrangeend, rangetype)

            if reloptions != None and reloptions != '':

                add_sql += 'with(%s);'%(reloptions)

            else:

                add_sql += ';'

            plpy.execute(add_sql);

            relist.append(add_sql)

            partitionrangestart = func_add(partitionrangestart, interval)

    # 处理 list 分区

    if partitiontype == 'list':

        listtype = rv[0]['listtype']

        partitionlistvalue = rv[0]['partitionlistvalue']

        interval = int(partitionlistvalue) - int(pre_value)

        # 按月分区

        if partitionname.__len__() == 7:

            func_add = add_month

        # 按天分区

        elif partitionname.__len__() == 9:

            func_add = add_day

        # 分区名不规范，不处理

        else:

            return []

        partitionlistvalue = now()

        while partitionlistvalue < now_add_7days:

            partitionname = 'p' + partitionlistvalue

            add_sql = "alter table %s add partition %s values('%s'::%s)"%(tablename, partitionname, partitionlistvalue, listtype)

            if reloptions != None and reloptions != '':

                add_sql += 'with(%s);'%(reloptions)

            else:

                add_sql += ';'

            plpy.execute(add_sql);

            relist.append(add_sql)

            partitionlistvalue = func_add(partitionlistvalue, interval)

return relist

$$ language plpythonu;

select add_partition_info('tb_partition_list_yyyymmdd'::regclass, 1);

select 'grant select on ' || nspname || '.' || relname || ' to gpadmin;' from pg_class a, pg_namespace b

where relname not like '%_1_prt%' and relkind='r' and has_table_privilege('gpadmin', a.oid, 'select')='f'

and a.relnamespace=b.oid and nspname not in ('pg_catalog', 'information_schema') and nspname not like '%pg_tmp%';

view

-- 1.获取表的字段信息，表名pg_class，schema在pg_namespace，字段信息pg_attribute

select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a,

(

select c.oid from pg_catalog.pg_class c left join pg_catalog.pg_namespace n on n.oid=c.relnamespace

where c.relname='pg_class' and n.nspname='pg_catalog'

) b

where a.attrelid=b.oid and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 1.1 获取表的字段信息

select a.attname, pg_catalog.format_type(a.atttypid, a.atttypmod) as data_type from pg_catalog.pg_attribute a

where a.attrelid='pg_catalog.pg_class'::regclass and a.attnum > 0 and not a.attisdropped order by a.attnum;

-- 2. 获取表的分布键gp_distribution_policy中

-- 2.1 localoid与pg_class的oid关联，attrnums是一个数组，记录字段attnum，与pg_attribute中的attnum关联

drop table if exists public.cxfa2 cascade;

create table public.cxfa2(a int, b int, c int, d int) distributed by(c, a);

select * from gp_distribution_policy where localoid='cxfa2'::regclass;

select * from gp_distribution_policy a, (select generate_series(1, 10)) i (i), pg_attribute b

where a.attrnums[i.i] is not null and a.localoid=b.attrelid and a.attrnums[i.i]=b.attnum

and a.localoid='public.cxfa2'::regclass order by i.i;

-- 3. 获取建表的时间

select * from pg_stat_file('pg_hba.conf');

drop type if exists public.stat_file cascade;

create type public.stat_file as (

size bigint,

access timestamp(0),

modification timestamp(0),

change timestamp(0)

);

create or replace function public.get_file_stat(filename text) returns stat_file as $$

import os, time

size = None

access = None

modification = None

change = None

try:

    a = os.stat(filename)

    size = int(a.st_size)

    access = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_atime))

    modification = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_mtime))

    change = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(a.st_ctime))

except Exception, e:

    pass

return [size, access, modification, change]

$$ language plpythonu;

select * from get_file_stat('pg_hba.conf');

drop view if exists public.v_table_modify_time cascade;

create view public.v_table_modify_time as

select tab_oid, schemaname, tablename, (filestat).access, (filestat).modification, (filestat).change

from

(

select a.oid tab_oid, e.nspname as schemaname, a.relname as tablename,

get_file_stat(fselocation || '/' ||

case when reltablespace=1664 then 'global'

when reltablespace=0 then 'base' || '/' || d.oid

else reltablespace || '/' || d.oid

end

|| '/' || relfilenode) as filestat

from pg_class a, pg_tablespace b, pg_filespace_entry c, pg_namespace e, pg_database d

where d.datname=current_database()

and (case when a.reltablespace = 0 then 1663 else a.reltablespace end)=b.oid

and b.spcfsoid=c.fsefsoid

and e.oid=a.relnamespace

and c.fsedbid=1

and a.relstorage in ('a', 'h')

and a.relkind='r'

) t;

select 'drop table ' || schemaname || '.' || tablename || ' cascade;'

from v_table_modify_time where access < now() - '1 days'::interval and tablename not like '%_1_prt_p%'

and schemaname='public' order by access;

-- 自定义类型转换

select castfunc::regprocedure from pg_cast where castsource='text'::regtype and casttarget='date'::regtype;

-- select '20180526'::date;

-- select date('20180526');

-- select date('2018-05-26');

-- select cast('2018-05-26' as date);

create or replace function public.regclass2text(a regclass) returns text as $$

return a;

$$ language plpythonu;

drop cast if exists (regclass as text) cascade;

create cast(regclass as text) with function regclass2text(a regclass);

select 57377::regclass::text;

create view v_gp_configuration as

select content

from gp_segment_configuration a, pg_filespace_entry b, pg_filespace create

where a.dbid=b.fsedbid and b.fsefsoid=c.oid and c.fsname='pg_system';

https://www.cnblogs.com/someblue/p/4225694.html

https://blog.csdn.net/menggudaoke/article/details/78843749

gp sql的更多相关文章

GP SQL 优化
1.收集统计信息vacuum full analyze ZCXT.ZCOT_PS_PROJECT; 2.检查表的数据量分布select gp_segment_id,count(*) from fact ...
GP数据库常用SQL语句
GP数据库常用SQL语句 --1,查看列名以及类型 select upper(column_name) ,data_type from information_schema.columns wher ...
使用sql查询mysql/oracle/sql server/gp数据库中指定表的字段信息(字段名/字段类型/字段长度/是否是主键/是否为空)
1,根据数据库类型拼接不同URL /** * 根据类型不同拼接连接的URL * @param dbType 1:mysql.2:oracle.3:sql server.4:gp * @param ip ...
GP查询表状态常用SQL
年终巨献史上最全 ——LINQ to SQL语句
LINQ to SQL语句(1)之Where 适用场景:实现过滤,查询等功能. 说明:与SQL命令中的Where作用相似,都是起到范围限定也就是过滤作用的,而判断条件就是它后面所接的子句.Where操 ...
Linq to SQL 语法查询(链接查询，子查询 & in操作 & join，分组统计等)
Linq to SQL 语法查询(链接查询,子查询 & in操作 & join,分组统计等) 子查询描述:查询订单数超过5的顾客信息查询句法: var 子查询 = from c i ...
LINQ to SQL语句(6)之Group By/Having
适用场景:分组数据,为我们查找数据缩小范围. 说明:分配并返回对传入参数进行分组操作后的可枚举对象.分组:延迟 1.简单形式: var q = from p in db.Products group ...
你必须知道的Microsoft SQL Server一
不知道为什么我Win10环境下安装的Sqlserver2012,智能提示的功能基本上没有用,没办法,我还是选择安装插件SQL Prompt 5吧.下载地址:http://www.uzzf.com/so ...
Oracle学习笔记十使用PL/SQL
PL/SQL 简介 PL/SQL 是过程语言(Procedural Language)与结构化查询语言(SQL)结合而成的编程语言,是对 SQL 的扩展,它支持多种数据类型,如大对象和集合类型,可使用 ...

随机推荐

Leetcode Excel Sheet Column Number （C++） && Excel Sheet Column Title （ Python）
Given a column title as appear in an Excel sheet, return its corresponding column number. For exampl ...
【MySQL案例】ERROR 1786 (HY000)
1.1.1. ERROR 1786 (HY000) [环境描写叙述] msyql5.6.14 [报错信息] 运行create table ... select的时候遇到报错: db1 [test] [ ...
linux-一篇文章完成lnmp环境的编译安装
lnmp环境搭建前置条件操作系统安装:CentOS 6.8 64位最小化安装. 配置好IP.DNS.网关.主机名配置防火墙,开启80.3306端口关闭访问墙 service iptables ...
DFS&&BFS
DFS DFS搜索是按照深度的方向搜索,它类似于树的先根遍历,是树的先根遍历的推广. 1.从图的某个顶点v0出发,首先访问v0, 2.找出刚访问过的顶点的第一个未被访问过的邻接点,然后访问该结点,以该 ...
hadoop之安全模式及SafeModeException
问题: hadoop启动的时候报错 HTTP ERROR 500 Problem accessing /nn_browsedfscontent.jsp. Reason: Cannot issue de ...
C语言字符串处理函数转自 http://blog.chinaunix.net/uid-25885064-id-3175049.html
C字符串处理函数 2012-04-13 18:14:16 分类: C/C++ void *memccpy (void *dest, const void *src, int c, size_t n) ...
sqlite性能优化
1.数据库性能上 1.1 批量事务插入,提升数据插入的性能由于sqlite默认每次插入都是事务,需要对文件进行读写,那么减少事务次数就能简书磁盘读写次数从而获得性能提升. 1.2 单条sql优于多条 ...
HDU 4004 The Frog's Games(二分+小思维+用到了lower_bound)
The Frog's Games Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65768/65768 K (Java/Others) ...
KMP算法解释
给定两个字符串A,B,判断T是否为S的子串(变式:寻找子串B在串A中的位置). 要求一个O(|A|+|B|)的做法. 通常称A为目标串(或主串),B为模式串. 算法过程: 我们假设串A的长度为n,串B ...
ORA-00257:archiver error.Connect internal only, until freed的问题（转）
删除归档日志_ORA-00257:archiver error.Connect internal only, until freed的问题 ORA-00257: archiver error. C ...

gp sql

appendonly

hostname

partition

view

gp sql的更多相关文章

随机推荐

热门专题