【CDN+】一些常用的Linux命令，crontab+VI+Hive（持续更新）

前言

本文主要是记录下工作中可能用到的一些linux指令，当作字典查用

Crontab 基本命令

# 安装
　　yum -y install vixie-cron crontabs
#查看状态
　　service crond status
#启动
　　service crond start
#关闭
　　service crond stop
#重启
　　service crond restart

crontab [-u user] file
crontab [-u user] [ -e | -l | -r ]

-e 编辑 -l 是列表 -r 是删除
ctrl+z 可以不保存退出

进入crontab文件后，默认使用的是VI编辑器，下面是VI编辑器的常用命令（蓝色部分的够用了）

VI编辑器基本命令

模式切换的方法
a	在当前光标位置之后插入内容。
A	在光标所在行的末尾（行尾）插入内容。
i	在当前光标位置之前插入内容。
I	在光标所在行的开头（行首）插入内容。
o	在光标所在行的后面插入一个新行。
O	在光标所在行的前面插入一个新行。

指令行的储存、离开等指令
:w	保存文件及退出vi编辑器
:w /root/XXX	将当前编辑的文件另存到/root目录下，文件名为XXX。
:q	退出编辑器。
:q!	不保存并退出。
:wq或x	保存并退出。
:set nu	显示行号，设定之后，会在每一行的前缀显示该行的行号
:set nonu	与 set nu 相反，为取消行号！

移动光标方法
光标方向的移动	使用键盘中的四个方向键↑、↓、←、→完成相应的光标移动。
	+ 光标移动到非空格符的下一行
	- 光标移动到非空格符的上一行
	H 光标移动到这个屏幕的最上方那一行的第一个字符
	M 光标移动到这个屏幕的中央那一行的第一个字符
	L 光标移动到这个屏幕的最下方那一行的第一个字符
	n<Enter> n 为数字。光标向下移动 n 行(常用)
翻页移动	使用Page Down键或Ctrl+F组合键向下翻动一整页内容。
	使用Page Up键或Ctrl+B组合键向上翻动一整页内容。
	使用Page Down键和Page Up键同样适用于vi的输入模式。
行内快速跳转	按Home键或^键、数字0键将光标快速跳转到本行的行首。
	按End键或$键将光标快速跳转到本行的行尾。
行间快速跳转	使用按键命令1G或者gg可跳转到文件内容的第1行。
	使用按键命令G可跳转到文件的最后一行。
	使用按键命令#G可跳转到文件中的第#行（其中“#”号用具体数字替换）。
复制	yy 复制光标所在的那一行。
	nyy n 为数字。复制光标所在的向下 n 行。
	yG 复制光标所在行到最后一行的所有数据。
	y1G 复制光标所在行到第一行的所有数据。
	y0 复制光标所在的那个字符到该行行首的所有数据。
	y$ 复制光标所在的那个字符到该行行尾的所有数据。
粘贴	按p键即可将缓冲区中的内容粘贴到光标位置处之后。
	按P键则会粘贴到光标位置处以前。
删除	使用x键或Del按键删除光标处的单个字符。
	使用dd删除当前光标所在行。
	使用#dd删除从光标处开始的#行内容。
	使用d^删除当前光标之前到行首的所有字符。
	使用d$删除当前光标处行尾的所有字符。
查找文件内容	在命令模式中，按/键后可以输入指定的字符串，从当前光标处开始向后进行查找（如果按？键则向前查找）完成查找后可以按n、N键在不同的查找结果中进行选择。

Hive 数据库操作

hive为每一个数据库创建一个目录，这个数据库中的表将会以子目录的形式放在这个数据库目录下

创建数据库

create database foo;
create database if not exists foo;

创建数据库时指定位置，这个位置一般是在hdfs上的位置：

create database foo location '/db/foo';

查看已经创建的数据库：

show databases ; show databases like 'foo.*';

查看创建数据库的语句：

show create database foo ;

给数据库添加描述信息：

create database dbname comment 'dbname描述信息';

# 删除数据库，这种删除，需要将数据库中的表全部删除，才能删除数据库

drop database dbname;

drop database if exists dbname;

# 强制删除数据库

drop database dbname cascade;
desc database dbname; 输出了 db_name | comment | location | owner_name | owner_type | parameters

使用hive数据库：

use database名称;
show tables;
show tables like 'tb_*';
show tables 'tb_*';

显示表的分区

show partitions tb_test;

显示表的详细信息

desc tb_name;

简单的表创建

create table tb_test(name string, age int);

典型的表创建：

create [external] table [if not exists] table_name (

col_name data_type [comment '字段描述信息']

col_name data_type [comment '字段描述信息'])

[comment '表的描述信息']

[location '指定表的路径']

[partitioned by (col_name data_type,...)]

[clustered by (col_name,col_name,...)]

[sorted by (col_name [asc|desc],...) into num_buckets buckets]

[row format row_format]

[location location_path]

指定字段分隔符

create table tb_test(name string,age int) row format delimited fields terminated by ',';

创建外部表

create external table tb_test(name string,age int)row format delimited fields terminated by ',';

创建带桶的表

create table student(id int,name string,age int)

partitioned by (sex string)

clustered by(id)

sorted by (age) into 2 buckets

row format delimited fields terminated by ',';

drop table tb_name;
drop table if exists tb_name;

# 按照sex='male'，sex='female'进行分区

alter table student add partition(sex='male') partition(sex='female');

删除分区

alter table student drop partition(sex='male');

增加列

alter table student add columns (rank string);
alter table student replace columns (height string);

Hive中可以使用-f文件名方式执行指定文件中的一个或者多个查询语句，一般这些查询文件保存为.q和.hql后缀名的文件

$hive -f /path/to/file/withqueries.hql

shutdown -h now --立即关机
shutdown -h 10:53 --到10:53关机，如果该时间小于当前时间，则到隔天
shutdown -h +10 --10分钟后自动关机
shutdown -r now --立即重启
shutdown -r +30 'The System Will Reboot in 30 Mins' --30分钟后重启并并发送通知给其它在线用户

Hive 基本的DDL

// 查看数据库

show databases;

// 使用数据库

use srm;

// 显示所有的函数

show functions;

// 查看函数用法

describe function substr;

// 查看当前数据库下

show tables;

// 查看表结构

desc invoice_lines;

// 查看某个表的分区情况

show partitions invoice_lines;

// 创建表

CREATE TABLE IF NOT EXISTS srm.invoice_lines_temp2(

SOURCE_SYS_KEY string comment '' ,

LEGAL_COMPANY string comment '' ,

VENDOR_NAME string comment '' ,

INVOICE_UNIT_PRICE double comment '' ,

PREPAY_UNAPPLIED double comment '' ,

GR_NON_VALUATED string comment ''

)partitioned by(jobid string) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

// LOCATION 用于指定表的数据文件路径

# LOCATION 'hdfs://cdh5/tmp/invoice/'; 

// 根据某张表，创建一张机构一样的表

create table invoice_lines_temp2 like invoice_lines;

// 创建外部表

CREATE EXTERNAL TABLE tinvoice_lines(id STRING, name STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/data/test/test_table';

// 删除表，如果是外部表，只会删除元数据(表结构)，不会删除外部文件中

drop table invoice_lines;

// 删除表的某个分区

alter table srm.invoice_lines_temp2 drop partition(jobid='JOBID');

// 删除外部表数据文件以及目录

DFS -rm -r /data/test/test_table;

// 更新表

ALTER TABLE invoice_lines RENAME TO invoice_lines2;

ALTER TABLE invoice_lines ADD COLUMNS (new_col2 INT COMMENT '内容');

// 清空表，比delete快很多，在mysql中会连索引记录都清空。delete会记录日志，truncate 不会记录日志？

truncate table invoice_lines;

// 删除记录

delete from invoice [where xxx = yyy]

Hive 数据导入

-----------------------------------------有关于数据导入------------------------------------------

// 导入本地文件数据到Hive表

load data local inpath '/apps/data/test1.txt'  into table invoice_lines;

// 导入HDFS文件数据到Hive表

load data inpath '/hdfs/app/data/test.txt'  into table invoice_lines;

// 从别的表中查询出相应的数据并导入到Hive表中，注意列数目一定要相同

insert into table invoice_lines select * from invoice_lines_temp2;

// 导入到指定分区表，注意列数目一定要相同

insert into table invoice_lines partition(jobid='') select xx1,xx2,xx3 from invoice_lines_temp2 where jobid='';

// 导入到指定分区表，采用动态分区的方式，注意列数目一定要相同

insert into table invoice_lines partition(jobid) select * from invoice_lines_temp2;

// Hive还支持多表插入，即把FROM 写到前面

FROM invoice insert into table invoice_temp1 select xx,xx2 insert into table invoice_temp2 select xx4,xx6;

// 项目上用到的一些写法

INSERT OVERWRITE TABLE srm.invoice_lines_temp2 PARTITION(jobid) SELECT sour_t.* FROM srm.invoice_lines_temp2 sour_t WHERE jobid = '';

INSERT INTO TABLE srm.invoice_lines SELECT * FROM srm.invoice_lines_temp2 WHERE jobid = '';

INSERT OVERWRITE TABLE srm.invoice_lines_temp2 PARTITION(jobid) SELECT * FROM srm.invoice_lines_temp2 WHERE jobid='' AND 1 = 1;

INSERT OVERWRITE TABLE srm.invoice_lines_temp2 PARTITION(jobid)

SELECT temp.* FROM srm.invoice_lines_temp2 temp JOIN

(

SELECT

    source_sys_key,

    legal_company,

    count( DISTINCT concat_ws( '', concat( invoice_line_type ), concat( invoice_head_id ) ) )

FROM

    srm.invoice_lines_temp2

WHERE jobid = ''

GROUP BY

    source_sys_key,

    legal_company

HAVING

    count( DISTINCT concat_ws( '', concat( invoice_line_type ), concat( invoice_head_id ) ) ) = 1

) t0 ON (temp.source_sys_key = t0.source_sys_key AND temp.legal_company = t0.legal_company )

where temp.jobid = '';

// 在创建表的时候通过从别的表中查询出相应的记录并插入到所创建的表中

create table invoice_temp1 AS select xx1,xx2,xx3 from invoice;

-----------------------------------------有关于数据导入------------------------------------------

// 删除表中数据，但要保持表的结构定义

dfs -rmr /user/hive/warehouse/srm/invoice_lines;

// 创建外部表

CREATE EXTERNAL TABLE tinvoice_lines(id STRING, name STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LOCATION '/data/test/test_table';

// 导入数据到表中(文件会被移动到仓库目录/data/test/test_table)

load data inpath '/test_tmp_data.txt' INTO TABLE tinvoice_lines;

hive -e "load data local inpath '${SOURCE_PATH}/${SourceFileNameNochar}' overwrite into table srm.invoice_lines_temp1 partition(jobid='${JOBID}');"

Sqoop 的导入导出

// 测试数据库连接

sqoop eval --connect jdbc:mysql://192.168.180.11/angel --username root--password root

// MySQL导入到Hive

sqoop import --connect jdbc:mysql://localhost:3306/test --username root --password 123456 --table person -m 1 --hive-import

// 导出该某Hive表所有数据到MySQL

sqoop export --connect jdbc:mysql://192.168.11.172:16408/ztsrm  --username srm --password handhand  --table invoice_lines  --export-dir /apps/hive/warehouse/srm.db/invoice_lines_temp2/jobid=106 --input-fields-terminated-by ','  --input-null-string "\\\\N" --input-null-non-string "\\\\N"

// 导出该某Hive表指定分区数据到MySQL

sqoop export --connect jdbc:mysql://192.168.11.172:16408/ztsrm  --username srm --password handhand  --table invoice_lines  --export-dir /apps/hive/warehouse/srm.db/invoice_lines_temp2 --input-fields-terminated-by ','  --input-null-string "\\\\N" --input-null-non-string "\\\\N"

插入表

INSERT INTO TABLE srm.invoice_lines_temp2 PARTITION (jobid = '') (

source_sys_key,

status,

material_group

)

SELECT

    '${GROUP_NAME}' source_sys_key,

    (

    CASE

        WHEN column28 IN ( '', 'P', 'V' ) THEN 'VERIFIED'

        ELSE  column28

    END

    ) status,

    IF(column30 IS NULL, '', regexp_replace ( column30, '"', '' ) ) material_group

FROM

    srm.invoice_lines_temp1 WHERE jobid = '';

动态分区表

INSERT OVERWRITE TABLE srm.invoice_lines_temp2 PARTITION(jobid)

SELECT

    sour_t.*

FROM

    srm.invoice_lines_temp2 sour_t where jobid='';

// 是否启动动态分区，默认false

SET hive.exec.dynamic.partition=true;

// 打开动态分区后，动态分区的模式，有 strict和 nonstrict 两个值可选，strict 要求至少包含一个静态分区列，nonstrict则无此要求

SET hive.exec.dynamic.partition.mode=nonstrict;

Python 脚本创建Hive 表

import  pymysql

import codecs

def getSingleSQL(table,schema = 'srm',ispartition = False):

    # table =  为表名，mysql, hive表名一致

    # schema = 为hive中的库名

    # ispartition : 是否分区默认为分区 

    create_head = 'CREATE TABLE IF NOT EXISTS {0}.{1}('.format(schema,table) + '\n'

    create_tail = 'ROW FORMAT DELIMITED FIELDS TERMINATED BY \',\' ; \n\n'

    connection=pymysql.connect(host='192.168.11.172', port=16408, user='srm', password='handhand', db='srm', charset='utf8')

    try:

        with connection.cursor(cursor=pymysql.cursors.DictCursor) as cursor:

            sql='SHOW FULL FIELDS FROM  {0}'.format(table)

            cursor.execute(sql)

            try:

                for row in cursor:

                    if 'bigint' in row['Type']:

                        row['Type'] = "bigint"

                    elif 'int' in row['Type'] or 'tinyint' in row['Type'] or 'smallint' in row['Type'] or 'mediumint' in row['Type'] or 'integer' in row['Type']:

                        row['Type'] = "int"

                    elif 'double' in row['Type'] or 'float' in row['Type'] or 'decimal' in row['Type']:

                        row['Type'] = "double"

                    else:

                        row['Type'] = "string"

                    create_head += row['Field'] + ' '+ row['Type'] +' comment \'' + row['Comment'] + '\' ,\n'

            except:

                print('程序异常!')

    finally:

        connection.close()

    singleSQL = create_head[:-2] + '\n' + ')'+ create_tail

    return singleSQL     

def getTotalSQL():

    connection=pymysql.connect(host='192.168.11.172', port=16408, user='srm', password='handhand', db='srm', charset='utf8')

    try:

        with connection.cursor(cursor=pymysql.cursors.DictCursor) as cursor:

            sql='SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA=\'SRM\' AND TABLE_TYPE=\'BASE TABLE\' '

            cursor.execute(sql)

            try:

                for row in cursor:

                    print(row)

                    tableName = row['TABLE_NAME']

                    singleSQL = getSingleSQL(tableName)

                    f = open('create_hive_table.sql', 'a', encoding='utf-8')

                    f.write(singleSQL)

            except:

                print('程序异常了哦!')

    finally:

        connection.close()  

getTotalSQL()