Pandas之DataFrame—

'''

【课程2.】  时间模块：datetime

datetime模块，主要掌握：datetime.date(), datetime.datetime(), datetime.timedelta()

日期解析方法：parser.parse

'''

# datetime.date：date对象

import datetime  # 也可以写 from datetime import date

today = datetime.date.today()

print(today,type(today))

print(str(today),type(str(today)))

# datetime.date.today 返回今日

# 输出格式为 date类

t = datetime.date(,,)

print(t)

# (年，月，日) → 直接得到当时日期

　　输出：

-- <class 'datetime.date'>

-- <class 'str'>

--

# datetime.datetime：datetime对象

now = datetime.datetime.now()

print(now,type(now))

print(str(now),type(str(now)))

# .now()方法，输出当前时间

# 输出格式为 datetime类

# 可通过str()转化为字符串

t1 = datetime.datetime(,,)

t2 = datetime.datetime(,,,,,)

print(t1,t2)

# (年，月，日，时，分，秒)，至少输入年月日

t2-t1

# 相减得到时间差 —— timedelta

　　输出：

-- ::04.769222 <class 'datetime.datetime'>

-- ::04.769222 <class 'str'>

-- :: -- ::

# datetime.timedelta：时间差

today = datetime.datetime.today()  # datetime.datetime也有today()方法

yestoday = today - datetime.timedelta()  #

print(today)

print(yestoday)

print(today - datetime.timedelta())

# 时间差主要用作时间的加减法，相当于可被识别的时间“差值”

　　输出：

-- ::06.484283

-- ::06.484283

-- ::06.484283

# parser.parse：日期字符串转换

from dateutil.parser import parse

date = '12-21-2017'

t = parse(date)

print(t,type(t))

# 直接将str转化成datetime.datetime

print(parse('2000-1-1'),'\n',

     parse('5/1/2014'),'\n',

     parse('5/1/2014', dayfirst = True),'\n',  # 国际通用格式中，日在月之前，可以通过dayfirst来设置

     parse('22/1/2014'),'\n',

     parse('Jan 31, 1997 10:45 PM'))

# 各种格式可以解析，但无法支持中文

　　输出：

-- :: <class 'datetime.datetime'>

-- ::

 -- ::

 -- ::

 -- ::

 -- ::

# pd.Timestamp()

import numpy as np

import pandas as pd

date1 = datetime.datetime(,,,,,)  # 创建一个datetime.datetime

date2 = '2017-12-21'  # 创建一个字符串

t1 = pd.Timestamp(date1)

t2 = pd.Timestamp(date2)

print(t1,type(t1))

print(t2)

print(pd.Timestamp('2017-12-21 15:00:22'))

# 直接生成pandas的时刻数据 → 时间戳

# 数据类型为 pandas的Timestamp

　　输出：

-- :: <class 'pandas.tslib.Timestamp'>

-- ::

-- ::

# pd.to_datetime

from datetime import datetime

date1 = datetime(,,,,,)

date2 = '2017-12-21'

t1 = pd.to_datetime(date1)

t2 = pd.to_datetime(date2)

print(t1,type(t1))

print(t2,type(t2))

# pd.to_datetime()：如果是单个时间数据，转换成pandas的时刻数据，数据类型为Timestamp

lst_date = [ '2017-12-21', '2017-12-22', '2017-12-23']

t3 = pd.to_datetime(lst_date)

print(t3,type(t3))

# 多个时间数据，将会转换为pandas的DatetimeIndex

　　输出：

-- :: <class 'pandas.tslib.Timestamp'>

-- :: <class 'pandas.tslib.Timestamp'>

DatetimeIndex(['2017-12-21', '2017-12-22', '2017-12-23'], dtype='datetime64[ns]', freq=None) <class 'pandas.tseries.index.DatetimeIndex'>

# pd.to_datetime → 多个时间数据转换时间戳索引

date1 = [datetime(,,),datetime(,,),datetime(,,),datetime(,,),datetime(,,)]

date2 = ['2017-2-1','2017-2-2','2017-2-3','2017-2-4','2017-2-5','2017-2-6']

print(date1)

print(date2)

t1 = pd.to_datetime(date2)

t2 = pd.to_datetime(date2)

print(t1)

print(t2)

# 多个时间数据转换为 DatetimeIndex

date3 = ['2017-2-1','2017-2-2','2017-2-3','hello world!','2017-2-5','2017-2-6']

t3 = pd.to_datetime(date3, errors = 'ignore')

print(t3,type(t3))

# 当一组时间序列中夹杂其他格式数据，可用errors参数返回

# errors = 'ignore':不可解析时返回原始输入，这里就是直接生成一般数组

t4 = pd.to_datetime(date3, errors = 'coerce')

print(t4,type(t4))

# errors = 'coerce':不可扩展，缺失值返回NaT（Not a Time），结果认为DatetimeIndex

　　输出：

[datetime.datetime(, , , , ), datetime.datetime(, , , , ), datetime.datetime(, , , , ), datetime.datetime(, , , , ), datetime.datetime(, , , , )]

['2017-2-1', '2017-2-2', '2017-2-3', '2017-2-4', '2017-2-5', '2017-2-6']

DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',

               '2017-02-05', '2017-02-06'],

              dtype='datetime64[ns]', freq=None)

DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',

               '2017-02-05', '2017-02-06'],

              dtype='datetime64[ns]', freq=None)

['2017-2-1' '2017-2-2' '2017-2-3' 'hello world!' '2017-2-5' '2017-2-6'] <class 'numpy.ndarray'>

DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', 'NaT', '2017-02-05',

               '2017-02-06'],

              dtype='datetime64[ns]', freq=None) <class 'pandas.tseries.index.DatetimeIndex'>

'''

【课程2.】  Pandas时间戳索引：DatetimeIndex

核心：pd.date_range()

'''

# pd.DatetimeIndex()与TimeSeries时间序列

rng = pd.DatetimeIndex(['12/1/2017','12/2/2017','12/3/2017','12/4/2017','12/5/2017'])

print(rng,type(rng))

print(rng[],type(rng[]))

# 直接生成时间戳索引，支持str、datetime.datetime

# 单个时间戳为Timestamp，多个时间戳为DatetimeIndex

st = pd.Series(np.random.rand(len(rng)), index = rng)

print(st,type(st))

print(st.index)

# 以DatetimeIndex为index的Series，为TimeSries，时间序列

　　输出：

DatetimeIndex(['2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',

               '2017-12-05'],

              dtype='datetime64[ns]', freq=None) <class 'pandas.tseries.index.DatetimeIndex'>

-- :: <class 'pandas.tslib.Timestamp'>

--    0.837612

--    0.539392

--    0.100238

--    0.285519

--    0.939607

dtype: float64 <class 'pandas.core.series.Series'>

DatetimeIndex(['2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04',

               '2017-12-05'],

              dtype='datetime64[ns]', freq=None)

# pd.date_range()-日期范围：生成日期范围

# 2种生成方式：①start + end； ②start/end + periods

# 默认频率：day

rng1 = pd.date_range('1/1/2017','1/10/2017', normalize=True)

rng2 = pd.date_range(start = '1/1/2017', periods = )

rng3 = pd.date_range(end = '1/30/2017 15:00:00', periods = )  # 增加了时、分、秒

print(rng1,type(rng1))

print(rng2)

print(rng3)

print('-------')

# 直接生成DatetimeIndex

# pd.date_range(start=None, end=None, periods=None, freq='D', tz=None, normalize=False, name=None, closed=None, **kwargs)

# start：开始时间

# end：结束时间

# periods：偏移量

# freq：频率，默认天，pd.date_range()默认频率为日历日，pd.bdate_range()默认频率为工作日

# tz：时区

rng4 = pd.date_range(start = '1/1/2017 15:30', periods = , name = 'hello world!', normalize = True)

print(rng4)

print('-------')

# normalize：时间参数值正则化到午夜时间戳（这里最后就直接变成0::，并不是15::）

# name：索引对象名称

print(pd.date_range('',''))  # 20170101也可读取

print(pd.date_range('','',closed = 'right'))

print(pd.date_range('','',closed = 'left'))

print('-------')

# closed：默认为None的情况下，左闭右闭，left则左闭右开，right则左开右闭

print(pd.bdate_range('',''))

# pd.bdate_range()默认频率为工作日

print(list(pd.date_range(start = '1/1/2017', periods = )))

# 直接转化为list，元素为Timestamp

　　输出：

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',

               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',

               '2017-01-09', '2017-01-10'],

              dtype='datetime64[ns]', freq='D') <class 'pandas.tseries.index.DatetimeIndex'>

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',

               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',

               '2017-01-09', '2017-01-10'],

              dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2017-01-21 15:00:00', '2017-01-22 15:00:00',

               '2017-01-23 15:00:00', '2017-01-24 15:00:00',

               '2017-01-25 15:00:00', '2017-01-26 15:00:00',

               '2017-01-27 15:00:00', '2017-01-28 15:00:00',

               '2017-01-29 15:00:00', '2017-01-30 15:00:00'],

              dtype='datetime64[ns]', freq='D')

-------

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',

               '2017-01-05', '2017-01-06', '2017-01-07', '2017-01-08',

               '2017-01-09', '2017-01-10'],

              dtype='datetime64[ns]', name='hello world!', freq='D')

-------

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D')

-------

DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04', '2017-01-05',

               '2017-01-06'],

              dtype='datetime64[ns]', freq='B')

[Timestamp('2017-01-01 00:00:00', offset='D'), Timestamp('2017-01-02 00:00:00', offset='D'), Timestamp('2017-01-03 00:00:00', offset='D'), Timestamp('2017-01-04 00:00:00', offset='D'), Timestamp('2017-01-05 00:00:00', offset='D'), Timestamp('2017-01-06 00:00:00', offset='D'), Timestamp('2017-01-07 00:00:00', offset='D'), Timestamp('2017-01-08 00:00:00', offset='D'), Timestamp('2017-01-09 00:00:00', offset='D'), Timestamp('2017-01-10 00:00:00', offset='D')]

# pd.date_range()-日期范围：频率()

print(pd.date_range('2017/1/1','2017/1/4'))  # 默认freq = 'D'：每日历日

print(pd.date_range('2017/1/1','2017/1/4', freq = 'B'))  # B：每工作日

print(pd.date_range('2017/1/1','2017/1/2', freq = 'H'))  # H：每小时

print(pd.date_range('2017/1/1 12:00','2017/1/1 12:10', freq = 'T'))  # T/MIN：每分

print(pd.date_range('2017/1/1 12:00:00','2017/1/1 12:00:10', freq = 'S'))  # S：每秒

print(pd.date_range('2017/1/1 12:00:00','2017/1/1 12:00:10', freq = 'L'))  # L：每毫秒（千分之一秒）

print(pd.date_range('2017/1/1 12:00:00','2017/1/1 12:00:10', freq = 'U'))  # U：每微秒（百万分之一秒）

print(pd.date_range('2017/1/1','2017/2/1', freq = 'W-MON'))

# W-MON：从指定星期几开始算起，每周

# 星期几缩写：MON/TUE/WED/THU/FRI/SAT/SUN

print(pd.date_range('2017/1/1','2017/5/1', freq = 'WOM-2MON'))

# WOM-2MON：每月的第几个星期几开始算，这里是每月第二个星期一

　　输出：

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D')

DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='B')

DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 01:00:00',

               '2017-01-01 02:00:00', '2017-01-01 03:00:00',

               '2017-01-01 04:00:00', '2017-01-01 05:00:00',

               '2017-01-01 06:00:00', '2017-01-01 07:00:00',

               '2017-01-01 08:00:00', '2017-01-01 09:00:00',

               '2017-01-01 10:00:00', '2017-01-01 11:00:00',

               '2017-01-01 12:00:00', '2017-01-01 13:00:00',

               '2017-01-01 14:00:00', '2017-01-01 15:00:00',

               '2017-01-01 16:00:00', '2017-01-01 17:00:00',

               '2017-01-01 18:00:00', '2017-01-01 19:00:00',

               '2017-01-01 20:00:00', '2017-01-01 21:00:00',

               '2017-01-01 22:00:00', '2017-01-01 23:00:00',

               '2017-01-02 00:00:00'],

              dtype='datetime64[ns]', freq='H')

DatetimeIndex(['2017-01-01 12:00:00', '2017-01-01 12:01:00',

               '2017-01-01 12:02:00', '2017-01-01 12:03:00',

               '2017-01-01 12:04:00', '2017-01-01 12:05:00',

               '2017-01-01 12:06:00', '2017-01-01 12:07:00',

               '2017-01-01 12:08:00', '2017-01-01 12:09:00',

               '2017-01-01 12:10:00'],

              dtype='datetime64[ns]', freq='T')

DatetimeIndex(['2017-01-01 12:00:00', '2017-01-01 12:00:01',

               '2017-01-01 12:00:02', '2017-01-01 12:00:03',

               '2017-01-01 12:00:04', '2017-01-01 12:00:05',

               '2017-01-01 12:00:06', '2017-01-01 12:00:07',

               '2017-01-01 12:00:08', '2017-01-01 12:00:09',

               '2017-01-01 12:00:10'],

              dtype='datetime64[ns]', freq='S')

DatetimeIndex([       '2017-01-01 12:00:00', '2017-01-01 12:00:00.001000',

               '2017-01-01 12:00:00.002000', '2017-01-01 12:00:00.003000',

               '2017-01-01 12:00:00.004000', '2017-01-01 12:00:00.005000',

               '2017-01-01 12:00:00.006000', '2017-01-01 12:00:00.007000',

               '2017-01-01 12:00:00.008000', '2017-01-01 12:00:00.009000',

               ...

               '2017-01-01 12:00:09.991000', '2017-01-01 12:00:09.992000',

               '2017-01-01 12:00:09.993000', '2017-01-01 12:00:09.994000',

               '2017-01-01 12:00:09.995000', '2017-01-01 12:00:09.996000',

               '2017-01-01 12:00:09.997000', '2017-01-01 12:00:09.998000',

               '2017-01-01 12:00:09.999000',        '2017-01-01 12:00:10'],

              dtype='datetime64[ns]', length=, freq='L')

DatetimeIndex([       '2017-01-01 12:00:00', '2017-01-01 12:00:00.000001',

               '2017-01-01 12:00:00.000002', '2017-01-01 12:00:00.000003',

               '2017-01-01 12:00:00.000004', '2017-01-01 12:00:00.000005',

               '2017-01-01 12:00:00.000006', '2017-01-01 12:00:00.000007',

               '2017-01-01 12:00:00.000008', '2017-01-01 12:00:00.000009',

               ...

               '2017-01-01 12:00:09.999991', '2017-01-01 12:00:09.999992',

               '2017-01-01 12:00:09.999993', '2017-01-01 12:00:09.999994',

               '2017-01-01 12:00:09.999995', '2017-01-01 12:00:09.999996',

               '2017-01-01 12:00:09.999997', '2017-01-01 12:00:09.999998',

               '2017-01-01 12:00:09.999999',        '2017-01-01 12:00:10'],

              dtype='datetime64[ns]', length=, freq='U')

DatetimeIndex(['2017-01-02', '2017-01-09', '2017-01-16', '2017-01-23',

               '2017-01-30'],

              dtype='datetime64[ns]', freq='W-MON')

DatetimeIndex(['2017-01-09', '2017-02-13', '2017-03-13', '2017-04-10'], dtype='datetime64[ns]', freq='WOM-2MON')

# pd.date_range()-日期范围：频率()

print(pd.date_range('','', freq = 'M'))

print(pd.date_range('','', freq = 'Q-DEC'))

print(pd.date_range('','', freq = 'A-DEC'))

print('------')

# M：每月最后一个日历日

# Q-月：指定月为季度末，每个季度末最后一月的最后一个日历日

# A-月：每年指定月份的最后一个日历日

# 月缩写：JAN/FEB/MAR/APR/MAY/JUN/JUL/AUG/SEP/OCT/NOV/DEC

# 所以Q-月只有三种情况：---,---,---

print(pd.date_range('','', freq = 'BM'))

print(pd.date_range('','', freq = 'BQ-DEC'))

print(pd.date_range('','', freq = 'BA-DEC'))

print('------')

# BM：每月最后一个工作日

# BQ-月：指定月为季度末，每个季度末最后一月的最后一个工作日

# BA-月：每年指定月份的最后一个工作日

print(pd.date_range('','', freq = 'MS'))

print(pd.date_range('','', freq = 'QS-DEC'))

print(pd.date_range('','', freq = 'AS-DEC'))

print('------')

# M：每月第一个日历日

# Q-月：指定月为季度末，每个季度末最后一月的第一个日历日

# A-月：每年指定月份的第一个日历日

print(pd.date_range('','', freq = 'BMS'))

print(pd.date_range('','', freq = 'BQS-DEC'))

print(pd.date_range('','', freq = 'BAS-DEC'))

print('------')

# BM：每月第一个工作日

# BQ-月：指定月为季度末，每个季度末最后一月的第一个工作日

# BA-月：每年指定月份的第一个工作日

　　输出：

DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-30',

               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',

               '2017-09-30', '2017-10-31', '2017-11-30', '2017-12-31'],

              dtype='datetime64[ns]', freq='M')

DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-30', '2017-12-31',

               '2018-03-31', '2018-06-30', '2018-09-30', '2018-12-31',

               '2019-03-31', '2019-06-30', '2019-09-30', '2019-12-31'],

              dtype='datetime64[ns]', freq='Q-DEC')

DatetimeIndex(['2017-12-31', '2018-12-31', '2019-12-31'], dtype='datetime64[ns]', freq='A-DEC')

------

DatetimeIndex(['2017-01-31', '2017-02-28', '2017-03-31', '2017-04-28',

               '2017-05-31', '2017-06-30', '2017-07-31', '2017-08-31',

               '2017-09-29', '2017-10-31', '2017-11-30', '2017-12-29'],

              dtype='datetime64[ns]', freq='BM')

DatetimeIndex(['2017-03-31', '2017-06-30', '2017-09-29', '2017-12-29',

               '2018-03-30', '2018-06-29', '2018-09-28', '2018-12-31',

               '2019-03-29', '2019-06-28', '2019-09-30', '2019-12-31'],

              dtype='datetime64[ns]', freq='BQ-DEC')

DatetimeIndex(['2017-12-29', '2018-12-31', '2019-12-31'], dtype='datetime64[ns]', freq='BA-DEC')

------

DatetimeIndex(['2017-01-01', '2017-02-01', '2017-03-01', '2017-04-01',

               '2017-05-01', '2017-06-01', '2017-07-01', '2017-08-01',

               '2017-09-01', '2017-10-01', '2017-11-01', '2017-12-01',

               '2018-01-01'],

              dtype='datetime64[ns]', freq='MS')

DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01',

               '2018-03-01', '2018-06-01', '2018-09-01', '2018-12-01',

               '2019-03-01', '2019-06-01', '2019-09-01', '2019-12-01'],

              dtype='datetime64[ns]', freq='QS-DEC')

DatetimeIndex(['2017-12-01', '2018-12-01', '2019-12-01'], dtype='datetime64[ns]', freq='AS-DEC')

------

DatetimeIndex(['2017-01-02', '2017-02-01', '2017-03-01', '2017-04-03',

               '2017-05-01', '2017-06-01', '2017-07-03', '2017-08-01',

               '2017-09-01', '2017-10-02', '2017-11-01', '2017-12-01',

               '2018-01-01'],

              dtype='datetime64[ns]', freq='BMS')

DatetimeIndex(['2017-03-01', '2017-06-01', '2017-09-01', '2017-12-01',

               '2018-03-01', '2018-06-01', '2018-09-03', '2018-12-03',

               '2019-03-01', '2019-06-03', '2019-09-02', '2019-12-02'],

              dtype='datetime64[ns]', freq='BQS-DEC')

DatetimeIndex(['2017-12-01', '2018-12-03', '2019-12-02'], dtype='datetime64[ns]', freq='BAS-DEC')

 pd.date_range()-日期范围：复合频率

print(pd.date_range('2017/1/1','2017/2/1', freq = '7D'))  # 7天

print(pd.date_range('2017/1/1','2017/1/2', freq = '2h30min'))  # 2小时30分钟

print(pd.date_range('','', freq = '2M'))  # 2月，每月最后一个日历日

　　输出：

DatetimeIndex(['2017-01-01', '2017-01-08', '2017-01-15', '2017-01-22',

               '2017-01-29'],

              dtype='datetime64[ns]', freq='7D')

DatetimeIndex(['2017-01-01 00:00:00', '2017-01-01 02:30:00',

               '2017-01-01 05:00:00', '2017-01-01 07:30:00',

               '2017-01-01 10:00:00', '2017-01-01 12:30:00',

               '2017-01-01 15:00:00', '2017-01-01 17:30:00',

               '2017-01-01 20:00:00', '2017-01-01 22:30:00'],

              dtype='datetime64[ns]', freq='150T')

DatetimeIndex(['2017-01-31', '2017-03-31', '2017-05-31', '2017-07-31',

               '2017-09-30', '2017-11-30'],

              dtype='datetime64[ns]', freq='2M')

# asfreq：时期频率转换

ts = pd.Series(np.random.rand(),

              index = pd.date_range('',''))

print(ts)

print(ts.asfreq('4H',method = 'ffill'))

# 改变频率，这里是D改为4H

# method：插值模式，None不插值，ffill用之前值填充，bfill用之后值填充

　　输出：

--    0.945391

--    0.656020

--    0.295795

--    0.318078

Freq: D, dtype: float64

-- ::    0.945391

-- ::    0.945391

-- ::    0.945391

-- ::    0.945391

-- ::    0.945391

-- ::    0.945391

-- ::    0.656020

-- ::    0.656020

-- ::    0.656020

-- ::    0.656020

-- ::    0.656020

-- ::    0.656020

-- ::    0.295795

-- ::    0.295795

-- ::    0.295795

-- ::    0.295795

-- ::    0.295795

-- ::    0.295795

-- ::    0.318078

Freq: 4H, dtype: float64

# pd.date_range()-日期范围：超前/滞后数据

ts = pd.Series(np.random.rand(),

              index = pd.date_range('',''))

print(ts)

print(ts.shift())

print(ts.shift(-))

print('------')

# 正数：数值后移（滞后）；负数：数值前移（超前）

per = ts/ts.shift() -

print(per)

print('------')

# 计算变化百分比，这里计算：该时间戳与上一个时间戳相比，变化百分比

print(ts.shift(, freq = 'D'))

print(ts.shift(, freq = 'T'))

# 加上freq参数：对时间戳进行位移，而不是对数值进行位移

　　输出：

--    0.967312

--    0.945871

--    0.555347

--    0.872889

Freq: D, dtype: float64

--         NaN

--         NaN

--    0.967312

--    0.945871

Freq: D, dtype: float64

--    0.555347

--    0.872889

--         NaN

--         NaN

Freq: D, dtype: float64

------

--         NaN

--   -0.022166

--   -0.412872

--    0.571790

Freq: D, dtype: float64

------

--    0.967312

--    0.945871

--    0.555347

--    0.872889

Freq: D, dtype: float64

-- ::    0.967312

-- ::    0.945871

-- ::    0.555347

-- ::    0.872889

Freq: D, dtype: float64

'''

【课程2.】  Pandas时期：Period

核心：pd.Period()

'''

# pd.Period()创建时期

p = pd.Period('', freq = 'M')

print(p, type(p))

# 生成一个以2017-01开始，月为频率的时间构造器

# pd.Period()参数：一个时间戳 + freq 参数 → freq 用于指明该 period 的长度，时间戳则说明该 period 在时间轴上的位置

print(p + )

print(p - )

print(pd.Period('', freq = 'A-DEC') - )

# 通过加减整数，将周期整体移动

# 这里是按照 月、年 移动

　　输出：

- <class 'pandas._period.Period'>

-

-

# pd.period_range()创建时期范围

prng = pd.period_range('1/1/2011', '1/1/2012', freq='M')

print(prng,type(prng))

print(prng[],type(prng[]))

# 数据格式为PeriodIndex，单个数值为Period

ts = pd.Series(np.random.rand(len(prng)), index = prng)

print(ts,type(ts))

print(ts.index)

# 时间序列

# Period('', freq = 'A-DEC')可以看成多个时间期的时间段中的游标

# Timestamp表示一个时间戳，是一个时间截面；Period是一个时期，是一个时间段！！但两者作为index时区别不大

　　输出：

PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',

             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',

             '2012-01'],

            dtype='int64', freq='M') <class 'pandas.tseries.period.PeriodIndex'>

- <class 'pandas._period.Period'>

-    0.342571

-    0.826151

-    0.370505

-    0.137151

-    0.679976

-    0.265928

-    0.416502

-    0.874078

-    0.112801

-    0.112504

-    0.448408

-    0.851046

-    0.370605

Freq: M, dtype: float64 <class 'pandas.core.series.Series'>

PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04', '2011-05', '2011-06',

             '2011-07', '2011-08', '2011-09', '2011-10', '2011-11', '2011-12',

             '2012-01'],

            dtype='int64', freq='M')

# asfreq：频率转换

p = pd.Period('','A-DEC')

print(p)

print(p.asfreq('M', how = 'start'))  # 也可写 how = 's'

print(p.asfreq('D', how = 'end'))  # 也可写 how = 'e'

# 通过.asfreq(freq, method=None, how=None)方法转换成别的频率

prng = pd.period_range('','',freq = 'M')

ts1 = pd.Series(np.random.rand(len(prng)), index = prng)

ts2 = pd.Series(np.random.rand(len(prng)), index = prng.asfreq('D', how = 'start'))

print(ts1.head(),len(ts1))

print(ts2.head(),len(ts2))

# asfreq也可以转换TIMESeries的index

　　输出：

-

--

-    0.060797

-    0.441994

-    0.971933

-    0.000334

-    0.545191

Freq: M, dtype: float64

--    0.447614

--    0.679438

--    0.891729

--    0.949993

--    0.942548

Freq: D, dtype: float64

# 时间戳与时期之间的转换：pd.to_period()、pd.to_timestamp()

rng = pd.date_range('2017/1/1', periods = , freq = 'M')

prng = pd.period_range('','', freq = 'M')

ts1 = pd.Series(np.random.rand(len(rng)), index = rng)

print(ts1.head())

print(ts1.to_period().head())

# 每月最后一日，转化为每月

ts2 = pd.Series(np.random.rand(len(prng)), index = prng)

print(ts2.head())

print(ts2.to_timestamp().head())

# 每月，转化为每月第一天

　　输出：

--    0.125288

--    0.497174

--    0.573114

--    0.665665

--    0.263561

Freq: M, dtype: float64

-    0.125288

-    0.497174

-    0.573114

-    0.665665

-    0.263561

Freq: M, dtype: float64

-    0.748661

-    0.095891

-    0.280341

-    0.569813

-    0.067677

Freq: M, dtype: float64

--    0.748661

--    0.095891

--    0.280341

--    0.569813

--    0.067677

Freq: MS, dtype: float64

'''

【课程2.】  时间序列 - 索引及切片

TimeSeries是Series的一个子类，所以Series索引及数据选取方面的方法基本一样

同时TimeSeries通过时间序列有更便捷的方法做索引和切片

'''

# 索引

from datetime import datetime

rng = pd.date_range('2017/1','2017/3')

ts = pd.Series(np.random.rand(len(rng)), index = rng)

print(ts.head())

print(ts[])

print(ts[:])

print('-----')

# 基本下标位置索引

print(ts['2017/1/2'])

print(ts[''])

print(ts['1/10/2017'])

print(ts[datetime(,,)])

print('-----')

# 时间序列标签索引，支持各种时间字符串，以及datetime.datetime

# 时间序列由于按照时间先后排序，故不用考虑顺序问题

# 索引方法同样适用于Dataframe

　　输出：

--    0.107736

--    0.887981

--    0.712862

--    0.920021

--    0.317863

Freq: D, dtype: float64

0.107735945027

--    0.107736

--    0.887981

Freq: D, dtype: float64

-----

0.887980757812

0.712861778966

0.788336674948

0.93070380011

-----

# 切片

rng = pd.date_range('2017/1','2017/3',freq = '12H')

ts = pd.Series(np.random.rand(len(rng)), index = rng)

print(ts['2017/1/5':'2017/1/10'])

print('-----')

# 和Series按照index索引原理一样，也是末端包含

print(ts['2017/2'].head())

# 传入月，直接得到一个切片

　　输出：

-- ::    0.462085

-- ::    0.778637

-- ::    0.356306

-- ::    0.667964

-- ::    0.246857

-- ::    0.386956

-- ::    0.328203

-- ::    0.260853

-- ::    0.224920

-- ::    0.397457

-- ::    0.158729

-- ::    0.501266

Freq: 12H, dtype: float64

-----

-- ::    0.243932

-- ::    0.220830

-- ::    0.896107

-- ::    0.476584

-- ::    0.515817

Freq: 12H, dtype: float64

# 重复索引的时间序列

dates = pd.DatetimeIndex(['1/1/2015','1/2/2015','1/3/2015','1/4/2015','1/1/2015','1/2/2015'])

ts = pd.Series(np.random.rand(), index = dates)

print(ts)

print(ts.is_unique,ts.index.is_unique)

print('-----')

# index有重复，is_unique检查 → values唯一，index不唯一

print(ts[''],type(ts['']))

print(ts[''],type(ts['']))

print('-----')

# index有重复的将返回多个值

print(ts.groupby(level = ).mean())

# 通过groupby做分组，重复的值这里用平均值处理

　　输出：

--    0.300286

--    0.603865

--    0.017949

--    0.026621

--    0.791441

--    0.526622

dtype: float64

True False

-----

--    0.300286

--    0.791441

dtype: float64 <class 'pandas.core.series.Series'>

--    0.026621

dtype: float64 <class 'pandas.core.series.Series'>

-----

--    0.545863

--    0.565244

--    0.017949

--    0.026621

dtype: float64

'''

【课程2.】  时间序列 - 重采样

将时间序列从一个频率转换为另一个频率的过程，且会有数据的结合

降采样：高频数据 → 低频数据，eg.以天为频率的数据转为以月为频率的数据

升采样：低频数据 → 高频数据，eg.以年为频率的数据转为以月为频率的数据

'''

# 重采样：.resample()

# 创建一个以天为频率的TimeSeries，重采样为按2天为频率

rng = pd.date_range('20170101', periods = 12)

ts = pd.Series(np.arange(12), index = rng)

print(ts)

ts_re = ts.resample('5D')

ts_re2 = ts.resample('5D').sum()

print(ts_re, type(ts_re))

print(ts_re2, type(ts_re2))

print('-----')

# ts.resample('5D')：得到一个重采样构建器，频率改为5天

# ts.resample('5D').sum():得到一个新的聚合后的Series，聚合方式为求和

# freq：重采样频率 → ts.resample('5D')

# .sum()：聚合方法

print(ts.resample('5D').mean(),'→ 求平均值\n')

print(ts.resample('5D').max(),'→ 求最大值\n')

print(ts.resample('5D').min(),'→ 求最小值\n')

print(ts.resample('5D').median(),'→ 求中值\n')

print(ts.resample('5D').first(),'→ 返回第一个值\n')

print(ts.resample('5D').last(),'→ 返回最后一个值\n')

print(ts.resample('5D').ohlc(),'→ OHLC重采样\n')

# OHLC:金融领域的时间序列聚合方式 → open开盘、high最大值、low最小值、close收盘

　　输出：

--

--

--

--

--

--

--

--

--

--

--

--

Freq: D, dtype: int32

DatetimeIndexResampler [freq=< * Days>, axis=, closed=left, label=left, convention=start, base=] <class 'pandas.tseries.resample.DatetimeIndexResampler'>

--

--

--

Freq: 5D, dtype: int32 <class 'pandas.core.series.Series'>

-----

--     2.0

--     7.0

--    10.5

Freq: 5D, dtype: float64 → 求平均值

--

--

--

Freq: 5D, dtype: int32 → 求最大值

--

--

--

Freq: 5D, dtype: int32 → 求最小值

--     2.0

--     7.0

--    10.5

Freq: 5D, dtype: float64 → 求中值

--

--

--

Freq: 5D, dtype: int32 → 返回第一个值

--

--

--

Freq: 5D, dtype: int32 → 返回最后一个值

            open  high  low  close

--

--

--                 → OHLC重采样

# 降采样

rng = pd.date_range('', periods = )

ts = pd.Series(np.arange(,), index = rng)

print(ts)

print(ts.resample('5D').sum(),'→ 默认\n')

print(ts.resample('5D', closed = 'left').sum(),'→ left\n')

print(ts.resample('5D', closed = 'right').sum(),'→ right\n')

print('-----')

# closed：各时间段哪一端是闭合（即包含）的，默认 左闭右闭

# 详解：这里values为0-，按照5D重采样 → [,,,,],[,,,,],[,]

# left指定间隔左边为结束 → [,,,,],[,,,,],[,]

# right指定间隔右边为结束 → [],[,,,,],[,,,,],[]

print(ts.resample('5D', label = 'left').sum(),'→ leftlabel\n')

print(ts.resample('5D', label = 'right').sum(),'→ rightlabel\n')

# label：聚合值的index，默认为取左

# 值采样认为默认（这里closed默认）

　　输出：

--

--

--

--

--

--

--

--

--

--

--

--

Freq: D, dtype: int32

--

--

--

Freq: 5D, dtype: int32 → 默认

--

--

--

Freq: 5D, dtype: int32 → left

--

--

--

--

Freq: 5D, dtype: int32 → right

-----

--

--

--

Freq: 5D, dtype: int32 → leftlabel

--

--

--

Freq: 5D, dtype: int32 → rightlabel

# 升采样及插值

rng = pd.date_range('2017/1/1 0:0:0', periods = , freq = 'H')

ts = pd.DataFrame(np.arange().reshape(,),

                  index = rng,

                  columns = ['a','b','c'])

print(ts)

print(ts.resample('15T').asfreq())

print(ts.resample('15T').ffill())

print(ts.resample('15T').bfill())

# 低频转高频，主要是如何插值

# .asfreq()：不做填充，返回Nan

# .ffill()：向上填充

# .bfill()：向下填充

　　输出：

                     a   b   c

-- ::

-- ::

-- ::

-- ::

-- ::

                        a     b     c

-- ::   0.0   1.0   2.0

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::   3.0   4.0   5.0

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::   6.0   7.0   8.0

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::   9.0  10.0  11.0

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::   NaN   NaN   NaN

-- ::  12.0  13.0  14.0

                      a   b   c

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

                      a   b   c

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

-- ::

# 时期重采样 - Period

prng = pd.period_range('','',freq = 'M')

ts = pd.Series(np.arange(len(prng)), index = prng)

print(ts)

print(ts.resample('3M').sum())  # 降采样

print(ts.resample('15D').ffill())  # 升采样

　　输出：

-

-

-

-

-

-

-

-

-

-

-

-

-

Freq: M, dtype: int32

--

--

--

--

--

Freq: 3M, dtype: int32

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

--

Freq: 15D, dtype: int32

Pandas之DataFrame——Part 2的更多相关文章

python 数据处理学习pandas之DataFrame
请原谅没有一次写完,本文是自己学习过程中的记录,完善pandas的学习知识,对于现有网上资料的缺少和利用python进行数据分析这本书部分知识的过时,只好以记录的形势来写这篇文章.最如果后续工作定下来 ...
Pandas之Dataframe叠加，排序，统计，重新设置索引
Pandas之Dataframe索引,排序,统计,重新设置索引一:叠加 import pandas as pd a_list = [df1,df2,df3] add_data = pd.concat ...
pandas中DataFrame对象to_csv()方法中的encoding参数
当使用pd.read_csv()方法读取csv格式文件的时候,常常会因为csv文件中带有中文字符而产生字符编码错误,造成读取文件错误,在这个时候,我们可以尝试将pd.read_csv()函数的enco ...
pandas（DataFrame）
DataFrame是二维数据结构,即数据以行和列的表格方式排列!特点:潜在的列是不同的类型,大小可变,标记行和列,可以对列和行执行算数运算. 其中Name,Age即为对应的Columns,序号0,1, ...
Python3 Pandas的DataFrame数据的增、删、改、查
Python3 Pandas的DataFrame数据的增.删.改.查一.DataFrame数据准备增.删.改.查的方法有很多很多种,这里只展示出常用的几种. 参数inplace默认为False,只 ...
Python3 Pandas的DataFrame格式数据写入excle文件、json、html、剪贴板、数据库
Python3 Pandas的DataFrame格式数据写入excle文件.json.html.剪贴板.数据库一.DataFrame格式数据 Pandas是Python下一个开源数据分析的库,它提供 ...
python. pandas(series,dataframe,index) method test
python. pandas(series,dataframe,index,reindex,csv file read and write) method test import pandas as ...
pandas取dataframe特定行/列
1. 按列取.按索引/行取.按特定行列取 import numpy as np from pandas import DataFrame import pandas as pd df=DataFram ...
Pandas中DataFrame修改列名
Pandas中DataFrame修改列名:使用 rename df = pd.read_csv('I:/Papers/consumer/codeandpaper/TmallData/result01- ...
Spark与Pandas中DataFrame对比
Pandas Spark 工作方式单机single machine tool,没有并行机制parallelism不支持Hadoop,处理大量数据有瓶颈分布式并行计算框架,内建并行机制paral ...

随机推荐

Java OOP——第七章多线程
1.进程:是指运行中的应用程序,每个进程都有自己独立的地址空间(内存空间): Eg:用户点击桌面的IE浏览器,就启动了一个进程,操作系统就会为该进程分配独立的地址空间.当用户再次点击左面的IE浏览器, ...
Linux系统Mini版配置相关
一:修改ip 编辑:vi /etc/sysconfig/network-sc/ifcfg-eth0 配置如下图:
python中的文件操作小结2
''' #-----------文件修改---------- f=open("test_1",'r',encoding="utf-8") f2=open(&qu ...
C# 打开帮助文档，打开电脑中其他应用或者文件
打开帮助文档 System.Diagnostics.Process.Start(Directory.GetCurrentDirectory() + "\\" + "hel ...
删除警告的方法 python
import warningswarnings.filterwarnings('ignore')
oracle11g导出表时空表导不出解决方案
oracle11g用exp命令导出数据库表时,有时会发现只导出了一部分表时而且不会报错,原因是有空表没有进行导出,之前一直没有找到方法于是用最笨的方法重新建这些空表,当然在我们实际当中表的数量大时我们 ...
Hive 压缩技术Data Compression
Mapreducwe 执行流程 :input > map > shuffle > reduce > output 压缩执行时间,map 之后,压缩,数据存储在本地磁盘,减少磁盘 ...
python的列表生成式和生成器
1.列表生成式是Python受欢迎的语法之一,通过一句简洁的语法就可以对一组元素进行过滤,还可以对得到的元素进行转换处理,语法格式为: [exp for val in collection if co ...
jquery跨域解决方案JSONP
1.在互联网中我们的计算机是通过IP来定位的,但是IP比较难记忆,因此通过domain name(域名)来取代IP 2.什么是跨域? (1)默认浏览器为了安全问题,禁止了xmlhttprequest跨 ...
webstorm git提交不成功的
git pull git pull origin master git pull origin master --allow-unrelated-histories

Pandas之DataFrame——Part 2

Pandas之DataFrame——Part 2的更多相关文章

随机推荐

热门专题