pandas功能使用rename, reindex, set

pandas rename 功能

在使用 pandas 的过程中经常会用到修改列名称的问题，会用到 rename 或者 reindex 等功能，每次都需要去查文档
当然经常也可以使用 df.columns重新赋值为某个列表
用 rename 则可以轻松应对 pandas 中修改列名的问题

导入常用的数据包

import pandas as pd

import numpy as np

构建一个含有multiIndex的 Series

arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]

tuples = list(zip(*arrays))

index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])

s = pd.Series(np.random.randn(8), index=index)

s.index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],

           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],

           names=['first', 'second'])

查看 s

first  second

bar    one      -0.073094

       two      -0.449141

baz    one       0.109093

       two      -0.033135

foo    one       1.315809

       two      -0.887890

qux    one       2.255328

       two      -0.778246

dtype: float64

使用set_names可以将 index 中的名称进行更改

s.index.set_names(['L1', 'L2'], inplace=True)

L1   L2

bar  one    0.037524

     two   -0.178425

baz  one   -0.778211

     two    1.440168

foo  one    0.314172

     two    0.710597

qux  one    1.197275

     two    0.527058

dtype: float64

s.index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],

           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],

           names=['L1', 'L2'])

同样可以使用 rename 将Series 修改回来

s.index.rename(['first','second'],inplace= True)

first  second

bar    one       0.037524

       two      -0.178425

baz    one      -0.778211

       two       1.440168

foo    one       0.314172

       two       0.710597

qux    one       1.197275

       two       0.527058

dtype: float64

使用reset_index 可以将 index 中的两列转化为正常的列

s.reset_index()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

	first	second	0
0	bar	one	0.037524
1	bar	two	-0.178425
2	baz	one	-0.778211
3	baz	two	1.440168
4	foo	one	0.314172
5	foo	two	0.710597
6	qux	one	1.197275
7	qux	two	0.527058

可以使用 pivot_table 恢复成一开始的样子，将两列重新作为 index 展示出来

s.reset_index().pivot_table(index=['first','second'],values=0,aggfunc=lambda x:x)

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

		0
first	second
bar	one	0.037524
bar	two	-0.178425
baz	one	-0.778211
baz	two	1.440168
foo	one	0.314172
foo	two	0.710597
qux	one	1.197275
qux	two	0.527058

同样可以使用最简单的方式进行更改 index 中的名称

s.index.names=['first1','second1'] ## 此操作，相当于直接赋值，会更改 s

s.index

MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],

           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],

           names=['first1', 'second1'])

first1  second1

bar     one        0.037524

        two       -0.178425

baz     one       -0.778211

        two        1.440168

foo     one        0.314172

        two        0.710597

qux     one        1.197275

        two        0.527058

dtype: float64

df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,                    'B' : ['A', 'B', 'C'] * 4,

                 'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,

                  'D' : np.random.randn(12),

                 'E' : np.random.randn(12)})

df.head()

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

	A	B	C	D	E
0	one	A	foo	0.664180	-0.107764
1	one	B	foo	-0.833609	0.008083
2	two	C	foo	0.117919	-1.365583
3	three	A	bar	-0.116776	-1.201934
4	one	B	bar	-1.315190	-0.157779

df.pivot_table(index=['A','C'],values=['D'],columns='B',aggfunc=np.sum,fill_value='unknown')

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead tr th {

    text-align: left;

}

.dataframe thead tr:last-of-type th {

    text-align: right;

}

		D
	B	A	B	C
A	C
one	bar	2.71452	-1.31519	0.0231296
one	foo	0.66418	-0.833609	-0.96451
three	bar	-0.116776	unknown	0.450891
three	foo	unknown	0.012846	unknown
two	bar	unknown	0.752643	unknown
two	foo	0.963631	unknown	0.117919

df1 =df.pivot_table(index=['A','C'],values=['D'],columns='B',aggfunc=np.sum,fill_value='unknown')

df1.index

MultiIndex(levels=[['one', 'three', 'two'], ['bar', 'foo']],

           labels=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],

           names=['A', 'C'])

df1.index.names=['first','second']

df1

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead tr th {

    text-align: left;

}

.dataframe thead tr:last-of-type th {

    text-align: right;

}

		D
	B	A	B	C
first	second
one	bar	2.71452	-1.31519	0.0231296
one	foo	0.66418	-0.833609	-0.96451
three	bar	-0.116776	unknown	0.450891
three	foo	unknown	0.012846	unknown
two	bar	unknown	0.752643	unknown
two	foo	0.963631	unknown	0.117919

df1_stack=df1.stack()

df1_stack.index.names=['first','second','third']

df1_stack

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

			D
first	second	third
one	bar	A	2.71452
		B	-1.31519
		C	0.0231296
	foo	A	0.66418
		B	-0.833609
		C	-0.96451
three	bar	A	-0.116776
		B	unknown
		C	0.450891
	foo	A	unknown
		B	0.012846
		C	unknown
two	bar	A	unknown
		B	0.752643
		C	unknown
	foo	A	0.963631
		B	unknown
		C	0.117919

df1_stack.columns=['总和']

df1_stack

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

			总和
first	second	third
one	bar	A	2.71452
		B	-1.31519
		C	0.0231296
	foo	A	0.66418
		B	-0.833609
		C	-0.96451
three	bar	A	-0.116776
		B	unknown
		C	0.450891
	foo	A	unknown
		B	0.012846
		C	unknown
two	bar	A	unknown
		B	0.752643
		C	unknown
	foo	A	0.963631
		B	unknown
		C	0.117919

df2 = df1_stack.reset_index()

df2.set_index('first')

.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}

.dataframe tbody tr th {

    vertical-align: top;

}

.dataframe thead th {

    text-align: right;

}

	second	third	总和
first
one	bar	A	2.71452
one	bar	B	-1.31519
one	bar	C	0.0231296
one	foo	A	0.66418
one	foo	B	-0.833609
one	foo	C	-0.96451
three	bar	A	-0.116776
three	bar	B	unknown
three	bar	C	0.450891
three	foo	A	unknown
three	foo	B	0.012846
three	foo	C	unknown
two	bar	A	unknown
two	bar	B	0.752643
two	bar	C	unknown
two	foo	A	0.963631
two	foo	B	unknown
two	foo	C	0.117919