Python数据分析扩展库pandas的DataFrame排序方法小结

原创 2017-08-26 董付国 Python小屋 Python小屋

>>> import numpy as np
>>> import pandas as pd

# 创建DataFrame
>>> df = pd.DataFrame(data=[np.random.randint(1,10,4) for i in range(5)],\
index=range(5), columns=list('ABCD'))
>>> df
A B C D
0 3 3 1 4
1 7 9 1 4
2 1 2 6 2
3 1 9 5 7
4 6 9 2 5

# 对index进行排序
>>> df.sort_index(axis=0, ascending=False)
A B C D
4 6 9 2 5
3 1 9 5 7
2 1 2 6 2
1 7 9 1 4
0 3 3 1 4

# 对columns进行排序
>>> df.sort_index(axis=1, ascending=False)
D C B A
0 4 1 3 3
1 4 1 9 7
2 2 6 2 1
3 7 5 9 1
4 5 2 9 6

# 按单列进行排序
>>> df.sort_values('A')
A B C D
2 1 2 6 2
3 1 9 5 7
0 3 3 1 4
4 6 9 2 5
1 7 9 1 4
>>> df.sort_values('B')
A B C D
2 1 2 6 2
0 3 3 1 4
1 7 9 1 4
3 1 9 5 7
4 6 9 2 5

# 按多列进行排序

>>> df.sort_values(['A','B'])
   A B C D
2 1 2 6 2
3 1 9 5 7
0 3 3 1 4
4 6 9 2 5
1 7 9 1 4
>>> df.sort_values(['B','A'])
   A B C D
2 1 2 6 2
0 3 3 1 4
3 1 9 5 7
4 6 9 2 5
1 7 9 1 4
# 降序排序
>>> df.sort_values(['B','A'], ascending=False)
   A B C D
1 7 9 1 4
4 6 9 2 5
3 1 9 5 7
0 3 3 1 4
2 1 2 6 2

# 随机排序

>>> sampler = np.random.permutation(len(df.index))
>>> sampler
array([3, 2, 1, 0, 4])
>>> df.take(sampler)
A B C D
3 1 9 5 7
2 1 2 6 2
1 7 9 1 4
0 3 3 1 4
4 6 9 2 5

# 随机排序
>>> sampler = np.random.permutation(len(df.index))
>>> sampler
array([0, 1, 4, 2, 3])
>>> df.take(sampler)
A B C D
0 3 3 1 4
1 7 9 1 4
4 6 9 2 5
2 1 2 6 2
3 1 9 5 7