pandas总结(一)——Series的使用

文章作者:Tyan
博客:noahsnail.com  |  CSDN  |  简书

1
2
3
4
5
6
7
8
9
10
# pandas是一个用来进行数据分析的基于numpy的库

import pandas as pd
import numpy as np

# Series是一个一维的数据结构

# 用list构建Series
series1 = pd.Series([3, 5, 'test', -5, 0.3])
print series1
0       3
1       5
2    test
3      -5
4     0.3
dtype: object
1
2
3
# 用list, index构建Series
series2 = pd.Series([3, 5, 'test', -5, 0.3], index = ['A', 'B', 'C', 'D', 'E'])
print series2
A       3
B       5
C    test
D      -5
E     0.3
dtype: object
1
2
3
4
# 通过dict构建Series
companies = {'Baidu': 400, 'Alibaba': 500, 'Tecent': 600, 'Jingdong': 300}
series3 = pd.Series(companies)
print series3
Alibaba     500
Baidu       400
Jingdong    300
Tecent      600
dtype: int64
1
2
3
4
5
6
7
# Series数据选择

# 通过index选择数据
print series3['Baidu']

# 选择多个数据
print series3[['Baidu', 'Tecent']]
400
Baidu     400
Tecent    600
dtype: int64
1
2
# 根据条件选择数据
print series3[series3 < 500]
Baidu       400
Jingdong    300
dtype: int64
1
2
3
4
 # 条件选择原理
print series3 < 500
temp = series3 < 500
print series3[temp]
Alibaba     False
Baidu        True
Jingdong     True
Tecent      False
dtype: bool
Baidu       400
Jingdong    300
dtype: int64
1
2
3
4
# Series元素赋值
print 'old value: ', series3['Baidu']
series3['Baidu'] = 450
print 'new value: ', series3['Baidu']
old value:  400
new value:  450
1
2
3
4
5
6
# 根据条件赋值
print 'old series: '
print series3
series3[series3 < 500] = 500
print 'new series: '
print series3
old series: 
Alibaba     500
Baidu       400
Jingdong    300
Tecent      600
dtype: int64
new series: 
Alibaba     500
Baidu       500
Jingdong    500
Tecent      600
dtype: int64
1
2
3
4
5
6
# Series数学运算
print 'Division: '
print series3 / 2
print 'Square: '
print series3 ** 2
print np.square(series3)
Division: 
Alibaba     250.0
Baidu       250.0
Jingdong    250.0
Tecent      300.0
dtype: float64
Square: 
Alibaba     250000
Baidu       250000
Jingdong    250000
Tecent      360000
dtype: int64
Alibaba     250000
Baidu       250000
Jingdong    250000
Tecent      360000
dtype: int64
1
2
3
4
# 定义新的Series, 公司人数
people = {'Baidu': 50000, 'Alibaba': 45000, 'Tecent': 60000, 'Jingdong': 80000, 'Netease': 30000}
series4 = pd.Series(people)
print series4
Alibaba     45000
Baidu       50000
Jingdong    80000
Netease     30000
Tecent      60000
dtype: int64
1
2
# Series相加, series3没有Netease, 因此结果为NaN
print series3 + series4
Alibaba     45500.0
Baidu       50500.0
Jingdong    80500.0
Netease         NaN
Tecent      60600.0
dtype: float64
1
2
3
# 判断数据是否数据缺失
print 'Netease' in series3
print 'Baidu' in series3
False
True
1
2
3
4
5
6
7
# 找出数据为null或非null的元素
result = series3 + series4
print result.notnull()
print result.isnull()

print result[result.isnull()]
print result[result.isnull() != True]
Alibaba      True
Baidu        True
Jingdong     True
Netease     False
Tecent       True
dtype: bool
Alibaba     False
Baidu       False
Jingdong    False
Netease      True
Tecent      False
dtype: bool
Netease   NaN
dtype: float64
Alibaba     45500.0
Baidu       50500.0
Jingdong    80500.0
Tecent      60600.0
dtype: float64
如果有收获,可以请我喝杯咖啡!