|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
Created on Mon May 2 14:20:17 2022
|
|
|
|
|
|
|
|
|
|
@author: hzh
|
|
|
|
|
"""
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
|
# Series 序列数据
|
|
|
|
|
lst = [80, 80, 75, 60, 95]
|
|
|
|
|
index = ["he", "wang", "liu", "zhang", "chen"]
|
|
|
|
|
s1 = pd.Series(lst, index)
|
|
|
|
|
print(s1.values) # 返回的是一个numpy的数组
|
|
|
|
|
print(s1.index)
|
|
|
|
|
print(s1[0])
|
|
|
|
|
print(s1['he'])
|
|
|
|
|
|
|
|
|
|
dict1 = {"he": 80, "huang": 80, "liu": 75, "peng": 60, "yang": 95}
|
|
|
|
|
s1 = pd.Series(dict1)
|
|
|
|
|
print(s1)
|
|
|
|
|
print(s1.values) # 返回的是一个一维的numpy的数组
|
|
|
|
|
print(s1.index)
|
|
|
|
|
print(s1[0])
|
|
|
|
|
print(s1['he'])
|
|
|
|
|
|
|
|
|
|
# DataFrame数据
|
|
|
|
|
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
|
|
|
|
|
'year' : [2000, 2001, 2002, 2001, 2002],
|
|
|
|
|
'popu' : [1.5, 1.7, 3.6, 2.4, 2.9]}
|
|
|
|
|
index = ['one', 'two', 'three', 'four', 'five']
|
|
|
|
|
df = pd.DataFrame(data, index)
|
|
|
|
|
print(df)
|
|
|
|
|
print(df.columns)
|
|
|
|
|
print(df.index)
|
|
|
|
|
print(type(df.values)) # 返回的是一个二维的numpy.ndarray
|
|
|
|
|
|
|
|
|
|
# DataFrame数据属性
|
|
|
|
|
df.shape # 行列数
|
|
|
|
|
df.dtypes # 列的数据类型
|
|
|
|
|
df.ndim # 数据维度
|
|
|
|
|
df.head(3)
|
|
|
|
|
df.tail(2)
|
|
|
|
|
df.info() # 相关信息概览
|
|
|
|
|
df.describe() # 快速综合统计结果
|
|
|
|
|
|
|
|
|
|
# 索引1:[]列索引
|
|
|
|
|
df.year
|
|
|
|
|
df['year']
|
|
|
|
|
df['state':'popu'] # 不能做列的切片索引
|
|
|
|
|
df[['year', 'popu']]
|
|
|
|
|
# 练习:输出第一列和最后一列
|
|
|
|
|
df[['state', 'popu']]
|
|
|
|
|
|
|
|
|
|
# 索引2:loc 行&列索引
|
|
|
|
|
df.loc['one', 'year']
|
|
|
|
|
df.loc['one':'three', 'year':'popu']
|
|
|
|
|
df.loc[['one', 'five'], ['state', 'popu']]
|
|
|
|
|
|
|
|
|
|
# 索引3 iloc 行号与列号索引
|
|
|
|
|
df.iloc[0, 1]
|
|
|
|
|
df.iloc[0:3, 1:2]
|
|
|
|
|
|
|
|
|
|
# 索引4:布尔索引
|
|
|
|
|
df.loc[df['year'] > 2000]
|
|
|
|
|
df.loc[df['state'].str[0] == 'O']
|
|
|
|
|
df[(df.year > 2001) & (df.state.str[0] == 'O')]
|
|
|
|
|
df[(df.year > 2001) | (df.state.str[0] == 'O')]
|
|
|
|
|
# 州名以O开头或者popu值大于2
|
|
|
|
|
df[(df.popu > 2) | (df.state.str[0] == 'O')]
|
|
|
|
|
|
|
|
|
|
# 增加
|
|
|
|
|
dict2 = {'year': 2003, 'state': 'Louisiana', 'popu': 1.4}
|
|
|
|
|
df1 = df.append(dict2, ignore_index=True)
|
|
|
|
|
df1['debt'] = 1
|
|
|
|
|
myprint(df1)
|
|
|
|
|
|
|
|
|
|
# 删除
|
|
|
|
|
df1.drop(5, inplace=True)
|
|
|
|
|
myprint(df1)
|
|
|
|
|
df1.drop('debt', axis=1, inplace=True)
|
|
|
|
|
myprint(df1)
|
|
|
|
|
|
|
|
|
|
# 修改
|
|
|
|
|
df1['popu'] += 1
|
|
|
|
|
myprint(df1, 'df1')
|
|
|
|
|
|
|
|
|
|
# 合并
|
|
|
|
|
df2 = df.copy()
|
|
|
|
|
myprint(df2, 'df2')
|
|
|
|
|
df3 = pd.concat([df2, df1], ignore_index=True)
|
|
|
|
|
myprint(df3, 'df3')
|
|
|
|
|
df4 = pd.concat([df2, df1], axis=1)
|
|
|
|
|
myprint(df4)
|
|
|
|
|
|
|
|
|
|
# 排序
|
|
|
|
|
df5 = df.sort_values(by=['popu'], ascending=True)
|
|
|
|
|
myprint(df5)
|
|
|
|
|
df6 = df.sort_values(by=['year', 'popu'], ascending=False)
|
|
|
|
|
myprint(df6)
|
|
|
|
|
|