You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
toratoratora/2022春pandas上课练习代码.py

101 lines
2.4 KiB

# -*- coding: utf-8 -*-
"""
Created on Mon May 2 14:20:17 2022
@author: hzh
"""
import pandas as pd
# Series 序列数据
lst = [80, 80, 75, 60, 95]
index = ["he", "wang", "liu", "zhang", "chen"]
s1 = pd.Series(lst, index)
print(s1.values) # 返回的是一个numpy的数组
print(s1.index)
print(s1[0])
print(s1['he'])
dict1 = {"he": 80, "huang": 80, "liu": 75, "peng": 60, "yang": 95}
s1 = pd.Series(dict1)
print(s1)
print(s1.values) # 返回的是一个一维的numpy的数组
print(s1.index)
print(s1[0])
print(s1['he'])
# DataFrame数据
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year' : [2000, 2001, 2002, 2001, 2002],
'popu' : [1.5, 1.7, 3.6, 2.4, 2.9]}
index = ['one', 'two', 'three', 'four', 'five']
df = pd.DataFrame(data, index)
print(df)
print(df.columns)
print(df.index)
print(type(df.values)) # 返回的是一个二维的numpy.ndarray
# DataFrame数据属性
df.shape # 行列数
df.dtypes # 列的数据类型
df.ndim # 数据维度
df.head(3)
df.tail(2)
df.info() # 相关信息概览
df.describe() # 快速综合统计结果
# 索引1:[]列索引
df.year
df['year']
df['state':'popu'] # 不能做列的切片索引
df[['year', 'popu']]
# 练习:输出第一列和最后一列
df[['state', 'popu']]
# 索引2:loc 行&列索引
df.loc['one', 'year']
df.loc['one':'three', 'year':'popu']
df.loc[['one', 'five'], ['state', 'popu']]
# 索引3 iloc 行号与列号索引
df.iloc[0, 1]
df.iloc[0:3, 1:2]
# 索引4布尔索引
df.loc[df['year'] > 2000]
df.loc[df['state'].str[0] == 'O']
df[(df.year > 2001) & (df.state.str[0] == 'O')]
df[(df.year > 2001) | (df.state.str[0] == 'O')]
# 州名以O开头或者popu值大于2
df[(df.popu > 2) | (df.state.str[0] == 'O')]
# 增加
dict2 = {'year': 2003, 'state': 'Louisiana', 'popu': 1.4}
df1 = df.append(dict2, ignore_index=True)
df1['debt'] = 1
myprint(df1)
# 删除
df1.drop(5, inplace=True)
myprint(df1)
df1.drop('debt', axis=1, inplace=True)
myprint(df1)
# 修改
df1['popu'] += 1
myprint(df1, 'df1')
# 合并
df2 = df.copy()
myprint(df2, 'df2')
df3 = pd.concat([df2, df1], ignore_index=True)
myprint(df3, 'df3')
df4 = pd.concat([df2, df1], axis=1)
myprint(df4)
# 排序
df5 = df.sort_values(by=['popu'], ascending=True)
myprint(df5)
df6 = df.sort_values(by=['year', 'popu'], ascending=False)
myprint(df6)