You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
toratoratora/2022春pandas上课练习代码.py

101 lines
2.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Mon May 2 14:20:17 2022
@author: hzh
"""
import pandas as pd
# Series 序列数据
lst = [80, 80, 75, 60, 95]
index = ["he", "wang", "liu", "zhang", "chen"]
s1 = pd.Series(lst, index)
print(s1.values) # 返回的是一个numpy的数组
print(s1.index)
print(s1[0])
print(s1['he'])
dict1 = {"he": 80, "huang": 80, "liu": 75, "peng": 60, "yang": 95}
s1 = pd.Series(dict1)
print(s1)
print(s1.values) # 返回的是一个一维的numpy的数组
print(s1.index)
print(s1[0])
print(s1['he'])
# DataFrame数据
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year' : [2000, 2001, 2002, 2001, 2002],
'popu' : [1.5, 1.7, 3.6, 2.4, 2.9]}
index = ['one', 'two', 'three', 'four', 'five']
df = pd.DataFrame(data, index)
print(df)
print(df.columns)
print(df.index)
print(type(df.values)) # 返回的是一个二维的numpy.ndarray
# DataFrame数据属性
df.shape # 行列数
df.dtypes # 列的数据类型
df.ndim # 数据维度
df.head(3)
df.tail(2)
df.info() # 相关信息概览
df.describe() # 快速综合统计结果
# 索引1:[]列索引
df.year
df['year']
df['state':'popu'] # 不能做列的切片索引
df[['year', 'popu']]
# 练习:输出第一列和最后一列
df[['state', 'popu']]
# 索引2:loc 行&列索引
df.loc['one', 'year']
df.loc['one':'three', 'year':'popu']
df.loc[['one', 'five'], ['state', 'popu']]
# 索引3 iloc 行号与列号索引
df.iloc[0, 1]
df.iloc[0:3, 1:2]
# 索引4布尔索引
df.loc[df['year'] > 2000]
df.loc[df['state'].str[0] == 'O']
df[(df.year > 2001) & (df.state.str[0] == 'O')]
df[(df.year > 2001) | (df.state.str[0] == 'O')]
# 州名以O开头或者popu值大于2
df[(df.popu > 2) | (df.state.str[0] == 'O')]
# 增加
dict2 = {'year': 2003, 'state': 'Louisiana', 'popu': 1.4}
df1 = df.append(dict2, ignore_index=True)
df1['debt'] = 1
myprint(df1)
# 删除
df1.drop(5, inplace=True)
myprint(df1)
df1.drop('debt', axis=1, inplace=True)
myprint(df1)
# 修改
df1['popu'] += 1
myprint(df1, 'df1')
# 合并
df2 = df.copy()
myprint(df2, 'df2')
df3 = pd.concat([df2, df1], ignore_index=True)
myprint(df3, 'df3')
df4 = pd.concat([df2, df1], axis=1)
myprint(df4)
# 排序
df5 = df.sort_values(by=['popu'], ascending=True)
myprint(df5)
df6 = df.sort_values(by=['year', 'popu'], ascending=False)
myprint(df6)