From b41799c3aaa4d872f36ec98cdcdea61b2190079f Mon Sep 17 00:00:00 2001 From: hnu202110040108 Date: Wed, 22 Jun 2022 14:15:20 +0800 Subject: [PATCH] ADD file via upload --- 2022春pandas上课练习代码.py | 100 +++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 2022春pandas上课练习代码.py diff --git a/2022春pandas上课练习代码.py b/2022春pandas上课练习代码.py new file mode 100644 index 0000000..4a62d31 --- /dev/null +++ b/2022春pandas上课练习代码.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon May 2 14:20:17 2022 + +@author: hzh +""" +import pandas as pd + +# Series 序列数据 +lst = [80, 80, 75, 60, 95] +index = ["he", "wang", "liu", "zhang", "chen"] +s1 = pd.Series(lst, index) +print(s1.values) # 返回的是一个numpy的数组 +print(s1.index) +print(s1[0]) +print(s1['he']) + +dict1 = {"he": 80, "huang": 80, "liu": 75, "peng": 60, "yang": 95} +s1 = pd.Series(dict1) +print(s1) +print(s1.values) # 返回的是一个一维的numpy的数组 +print(s1.index) +print(s1[0]) +print(s1['he']) + +# DataFrame数据 +data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], + 'year' : [2000, 2001, 2002, 2001, 2002], + 'popu' : [1.5, 1.7, 3.6, 2.4, 2.9]} +index = ['one', 'two', 'three', 'four', 'five'] +df = pd.DataFrame(data, index) +print(df) +print(df.columns) +print(df.index) +print(type(df.values)) # 返回的是一个二维的numpy.ndarray + +# DataFrame数据属性 +df.shape # 行列数 +df.dtypes # 列的数据类型 +df.ndim # 数据维度 +df.head(3) +df.tail(2) +df.info() # 相关信息概览 +df.describe() # 快速综合统计结果 + +# 索引1:[]列索引 +df.year +df['year'] +df['state':'popu'] # 不能做列的切片索引 +df[['year', 'popu']] +# 练习:输出第一列和最后一列 +df[['state', 'popu']] + +# 索引2:loc 行&列索引 +df.loc['one', 'year'] +df.loc['one':'three', 'year':'popu'] +df.loc[['one', 'five'], ['state', 'popu']] + +# 索引3 iloc 行号与列号索引 +df.iloc[0, 1] +df.iloc[0:3, 1:2] + +# 索引4:布尔索引 +df.loc[df['year'] > 2000] +df.loc[df['state'].str[0] == 'O'] +df[(df.year > 2001) & (df.state.str[0] == 'O')] +df[(df.year > 2001) | (df.state.str[0] == 'O')] +# 州名以O开头或者popu值大于2 +df[(df.popu > 2) | (df.state.str[0] == 'O')] + +# 增加 +dict2 = {'year': 2003, 'state': 'Louisiana', 'popu': 1.4} +df1 = df.append(dict2, ignore_index=True) +df1['debt'] = 1 +myprint(df1) + +# 删除 +df1.drop(5, inplace=True) +myprint(df1) +df1.drop('debt', axis=1, inplace=True) +myprint(df1) + +# 修改 +df1['popu'] += 1 +myprint(df1, 'df1') + +# 合并 +df2 = df.copy() +myprint(df2, 'df2') +df3 = pd.concat([df2, df1], ignore_index=True) +myprint(df3, 'df3') +df4 = pd.concat([df2, df1], axis=1) +myprint(df4) + +# 排序 +df5 = df.sort_values(by=['popu'], ascending=True) +myprint(df5) +df6 = df.sort_values(by=['year', 'popu'], ascending=False) +myprint(df6) +