# -*- coding: utf-8 -*- """ Created on Mon May 2 14:20:17 2022 @author: hzh """ import pandas as pd # Series 序列数据 lst = [80, 80, 75, 60, 95] index = ["he", "wang", "liu", "zhang", "chen"] s1 = pd.Series(lst, index) print(s1.values) # 返回的是一个numpy的数组 print(s1.index) print(s1[0]) print(s1['he']) dict1 = {"he": 80, "huang": 80, "liu": 75, "peng": 60, "yang": 95} s1 = pd.Series(dict1) print(s1) print(s1.values) # 返回的是一个一维的numpy的数组 print(s1.index) print(s1[0]) print(s1['he']) # DataFrame数据 data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year' : [2000, 2001, 2002, 2001, 2002], 'popu' : [1.5, 1.7, 3.6, 2.4, 2.9]} index = ['one', 'two', 'three', 'four', 'five'] df = pd.DataFrame(data, index) print(df) print(df.columns) print(df.index) print(type(df.values)) # 返回的是一个二维的numpy.ndarray # DataFrame数据属性 df.shape # 行列数 df.dtypes # 列的数据类型 df.ndim # 数据维度 df.head(3) df.tail(2) df.info() # 相关信息概览 df.describe() # 快速综合统计结果 # 索引1:[]列索引 df.year df['year'] df['state':'popu'] # 不能做列的切片索引 df[['year', 'popu']] # 练习:输出第一列和最后一列 df[['state', 'popu']] # 索引2:loc 行&列索引 df.loc['one', 'year'] df.loc['one':'three', 'year':'popu'] df.loc[['one', 'five'], ['state', 'popu']] # 索引3 iloc 行号与列号索引 df.iloc[0, 1] df.iloc[0:3, 1:2] # 索引4:布尔索引 df.loc[df['year'] > 2000] df.loc[df['state'].str[0] == 'O'] df[(df.year > 2001) & (df.state.str[0] == 'O')] df[(df.year > 2001) | (df.state.str[0] == 'O')] # 州名以O开头或者popu值大于2 df[(df.popu > 2) | (df.state.str[0] == 'O')] # 增加 dict2 = {'year': 2003, 'state': 'Louisiana', 'popu': 1.4} df1 = df.append(dict2, ignore_index=True) df1['debt'] = 1 myprint(df1) # 删除 df1.drop(5, inplace=True) myprint(df1) df1.drop('debt', axis=1, inplace=True) myprint(df1) # 修改 df1['popu'] += 1 myprint(df1, 'df1') # 合并 df2 = df.copy() myprint(df2, 'df2') df3 = pd.concat([df2, df1], ignore_index=True) myprint(df3, 'df3') df4 = pd.concat([df2, df1], axis=1) myprint(df4) # 排序 df5 = df.sort_values(by=['popu'], ascending=True) myprint(df5) df6 = df.sort_values(by=['year', 'popu'], ascending=False) myprint(df6)