import pandas as pd <<<<<<< HEAD import numpy as np from scipy.stats import zscore from sklearn.decomposition import PCA ======= from scipy.stats import zscore >>>>>>> remotes/origin/盘荣博 import matplotlib.pyplot as plt from matplotlib.pyplot import ylabel df = pd.read_excel("棉花产量论文作业的数据.xlsx") # plt.plot(df["年份"],df["单产"]) plt.rcParams['font.sans-serif']="SimHei" # plt.rcParams['size'] =10 # plt.ylabel('单产') # plt.xlabel('年份') # print(df) d = df.to_numpy()[:,1:] print(d) plt.subplot(4,1,1) plt.scatter(d[:,:1],d[:,1:2],c='r') ylabel('原始数据'),plt.title("单产和种子费用的关系") #公式调用标准化,遵守标准正态分布 data = zscore(d) print(data) plt.subplot(4,1,2) plt.scatter(data[:,:1],data[:,1:2],c='b',) ylabel('zscore') print(d.max(axis=0)) print(d.std(axis=0)) print(d.mean(axis=0)) #手写标准正态分布 data1=(d-d.mean(axis=0))/d.std(axis=0) print(data1) plt.subplot(4,1,3) plt.scatter(data1[:,:1],data1[:,1:2],c='y') ylabel('手写标准正态分布') data2=(d-d.min(axis=0))/(d.max(axis=0)-d.min(axis=0)) plt.subplot(4,1,4) plt.scatter(data2[:,:1],data2[:,1:2],c='g') plt.xlabel('压缩到0~1') print(data==data1) <<<<<<< HEAD # plt.savefig("shuju.jpg",dpi=2000) # plt.show() md= PCA().fit(data) cf = np.cov(data.T)#求协方差矩阵 print(cf) c, d= np.linalg.eig(cf) print("特征值:\n",c) print(md.explained_variance_) e=c/c.sum() # for _ in range(len(e)): # if(_!=0): # e[_]+=e[_-1] print('贡献率:') print(e) print(md.explained_variance_ratio_) print('特征向量:') print(d.T) print(md.components_) print(md.components_-d.T<=0.1) ======= plt.savefig("shuju.jpg",dpi=2000) plt.show() >>>>>>> remotes/origin/盘荣博