You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
71 lines
1.7 KiB
71 lines
1.7 KiB
import pandas as pd
|
|
<<<<<<< HEAD
|
|
import numpy as np
|
|
from scipy.stats import zscore
|
|
from sklearn.decomposition import PCA
|
|
=======
|
|
from scipy.stats import zscore
|
|
>>>>>>> remotes/origin/盘荣博
|
|
import matplotlib.pyplot as plt
|
|
from matplotlib.pyplot import ylabel
|
|
df = pd.read_excel("棉花产量论文作业的数据.xlsx")
|
|
# plt.plot(df["年份"],df["单产"])
|
|
plt.rcParams['font.sans-serif']="SimHei"
|
|
# plt.rcParams['size'] =10
|
|
# plt.ylabel('单产')
|
|
# plt.xlabel('年份')
|
|
|
|
# print(df)
|
|
d = df.to_numpy()[:,1:]
|
|
print(d)
|
|
plt.subplot(4,1,1)
|
|
plt.scatter(d[:,:1],d[:,1:2],c='r')
|
|
ylabel('原始数据'),plt.title("单产和种子费用的关系")
|
|
#公式调用标准化,遵守标准正态分布
|
|
data = zscore(d)
|
|
print(data)
|
|
plt.subplot(4,1,2)
|
|
plt.scatter(data[:,:1],data[:,1:2],c='b',)
|
|
ylabel('zscore')
|
|
|
|
print(d.max(axis=0))
|
|
print(d.std(axis=0))
|
|
print(d.mean(axis=0))
|
|
#手写标准正态分布
|
|
data1=(d-d.mean(axis=0))/d.std(axis=0)
|
|
print(data1)
|
|
plt.subplot(4,1,3)
|
|
plt.scatter(data1[:,:1],data1[:,1:2],c='y')
|
|
ylabel('手写标准正态分布')
|
|
|
|
data2=(d-d.min(axis=0))/(d.max(axis=0)-d.min(axis=0))
|
|
plt.subplot(4,1,4)
|
|
plt.scatter(data2[:,:1],data2[:,1:2],c='g')
|
|
plt.xlabel('压缩到0~1')
|
|
print(data==data1)
|
|
|
|
<<<<<<< HEAD
|
|
# plt.savefig("shuju.jpg",dpi=2000)
|
|
# plt.show()
|
|
md= PCA().fit(data)
|
|
cf = np.cov(data.T)#求协方差矩阵
|
|
print(cf)
|
|
c, d= np.linalg.eig(cf)
|
|
print("特征值:\n",c)
|
|
print(md.explained_variance_)
|
|
e=c/c.sum()
|
|
# for _ in range(len(e)):
|
|
# if(_!=0):
|
|
# e[_]+=e[_-1]
|
|
print('贡献率:')
|
|
print(e)
|
|
print(md.explained_variance_ratio_)
|
|
print('特征向量:')
|
|
print(d.T)
|
|
print(md.components_)
|
|
print(md.components_-d.T<=0.1)
|
|
=======
|
|
plt.savefig("shuju.jpg",dpi=2000)
|
|
plt.show()
|
|
>>>>>>> remotes/origin/盘荣博
|