diff --git a/7.3.3~7.3.5.py b/7.3.3~7.3.5.py new file mode 100644 index 0000000..ed29f42 --- /dev/null +++ b/7.3.3~7.3.5.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +""" +Created on Tue Oct 23 20:53:57 2018 + +@author: Administrator +""" +import pandas as pd +import numpy as np +from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA + +# 读取数据 +data = pd.read_excel('Data.xlsx') + +# 检查数据类型 +# print(data.dtypes) + +# 确保 'ts_code' 列是字符串类型 +data['ts_code'] = data['ts_code'].astype(str) + +# 保留 'ts_code' 列,以便后续使用 +ts_code = data['ts_code'].copy() + +# 过滤掉非数值列 +numeric_columns = data.select_dtypes(include=[np.number]).columns +data_numeric = data[numeric_columns] + +# 过滤掉非正数 +data_numeric = data_numeric[data_numeric > 0] + +# 删除缺失值 +data_numeric = data_numeric.dropna() + +# 确保 'ts_code' 列与数据同步 +ts_code = ts_code[data_numeric.index] + +# 标准化数据 +X = data_numeric.iloc[:, 1:] # 排除第一列 ts_code +scaler = StandardScaler() +scaler.fit(X) +X_scaled = scaler.transform(X) + +# 主成分分析 +pca = PCA(n_components=0.95) # 累计贡献率为95% +Y = pca.fit_transform(X_scaled) # 满足累计贡献率为95%的主成分数据 +gxl = pca.explained_variance_ratio_ # 贡献率 + +# 计算综合得分 +F = np.zeros((len(Y))) +for i in range(len(gxl)): + f = Y[:, i] * gxl[i] + F += f + +# 创建 Series 并排序 +fs1 = pd.Series(F, index=ts_code.values) +Fscore1 = fs1.sort_values(ascending=False) # 降序,True 为升序 + +# 读取股票代码数据 +stk = pd.read_excel('stkcode.xlsx') +stk = pd.Series(stk['name'].values, index=stk['ts_code'].values) + +# 过滤掉不在 stk 中的 ts_code +ts_code_filtered = ts_code[ts_code.isin(stk.index)] +F_filtered = F[ts_code.isin(stk.index)] + +# 获取对应的股票名称 +stk1 = stk[ts_code_filtered.values] + +# 创建包含股票名称的 Series 并排序 +fs2 = pd.Series(F_filtered, index=stk1.values) +Fscore2 = fs2.sort_values(ascending=False) # 降序,True 为升序 + +# 输出结果 +print("按股票代码排序的综合得分:") +print(Fscore1) +print("\n按股票名称排序的综合得分:") +print(Fscore2)