|
|
# -*- coding: utf-8 -*-
|
|
|
"""
|
|
|
Created on Tue Oct 23 20:53:57 2018
|
|
|
|
|
|
@author: Administrator
|
|
|
"""
|
|
|
import pandas as pd
|
|
|
import numpy as np
|
|
|
from sklearn.preprocessing import StandardScaler
|
|
|
from sklearn.decomposition import PCA
|
|
|
|
|
|
# 读取数据
|
|
|
data = pd.read_excel('Data.xlsx')
|
|
|
|
|
|
# 检查数据类型
|
|
|
# print(data.dtypes)
|
|
|
|
|
|
# 确保 'ts_code' 列是字符串类型
|
|
|
data['ts_code'] = data['ts_code'].astype(str)
|
|
|
|
|
|
# 保留 'ts_code' 列,以便后续使用
|
|
|
ts_code = data['ts_code'].copy()
|
|
|
|
|
|
# 过滤掉非数值列
|
|
|
numeric_columns = data.select_dtypes(include=[np.number]).columns
|
|
|
data_numeric = data[numeric_columns]
|
|
|
|
|
|
# 过滤掉非正数
|
|
|
data_numeric = data_numeric[data_numeric > 0]
|
|
|
|
|
|
# 删除缺失值
|
|
|
data_numeric = data_numeric.dropna()
|
|
|
|
|
|
# 确保 'ts_code' 列与数据同步
|
|
|
ts_code = ts_code[data_numeric.index]
|
|
|
|
|
|
# 标准化数据
|
|
|
X = data_numeric.iloc[:, 1:] # 排除第一列 ts_code
|
|
|
scaler = StandardScaler()
|
|
|
scaler.fit(X)
|
|
|
X_scaled = scaler.transform(X)
|
|
|
|
|
|
# 主成分分析
|
|
|
pca = PCA(n_components=0.95) # 累计贡献率为95%
|
|
|
Y = pca.fit_transform(X_scaled) # 满足累计贡献率为95%的主成分数据
|
|
|
gxl = pca.explained_variance_ratio_ # 贡献率
|
|
|
|
|
|
# 计算综合得分
|
|
|
F = np.zeros((len(Y)))
|
|
|
for i in range(len(gxl)):
|
|
|
f = Y[:, i] * gxl[i]
|
|
|
F += f
|
|
|
|
|
|
# 创建 Series 并排序
|
|
|
fs1 = pd.Series(F, index=ts_code.values)
|
|
|
Fscore1 = fs1.sort_values(ascending=False) # 降序,True 为升序
|
|
|
|
|
|
# 读取股票代码数据
|
|
|
stk = pd.read_excel('stkcode.xlsx')
|
|
|
stk = pd.Series(stk['name'].values, index=stk['ts_code'].values)
|
|
|
|
|
|
# 过滤掉不在 stk 中的 ts_code
|
|
|
ts_code_filtered = ts_code[ts_code.isin(stk.index)]
|
|
|
F_filtered = F[ts_code.isin(stk.index)]
|
|
|
|
|
|
# 获取对应的股票名称
|
|
|
stk1 = stk[ts_code_filtered.values]
|
|
|
|
|
|
# 创建包含股票名称的 Series 并排序
|
|
|
fs2 = pd.Series(F_filtered, index=stk1.values)
|
|
|
Fscore2 = fs2.sort_values(ascending=False) # 降序,True 为升序
|
|
|
|
|
|
# 输出结果
|
|
|
print("按股票代码排序的综合得分:")
|
|
|
print(Fscore1)
|
|
|
print("\n按股票名称排序的综合得分:")
|
|
|
print(Fscore2)
|