You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
2.1 KiB

# -*- coding: utf-8 -*-
"""
Created on Tue Oct 23 20:53:57 2018
@author: Administrator
"""
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# 读取数据
data = pd.read_excel('Data.xlsx')
# 检查数据类型
# print(data.dtypes)
# 确保 'ts_code' 列是字符串类型
data['ts_code'] = data['ts_code'].astype(str)
# 保留 'ts_code' 列,以便后续使用
ts_code = data['ts_code'].copy()
# 过滤掉非数值列
numeric_columns = data.select_dtypes(include=[np.number]).columns
data_numeric = data[numeric_columns]
# 过滤掉非正数
data_numeric = data_numeric[data_numeric > 0]
# 删除缺失值
data_numeric = data_numeric.dropna()
# 确保 'ts_code' 列与数据同步
ts_code = ts_code[data_numeric.index]
# 标准化数据
X = data_numeric.iloc[:, 1:] # 排除第一列 ts_code
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
# 主成分分析
pca = PCA(n_components=0.95) # 累计贡献率为95%
Y = pca.fit_transform(X_scaled) # 满足累计贡献率为95%的主成分数据
gxl = pca.explained_variance_ratio_ # 贡献率
# 计算综合得分
F = np.zeros((len(Y)))
for i in range(len(gxl)):
f = Y[:, i] * gxl[i]
F += f
# 创建 Series 并排序
fs1 = pd.Series(F, index=ts_code.values)
Fscore1 = fs1.sort_values(ascending=False) # 降序True 为升序
# 读取股票代码数据
stk = pd.read_excel('stkcode.xlsx')
stk = pd.Series(stk['name'].values, index=stk['ts_code'].values)
# 过滤掉不在 stk 中的 ts_code
ts_code_filtered = ts_code[ts_code.isin(stk.index)]
F_filtered = F[ts_code.isin(stk.index)]
# 获取对应的股票名称
stk1 = stk[ts_code_filtered.values]
# 创建包含股票名称的 Series 并排序
fs2 = pd.Series(F_filtered, index=stk1.values)
Fscore2 = fs2.sort_values(ascending=False) # 降序True 为升序
# 输出结果
print("按股票代码排序的综合得分:")
print(Fscore1)
print("\n按股票名称排序的综合得分:")
print(Fscore2)