You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
2.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Tue Oct 23 20:53:57 2018
@author: Administrator
"""
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
# 读取数据
data = pd.read_excel('Data.xlsx')
# 检查数据类型
# print(data.dtypes)
# 确保 'ts_code' 列是字符串类型
data['ts_code'] = data['ts_code'].astype(str)
# 保留 'ts_code' 列,以便后续使用
ts_code = data['ts_code'].copy()
# 过滤掉非数值列
numeric_columns = data.select_dtypes(include=[np.number]).columns
data_numeric = data[numeric_columns]
# 过滤掉非正数
data_numeric = data_numeric[data_numeric > 0]
# 删除缺失值
data_numeric = data_numeric.dropna()
# 确保 'ts_code' 列与数据同步
ts_code = ts_code[data_numeric.index]
# 标准化数据
X = data_numeric.iloc[:, 1:] # 排除第一列 ts_code
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
# 主成分分析
pca = PCA(n_components=0.95) # 累计贡献率为95%
Y = pca.fit_transform(X_scaled) # 满足累计贡献率为95%的主成分数据
gxl = pca.explained_variance_ratio_ # 贡献率
# 计算综合得分
F = np.zeros((len(Y)))
for i in range(len(gxl)):
f = Y[:, i] * gxl[i]
F += f
# 创建 Series 并排序
fs1 = pd.Series(F, index=ts_code.values)
Fscore1 = fs1.sort_values(ascending=False) # 降序True 为升序
# 读取股票代码数据
stk = pd.read_excel('stkcode.xlsx')
stk = pd.Series(stk['name'].values, index=stk['ts_code'].values)
# 过滤掉不在 stk 中的 ts_code
ts_code_filtered = ts_code[ts_code.isin(stk.index)]
F_filtered = F[ts_code.isin(stk.index)]
# 获取对应的股票名称
stk1 = stk[ts_code_filtered.values]
# 创建包含股票名称的 Series 并排序
fs2 = pd.Series(F_filtered, index=stk1.values)
Fscore2 = fs2.sort_values(ascending=False) # 降序True 为升序
# 输出结果
print("按股票代码排序的综合得分:")
print(Fscore1)
print("\n按股票名称排序的综合得分:")
print(Fscore2)