You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

78 lines
3.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import tushare as ts
import pandas as pd
# Tushare API 初始化
ts.set_token('64532b1c03637bc0c3ac92931a5d1b53cfaf75de87c22dfdc70ca6a0')
pro = ts.pro_api()
# 股票基本信息获取,并保存为 Excel 文件
try:
stkcode = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry')
stkcode.to_excel('stkcode.xlsx', index=False)
print("股票基本信息已成功导出到 'stkcode.xlsx'")
except PermissionError as e:
print(f"无法写入 'stkcode.xlsx' 文件:{e}")
print("请确保文件没有被其他程序占用,并且你有写入权限。")
exit(1)
# 从利润表中获取营业收入、营业利润、利润总额、净利润指标数据
income = pro.income_vip(period='20161231', fields='ts_code,revenue,operate_profit,total_profit,n_income_attr_p')
income = income.drop_duplicates(subset=['ts_code'])
print(f"收入数据列名: {income.columns}")
# 从资产负债表中获取资产总计、固定资产指标数据
balance = pro.balancesheet_vip(period='20161231', fields='ts_code,total_assets,fix_assets')
balance = balance.drop_duplicates(subset=['ts_code'])
print(f"资产负债数据列名: {balance.columns}")
# 从财务指标表中获取净资产收益率、每股净资产、每股资本公积、每股收益指标数据
indicator = pro.fina_indicator_vip(period='20161231', fields='ts_code,roe,bps,capital_rese_ps,eps')
indicator = indicator.drop_duplicates(subset=['ts_code'])
print(f"财务指标数据列名: {indicator.columns}")
# 检查每个数据集中是否包含 'ts_code' 列
for df, name in zip([income, balance, indicator], ['income', 'balance', 'indicator']):
if 'ts_code' not in df.columns:
raise KeyError(f"数据集 {name} 中没有 'ts_code' 列,请检查数据文件")
# 检查并处理 NaN 值
def check_and_handle_nan(df, name):
if df.isnull().values.any():
print(f"数据集 {name} 包含 NaN 值,进行处理...")
# 选择删除包含 NaN 的行
df = df.dropna()
# 或者选择填充 NaN 值
# df = df.fillna(0)
return df
income = check_and_handle_nan(income, 'income')
balance = check_and_handle_nan(balance, 'balance')
indicator = check_and_handle_nan(indicator, 'indicator')
# 数据集成,以代码为键,内连接,并把集成后的数据导出 Excel
tempdata = pd.merge(income, balance, how='inner', on='ts_code')
Data = pd.merge(tempdata, indicator, how='inner', on='ts_code')
# 确保合并后的数据集中包含 'ts_code' 列
if 'ts_code' not in Data.columns:
raise KeyError("合并后的数据集中没有 'ts_code' 列,请检查数据文件")
# 再次检查并处理 NaN 值
Data = check_and_handle_nan(Data, 'Data')
# 将集成后的数据导出到 Excel
Data.to_excel('Data.xlsx', index=False)
print("数据已成功导出到 'Data.xlsx'")
## 另外,本章中的其他数据获取例子,也在本程序中
# 获取上汽集团2017年的交易数据并导出 Excel
dta = pro.daily(ts_code='600104.SH', start_date='20170101', end_date='20171231')
dta = dta.sort_values('trade_date')
dta.to_excel('dta.xlsx')
# 获取沪深300指数2017年的交易数据并导出 Excel
hs300 = pro.index_daily(ts_code='399300.SZ', start_date='20170101', end_date='20171231')
hs300 = hs300.sort_values('trade_date')
hs300.to_excel('hs300.xlsx')