Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
Yao | 442b502724 | 4 months ago |
Yao | 941c9fe7e0 | 8 months ago |
Yao | eca2019e33 | 8 months ago |
Yao | 0d1f454dae | 8 months ago |
Yao | ebb97b8f8c | 8 months ago |
@ -0,0 +1,6 @@
|
||||
/test
|
||||
/*/__pycache__
|
||||
/models/heatmap.py
|
||||
/models/LSTM_Forecasting.py
|
||||
/data
|
||||
/.vscode
|
@ -0,0 +1,29 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
from . import utils, draw_echarts
|
||||
import streamlit as st
|
||||
|
||||
from models import VAR_Forecasting, ARIMA_Forecasting, SARIMA_Forecasting, RF_Forecasting
|
||||
from typing import List, Type
|
||||
|
||||
|
||||
def run(target: str,
|
||||
target_name: str,
|
||||
models: List[Type] = [
|
||||
VAR_Forecasting, ARIMA_Forecasting, SARIMA_Forecasting,
|
||||
RF_Forecasting
|
||||
]):
|
||||
models_name = [
|
||||
model.__name__.split('.')[-1].split('_')[0] for model in models
|
||||
]
|
||||
|
||||
st.title("模型预测结果")
|
||||
history_data = utils.read_csv("data/normalized_df.csv")
|
||||
|
||||
selected_model = st.selectbox("选择你想看的模型预测结果", models_name)
|
||||
|
||||
pred_data = utils.read_csv(f"data/{selected_model}_Forecasting_df.csv")
|
||||
|
||||
draw_echarts.draw_echarts(selected_model, target, target_name,
|
||||
history_data, pred_data)
|
@ -0,0 +1,95 @@
|
||||
import pandas as pd
|
||||
from streamlit_echarts import st_echarts
|
||||
|
||||
|
||||
def draw_echarts(model_name: str, target: str, target_name: str,
|
||||
history_data: pd.DataFrame, pred_data: pd.DataFrame):
|
||||
"""
|
||||
构造 ECharts 图表的配置并在 Streamlit 应用中展示。
|
||||
|
||||
Args:
|
||||
model_name (str): 模型的名称
|
||||
target (str): 目标值的列名
|
||||
target_name (str): 目标值的显示名称
|
||||
historical_data (pd.DataFrame): 历史数据
|
||||
predicted_data (pd.DataFrame): 预测数据
|
||||
|
||||
Returns:
|
||||
dict: ECharts 图表的配置
|
||||
"""
|
||||
# 数据处理,将历史数据和预测数据添加 None 值以适应图表的 x 轴
|
||||
history_values = history_data[target].values.tolist() + [
|
||||
None for _ in range(len(pred_data))
|
||||
]
|
||||
pred_values = [None for _ in range(len(history_data))
|
||||
] + pred_data[target].values.tolist()
|
||||
|
||||
# 定义ECharts的配置
|
||||
option = {
|
||||
"title": {
|
||||
"text": f"{model_name}模型",
|
||||
"x": "auto"
|
||||
},
|
||||
# 配置提示框组件
|
||||
"tooltip": {
|
||||
"trigger": "axis"
|
||||
},
|
||||
# 配置图例组件
|
||||
"legend": {
|
||||
"data": [f"{target_name}历史数据", f"{target_name}预测数据"],
|
||||
"left": "right"
|
||||
},
|
||||
# 配置x轴和y轴
|
||||
"xAxis": {
|
||||
"type":
|
||||
"category",
|
||||
"data":
|
||||
history_data.index.astype(str).to_list() +
|
||||
pred_data.index.astype(str).to_list()
|
||||
},
|
||||
"yAxis": {
|
||||
"type": "value"
|
||||
},
|
||||
# 配置数据区域缩放组件
|
||||
"dataZoom": [{
|
||||
"type": "inside",
|
||||
"start": 0,
|
||||
"end": 100
|
||||
}],
|
||||
"series": []
|
||||
}
|
||||
|
||||
# 添加历史数据系列
|
||||
if any(history_values):
|
||||
option["series"].append({
|
||||
"name": f"{target_name}历史数据",
|
||||
"type": "line",
|
||||
"data": history_values,
|
||||
"smooth": "true"
|
||||
})
|
||||
|
||||
# 添加预测数据的系列
|
||||
if any(pred_values):
|
||||
option["series"].append({
|
||||
"name": f"{target_name}预测数据",
|
||||
"type": "line",
|
||||
"data": pred_values,
|
||||
"smooth": "true",
|
||||
"lineStyle": {
|
||||
"type": "dashed"
|
||||
}
|
||||
})
|
||||
|
||||
# 在Streamlit应用中展示ECharts图表
|
||||
st_echarts(options=option)
|
||||
return option
|
||||
|
||||
|
||||
# history_data = pd.read_csv('data/normalized_df.csv',
|
||||
# index_col="date",
|
||||
# parse_dates=["date"])
|
||||
# pred_data = pd.read_csv('data/VAR_Forecasting_df.csv',
|
||||
# index_col="date",
|
||||
# parse_dates=["date"])
|
||||
# draw_echarts('VAR_Forecasting', 'liugan_index', '流感指数', history_data,
|
||||
# pred_data)
|
@ -0,0 +1,26 @@
|
||||
"""
|
||||
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def read_csv(file_path: str) -> pd.DataFrame:
|
||||
"""
|
||||
从 CSV 文件中加载 DataFrame 对象。
|
||||
|
||||
Args:
|
||||
file_path (str): CSV 文件的路径。
|
||||
|
||||
Returns:
|
||||
DataFrame: 从 CSV 文件中加载的 DataFrame 对象。
|
||||
"""
|
||||
try:
|
||||
df = pd.read_csv(file_path, index_col="date", parse_dates=["date"])
|
||||
print(f"成功读取文件: {file_path}")
|
||||
except FileNotFoundError:
|
||||
print(f"找不到文件: {file_path}")
|
||||
df = pd.DataFrame()
|
||||
except Exception as e:
|
||||
print(f"读取文件时发生错误: {e}")
|
||||
df = pd.DataFrame()
|
||||
return df
|
@ -0,0 +1,21 @@
|
||||
from typing import List, Type
|
||||
import pg_request as pg
|
||||
import models as m
|
||||
import echarts_visualization as ev
|
||||
|
||||
|
||||
def main(target: str,
|
||||
target_name: str,
|
||||
exog_columns: List[str],
|
||||
models: List[Type] = [
|
||||
m.VAR_Forecasting, m.ARIMA_Forecasting, m.SARIMA_Forecasting,
|
||||
m.RF_Forecasting
|
||||
]):
|
||||
pg.run()
|
||||
m.run(forecast_target=target, exog_columns=exog_columns, models=models)
|
||||
ev.run(target=target, target_name=target_name, models=models)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main('liugan_index', '流感指数',
|
||||
['infection_number.1', 'infection_number.2', 'jijin_data', 'shoupan'])
|
Binary file not shown.
@ -0,0 +1,41 @@
|
||||
# 可供使用的user_agent池
|
||||
agent_list = [
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:87.0) Gecko/20100101 \
|
||||
Firefox/87.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, li\
|
||||
ke Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0",
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHT\
|
||||
ML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
||||
"Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebK\
|
||||
it/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/53\
|
||||
7.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWeb\
|
||||
Kit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/\
|
||||
537.36",
|
||||
"Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (\
|
||||
KHTML, like Gecko) Chrome/70.0.3538.80 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535\
|
||||
.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=t\
|
||||
rue",
|
||||
"Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia \
|
||||
550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mob\
|
||||
ile Safari/537.36 Edge/14.14263",
|
||||
"Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia \
|
||||
950) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mob\
|
||||
ile Safari/537.36 Edge/14.14263",
|
||||
"Mozilla/5.0 (Linux; Android 11; moto g power (2022)) AppleWebKit/\
|
||||
537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0.1; Moto G (4)) AppleWebKit/537.36\
|
||||
(KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWe\
|
||||
bKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWeb\
|
||||
Kit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.\
|
||||
36",
|
||||
"Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKi\
|
||||
t/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36\
|
||||
",
|
||||
"Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006\
|
||||
) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile S\
|
||||
afari/537.36",
|
||||
]
|
@ -0,0 +1,131 @@
|
||||
import requests
|
||||
import re
|
||||
from multiprocessing.pool import ThreadPool
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def get_jijin_data(*args):
|
||||
"""
|
||||
获取某个基金某页的历史净值数据
|
||||
:param fundCode:
|
||||
:param page:
|
||||
:return: list
|
||||
"""
|
||||
cookies = {
|
||||
'qgqp_b_id': '5c08ebc12f489b4f5ba9e76c2539ce0b',
|
||||
'emshistory':
|
||||
'%5B%2200005%22%2C%2200002%22%2C%2200002%E3%80%81%22%2C%2200001%22%5D',
|
||||
'HAList':
|
||||
'ty-0-300411-%u91D1%u76FE%u80A1%u4EFD%2Cty-0-399366-%u80FD%u6E90%u91D1%u5C5E%2Cty-116-00002-%u4E2D%u7535%u63A7%u80A1%2Cty-116-03119-GX%u4E9A%u6D32%u534A%u5BFC%u4F53%2Cty-116-00007-%u667A%u5BCC%u8D44%u6E90%u6295%u8D44%2Cty-116-00001-%u957F%u548C%2Cty-116-00016-%u65B0%u9E3F%u57FA%u5730%u4EA7%2Cty-0-301075-%u591A%u745E%u533B%u836F%2Cty-90-BK1042-%u533B%u836F%u5546%u4E1A%2Cty-1-601607-%u4E0A%u6D77%u533B%u836F',
|
||||
'mtp': '1',
|
||||
'ct':
|
||||
'Rc8QhLQwVpXSsLuf4UOMLbPMtE9gFAEkMTisAatrxh1rv-WFWG9EC-2zw_WFCJnVfsaViwejVO4ziLTZig1GUptw6NORwx36yfzDu9g9zstYkLdwIWvQ-9QqGL-F5C1GCS7xhUtoBrFAibnr_-HA078LL8tr7yWiGM9V3ZmooC8',
|
||||
'ut':
|
||||
'FobyicMgeV54OLFNgnrRk4fT26HSX01NG2N55VZbVzZlqOMDJ-67DsHyCMk6G-yTMaqRhIAFuiYbVkK6Y-sYY8ghkJ3v9gyvUZyHWYpJnreP78yw4o-H8FNcTvUXmOj4KLsGaYuV1TAHltcdN0WDTy-YCOJ8OlzrX-MQbQc_CBvXfUYn10iBhXwvJY94XBkg4eOCJpu6Dok3ot9Xsr8flPIDz6f3KxJcIgnXZ7QpZKDMIvavpSunuMiR8Q5ezUD2y-JiBEgNkeoH_36wg0elojOfd5k61gTK',
|
||||
'pi':
|
||||
'6293426663250936%3Bm6293426663250936%3B%E4%BA%89%E5%88%86%E5%A4%BA%E7%A7%92%E7%9A%84%E9%A3%8E%E8%BE%B02%3B4qqIkcy3NvmegD2EnE%2BsOg2O1jjgPTjDxX3du3GmlWaCk8fr0sJ%2FmubqRXtUqqRoZWsMMmMvcfSg1wNNX8p93XE3fanPRZvbcs7bYEjCeUqg5RMcJtmbM9jEifMzwRAAmCipwh9KbqrYLdkLenTwJYqOaG9qmaZ2qDmn2Pa66eitUxhH2q0aU0kerTnJCi2qJnM8Y0Oc%3Bz%2Bzk7gxq8gdHwxSGucOoQSvBZ44Uaf7Um0f7bFnTUgwLnxWm2OMnlrG9SZX6ezbrsEoqVVrOk%2FVRGekqxUH%2BufKtmb89UVNnA0x62lxu6z84Y8dT0sXAWUELHmWZf8cnumRIL8kPvuAcHSXq5P6pTC3OaxbBeQ%3D%3D',
|
||||
'uidal':
|
||||
'6293426663250936%e4%ba%89%e5%88%86%e5%a4%ba%e7%a7%92%e7%9a%84%e9%a3%8e%e8%be%b02',
|
||||
'sid': '',
|
||||
'vtpst': '|',
|
||||
'websitepoptg_api_time': '1715218615434',
|
||||
'st_si': '46368340182479',
|
||||
'EmFundFavorVersion': '1686749115372',
|
||||
'EmFundFavorVersion2': '1686749115372',
|
||||
'st_asi': 'delete',
|
||||
'EMFUND0': 'null',
|
||||
'st_pvi': '35290886003252',
|
||||
'st_sp': '2023-12-17%2018%3A51%3A34',
|
||||
'st_inirUrl': 'https%3A%2F%2Fcn.bing.com%2F',
|
||||
'st_sn': '27',
|
||||
'st_psi': '20240509100744555-112200305283-5067673963',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Accept':
|
||||
'*/*',
|
||||
'Accept-Language':
|
||||
'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'Connection':
|
||||
'keep-alive',
|
||||
# 'Cookie': 'qgqp_b_id=5c08ebc12f489b4f5ba9e76c2539ce0b; emshistory=%5B%2200005%22%2C%2200002%22%2C%2200002%E3%80%81%22%2C%2200001%22%5D; HAList=ty-0-300411-%u91D1%u76FE%u80A1%u4EFD%2Cty-0-399366-%u80FD%u6E90%u91D1%u5C5E%2Cty-116-00002-%u4E2D%u7535%u63A7%u80A1%2Cty-116-03119-GX%u4E9A%u6D32%u534A%u5BFC%u4F53%2Cty-116-00007-%u667A%u5BCC%u8D44%u6E90%u6295%u8D44%2Cty-116-00001-%u957F%u548C%2Cty-116-00016-%u65B0%u9E3F%u57FA%u5730%u4EA7%2Cty-0-301075-%u591A%u745E%u533B%u836F%2Cty-90-BK1042-%u533B%u836F%u5546%u4E1A%2Cty-1-601607-%u4E0A%u6D77%u533B%u836F; mtp=1; ct=Rc8QhLQwVpXSsLuf4UOMLbPMtE9gFAEkMTisAatrxh1rv-WFWG9EC-2zw_WFCJnVfsaViwejVO4ziLTZig1GUptw6NORwx36yfzDu9g9zstYkLdwIWvQ-9QqGL-F5C1GCS7xhUtoBrFAibnr_-HA078LL8tr7yWiGM9V3ZmooC8; ut=FobyicMgeV54OLFNgnrRk4fT26HSX01NG2N55VZbVzZlqOMDJ-67DsHyCMk6G-yTMaqRhIAFuiYbVkK6Y-sYY8ghkJ3v9gyvUZyHWYpJnreP78yw4o-H8FNcTvUXmOj4KLsGaYuV1TAHltcdN0WDTy-YCOJ8OlzrX-MQbQc_CBvXfUYn10iBhXwvJY94XBkg4eOCJpu6Dok3ot9Xsr8flPIDz6f3KxJcIgnXZ7QpZKDMIvavpSunuMiR8Q5ezUD2y-JiBEgNkeoH_36wg0elojOfd5k61gTK; pi=6293426663250936%3Bm6293426663250936%3B%E4%BA%89%E5%88%86%E5%A4%BA%E7%A7%92%E7%9A%84%E9%A3%8E%E8%BE%B02%3B4qqIkcy3NvmegD2EnE%2BsOg2O1jjgPTjDxX3du3GmlWaCk8fr0sJ%2FmubqRXtUqqRoZWsMMmMvcfSg1wNNX8p93XE3fanPRZvbcs7bYEjCeUqg5RMcJtmbM9jEifMzwRAAmCipwh9KbqrYLdkLenTwJYqOaG9qmaZ2qDmn2Pa66eitUxhH2q0aU0kerTnJCi2qJnM8Y0Oc%3Bz%2Bzk7gxq8gdHwxSGucOoQSvBZ44Uaf7Um0f7bFnTUgwLnxWm2OMnlrG9SZX6ezbrsEoqVVrOk%2FVRGekqxUH%2BufKtmb89UVNnA0x62lxu6z84Y8dT0sXAWUELHmWZf8cnumRIL8kPvuAcHSXq5P6pTC3OaxbBeQ%3D%3D; uidal=6293426663250936%e4%ba%89%e5%88%86%e5%a4%ba%e7%a7%92%e7%9a%84%e9%a3%8e%e8%be%b02; sid=; vtpst=|; websitepoptg_api_time=1715218615434; st_si=46368340182479; EmFundFavorVersion=1686749115372; EmFundFavorVersion2=1686749115372; st_asi=delete; EMFUND0=null; EMFUND1=05-09%2009%3A49%3A02@%23%24%u534E%u590F%u6210%u957F%u6DF7%u5408@%23%24000001; EMFUND2=05-09%2009%3A53%3A36@%23%24%u5BCC%u56FD%u7CBE%u51C6%u533B%u7597%u6DF7%u5408A@%23%24005176; EMFUND3=05-09%2009%3A54%3A07@%23%24%u94F6%u6CB3%u533B%u836F%u6DF7%u5408A@%23%24011335; EMFUND4=05-09%2009%3A54%3A13@%23%24%u4E1C%u65B9%u7EA2%u533B%u7597%u5347%u7EA7%u80A1%u7968%u53D1%u8D77A@%23%24015052; EMFUND5=05-09%2009%3A57%3A40@%23%24%u5B9D%u76C8%u73B0%u4EE3%u670D%u52A1%u4E1A%u6DF7%u5408A@%23%24009223; EMFUND6=05-09%2009%3A57%3A51@%23%24%u4E1C%u65B9%u7EA2%u533B%u7597%u5347%u7EA7%u80A1%u7968%u53D1%u8D77C@%23%24015053; EMFUND7=05-09%2009%3A58%3A04@%23%24%u5E7F%u53D1%u521B%u65B0%u533B%u7597%u4E24%u5E74%u6301%u6709%u6DF7%u5408A@%23%24010731; EMFUND8=05-09%2009%3A58%3A56@%23%24%u5BCC%u56FD%u751F%u7269%u533B%u836F%u79D1%u6280%u6DF7%u5408A@%23%24006218; EMFUND9=05-09 09:59:24@#$%u534E%u5546%u533B%u836F%u533B%u7597%u884C%u4E1A%u80A1%u7968@%23%24008107; st_pvi=35290886003252; st_sp=2023-12-17%2018%3A51%3A34; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=27; st_psi=20240509100744555-112200305283-5067673963',
|
||||
'Referer':
|
||||
'https://fundf10.eastmoney.com/',
|
||||
'User-Agent':
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
|
||||
}
|
||||
|
||||
params = {
|
||||
'callback': 'jQuery183019015669101010957_1715220464680',
|
||||
'fundCode': args[0][0],
|
||||
'pageIndex': args[0][1],
|
||||
'pageSize': '20',
|
||||
'startDate': '',
|
||||
'endDate': '',
|
||||
'_': '1715220492762',
|
||||
}
|
||||
|
||||
response = requests.get('https://api.fund.eastmoney.com/f10/lsjz',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers)
|
||||
pattern = r'"FSRQ":"(.*?)","DWJZ":"(.*?)"'
|
||||
text = response.text
|
||||
data_page = re.findall(pattern, text)
|
||||
data_list = []
|
||||
for data in data_page:
|
||||
data_list.append(list(data))
|
||||
return data_list
|
||||
|
||||
|
||||
def get_hx_data():
|
||||
"""
|
||||
获取华商医药医疗行业股票基金历史净值数据
|
||||
:return: list of hx_data
|
||||
"""
|
||||
fundcode = '008107'
|
||||
page_list = range(1, 29)
|
||||
hx_data = []
|
||||
args_list = [(fundcode, i) for i in page_list]
|
||||
# 使用多进程处理
|
||||
pool = ThreadPool(100)
|
||||
data_list = pool.map(get_jijin_data, args_list)
|
||||
pool.close()
|
||||
pool.join()
|
||||
for data in data_list:
|
||||
hx_data += data
|
||||
print(hx_data)
|
||||
# 数据储存
|
||||
return hx_data
|
||||
|
||||
|
||||
def get_gf_data():
|
||||
"""
|
||||
获取广发创新医疗两年持有混合基金历史净值数据
|
||||
:return: list of hx_data
|
||||
"""
|
||||
fundcode = '010731'
|
||||
page_list = range(1, 29)
|
||||
gf_data = []
|
||||
args_list = [(fundcode, i) for i in page_list]
|
||||
# 使用多进程处理
|
||||
pool = ThreadPool(100)
|
||||
data_list = pool.map(get_jijin_data, args_list)
|
||||
pool.close()
|
||||
pool.join()
|
||||
for data in data_list:
|
||||
gf_data += data
|
||||
print(gf_data)
|
||||
return gf_data
|
||||
|
||||
|
||||
def save_data_to_csv(data, filename):
|
||||
df = pd.DataFrame(data, columns=['date', filename])
|
||||
df['date'] = pd.to_datetime(df['date'])
|
||||
df = df.sort_values(by='date')
|
||||
df.to_csv(f'{filename}.csv', encoding="utf_8")
|
||||
print(f'成功爬取流感基金数据并保存在{filename}.csv中')
|
||||
|
||||
|
||||
save_data_to_csv(get_hx_data(), 'hx_jijin_data')
|
||||
save_data_to_csv(get_gf_data(), 'gf_jijin_data')
|
Loading…
Reference in new issue