You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
# add_fund_data.py
|
|
|
|
|
from app_test.tiantian_jijin_spider import get_fund_data_by_code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_df(df):
|
|
|
|
|
"""
|
|
|
|
|
对 DataFrame 对象进行最小最大标准化。
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
df (DataFrame): 要进行标准化的 DataFrame 对象。
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
df_normalized (DataFrame): 进行最小最大标准化后的 DataFrame 对象。
|
|
|
|
|
"""
|
|
|
|
|
# 如果列的数据类型是布尔值、有符号整型、无符号整型、浮点数或复数浮点数的话,就进行最大最小标准化,否则保留原列的数据
|
|
|
|
|
df_normalized = df.apply(lambda x: (x - x.min()) / (x.max() - x.min())
|
|
|
|
|
if x.dtype.kind in 'biufc' else x)
|
|
|
|
|
|
|
|
|
|
return df_normalized
|
|
|
|
|
def add_fund_data(fund_code):
|
|
|
|
|
df = pd.read_csv('filled_row_data.csv')
|
|
|
|
|
# print(df)
|
|
|
|
|
fund_data = get_fund_data_by_code(fund_code)
|
|
|
|
|
|
|
|
|
|
# print('基金数据')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# print(fund_data)
|
|
|
|
|
#将需要预测的因变量基金数据添加到预处理后数据
|
|
|
|
|
# del fund_data['id'] # 删除 'id' 列
|
|
|
|
|
df_merged = pd.merge(df, fund_data, how='inner', on='date')
|
|
|
|
|
df_merged['date'] = pd.to_datetime(df_merged['date'])
|
|
|
|
|
df_merged.set_index('date', inplace=True)
|
|
|
|
|
# print(type(df_merged.index.max()))
|
|
|
|
|
|
|
|
|
|
# print('开始保存数据')
|
|
|
|
|
# df_merged.to_csv('row_data.csv',encoding='utf-8')
|
|
|
|
|
# 对缺失值进行线性插值(其他方法?多项插值?)
|
|
|
|
|
df_merged = df_merged.interpolate()
|
|
|
|
|
|
|
|
|
|
# 如果有剩余的NaN值,删除这些行
|
|
|
|
|
df_merged.dropna(inplace=True)
|
|
|
|
|
# df_merged = normalize_df(df_merged)
|
|
|
|
|
# print(df_merged)
|
|
|
|
|
print('添加基金数据成功')
|
|
|
|
|
return df_merged
|