You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
import os
import pandas as pd
# add_fund_data.py
from app_test . tiantian_jijin_spider import get_fund_data_by_code
def normalize_df ( df ) :
"""
对 DataFrame 对象进行最小最大标准化。
Args:
df (DataFrame): 要进行标准化的 DataFrame 对象。
Returns:
df_normalized (DataFrame): 进行最小最大标准化后的 DataFrame 对象。
"""
# 如果列的数据类型是布尔值、有符号整型、无符号整型、浮点数或复数浮点数的话,就进行最大最小标准化,否则保留原列的数据
df_normalized = df . apply ( lambda x : ( x - x . min ( ) ) / ( x . max ( ) - x . min ( ) )
if x . dtype . kind in ' biufc ' else x )
return df_normalized
def add_fund_data ( fund_code ) :
df = pd . read_csv ( ' filled_row_data.csv ' )
# print(df)
fund_data = get_fund_data_by_code ( fund_code )
# print('基金数据')
# print(fund_data)
#将需要预测的因变量基金数据添加到预处理后数据
# del fund_data['id'] # 删除 'id' 列
df_merged = pd . merge ( df , fund_data , how = ' inner ' , on = ' date ' )
df_merged [ ' date ' ] = pd . to_datetime ( df_merged [ ' date ' ] )
df_merged . set_index ( ' date ' , inplace = True )
# print(type(df_merged.index.max()))
# print('开始保存数据')
# df_merged.to_csv('row_data.csv',encoding='utf-8')
# 对缺失值进行线性插值(其他方法?多项插值?)
df_merged = df_merged . interpolate ( )
# 如果有剩余的NaN值, 删除这些行
df_merged . dropna ( inplace = True )
# df_merged = normalize_df(df_merged)
# print(df_merged)
print ( ' 添加基金数据成功 ' )
return df_merged