You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
132 lines
7.7 KiB
132 lines
7.7 KiB
import requests
|
|
import re
|
|
from multiprocessing.pool import ThreadPool
|
|
import pandas as pd
|
|
|
|
|
|
def get_jijin_data(*args):
|
|
"""
|
|
获取某个基金某页的历史净值数据
|
|
:param fundCode:
|
|
:param page:
|
|
:return: list
|
|
"""
|
|
cookies = {
|
|
'qgqp_b_id': '5c08ebc12f489b4f5ba9e76c2539ce0b',
|
|
'emshistory':
|
|
'%5B%2200005%22%2C%2200002%22%2C%2200002%E3%80%81%22%2C%2200001%22%5D',
|
|
'HAList':
|
|
'ty-0-300411-%u91D1%u76FE%u80A1%u4EFD%2Cty-0-399366-%u80FD%u6E90%u91D1%u5C5E%2Cty-116-00002-%u4E2D%u7535%u63A7%u80A1%2Cty-116-03119-GX%u4E9A%u6D32%u534A%u5BFC%u4F53%2Cty-116-00007-%u667A%u5BCC%u8D44%u6E90%u6295%u8D44%2Cty-116-00001-%u957F%u548C%2Cty-116-00016-%u65B0%u9E3F%u57FA%u5730%u4EA7%2Cty-0-301075-%u591A%u745E%u533B%u836F%2Cty-90-BK1042-%u533B%u836F%u5546%u4E1A%2Cty-1-601607-%u4E0A%u6D77%u533B%u836F',
|
|
'mtp': '1',
|
|
'ct':
|
|
'Rc8QhLQwVpXSsLuf4UOMLbPMtE9gFAEkMTisAatrxh1rv-WFWG9EC-2zw_WFCJnVfsaViwejVO4ziLTZig1GUptw6NORwx36yfzDu9g9zstYkLdwIWvQ-9QqGL-F5C1GCS7xhUtoBrFAibnr_-HA078LL8tr7yWiGM9V3ZmooC8',
|
|
'ut':
|
|
'FobyicMgeV54OLFNgnrRk4fT26HSX01NG2N55VZbVzZlqOMDJ-67DsHyCMk6G-yTMaqRhIAFuiYbVkK6Y-sYY8ghkJ3v9gyvUZyHWYpJnreP78yw4o-H8FNcTvUXmOj4KLsGaYuV1TAHltcdN0WDTy-YCOJ8OlzrX-MQbQc_CBvXfUYn10iBhXwvJY94XBkg4eOCJpu6Dok3ot9Xsr8flPIDz6f3KxJcIgnXZ7QpZKDMIvavpSunuMiR8Q5ezUD2y-JiBEgNkeoH_36wg0elojOfd5k61gTK',
|
|
'pi':
|
|
'6293426663250936%3Bm6293426663250936%3B%E4%BA%89%E5%88%86%E5%A4%BA%E7%A7%92%E7%9A%84%E9%A3%8E%E8%BE%B02%3B4qqIkcy3NvmegD2EnE%2BsOg2O1jjgPTjDxX3du3GmlWaCk8fr0sJ%2FmubqRXtUqqRoZWsMMmMvcfSg1wNNX8p93XE3fanPRZvbcs7bYEjCeUqg5RMcJtmbM9jEifMzwRAAmCipwh9KbqrYLdkLenTwJYqOaG9qmaZ2qDmn2Pa66eitUxhH2q0aU0kerTnJCi2qJnM8Y0Oc%3Bz%2Bzk7gxq8gdHwxSGucOoQSvBZ44Uaf7Um0f7bFnTUgwLnxWm2OMnlrG9SZX6ezbrsEoqVVrOk%2FVRGekqxUH%2BufKtmb89UVNnA0x62lxu6z84Y8dT0sXAWUELHmWZf8cnumRIL8kPvuAcHSXq5P6pTC3OaxbBeQ%3D%3D',
|
|
'uidal':
|
|
'6293426663250936%e4%ba%89%e5%88%86%e5%a4%ba%e7%a7%92%e7%9a%84%e9%a3%8e%e8%be%b02',
|
|
'sid': '',
|
|
'vtpst': '|',
|
|
'websitepoptg_api_time': '1715218615434',
|
|
'st_si': '46368340182479',
|
|
'EmFundFavorVersion': '1686749115372',
|
|
'EmFundFavorVersion2': '1686749115372',
|
|
'st_asi': 'delete',
|
|
'EMFUND0': 'null',
|
|
'st_pvi': '35290886003252',
|
|
'st_sp': '2023-12-17%2018%3A51%3A34',
|
|
'st_inirUrl': 'https%3A%2F%2Fcn.bing.com%2F',
|
|
'st_sn': '27',
|
|
'st_psi': '20240509100744555-112200305283-5067673963',
|
|
}
|
|
|
|
headers = {
|
|
'Accept':
|
|
'*/*',
|
|
'Accept-Language':
|
|
'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
'Connection':
|
|
'keep-alive',
|
|
# 'Cookie': 'qgqp_b_id=5c08ebc12f489b4f5ba9e76c2539ce0b; emshistory=%5B%2200005%22%2C%2200002%22%2C%2200002%E3%80%81%22%2C%2200001%22%5D; HAList=ty-0-300411-%u91D1%u76FE%u80A1%u4EFD%2Cty-0-399366-%u80FD%u6E90%u91D1%u5C5E%2Cty-116-00002-%u4E2D%u7535%u63A7%u80A1%2Cty-116-03119-GX%u4E9A%u6D32%u534A%u5BFC%u4F53%2Cty-116-00007-%u667A%u5BCC%u8D44%u6E90%u6295%u8D44%2Cty-116-00001-%u957F%u548C%2Cty-116-00016-%u65B0%u9E3F%u57FA%u5730%u4EA7%2Cty-0-301075-%u591A%u745E%u533B%u836F%2Cty-90-BK1042-%u533B%u836F%u5546%u4E1A%2Cty-1-601607-%u4E0A%u6D77%u533B%u836F; mtp=1; ct=Rc8QhLQwVpXSsLuf4UOMLbPMtE9gFAEkMTisAatrxh1rv-WFWG9EC-2zw_WFCJnVfsaViwejVO4ziLTZig1GUptw6NORwx36yfzDu9g9zstYkLdwIWvQ-9QqGL-F5C1GCS7xhUtoBrFAibnr_-HA078LL8tr7yWiGM9V3ZmooC8; ut=FobyicMgeV54OLFNgnrRk4fT26HSX01NG2N55VZbVzZlqOMDJ-67DsHyCMk6G-yTMaqRhIAFuiYbVkK6Y-sYY8ghkJ3v9gyvUZyHWYpJnreP78yw4o-H8FNcTvUXmOj4KLsGaYuV1TAHltcdN0WDTy-YCOJ8OlzrX-MQbQc_CBvXfUYn10iBhXwvJY94XBkg4eOCJpu6Dok3ot9Xsr8flPIDz6f3KxJcIgnXZ7QpZKDMIvavpSunuMiR8Q5ezUD2y-JiBEgNkeoH_36wg0elojOfd5k61gTK; pi=6293426663250936%3Bm6293426663250936%3B%E4%BA%89%E5%88%86%E5%A4%BA%E7%A7%92%E7%9A%84%E9%A3%8E%E8%BE%B02%3B4qqIkcy3NvmegD2EnE%2BsOg2O1jjgPTjDxX3du3GmlWaCk8fr0sJ%2FmubqRXtUqqRoZWsMMmMvcfSg1wNNX8p93XE3fanPRZvbcs7bYEjCeUqg5RMcJtmbM9jEifMzwRAAmCipwh9KbqrYLdkLenTwJYqOaG9qmaZ2qDmn2Pa66eitUxhH2q0aU0kerTnJCi2qJnM8Y0Oc%3Bz%2Bzk7gxq8gdHwxSGucOoQSvBZ44Uaf7Um0f7bFnTUgwLnxWm2OMnlrG9SZX6ezbrsEoqVVrOk%2FVRGekqxUH%2BufKtmb89UVNnA0x62lxu6z84Y8dT0sXAWUELHmWZf8cnumRIL8kPvuAcHSXq5P6pTC3OaxbBeQ%3D%3D; uidal=6293426663250936%e4%ba%89%e5%88%86%e5%a4%ba%e7%a7%92%e7%9a%84%e9%a3%8e%e8%be%b02; sid=; vtpst=|; websitepoptg_api_time=1715218615434; st_si=46368340182479; EmFundFavorVersion=1686749115372; EmFundFavorVersion2=1686749115372; st_asi=delete; EMFUND0=null; EMFUND1=05-09%2009%3A49%3A02@%23%24%u534E%u590F%u6210%u957F%u6DF7%u5408@%23%24000001; EMFUND2=05-09%2009%3A53%3A36@%23%24%u5BCC%u56FD%u7CBE%u51C6%u533B%u7597%u6DF7%u5408A@%23%24005176; EMFUND3=05-09%2009%3A54%3A07@%23%24%u94F6%u6CB3%u533B%u836F%u6DF7%u5408A@%23%24011335; EMFUND4=05-09%2009%3A54%3A13@%23%24%u4E1C%u65B9%u7EA2%u533B%u7597%u5347%u7EA7%u80A1%u7968%u53D1%u8D77A@%23%24015052; EMFUND5=05-09%2009%3A57%3A40@%23%24%u5B9D%u76C8%u73B0%u4EE3%u670D%u52A1%u4E1A%u6DF7%u5408A@%23%24009223; EMFUND6=05-09%2009%3A57%3A51@%23%24%u4E1C%u65B9%u7EA2%u533B%u7597%u5347%u7EA7%u80A1%u7968%u53D1%u8D77C@%23%24015053; EMFUND7=05-09%2009%3A58%3A04@%23%24%u5E7F%u53D1%u521B%u65B0%u533B%u7597%u4E24%u5E74%u6301%u6709%u6DF7%u5408A@%23%24010731; EMFUND8=05-09%2009%3A58%3A56@%23%24%u5BCC%u56FD%u751F%u7269%u533B%u836F%u79D1%u6280%u6DF7%u5408A@%23%24006218; EMFUND9=05-09 09:59:24@#$%u534E%u5546%u533B%u836F%u533B%u7597%u884C%u4E1A%u80A1%u7968@%23%24008107; st_pvi=35290886003252; st_sp=2023-12-17%2018%3A51%3A34; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=27; st_psi=20240509100744555-112200305283-5067673963',
|
|
'Referer':
|
|
'https://fundf10.eastmoney.com/',
|
|
'User-Agent':
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
|
|
}
|
|
|
|
params = {
|
|
'callback': 'jQuery183019015669101010957_1715220464680',
|
|
'fundCode': args[0][0],
|
|
'pageIndex': args[0][1],
|
|
'pageSize': '20',
|
|
'startDate': '',
|
|
'endDate': '',
|
|
'_': '1715220492762',
|
|
}
|
|
|
|
response = requests.get('https://api.fund.eastmoney.com/f10/lsjz',
|
|
params=params,
|
|
cookies=cookies,
|
|
headers=headers)
|
|
pattern = r'"FSRQ":"(.*?)","DWJZ":"(.*?)"'
|
|
text = response.text
|
|
data_page = re.findall(pattern, text)
|
|
data_list = []
|
|
for data in data_page:
|
|
data_list.append(list(data))
|
|
return data_list
|
|
|
|
|
|
def get_hx_data():
|
|
"""
|
|
获取华商医药医疗行业股票基金历史净值数据
|
|
:return: list of hx_data
|
|
"""
|
|
fundcode = '008107'
|
|
page_list = range(1, 29)
|
|
hx_data = []
|
|
args_list = [(fundcode, i) for i in page_list]
|
|
# 使用多进程处理
|
|
pool = ThreadPool(100)
|
|
data_list = pool.map(get_jijin_data, args_list)
|
|
pool.close()
|
|
pool.join()
|
|
for data in data_list:
|
|
hx_data += data
|
|
print(hx_data)
|
|
# 数据储存
|
|
return hx_data
|
|
|
|
|
|
def get_gf_data():
|
|
"""
|
|
获取广发创新医疗两年持有混合基金历史净值数据
|
|
:return: list of hx_data
|
|
"""
|
|
fundcode = '010731'
|
|
page_list = range(1, 29)
|
|
gf_data = []
|
|
args_list = [(fundcode, i) for i in page_list]
|
|
# 使用多进程处理
|
|
pool = ThreadPool(100)
|
|
data_list = pool.map(get_jijin_data, args_list)
|
|
pool.close()
|
|
pool.join()
|
|
for data in data_list:
|
|
gf_data += data
|
|
print(gf_data)
|
|
return gf_data
|
|
|
|
|
|
def save_data_to_csv(data, filename):
|
|
df = pd.DataFrame(data, columns=['date', filename])
|
|
df['date'] = pd.to_datetime(df['date'])
|
|
df = df.sort_values(by='date')
|
|
df.to_csv(f'{filename}.csv', encoding="utf_8")
|
|
print(f'成功爬取流感基金数据并保存在{filename}.csv中')
|
|
|
|
|
|
save_data_to_csv(get_hx_data(), 'hx_jijin_data')
|
|
save_data_to_csv(get_gf_data(), 'gf_jijin_data')
|