import os os.chdir('D:/python/djangoProject/test_Bootstrap') # 设置DJANGO_SETTINGS_MODULE环境变量 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'test_Bootstrap.settings') import requests import re from multiprocessing.pool import ThreadPool import pandas as pd from .models import JijinData import random class JijinSpider(): def get_jijin_data(self,*args): """ 获取某个基金某页的历史净值数据 :param fundCode: :param page: :return: list """ # print(args) cookies = { 'qgqp_b_id': '5c08ebc12f489b4f5ba9e76c2539ce0b', 'emshistory': '%5B%2200005%22%2C%2200002%22%2C%2200002%E3%80%81%22%2C%2200001%22%5D', 'HAList': 'ty-0-300411-%u91D1%u76FE%u80A1%u4EFD%2Cty-0-399366-%u80FD%u6E90%u91D1%u5C5E%2Cty-116-00002-%u4E2D%u7535%u63A7%u80A1%2Cty-116-03119-GX%u4E9A%u6D32%u534A%u5BFC%u4F53%2Cty-116-00007-%u667A%u5BCC%u8D44%u6E90%u6295%u8D44%2Cty-116-00001-%u957F%u548C%2Cty-116-00016-%u65B0%u9E3F%u57FA%u5730%u4EA7%2Cty-0-301075-%u591A%u745E%u533B%u836F%2Cty-90-BK1042-%u533B%u836F%u5546%u4E1A%2Cty-1-601607-%u4E0A%u6D77%u533B%u836F', 'mtp': '1', 'ct': 'Rc8QhLQwVpXSsLuf4UOMLbPMtE9gFAEkMTisAatrxh1rv-WFWG9EC-2zw_WFCJnVfsaViwejVO4ziLTZig1GUptw6NORwx36yfzDu9g9zstYkLdwIWvQ-9QqGL-F5C1GCS7xhUtoBrFAibnr_-HA078LL8tr7yWiGM9V3ZmooC8', 'ut': 'FobyicMgeV54OLFNgnrRk4fT26HSX01NG2N55VZbVzZlqOMDJ-67DsHyCMk6G-yTMaqRhIAFuiYbVkK6Y-sYY8ghkJ3v9gyvUZyHWYpJnreP78yw4o-H8FNcTvUXmOj4KLsGaYuV1TAHltcdN0WDTy-YCOJ8OlzrX-MQbQc_CBvXfUYn10iBhXwvJY94XBkg4eOCJpu6Dok3ot9Xsr8flPIDz6f3KxJcIgnXZ7QpZKDMIvavpSunuMiR8Q5ezUD2y-JiBEgNkeoH_36wg0elojOfd5k61gTK', 'pi': '6293426663250936%3Bm6293426663250936%3B%E4%BA%89%E5%88%86%E5%A4%BA%E7%A7%92%E7%9A%84%E9%A3%8E%E8%BE%B02%3B4qqIkcy3NvmegD2EnE%2BsOg2O1jjgPTjDxX3du3GmlWaCk8fr0sJ%2FmubqRXtUqqRoZWsMMmMvcfSg1wNNX8p93XE3fanPRZvbcs7bYEjCeUqg5RMcJtmbM9jEifMzwRAAmCipwh9KbqrYLdkLenTwJYqOaG9qmaZ2qDmn2Pa66eitUxhH2q0aU0kerTnJCi2qJnM8Y0Oc%3Bz%2Bzk7gxq8gdHwxSGucOoQSvBZ44Uaf7Um0f7bFnTUgwLnxWm2OMnlrG9SZX6ezbrsEoqVVrOk%2FVRGekqxUH%2BufKtmb89UVNnA0x62lxu6z84Y8dT0sXAWUELHmWZf8cnumRIL8kPvuAcHSXq5P6pTC3OaxbBeQ%3D%3D', 'uidal': '6293426663250936%e4%ba%89%e5%88%86%e5%a4%ba%e7%a7%92%e7%9a%84%e9%a3%8e%e8%be%b02', 'sid': '', 'vtpst': '|', 'websitepoptg_api_time': '1715218615434', 'st_si': '46368340182479', 'EmFundFavorVersion': '1686749115372', 'EmFundFavorVersion2': '1686749115372', 'st_asi': 'delete', 'EMFUND0': 'null', 'st_pvi': '35290886003252', 'st_sp': '2023-12-17%2018%3A51%3A34', 'st_inirUrl': 'https%3A%2F%2Fcn.bing.com%2F', 'st_sn': '27', 'st_psi': '20240509100744555-112200305283-5067673963', } agent_list = [ "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:87.0) Gecko/20100101 \ Firefox/87.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, li\ ke Gecko) Chrome/65.0.3314.0 Safari/537.36 SE 2.X MetaSr 1.0", 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHT\ ML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',] headers = { 'Accept': '*/*', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'Connection': 'keep-alive', # 'Cookie': 'qgqp_b_id=5c08ebc12f489b4f5ba9e76c2539ce0b; emshistory=%5B%2200005%22%2C%2200002%22%2C%2200002%E3%80%81%22%2C%2200001%22%5D; HAList=ty-0-300411-%u91D1%u76FE%u80A1%u4EFD%2Cty-0-399366-%u80FD%u6E90%u91D1%u5C5E%2Cty-116-00002-%u4E2D%u7535%u63A7%u80A1%2Cty-116-03119-GX%u4E9A%u6D32%u534A%u5BFC%u4F53%2Cty-116-00007-%u667A%u5BCC%u8D44%u6E90%u6295%u8D44%2Cty-116-00001-%u957F%u548C%2Cty-116-00016-%u65B0%u9E3F%u57FA%u5730%u4EA7%2Cty-0-301075-%u591A%u745E%u533B%u836F%2Cty-90-BK1042-%u533B%u836F%u5546%u4E1A%2Cty-1-601607-%u4E0A%u6D77%u533B%u836F; mtp=1; ct=Rc8QhLQwVpXSsLuf4UOMLbPMtE9gFAEkMTisAatrxh1rv-WFWG9EC-2zw_WFCJnVfsaViwejVO4ziLTZig1GUptw6NORwx36yfzDu9g9zstYkLdwIWvQ-9QqGL-F5C1GCS7xhUtoBrFAibnr_-HA078LL8tr7yWiGM9V3ZmooC8; ut=FobyicMgeV54OLFNgnrRk4fT26HSX01NG2N55VZbVzZlqOMDJ-67DsHyCMk6G-yTMaqRhIAFuiYbVkK6Y-sYY8ghkJ3v9gyvUZyHWYpJnreP78yw4o-H8FNcTvUXmOj4KLsGaYuV1TAHltcdN0WDTy-YCOJ8OlzrX-MQbQc_CBvXfUYn10iBhXwvJY94XBkg4eOCJpu6Dok3ot9Xsr8flPIDz6f3KxJcIgnXZ7QpZKDMIvavpSunuMiR8Q5ezUD2y-JiBEgNkeoH_36wg0elojOfd5k61gTK; pi=6293426663250936%3Bm6293426663250936%3B%E4%BA%89%E5%88%86%E5%A4%BA%E7%A7%92%E7%9A%84%E9%A3%8E%E8%BE%B02%3B4qqIkcy3NvmegD2EnE%2BsOg2O1jjgPTjDxX3du3GmlWaCk8fr0sJ%2FmubqRXtUqqRoZWsMMmMvcfSg1wNNX8p93XE3fanPRZvbcs7bYEjCeUqg5RMcJtmbM9jEifMzwRAAmCipwh9KbqrYLdkLenTwJYqOaG9qmaZ2qDmn2Pa66eitUxhH2q0aU0kerTnJCi2qJnM8Y0Oc%3Bz%2Bzk7gxq8gdHwxSGucOoQSvBZ44Uaf7Um0f7bFnTUgwLnxWm2OMnlrG9SZX6ezbrsEoqVVrOk%2FVRGekqxUH%2BufKtmb89UVNnA0x62lxu6z84Y8dT0sXAWUELHmWZf8cnumRIL8kPvuAcHSXq5P6pTC3OaxbBeQ%3D%3D; uidal=6293426663250936%e4%ba%89%e5%88%86%e5%a4%ba%e7%a7%92%e7%9a%84%e9%a3%8e%e8%be%b02; sid=; vtpst=|; websitepoptg_api_time=1715218615434; st_si=46368340182479; EmFundFavorVersion=1686749115372; EmFundFavorVersion2=1686749115372; st_asi=delete; EMFUND0=null; EMFUND1=05-09%2009%3A49%3A02@%23%24%u534E%u590F%u6210%u957F%u6DF7%u5408@%23%24000001; EMFUND2=05-09%2009%3A53%3A36@%23%24%u5BCC%u56FD%u7CBE%u51C6%u533B%u7597%u6DF7%u5408A@%23%24005176; EMFUND3=05-09%2009%3A54%3A07@%23%24%u94F6%u6CB3%u533B%u836F%u6DF7%u5408A@%23%24011335; EMFUND4=05-09%2009%3A54%3A13@%23%24%u4E1C%u65B9%u7EA2%u533B%u7597%u5347%u7EA7%u80A1%u7968%u53D1%u8D77A@%23%24015052; EMFUND5=05-09%2009%3A57%3A40@%23%24%u5B9D%u76C8%u73B0%u4EE3%u670D%u52A1%u4E1A%u6DF7%u5408A@%23%24009223; EMFUND6=05-09%2009%3A57%3A51@%23%24%u4E1C%u65B9%u7EA2%u533B%u7597%u5347%u7EA7%u80A1%u7968%u53D1%u8D77C@%23%24015053; EMFUND7=05-09%2009%3A58%3A04@%23%24%u5E7F%u53D1%u521B%u65B0%u533B%u7597%u4E24%u5E74%u6301%u6709%u6DF7%u5408A@%23%24010731; EMFUND8=05-09%2009%3A58%3A56@%23%24%u5BCC%u56FD%u751F%u7269%u533B%u836F%u79D1%u6280%u6DF7%u5408A@%23%24006218; EMFUND9=05-09 09:59:24@#$%u534E%u5546%u533B%u836F%u533B%u7597%u884C%u4E1A%u80A1%u7968@%23%24008107; st_pvi=35290886003252; st_sp=2023-12-17%2018%3A51%3A34; st_inirUrl=https%3A%2F%2Fcn.bing.com%2F; st_sn=27; st_psi=20240509100744555-112200305283-5067673963', 'Referer': 'https://fundf10.eastmoney.com/', 'User-Agent': random.choice(agent_list) } params = { 'callback': 'jQuery183019015669101010957_1715220464680', 'fundCode': args[0][0], 'pageIndex': args[0][1], 'pageSize': '20', 'startDate': '', 'endDate': '', '_': '1715220492762', } response = requests.get('https://api.fund.eastmoney.com/f10/lsjz', params=params, cookies=cookies, headers=headers) pattern = r'"FSRQ":"(.*?)","DWJZ":"(.*?)"' text = response.text data_page = re.findall(pattern, text) data_list = [] for data in reversed(data_page): data = list(data) data[1] = float(data[1]) data_list.append(data) return data_list def get_hx_data(self): """ 获取华商医药医疗行业股票基金历史净值数据 :return: list of hx_data """ fundcode = '008107' page_list = range(1,17) hx_data = [] args_list = [(fundcode,i) for i in page_list] # 使用多进程处理 pool = ThreadPool(100) data_list = pool.map(self.get_jijin_data,reversed(args_list) ) pool.close() pool.join() for data in data_list: hx_data+=data # print(hx_data) # 数据储存 # for data in hx_data: # obj, created = HXData.objects.get_or_create(date=data[0], defaults={'hx_data': data[1]}) # if created: # print(f"Added new record for date {data[0]} with infections {data[1]}") # else: # print(f"Record for date {data[0]} already exists.") hx_data = pd.DataFrame(hx_data, columns=['date', '收盘']) return hx_data def get_gf_data(self): """ 获取广发创新医疗两年持有混合基金历史净值数据 :return: list of hx_data """ fundcode = '010731' page_list = range(1,17) gf_data = [] args_list = [(fundcode,i) for i in page_list] # 使用多进程处理 pool = ThreadPool(100) data_list = pool.map(self.get_jijin_data,reversed(args_list) ) pool.close() pool.join() for data in data_list: gf_data+=data # print(gf_data) # for data in gf_data: # obj, created = GFData.objects.get_or_create(date=data[0], defaults={'gf_data': data[1]}) # if created: # print(f"Added new record for date {data[0]} with infections {data[1]}") # else: # print(f"Record for date {data[0]} already exists.") gf_data = pd.DataFrame(gf_data, columns=['date', '收盘']) return gf_data def get_tiantian_jijin_data(): jijin_spider = JijinSpider() df1 = jijin_spider.get_gf_data() df2 = jijin_spider.get_hx_data() merged_df = df1.merge(df2,on = 'date',how = 'inner')#取交集 merged_df['stock_data'] = (merged_df['收盘_x']+merged_df['收盘_y'])/2 data = merged_df[['date','stock_data']] data_list = data.values.tolist() # print(data_list) for data in data_list: obj, created =JijinData.objects.get_or_create(date=data[0], defaults={'jijin_data': data[1]}) if created: print(f"Added new record for date {data[0]} with infections {data[1]}") else: print(f"Record for date {data[0]} already exists.") # get_tiantian_jijin_data() # 需要预测的基金数据爬取 def get_fund_data_by_code(code): spider = JijinSpider() fundcode = code page_list = range(1, 17) fund_data = [] args_list = [(fundcode, i) for i in page_list] # 使用多进程处理 pool = ThreadPool(100) data_list = pool.map(spider.get_jijin_data, list(reversed(args_list))) pool.close() pool.join() for data in data_list: fund_data += data # print(hx_data) # 数据储存 # for data in hx_data: # obj, created = HXData.objects.get_or_create(date=data[0], defaults={'hx_data': data[1]}) # if created: # print(f"Added new record for date {data[0]} with infections {data[1]}") # else: # print(f"Record for date {data[0]} already exists.") fund_data = pd.DataFrame(fund_data, columns=['date', 'fund_data']) # print(fund_data) return fund_data