You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

134 lines
3.7 KiB

import sys
from download import download
import json
import time
import random
import pandas as pd
from tqdm import tqdm
request_params = [
{ "key":"rtntype",
"value":"5",
"equals":True,
"description":"",
"enabled":True
},
{ "key":"token",
"value":"4f1862fc3b5e77c150a2b985b12db0fd",
"equals":True,
"description":"",
"enabled":True
},
{ "key":"cb",
"value":"jQuery1124036208821942748104_1574562443091",
"equals":True,
"description":"",
"enabled":True
},
{ "key":"id",
"value":"%s",
"equals":True,
"description":"",
"enabled":True
},
{ "key":"type",
"value":"k",
"equals":True,
"description":"",
"enabled":True
},
{ "key":"authorityType",
"value":"",
"equals":True,
"description":"",
"enabled":True
},
{ "key":"_",
"value":"1574509941411",
"equals":True,
"description":"",
"enabled":True}
]
URI = "http://pdfm.eastmoney.com/EM_UBG_PDTI_Fast/api/js?"
for param in request_params:
URI += '%s=%s&' % (param["key"], param["value"])
class EastMoneyDayLine(object):
def __init__(self, end_date='00000000'):
const_path = sys.path[0].replace("\\craw_data\\dayline", "")
f = open(const_path + "\\const.json", "r", encoding="utf8")
self.consts = json.loads(f.read())
self.stock_list_file = self.consts['stock_list_file'] # 全部股票信息的csv文件
self.save_path_prefix = self.consts['day_line_file_prefix']['ease_money'] # 日线存储文件夹目录
self.end_date = end_date # 截止日期
self.codes = self.get_codes()
self.downloader = download.Downloader() # 下载器
def craw_one(self, code):
url = URI % self.process_code(code)
content = self.handle_jsonp(self.downloader.requests_get(url, type="json").decode("utf8"))
# print(content)
data = json.loads(content)
self.save_json_to_csv(data, code)
def get_codes(self):
try:
df = pd.read_csv(self.stock_list_file, encoding="gbk", error_bad_lines=False)
except:
print("ERROR Opening File: %s" % self.stock_list_file)
return False
codes = []
for index, row in df.iterrows():
codes.append(row['股票代码'][1:])
return codes
def save_json_to_csv(self, data, code):
realdata = data['data']
f = open(self.save_path_prefix + str(code) + ".csv", "w", encoding="gbk")
f.write(",".join(['日期', '开盘', '收盘', '最高', '最低', '成交量', '成交额', '振幅']))
f.write("\n")
for row in realdata[:-1]:
if ("".join(row[:10].split("-")) < self.end_date): continue
f.write(row[:-2])
f.write("\n")
f.close()
def process_code(self, code):
return '%s1' % code
def handle_jsonp(self, response_content):
return response_content[response_content.find("{"):-1]
def check_is_downloaded(self, code):
try:
df = pd.read_csv(self.save_path_prefix + code + ".csv")
return True
except:
return False
def controller(self):
for i in tqdm(range(len(self.codes))):
code = self.codes[i]
if (self.check_is_downloaded(code)) return
self.craw_one(code)
time.sleep(random.random()*2)
if __name__ == "__main__":
east_money_day_line = EastMoneyDayLine()
east_money_day_line.controller()
# east_money_day_line.craw_one(600175) # test craw