You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
134 lines
3.7 KiB
134 lines
3.7 KiB
import sys
|
|
from download import download
|
|
import json
|
|
import time
|
|
import random
|
|
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
|
|
request_params = [
|
|
{ "key":"rtntype",
|
|
"value":"5",
|
|
"equals":True,
|
|
"description":"",
|
|
"enabled":True
|
|
},
|
|
|
|
{ "key":"token",
|
|
"value":"4f1862fc3b5e77c150a2b985b12db0fd",
|
|
"equals":True,
|
|
"description":"",
|
|
"enabled":True
|
|
},
|
|
|
|
{ "key":"cb",
|
|
"value":"jQuery1124036208821942748104_1574562443091",
|
|
"equals":True,
|
|
"description":"",
|
|
"enabled":True
|
|
},
|
|
|
|
{ "key":"id",
|
|
"value":"%s",
|
|
"equals":True,
|
|
"description":"",
|
|
"enabled":True
|
|
},
|
|
|
|
{ "key":"type",
|
|
"value":"k",
|
|
"equals":True,
|
|
"description":"",
|
|
"enabled":True
|
|
},
|
|
|
|
{ "key":"authorityType",
|
|
"value":"",
|
|
"equals":True,
|
|
"description":"",
|
|
"enabled":True
|
|
},
|
|
|
|
{ "key":"_",
|
|
"value":"1574509941411",
|
|
"equals":True,
|
|
"description":"",
|
|
"enabled":True}
|
|
]
|
|
URI = "http://pdfm.eastmoney.com/EM_UBG_PDTI_Fast/api/js?"
|
|
|
|
for param in request_params:
|
|
URI += '%s=%s&' % (param["key"], param["value"])
|
|
|
|
class EastMoneyDayLine(object):
|
|
|
|
def __init__(self, end_date='00000000'):
|
|
|
|
const_path = sys.path[0].replace("\\craw_data\\dayline", "")
|
|
f = open(const_path + "\\const.json", "r", encoding="utf8")
|
|
self.consts = json.loads(f.read())
|
|
|
|
self.stock_list_file = self.consts['stock_list_file'] # 全部股票信息的csv文件
|
|
self.save_path_prefix = self.consts['day_line_file_prefix']['ease_money'] # 日线存储文件夹目录
|
|
self.end_date = end_date # 截止日期
|
|
self.codes = self.get_codes()
|
|
|
|
self.downloader = download.Downloader() # 下载器
|
|
|
|
def craw_one(self, code):
|
|
url = URI % self.process_code(code)
|
|
content = self.handle_jsonp(self.downloader.requests_get(url, type="json").decode("utf8"))
|
|
# print(content)
|
|
data = json.loads(content)
|
|
self.save_json_to_csv(data, code)
|
|
|
|
def get_codes(self):
|
|
try:
|
|
df = pd.read_csv(self.stock_list_file, encoding="gbk", error_bad_lines=False)
|
|
except:
|
|
print("ERROR Opening File: %s" % self.stock_list_file)
|
|
return False
|
|
|
|
codes = []
|
|
for index, row in df.iterrows():
|
|
codes.append(row['股票代码'][1:])
|
|
return codes
|
|
|
|
def save_json_to_csv(self, data, code):
|
|
realdata = data['data']
|
|
f = open(self.save_path_prefix + str(code) + ".csv", "w", encoding="gbk")
|
|
f.write(",".join(['日期', '开盘', '收盘', '最高', '最低', '成交量', '成交额', '振幅']))
|
|
f.write("\n")
|
|
for row in realdata[:-1]:
|
|
if ("".join(row[:10].split("-")) < self.end_date): continue
|
|
f.write(row[:-2])
|
|
f.write("\n")
|
|
f.close()
|
|
|
|
def process_code(self, code):
|
|
return '%s1' % code
|
|
|
|
def handle_jsonp(self, response_content):
|
|
return response_content[response_content.find("{"):-1]
|
|
|
|
def check_is_downloaded(self, code):
|
|
try:
|
|
df = pd.read_csv(self.save_path_prefix + code + ".csv")
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
|
|
def controller(self):
|
|
for i in tqdm(range(len(self.codes))):
|
|
code = self.codes[i]
|
|
if (self.check_is_downloaded(code)) return
|
|
self.craw_one(code)
|
|
time.sleep(random.random()*2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
east_money_day_line = EastMoneyDayLine()
|
|
east_money_day_line.controller()
|
|
# east_money_day_line.craw_one(600175) # test craw |