|
|
|
|
import urllib
|
|
|
|
|
import json
|
|
|
|
|
import csv
|
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
# 下载器
|
|
|
|
|
class Downloader(object):
|
|
|
|
|
def __init__(self, url):
|
|
|
|
|
self.url = url
|
|
|
|
|
|
|
|
|
|
def download(self):
|
|
|
|
|
html_content = urllib.request.urlopen(self.url).read()
|
|
|
|
|
html_content = html_content.decode("utf-8")
|
|
|
|
|
return html_content
|
|
|
|
|
|
|
|
|
|
# 调度器
|
|
|
|
|
class Controller(object):
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.downloader = None
|
|
|
|
|
self.parser = None
|
|
|
|
|
self.saver = None
|
|
|
|
|
|
|
|
|
|
def get_data(self):
|
|
|
|
|
url = "http://quotes.money.163.com/hs/service/diyrank.php?host=http%3A%2F%2Fquotes.money.163.com%2Fhs%2Fservice%2Fdiyrank.php&page=0&query=STYPE%3AEQA&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=desc&count=3607&type=query"
|
|
|
|
|
html_content = urllib.request.urlopen(url).read()
|
|
|
|
|
# 这时候解码可能导致json解析错误!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
|
|
|
# html_content = html_content.decode("unicode_escape")
|
|
|
|
|
# 当数量大于2995会报错, json解析失败, 原因是编号为2996的股票, 在公告中嵌套了引号导致json解析失败
|
|
|
|
|
data = json.loads(html_content)
|
|
|
|
|
|
|
|
|
|
self.saver = Saver(data=data)
|
|
|
|
|
self.saver.save()
|
|
|
|
|
|
|
|
|
|
class Saver(object):
|
|
|
|
|
|
|
|
|
|
def __init__(self, data):
|
|
|
|
|
self.data = data
|
|
|
|
|
const_path = sys.path[0].replace("\\craw_data\\stock_list", "")
|
|
|
|
|
f = open(const_path + '\\const.json', 'r', encoding='utf8')
|
|
|
|
|
self.consts = json.loads(f.read())
|
|
|
|
|
self.file_path = self.consts['path']['stock_list']['netease'] + "\\stock_list.json" # 存放目录
|
|
|
|
|
|
|
|
|
|
def save(self):
|
|
|
|
|
# 新建文件, 写入文件头
|
|
|
|
|
file_header = ["代码", "名称", '流通市值', '每股收益', '总市值']
|
|
|
|
|
csv_file = open(self.file_path, 'w', newline='')
|
|
|
|
|
writer = csv.writer(csv_file)
|
|
|
|
|
writer.writerow(file_header)
|
|
|
|
|
|
|
|
|
|
list = self.data['list']
|
|
|
|
|
sum = len(list)
|
|
|
|
|
for i in tqdm(range(0, sum)):
|
|
|
|
|
item = list[i]
|
|
|
|
|
|
|
|
|
|
# 处理股票代码, 去掉网易财经的0/1前缀, 并使其在Excel中显示正常(加`)
|
|
|
|
|
code = str(item['CODE'])
|
|
|
|
|
code = code[1:7] if len(code) == 7 else code[:]
|
|
|
|
|
code = "`" + code
|
|
|
|
|
|
|
|
|
|
row = [code, item['NAME'], item['MCAP'], item['MFSUM'], item['TCAP']]
|
|
|
|
|
csv_file = open(self.file_path, 'a', newline='') # 追加
|
|
|
|
|
writer = csv.writer(csv_file)
|
|
|
|
|
writer.writerow(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
controller = Controller()
|
|
|
|
|
controller.get_data()
|