ADD file via upload

master
pbn38v75u 4 years ago
parent 381937a13c
commit 2ffd1197b3

@ -0,0 +1,162 @@
import urllib
import urllib.request
import re
import random
import time
import xlwt
def set_style(font_name, font_height, bold=False):
style = xlwt.XFStyle()
font = xlwt.Font()
font.name = font_name # 'Times New Roman'
font.height = font_height
font.bold = bold
font.colour_index = 4
borders = xlwt.Borders()
borders.left = 6
borders.right = 6
borders.top = 6
borders.bottom = 6
style.font = font
style.borders = borders
return style
stockFile = open("stockFile.txt", "w")
stockFile.close()
# 抓取所需内容
user_agent = ["Mozilla/5.0 (Windows NT 10.0; WOW64)", 'Mozilla/5.0 (Windows NT 6.3; WOW64)',
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1',
'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3',
'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12',
'Opera/9.27 (Windows NT 5.2; U; zh-cn)',
'Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.0',
'Opera/8.0 (Macintosh; PPC Mac OS X; U; en)',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) '
'Version/5.1 Safari/534.50 '
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 '
'Safari/534.50 '
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11'
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) '
'Chrome/17.0.963.56 Safari/535.11 '
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, '
'like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5 '
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) '
'Version/5.1 Safari/534.50 '
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'
]
stock_total = [] # stock_total所有页面的股票数据 stock_page某页的股票数据
for page in range(1, 10): # 1-2
# http://quote.stockstar.com/stock/ranklist_a_3_1_1.html
url = 'http://quote.stockstar.com/stock/ranklist_a_3_1_' + str(page) + '.html'
a = random.choice(user_agent)
print(a)
request = urllib.request.Request(url=url,
headers={"User-Agent": a}) # 随机从user_agent列表中抽取一个元素
try:
response = urllib.request.urlopen(request)
except urllib.error.HTTPError as e: # 异常检测
print('page=', page, '', e.code)
except urllib.error.URLError as e:
print('page=', page, '', e.reason)
content = response.read().decode('gbk') # 读取网页内容
print('get page', page) # 打印成功获取的页码
pattern = re.compile('<tbody[\s\S]*</tbody>')
body = re.findall(pattern, str(content))
pattern = re.compile('>(.*?)<')
stock_page = re.findall(pattern, body[0]) # 正则匹配
stock_total.extend(stock_page) # stock_total所有页面的股票数据 stock_page某页的股票数据
time.sleep(random.randrange(1, 4)) # 每抓一页随机休眠几秒,数值可根据实际情况改动
# 删除空白字符
stock_last = stock_total[:] # stock_last为最终所要得到的股票数据
for data in stock_total:
if data == '':
stock_last.remove('')
# 打印结果
'''----------------------------------------'''
nums = '序号'
codes = '代码'
simple = '简称'
news = '最新价'
chgs = '涨跌幅'
chge = '涨跌额'
fivechg = '5分钟涨幅'
'''--------------------写入txt文件--------------------'''
topStr = nums + '\t' + codes + '\t' + simple + '\t' + news + '\t\t' + chgs + '\t\t' + chge + '\t\t' + fivechg
stockFile = open("stockFile.txt", "a", encoding="utf-8")
stockFile.writelines(topStr)
stockFile.writelines("\n")
'''--------------------写入xls--------------------'''
new_workbook = xlwt.Workbook()
new_sheet = new_workbook.add_sheet("SheetName_test")
new_sheet.write(0, 0, str(nums), set_style("Times New Roman", 220, True))
new_sheet.write(0, 1, codes, set_style("Times New Roman", 220, True))
new_sheet.write(0, 2, simple, set_style("Times New Roman", 220, True))
new_sheet.write(0, 3, news, set_style("Times New Roman", 220, True))
new_sheet.write(0, 4, chgs, set_style("Times New Roman", 220, True))
new_sheet.write(0, 5, chge, set_style("Times New Roman", 220, True))
new_sheet.write(0, 6, fivechg, set_style("Times New Roman", 220, True))
'''---------------------打印到控制台-------------------'''
topStr = '序号' + '\t' '代码' + '\t\t' + '简称' + '\t\t\t' + '最新价' + '\t\t\t' + '涨跌幅' + '\t\t\t' + '涨跌额' + '\t\t' + '5分钟涨幅'
print(topStr)
for i in range(0, len(stock_last), 6): # 原网页有13列数据所以步长为13
'''-------------------nums = '序号'---------------------'''
nums = str(i // 6)
codes = stock_last[i]
simple = stock_last[i + 1]
news = stock_last[i + 2]
chgs = stock_last[i + 3]
chge = stock_last[i + 4]
fivechg = stock_last[i + 5]
stockInfo = nums
stockInfo += '\t'
stockInfo += codes
stockInfo += '\t'
stockInfo += simple
# if len(simple) < 4:
# stockInfo += '\t'
stockInfo += '\t'
stockInfo += news
stockInfo += '\t'
stockInfo += chgs
stockInfo += '\t'
stockInfo += chge
stockInfo += '\t'
stockInfo += fivechg
'''--------------------写入txt文件--------------------'''
stockFile.writelines(stockInfo)
stockFile.writelines("\n")
'''--------------------写入xls文件--------------------'''
new_sheet.write(i // 6 + 1, 0, str(nums), set_style("Times New Roman", 220, False))
new_sheet.write(i // 6 + 1, 1, codes, set_style("Times New Roman", 220, False))
new_sheet.write(i // 6 + 1, 2, simple, set_style("Times New Roman", 220, False))
new_sheet.write(i // 6 + 1, 3, news, set_style("Times New Roman", 220, False))
new_sheet.write(i // 6 + 1, 4, chgs, set_style("Times New Roman", 220, False))
new_sheet.write(i // 6 + 1, 5, chge, set_style("Times New Roman", 220, False))
new_sheet.write(i // 6 + 1, 6, fivechg, set_style("Times New Roman", 220, False))
'''---------------------打印到控制台-------------------'''
stockInfo = str(i // 6) + '\t' + stock_last[i] + '\t' + stock_last[i + 1] + '\t\t' + stock_last[
i + 2] + ' ' + '\t' + stock_last[i + 3] + ' ' + '\t' + stock_last[i + 4] + ' ' + '\t' + stock_last[i + 5]
print(stockInfo)
stockFile.close()
new_workbook.save(r"newestStockInfo.xls")
input("按回车退出。")
Loading…
Cancel
Save