|
|
|
|
@ -0,0 +1,162 @@
|
|
|
|
|
import urllib
|
|
|
|
|
import urllib.request
|
|
|
|
|
import re
|
|
|
|
|
import random
|
|
|
|
|
import time
|
|
|
|
|
import xlwt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_style(font_name, font_height, bold=False):
|
|
|
|
|
style = xlwt.XFStyle()
|
|
|
|
|
|
|
|
|
|
font = xlwt.Font()
|
|
|
|
|
font.name = font_name # 'Times New Roman'
|
|
|
|
|
font.height = font_height
|
|
|
|
|
font.bold = bold
|
|
|
|
|
font.colour_index = 4
|
|
|
|
|
|
|
|
|
|
borders = xlwt.Borders()
|
|
|
|
|
borders.left = 6
|
|
|
|
|
borders.right = 6
|
|
|
|
|
borders.top = 6
|
|
|
|
|
borders.bottom = 6
|
|
|
|
|
|
|
|
|
|
style.font = font
|
|
|
|
|
style.borders = borders
|
|
|
|
|
return style
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
stockFile = open("stockFile.txt", "w")
|
|
|
|
|
stockFile.close()
|
|
|
|
|
# 抓取所需内容
|
|
|
|
|
user_agent = ["Mozilla/5.0 (Windows NT 10.0; WOW64)", 'Mozilla/5.0 (Windows NT 6.3; WOW64)',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
|
|
|
|
|
'Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1',
|
|
|
|
|
'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3',
|
|
|
|
|
'Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12',
|
|
|
|
|
'Opera/9.27 (Windows NT 5.2; U; zh-cn)',
|
|
|
|
|
'Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.0',
|
|
|
|
|
'Opera/8.0 (Macintosh; PPC Mac OS X; U; en)',
|
|
|
|
|
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)',
|
|
|
|
|
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) '
|
|
|
|
|
'Version/5.1 Safari/534.50 '
|
|
|
|
|
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 '
|
|
|
|
|
'Safari/534.50 '
|
|
|
|
|
'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11'
|
|
|
|
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) '
|
|
|
|
|
'Chrome/17.0.963.56 Safari/535.11 '
|
|
|
|
|
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'
|
|
|
|
|
'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, '
|
|
|
|
|
'like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5 '
|
|
|
|
|
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) '
|
|
|
|
|
'Version/5.1 Safari/534.50 '
|
|
|
|
|
'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'
|
|
|
|
|
]
|
|
|
|
|
stock_total = [] # stock_total:所有页面的股票数据 stock_page:某页的股票数据
|
|
|
|
|
for page in range(1, 10): # 1-2
|
|
|
|
|
# http://quote.stockstar.com/stock/ranklist_a_3_1_1.html
|
|
|
|
|
url = 'http://quote.stockstar.com/stock/ranklist_a_3_1_' + str(page) + '.html'
|
|
|
|
|
a = random.choice(user_agent)
|
|
|
|
|
print(a)
|
|
|
|
|
request = urllib.request.Request(url=url,
|
|
|
|
|
headers={"User-Agent": a}) # 随机从user_agent列表中抽取一个元素
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
response = urllib.request.urlopen(request)
|
|
|
|
|
except urllib.error.HTTPError as e: # 异常检测
|
|
|
|
|
print('page=', page, '', e.code)
|
|
|
|
|
except urllib.error.URLError as e:
|
|
|
|
|
print('page=', page, '', e.reason)
|
|
|
|
|
|
|
|
|
|
content = response.read().decode('gbk') # 读取网页内容
|
|
|
|
|
print('get page', page) # 打印成功获取的页码
|
|
|
|
|
|
|
|
|
|
pattern = re.compile('<tbody[\s\S]*</tbody>')
|
|
|
|
|
body = re.findall(pattern, str(content))
|
|
|
|
|
pattern = re.compile('>(.*?)<')
|
|
|
|
|
stock_page = re.findall(pattern, body[0]) # 正则匹配
|
|
|
|
|
|
|
|
|
|
stock_total.extend(stock_page) # stock_total:所有页面的股票数据 stock_page:某页的股票数据
|
|
|
|
|
time.sleep(random.randrange(1, 4)) # 每抓一页随机休眠几秒,数值可根据实际情况改动
|
|
|
|
|
|
|
|
|
|
# 删除空白字符
|
|
|
|
|
stock_last = stock_total[:] # stock_last为最终所要得到的股票数据
|
|
|
|
|
for data in stock_total:
|
|
|
|
|
if data == '':
|
|
|
|
|
stock_last.remove('')
|
|
|
|
|
# 打印结果
|
|
|
|
|
'''----------------------------------------'''
|
|
|
|
|
nums = '序号'
|
|
|
|
|
codes = '代码'
|
|
|
|
|
simple = '简称'
|
|
|
|
|
news = '最新价'
|
|
|
|
|
chgs = '涨跌幅'
|
|
|
|
|
chge = '涨跌额'
|
|
|
|
|
fivechg = '5分钟涨幅'
|
|
|
|
|
'''--------------------写入txt文件--------------------'''
|
|
|
|
|
topStr = nums + '\t' + codes + '\t' + simple + '\t' + news + '\t\t' + chgs + '\t\t' + chge + '\t\t' + fivechg
|
|
|
|
|
|
|
|
|
|
stockFile = open("stockFile.txt", "a", encoding="utf-8")
|
|
|
|
|
stockFile.writelines(topStr)
|
|
|
|
|
stockFile.writelines("\n")
|
|
|
|
|
|
|
|
|
|
'''--------------------写入xls--------------------'''
|
|
|
|
|
new_workbook = xlwt.Workbook()
|
|
|
|
|
new_sheet = new_workbook.add_sheet("SheetName_test")
|
|
|
|
|
new_sheet.write(0, 0, str(nums), set_style("Times New Roman", 220, True))
|
|
|
|
|
new_sheet.write(0, 1, codes, set_style("Times New Roman", 220, True))
|
|
|
|
|
new_sheet.write(0, 2, simple, set_style("Times New Roman", 220, True))
|
|
|
|
|
new_sheet.write(0, 3, news, set_style("Times New Roman", 220, True))
|
|
|
|
|
new_sheet.write(0, 4, chgs, set_style("Times New Roman", 220, True))
|
|
|
|
|
new_sheet.write(0, 5, chge, set_style("Times New Roman", 220, True))
|
|
|
|
|
new_sheet.write(0, 6, fivechg, set_style("Times New Roman", 220, True))
|
|
|
|
|
'''---------------------打印到控制台-------------------'''
|
|
|
|
|
topStr = '序号' + '\t' '代码' + '\t\t' + '简称' + '\t\t\t' + '最新价' + '\t\t\t' + '涨跌幅' + '\t\t\t' + '涨跌额' + '\t\t' + '5分钟涨幅'
|
|
|
|
|
print(topStr)
|
|
|
|
|
|
|
|
|
|
for i in range(0, len(stock_last), 6): # 原网页有13列数据,所以步长为13
|
|
|
|
|
'''-------------------nums = '序号'---------------------'''
|
|
|
|
|
nums = str(i // 6)
|
|
|
|
|
codes = stock_last[i]
|
|
|
|
|
simple = stock_last[i + 1]
|
|
|
|
|
news = stock_last[i + 2]
|
|
|
|
|
chgs = stock_last[i + 3]
|
|
|
|
|
chge = stock_last[i + 4]
|
|
|
|
|
fivechg = stock_last[i + 5]
|
|
|
|
|
|
|
|
|
|
stockInfo = nums
|
|
|
|
|
stockInfo += '\t'
|
|
|
|
|
stockInfo += codes
|
|
|
|
|
stockInfo += '\t'
|
|
|
|
|
stockInfo += simple
|
|
|
|
|
# if len(simple) < 4:
|
|
|
|
|
# stockInfo += '\t'
|
|
|
|
|
stockInfo += '\t'
|
|
|
|
|
stockInfo += news
|
|
|
|
|
stockInfo += '\t'
|
|
|
|
|
stockInfo += chgs
|
|
|
|
|
stockInfo += '\t'
|
|
|
|
|
stockInfo += chge
|
|
|
|
|
stockInfo += '\t'
|
|
|
|
|
stockInfo += fivechg
|
|
|
|
|
'''--------------------写入txt文件--------------------'''
|
|
|
|
|
stockFile.writelines(stockInfo)
|
|
|
|
|
stockFile.writelines("\n")
|
|
|
|
|
|
|
|
|
|
'''--------------------写入xls文件--------------------'''
|
|
|
|
|
new_sheet.write(i // 6 + 1, 0, str(nums), set_style("Times New Roman", 220, False))
|
|
|
|
|
new_sheet.write(i // 6 + 1, 1, codes, set_style("Times New Roman", 220, False))
|
|
|
|
|
new_sheet.write(i // 6 + 1, 2, simple, set_style("Times New Roman", 220, False))
|
|
|
|
|
new_sheet.write(i // 6 + 1, 3, news, set_style("Times New Roman", 220, False))
|
|
|
|
|
new_sheet.write(i // 6 + 1, 4, chgs, set_style("Times New Roman", 220, False))
|
|
|
|
|
new_sheet.write(i // 6 + 1, 5, chge, set_style("Times New Roman", 220, False))
|
|
|
|
|
new_sheet.write(i // 6 + 1, 6, fivechg, set_style("Times New Roman", 220, False))
|
|
|
|
|
|
|
|
|
|
'''---------------------打印到控制台-------------------'''
|
|
|
|
|
stockInfo = str(i // 6) + '\t' + stock_last[i] + '\t' + stock_last[i + 1] + '\t\t' + stock_last[
|
|
|
|
|
i + 2] + ' ' + '\t' + stock_last[i + 3] + ' ' + '\t' + stock_last[i + 4] + ' ' + '\t' + stock_last[i + 5]
|
|
|
|
|
print(stockInfo)
|
|
|
|
|
|
|
|
|
|
stockFile.close()
|
|
|
|
|
new_workbook.save(r"newestStockInfo.xls")
|
|
|
|
|
input("按回车退出。")
|