parent
ceb881e5ea
commit
0ec0a95f7b
@ -0,0 +1,67 @@
|
|||||||
|
#encoding:utf-8
|
||||||
|
import requests
|
||||||
|
import xlwt
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
#写入excel文件
|
||||||
|
def write(booksheet,name,data,n):
|
||||||
|
booksheet.write(n, 0, name)
|
||||||
|
for i in range(len(data)):
|
||||||
|
booksheet.write(n, i+1, data[i])
|
||||||
|
|
||||||
|
#爬取html文本
|
||||||
|
def gettext(url,headers):
|
||||||
|
response = requests.get(url=url, headers=headers, timeout=10)
|
||||||
|
if (response.status_code!=200) :
|
||||||
|
return None
|
||||||
|
|
||||||
|
response.encoding = response.apparent_encoding
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
#获取数据
|
||||||
|
def getdata(url,headers):
|
||||||
|
newtext = gettext(url, headers)
|
||||||
|
if newtext == None :
|
||||||
|
return None
|
||||||
|
temp = etree.HTML(newtext)
|
||||||
|
data1 = temp.xpath('//*[@id="stock_info"]/div[1]/section/div[1]/em[1]/text()')
|
||||||
|
data2 = temp.xpath('//*[@id="stock_info"]/div[1]/section/dl/dd/text()')
|
||||||
|
data3 = temp.xpath('//*[@id="stock_info"]/div[1]/section/div[2]/dl/dd/text()')
|
||||||
|
return data1 + data2 + data3
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
url = 'https://hq.gucheng.com/gpdmylb.html'
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
|
||||||
|
}
|
||||||
|
htmltext = gettext(url, headers)
|
||||||
|
|
||||||
|
if htmltext==None:
|
||||||
|
print('打开失败')
|
||||||
|
exit(-1)
|
||||||
|
|
||||||
|
html = etree.HTML(htmltext)
|
||||||
|
urls = html.xpath('//*[@id="stock_index_right"]/div[3]/section/a/@href')
|
||||||
|
texts = html.xpath('//*[@id="stock_index_right"]/div[3]/section/a/text()')
|
||||||
|
|
||||||
|
# 打开excel文件
|
||||||
|
workbook = xlwt.Workbook(encoding='utf-8')
|
||||||
|
booksheet = workbook.add_sheet('Sheet 1', cell_overwrite_ok=True)
|
||||||
|
write(booksheet,"股票",['当前','最高','最低','今开', '昨收', '涨停', '跌停', '换手率', '振幅', '成交量', '成交额', '内盘', '外盘', '量比', '涨跌幅', '市盈率(动)', '市净率', '流通市值', '总市值'],0)
|
||||||
|
|
||||||
|
num=0
|
||||||
|
goodnum=0
|
||||||
|
badnum=0
|
||||||
|
for i in range(100):
|
||||||
|
data = getdata(urls[i],headers)
|
||||||
|
if data == None :
|
||||||
|
print(texts[i], 'bad')
|
||||||
|
badnum+=1
|
||||||
|
continue
|
||||||
|
print(texts[i],'good')
|
||||||
|
goodnum+=1
|
||||||
|
write(booksheet,texts[i],data,num+1)
|
||||||
|
num+=1
|
||||||
|
|
||||||
|
print('成功:', goodnum, '\n失败:' , badnum)
|
||||||
|
workbook.save('./text.xls')
|
Loading…
Reference in new issue