wpf_branch
原泽 3 years ago
parent 21f2d811d0
commit 23c04a3584

@ -11,4 +11,6 @@
开展测试:
对考试模块开展了单元测试
对添加考试科目进行了单元测试
对在线考试系统开展了确认测试
对在线考试系统开展了确认测试

@ -1,92 +0,0 @@
import requests
from bs4 import BeautifulSoup
from pandas import DataFrame
import xlwt
import time
import re
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55'}
# 创建 workbook 即新建 excel 文件/工作簿
workbook = xlwt.Workbook(encoding='utf-8', style_compression=1)
worksheet = workbook.add_sheet('', cell_overwrite_ok=True) #创建工作表
xuhao=1
worksheet.write(0, 0, "题目")
worksheet.write(0, 1, "选项+答案")
for i in range(100,250):
url = "https://www.hyluz.cn/search.php?q=单选题:&page={}".format(i)
response = requests.get(url=url, headers=headers)
html = response.content.decode('utf-8')
text = response.text
# print(text)
dynasties = re.findall(r'<h2><a href="(.*?)" target="_blank"><strong><mark>单选题:</mark></strong>', text, re.DOTALL)
for j in dynasties:
# print(j)
# break
response = requests.get(url=j, headers=headers)
html = response.content.decode('utf-8')
text = response.text
# print(text)
# break
# print(text)
# print(html)
# soup = BeautifulSoup(html)
# print(soup)
# position = soup.select('#content >div>div.post > h1> em')
# print(position)
dynasties = re.findall(r'<h1>(.*?)</h1>', text, re.DOTALL)
print(dynasties)
x = re.findall(r'<br>(.*?)</div>', text, re.DOTALL)
s = x[0]
while("<br><br>" in s):
s = s.replace("<br><br>", "<br>")
s = s.replace("<br>", ";\t")
# if "A" in l[5]:
# w="A"
# elif "B" in l[5]:
# w="B"
# elif "C" in l[5]:
# w="C"
# else:
# w="D"
# option = re.findall(r'<br><br><br><br><br>(.*?)<br><br><br>', text, re.DOTALL)
# print(option)
# answer=re.findall(r'<br><br><br>(.*?)</div>', text, re.DOTALL)
# print(answer)
#写入数据
worksheet.write(xuhao, 0, dynasties)
# worksheet.write(xuhao, 1, l[1])
# worksheet.write(xuhao, 2, l[2])
# worksheet.write(xuhao, 3, l[3])
# worksheet.write(xuhao, 4, l[4])
worksheet.write(xuhao, 1, s)
xuhao += 1
# break
# break
time.sleep(1)
worksheet.col(0).width = 256*100
worksheet.col(1).width = 256*100
# worksheet.col(2).width = 256*20
# worksheet.col(3).width = 256*20
# worksheet.col(4).width = 256*20
# worksheet.col(5).width = 256*20
workbook.save(r'D:\爬虫爬题3.xls')
# for i in name_list:
# url = "https://so.gushiwen.cn/mingjus/default.aspx?astr={}".format(i)
# response = requests.get(url=url, headers=headers)
# html = response.content.decode('utf-8')
# text = response.text
# dynasties = re.findall(r'<a style=" float:left;" target="_blank" href="/.*?>(.*?)</a>', text, re.DOTALL)
# # titles = re.findall(r'<div\sclass="cont">.*?<b>(.*?)</b>', text, re.DOTALL)
# for i in range(0, len(dynasties), 2):
# print(dynasties[i], dynasties[i + 1])
# # 写入数据
# worksheet.write(k, 0, dynasties[i])
# worksheet.write(k, 1, dynasties[i+1])
# k+=1
Loading…
Cancel
Save