diff --git a/爬虫爬题.py b/爬虫爬题.py new file mode 100644 index 0000000..abaf855 --- /dev/null +++ b/爬虫爬题.py @@ -0,0 +1,92 @@ +import requests +from bs4 import BeautifulSoup +from pandas import DataFrame +import xlwt +import time +import re + +headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55'} + +# 创建 workbook 即新建 excel 文件/工作簿 +workbook = xlwt.Workbook(encoding='utf-8', style_compression=1) +worksheet = workbook.add_sheet('题', cell_overwrite_ok=True) #创建工作表 +xuhao=1 +worksheet.write(0, 0, "题目") +worksheet.write(0, 1, "选项+答案") +for i in range(100,250): + url = "https://www.hyluz.cn/search.php?q=单选题:&page={}".format(i) + response = requests.get(url=url, headers=headers) + html = response.content.decode('utf-8') + text = response.text + # print(text) + dynasties = re.findall(r'