From 23c04a3584cbf882d3e52b84e5785ec9d3eca0a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8E=9F=E6=B3=BD?= <2200729686@qq.com> Date: Sun, 27 Nov 2022 23:05:22 +0800 Subject: [PATCH] add --- src/README.md | 4 ++- 爬虫爬题.py | 92 ------------------------------------------------- 2 files changed, 3 insertions(+), 93 deletions(-) delete mode 100644 爬虫爬题.py diff --git a/src/README.md b/src/README.md index d39e842..02b5554 100644 --- a/src/README.md +++ b/src/README.md @@ -11,4 +11,6 @@ 开展测试: 对考试模块开展了单元测试 对添加考试科目进行了单元测试 -对在线考试系统开展了确认测试 \ No newline at end of file +对在线考试系统开展了确认测试 + + diff --git a/爬虫爬题.py b/爬虫爬题.py deleted file mode 100644 index abaf855..0000000 --- a/爬虫爬题.py +++ /dev/null @@ -1,92 +0,0 @@ -import requests -from bs4 import BeautifulSoup -from pandas import DataFrame -import xlwt -import time -import re - -headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55'} - -# 创建 workbook 即新建 excel 文件/工作簿 -workbook = xlwt.Workbook(encoding='utf-8', style_compression=1) -worksheet = workbook.add_sheet('题', cell_overwrite_ok=True) #创建工作表 -xuhao=1 -worksheet.write(0, 0, "题目") -worksheet.write(0, 1, "选项+答案") -for i in range(100,250): - url = "https://www.hyluz.cn/search.php?q=单选题:&page={}".format(i) - response = requests.get(url=url, headers=headers) - html = response.content.decode('utf-8') - text = response.text - # print(text) - dynasties = re.findall(r'

单选题:', text, re.DOTALL) - for j in dynasties: - # print(j) - # break - response = requests.get(url=j, headers=headers) - html = response.content.decode('utf-8') - text = response.text - # print(text) - # break - # print(text) - # print(html) - # soup = BeautifulSoup(html) - # print(soup) - # position = soup.select('#content >div>div.post > h1> em') - # print(position) - - - dynasties = re.findall(r'

(.*?)

', text, re.DOTALL) - print(dynasties) - x = re.findall(r'
(.*?)', text, re.DOTALL) - s = x[0] - while("

" in s): - s = s.replace("

", "
") - s = s.replace("
", ";\t") - - # if "A" in l[5]: - # w="A" - # elif "B" in l[5]: - # w="B" - # elif "C" in l[5]: - # w="C" - # else: - # w="D" - - # option = re.findall(r'




(.*?)


', text, re.DOTALL) - # print(option) - # answer=re.findall(r'


(.*?)', text, re.DOTALL) - # print(answer) - #写入数据 - worksheet.write(xuhao, 0, dynasties) - # worksheet.write(xuhao, 1, l[1]) - # worksheet.write(xuhao, 2, l[2]) - # worksheet.write(xuhao, 3, l[3]) - # worksheet.write(xuhao, 4, l[4]) - worksheet.write(xuhao, 1, s) - xuhao += 1 - # break - # break - time.sleep(1) -worksheet.col(0).width = 256*100 -worksheet.col(1).width = 256*100 -# worksheet.col(2).width = 256*20 -# worksheet.col(3).width = 256*20 -# worksheet.col(4).width = 256*20 -# worksheet.col(5).width = 256*20 -workbook.save(r'D:\爬虫爬题3.xls') - -# for i in name_list: -# url = "https://so.gushiwen.cn/mingjus/default.aspx?astr={}".format(i) -# response = requests.get(url=url, headers=headers) -# html = response.content.decode('utf-8') -# text = response.text -# dynasties = re.findall(r'
.*?(.*?)', text, re.DOTALL) -# for i in range(0, len(dynasties), 2): -# print(dynasties[i], dynasties[i + 1]) -# # 写入数据 -# worksheet.write(k, 0, dynasties[i]) -# worksheet.write(k, 1, dynasties[i+1]) -# k+=1 -