# 导入数据请求模块 --> 第三方模块, 需要安装 import requests # 导入正则表达式模块 --> 内置模块, 不需要安装 import re from bs4 import BeautifulSoup # 导入数据解析模块 --> 第三方模块, 需要安装 import parsel # 导入文件操作模块 --> 内置模块, 不需要安装 import os def get_response(html_url): # 模拟浏览器 headers 请求头 headers = { # user-agent 用户代理 表示浏览器基本身份信息 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0' } response = requests.get(url=html_url, headers=headers) return response def get_list_url(html_url): # 调用发送请求函数 html_data = get_response(html_url).text # 提取小说名字 name = re.findall('

(.*?)

', html_data)[0] # 提取章节url soup = BeautifulSoup(html_data, 'html.parser') link_tags = soup.find_all('dd') url_list = [] for tag in link_tags: if tag.find('a'): url_list.append(tag.find('a')['href']) return name, url_list def get_content(html_url): # 调用发送请求函数 html_data = get_response(html_url).text soup = BeautifulSoup(html_data, 'html.parser') # 提取标题 title_tag = soup.find('h1', class_='wap_none') if title_tag: title = title_tag.text.strip() else: title = "未找到标题" # 提取内容 content_tag = soup.find('div', id='chaptercontent') if content_tag: content = content_tag.text.strip().replace('
', '\n') else: content = "未找到内容" return title, content def save(name, title, content): # 自动创建一个文件夹 file = f'{name}\\' if not os.path.exists(file): os.mkdir(file) with open(file + title + '.txt', mode='a', encoding='utf-8') as f: # 写入内容 f.write(title) f.write('\n') f.write(content) f.write('\n') print(title, '已经保存') def get_novel_id(html_url): # 调用发送请求函数 novel_data = get_response(html_url=html_url).text selector = parsel.Selector(novel_data) href = selector.css('.blocks a::attr(href)').getall() return href def search(word): search_url = f'https://www.biqugen.net/modules/article/search.php?searchkey={word}' search_data = get_response(html_url=search_url).text selector = parsel.Selector(search_data) lis = selector.css('.grid tr') for li in lis: name = li.css('td.odd a::text').get() novel_id = li.css('td.odd a::attr(href)').get().split('/')[-2] writer = li.css('td.odd:nth-child(2)::text').get() print(name, novel_id, writer) def main(home_url): href = get_novel_id(html_url=home_url) for novel_id in href: novel_url = f'https://www.biqugen.net{novel_id}' name, url_list = get_list_url(html_url=novel_url) print(name, url_list) for url in url_list: index_url = 'https://www.biqugen.net/' + url title, content = get_content(html_url=index_url) save(name, title, content) if __name__ == '__main__': word = input('请输入你搜索的小说名:') search(word)