diff --git a/11.py b/11.py new file mode 100644 index 0000000..7ef8286 --- /dev/null +++ b/11.py @@ -0,0 +1,40 @@ +import os +import requests +from bs4 import BeautifulSoup +import csv + +URL = "https://www.ckxxbz.com/book/pianpianchongai/" +headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0" +} +response = requests.get(URL, headers=headers) +response.encoding = 'utf-8' +soup = BeautifulSoup(response.text, 'lxml') +chapter_list = soup.select('dl dd a') + +# 创建名为"偏偏宠爱"的目录 +if not os.path.exists("偏偏宠爱"): + os.mkdir("偏偏宠爱") + +with open('偏偏宠爱/偏偏宠爱.csv', 'w', newline='', encoding='utf-8') as csvfile: + fieldnames = ['章节标题', '章节链接'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + for chapter in chapter_list: + chapter_url = 'https://www.ckxxbz.com' + chapter['href'] + chapter_title = chapter.text + writer.writerow({'章节标题': chapter_title, '章节链接': chapter_url}) + print(chapter_title) + +for chapter in chapter_list: + chapter_url = 'https://www.ckxxbz.com' + chapter['href'] + response = requests.get(chapter_url, headers=headers) + response.encoding = "utf-8" + soup = BeautifulSoup(response.text, "lxml") + p_tags = soup.find_all('p') + content = "" + for p in p_tags: + content += p.text + "" + # 将章节内容保存到"偏偏宠爱"目录下的文本文件中 + with open(f'偏偏宠爱/{chapter.text}.txt', 'w', encoding='utf-8') as f: + f.write(content)