You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
41 lines
1.5 KiB
41 lines
1.5 KiB
import os
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import csv
|
|
|
|
URL = "https://www.ckxxbz.com/book/pianpianchongai/"
|
|
headers = {
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"
|
|
}
|
|
response = requests.get(URL, headers=headers)
|
|
response.encoding = 'utf-8'
|
|
soup = BeautifulSoup(response.text, 'lxml')
|
|
chapter_list = soup.select('dl dd a')
|
|
|
|
# 创建名为"偏偏宠爱"的目录
|
|
if not os.path.exists("偏偏宠爱"):
|
|
os.mkdir("偏偏宠爱")
|
|
|
|
with open('偏偏宠爱/偏偏宠爱.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['章节标题', '章节链接']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
for chapter in chapter_list:
|
|
chapter_url = 'https://www.ckxxbz.com' + chapter['href']
|
|
chapter_title = chapter.text
|
|
writer.writerow({'章节标题': chapter_title, '章节链接': chapter_url})
|
|
print(chapter_title)
|
|
|
|
for chapter in chapter_list:
|
|
chapter_url = 'https://www.ckxxbz.com' + chapter['href']
|
|
response = requests.get(chapter_url, headers=headers)
|
|
response.encoding = "utf-8"
|
|
soup = BeautifulSoup(response.text, "lxml")
|
|
p_tags = soup.find_all('p')
|
|
content = ""
|
|
for p in p_tags:
|
|
content += p.text + ""
|
|
# 将章节内容保存到"偏偏宠爱"目录下的文本文件中
|
|
with open(f'偏偏宠爱/{chapter.text}.txt', 'w', encoding='utf-8') as f:
|
|
f.write(content)
|