ADD file via upload

1 year ago · 396b4821a9
parent a84914d79b
commit 396b4821a9
1 changed files with 44 additions and 0 deletions
--- a/13.py
+++ b/13.py
@ -0,0 +1,44 @@
+import requests
+import csv
+from bs4 import BeautifulSoup
+
+URL = "https://www.ckxxbz.com/book/pianpianchongai/"
+h={"User-Agent":
+"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/122.0.0.0"}
+r=requests.get(url=URL,headers=h)
+text=r.text
+soup=BeautifulSoup(r.text,"lxml")
+re=soup.select("dl dd a")
+
+# 创建CSV文件并写入表头
+with open('偏偏宠爱/偏偏宠爱.csv', 'w', newline='', encoding='utf-8') as csvfile:
+    fieldnames = ['标题', '内容', '网址']
+    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+    writer.writeheader()
+
+    # 遍历链接并获取内容
+    for a in re:
+        img_url = a.get('href')
+        url1="https://www.ckxxbz.com"
+        full_url = url1 + img_url
+
+        response = requests.get(full_url, headers=h)
+        response.encoding = "utf-8"
+        soup = BeautifulSoup(response.text, "lxml")
+        p_tags = soup.find_all('p')
+        content = ""
+        for p in p_tags:
+            content += p.text + ""
+
+        # 将标题和内容写入txt文件
+        title = soup.find('h1').text
+        print(title + '\t' +full_url)
+        print(p_tags)
+
+
+        with open(f'偏偏宠爱/{title}.txt', 'w', encoding='utf-8') as f:
+            for i in content:
+                f.write(i)
+
+        # 将每章的网页地址、标题和正文长度写入到csv中
+        writer.writerow({'标题': title,'网址': full_url,})