Delete '2.py'

1 year ago · 4095b15b87
parent d6f92c8bae
commit 4095b15b87
1 changed files with 0 additions and 43 deletions
--- a/2.py
+++ b/2.py
@ -1,43 +0,0 @@
-import requests
-from lxml import etree
-import csv
-
-def get_source(url, headers):
-    r = requests.get(url, headers=headers)
-    dom = etree.HTML(r.text)
-    url_list = dom.xpath('//div[@class="book-list clearfix"]/ul/li/a/@href')
-    return url_list
-
-
-def get_title(url, headers):
-
-    r = requests.get(url, headers=headers)
-    dom = etree.HTML(r.content.decode("utf-8"))
-    biaoti = dom.xpath('//h1/text()')[0]
-    zw = dom.xpath('//article/div[1]/div//text()')
-    return biaoti, zw
-
-
-def save_txt(biaoti, zw):
-    a=""
-    for i in zw:
-       a=a+i
-    with open(biaoti+".txt",'a+',encoding='utf-8') as f:
-        f.write(a)
-def save_csv(list):
-    headers=["网址","标题","正文长度"]
-    with open("苏东坡传.csv",'w+',encoding='utf-8') as f:
-        w = csv.writer(f)
-        w.writerow(headers)
-        w.writerows(list)
-
-url = "https://www.xingyueboke.com/sudongpozhuan/"
-headers = {"User-Agent":
-               "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"}
-url_list = get_source(url, headers)
-list =[]
-for i in url_list:
-    biaoti, zw = get_title(i, headers)
-    save_txt(biaoti,zw)
-    list.append([i,biaoti,len(zw)])
-    save_csv(list)