Delete '2.py'

1 year ago · 88a015d534
parent cefbd68281
commit 88a015d534
1 changed files with 0 additions and 63 deletions
--- a/2.py
+++ b/2.py
@ -1,63 +0,0 @@
 import requests
 import os
 import csv
 from lxml import etree
 start_url="https://www.xingyueboke.com/sudongpozhuan/"
 h={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"}
 def get_source(url=start_url):
    response = requests.get(url,headers=h)
    if response.status_code== 200:
        return response.content.decode('utf-8')
    else:
        print("请求失败,状态码为{}".format(response.status_code))
        return ""
 source = get_source()
 def get_chapter_urls(start_source):
    selector = etree.HTML(start_source)
    urls=selector.xpath('//div[@class="book-list clearfix"]/ul/li/a/@href')
    right_urls=[]
    for url in urls:
        right_urls.append(url)
    return right_urls
 urls = get_chapter_urls(source)
 def get_article(article_html):
    selector = etree.HTML(article_html)
    title = selector.xpath('//h1/text()')[0]
    content = selector.xpath('string(//div[@id="nr1"]/div)')
    return title,content
 def save(title,content):
    filename = "苏东坡传/" + title + ".txt"
    if not os.path.exists("苏东坡传"):
        os.makedirs("苏东坡传")
    with open(filename, "w", encoding='utf-8') as f:
        f.write(content)
 def saveCsv(list):
    filename = "苏东坡传/苏东坡传.csv"
    if not os.path.exists("苏东坡传"):
        os.makedirs("苏东坡传")
    with open(filename, "w", encoding="utf-8",newline='') as f:
        w = csv.writer(f)
        w.writerow(["网页地址", "标题", "正文长度"])
        w.writerows(list)
 list =[]
 for url in urls:
    article_html = get_source(url)
    title, content = get_article(article_html)
    print(title)
    list.append([url,title,len(content)])
    save(title, content)
    saveCsv(list)
 # if __name__=='__main__':
 #     source = get_source()
 #     urls=get_chapter_urls(source)