You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
p4r267ofm 7c79d9d08e
Update README.md
10 months ago
README.md Update README.md 10 months ago

README.md

sudongpozhuan

import requests from lxml import etree import csv start_url="https://www.xingyueboke.com/sudongpozhuan/" h={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0"} def get_source(url=start_url): r=requests.get(url,headers=h) if r.status_code==200: return r.content.decode('utf-8') return r.text else: print("失败") return "" def get_chapter_urls(start_source): se=etree.HTML(start_source) urls=se.xpath('//[@id="content-list"]/div[2]/ul/li/a/@href') return urls def get_article(article_html): selector=etree.HTML(article_html) title=selector.xpath('//[@id="nr_title"]/text()') content=selector.xpath('string(//*[@id="nr1"]/div)') return title,content def save(title,content): import os fi="苏东波传"+title[0]+".txt" # dirname=os.path.dirname("苏东坡传") # if not os.path.exists(dirname): # os.makedirs(dirname) with open(fi,'a+',encoding='utf-8') as f: f.write(content) def czd(urls,titles,contents): data = [] for i in range(len(urls)-1): data.append([urls[i],titles[i],contents[i]]) with open("苏东坡传.csv",'a',encoding='utf-8',newline='') as f: writer=csv.writer(f) writer.writerow(["网站","标题","文章长度"]) writer.writerows(data)

if name=="main": source=get_source()#网站源码 urls=get_chapter_urls(source)#文章网站 titles = [] contents = [] for url in urls: article_html=get_source(url) title,content=get_article(article_html) titles.append(title) contents.append(len(content)) # print(title) # print(content) save(title,content) # print(titles) # print(contents) czd(urls,titles,contents)