ncccu94970/z_a - z_a - 头歌-开源项目托管

Go to file

ncccu94970 042b822531 ADD file via upload		11 months ago
2.py	ADD file via upload	11 months ago
README.md	Update README.md	11 months ago

README.md

import requests from lxml import etree import csv

def get_source(url, headers): r = requests.get(url, headers=headers) dom = etree.HTML(r.text) url_list = dom.xpath('//div[@class="book-list clearfix"]/ul/li/a/@href') return url_list

def get_title(url, headers):

r = requests.get(url, headers=headers)
dom = etree.HTML(r.content.decode("utf-8"))
biaoti = dom.xpath('//h1/text()')[0]
zw = dom.xpath('//article/div[1]/div//text()')
return biaoti, zw

def save_txt(biaoti, zw): a="" for i in zw: a=a+i with open(biaoti+".txt",'a+',encoding='utf-8') as f: f.write(a) def save_csv(list): headers=["网址","标题","正文长度"] with open("苏东坡传.csv",'w+',encoding='utf-8') as f: w = csv.writer(f) w.writerow(headers) w.writerows(list)

url = "https://www.xingyueboke.com/sudongpozhuan/" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"} url_list = get_source(url, headers) list =[] for i in url_list: biaoti, zw = get_title(i, headers) save_txt(biaoti,zw) list.append([i,biaoti,len(zw)]) save_csv(list)