ugkj/poemtry.py

import requests
from lxml import etree

start_url = "https://www.xingyueboke.com/sudongpozhuan/"
head = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"}


def get_source(start_url, head):
    response = requests.get(start_url, headers=head)
    response.encoding = 'utf-8'
    return response.text


def get_chapterurl(start_source):
    select = etree.HTML(start_source)
    urls = select.xpath('//div[@class="book-list clearfix"]/ul/li/a/@href')
    return urls


def get_article(article_html):
    select = etree.HTML(article_html)
    # title = select.xpath('/html/body/div[1]/article/header/h1/text()')
    # content = select.xpath('//*[@id="nr1"]/div/p/text()')
    title = select.xpath('//h1[@class="post-title"]/text()')
    content = select.xpath('//div[@id="nr1"]/div/p/text()')
    return title,content


#
#
#
def save(title, content):
    with open("dufu.text", "w", encoding='utf-8') as f:
        for j in content:
            f.write(j)
#
#
#
#
#
#
#
#
#
#
#
# source=get_source(start_url,head)
# urls=get_chapterurl(source)
# for i in urls:
#     articl_html=get_source(i)
#     get_article(articl_html)


urls=get_chapterurl(get_source(start_url, head))
for i in urls:
    artical_html=get_source(i,head)
    title,content=get_article(artical_html)
    print(title)
    print(content)
    save(title,content)
ADD file via upload 7 months ago			`import requests`
			`from lxml import etree`

			`start_url = "https://www.xingyueboke.com/sudongpozhuan/"`
			`head = {`
			`"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 Edg/123.0.0.0"}`


			`def get_source(start_url, head):`
			`response = requests.get(start_url, headers=head)`
			`response.encoding = 'utf-8'`
			`return response.text`



			`def get_chapterurl(start_source):`
			`select = etree.HTML(start_source)`
			`urls = select.xpath('//div[@class="book-list clearfix"]/ul/li/a/@href')`
			`return urls`




			`def get_article(article_html):`
			`select = etree.HTML(article_html)`
			`# title = select.xpath('/html/body/div[1]/article/header/h1/text()')`
			`# content = select.xpath('//*[@id="nr1"]/div/p/text()')`
			`title = select.xpath('//h1[@class="post-title"]/text()')`
			`content = select.xpath('//div[@id="nr1"]/div/p/text()')`
			`return title,content`





			`#`
			`#`
			`#`
			`def save(title, content):`
			`with open("dufu.text", "w", encoding='utf-8') as f:`
			`for j in content:`
			`f.write(j)`
			`#`
			`#`
			`#`
			`#`
			`#`
			`#`
			`#`
			`#`
			`#`
			`#`
			`#`
			`# source=get_source(start_url,head)`
			`# urls=get_chapterurl(source)`
			`# for i in urls:`
			`# articl_html=get_source(i)`
			`# get_article(articl_html)`



			`urls=get_chapterurl(get_source(start_url, head))`
			`for i in urls:`
			`artical_html=get_source(i,head)`
			`title,content=get_article(artical_html)`
			`print(title)`
			`print(content)`
			`save(title,content)`