You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
67 lines
2.1 KiB
67 lines
2.1 KiB
import requests
|
|
from lxml import etree
|
|
import csv
|
|
start_url="https://shici.tqzw.net.cn/poetry/"
|
|
h={"User-Agent":
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"}
|
|
def get_source(url=start_url):
|
|
response= requests.get(url,headers=h)
|
|
if response.status_code==200:
|
|
response.encoding="utf-8"
|
|
return response.text
|
|
else:
|
|
print("请求失败,状态码为{}".format(response.status_code))
|
|
return ""
|
|
|
|
def get_article(acticle_html):
|
|
selector = etree.HTML(acticle_html)
|
|
title = selector.xpath('//div[@class="gs-cont-title"]/h3/a/text()')
|
|
# print(title)
|
|
year = selector.xpath('//div[@class="gs-poem-sub"]/a[1]/text()')
|
|
shiren = selector.xpath('//div[@class="gs-poem-sub"]/a[2]/text()')
|
|
# print(year)
|
|
# print(shiren)
|
|
|
|
brief_introduction = selector.xpath('//div[@class="gs-works-text"]/div/p/text()')[:20]
|
|
return title,year,shiren,brief_introduction
|
|
def save(content):
|
|
with open("古诗词.txt","a+",encoding='utf-8') as f:
|
|
f.write(content)
|
|
def save_to_csv(chapter_data):
|
|
with open("古诗词.csv","w",newline="",encoding="utf-8") as csvfile:
|
|
writer=csv.writer(csvfile)
|
|
writer.writerow(["诗名","年代","诗人","诗句"])
|
|
for data in chapter_data:
|
|
writer.writerow(data)
|
|
html=get_source(start_url)
|
|
nai=get_article(html)
|
|
list = []
|
|
for a,s,d,f in zip(nai[0],nai[1],nai[2],nai[3]):
|
|
list.append([a,s,d,f])
|
|
print(list)
|
|
save_to_csv(list)
|
|
for i in list:
|
|
save(str(i)+'\n')
|
|
|
|
import pymysql
|
|
con=pymysql.connect(user="root",password="123456",host="localhost",port=3306,charset="utf8",database="27yk")
|
|
cur=con.cursor()
|
|
sql="""
|
|
create table if not exists gushi(
|
|
title varchar(50),
|
|
year varchar(30) not null,
|
|
author varchar(30) not null,
|
|
text varchar(1000) not null
|
|
);
|
|
"""
|
|
cur.execute(sql)
|
|
|
|
for i in list:
|
|
cur.execute("insert into gushi value(%s,%s,%s,%s);",(i[0], i[1], i[2], i[3]))
|
|
|
|
# 提交更改并关闭连接
|
|
con.commit()
|
|
cur.close()
|
|
con.cursor()
|
|
|