|
|
|
@ -0,0 +1,60 @@
|
|
|
|
|
import csv
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
from lxml import etree
|
|
|
|
|
import re
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
header={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'}
|
|
|
|
|
url='https://shenmezhidedu.com/'
|
|
|
|
|
|
|
|
|
|
response=requests.get(url,headers=header)
|
|
|
|
|
source=response.text
|
|
|
|
|
#print(source)
|
|
|
|
|
all_link=[]
|
|
|
|
|
all_book=[]
|
|
|
|
|
soup = BeautifulSoup(source, "lxml")
|
|
|
|
|
#爬取心理学一栏的书名,作者,出版社,发行日期,内容简介和作者简介,存储到csv文件
|
|
|
|
|
books=soup.select('div.list-card')
|
|
|
|
|
for b in books:
|
|
|
|
|
book=b.select('div.h6')
|
|
|
|
|
for b1 in book:
|
|
|
|
|
if '心理学' in b1.get_text():
|
|
|
|
|
for group in b1.find_next_siblings('div', class_='list-grouped'):
|
|
|
|
|
links=group.find_all('di')
|
|
|
|
|
for l1 in links:
|
|
|
|
|
# print(l1['href'])
|
|
|
|
|
all_link.append(l1['href'])
|
|
|
|
|
#print(all_link)
|
|
|
|
|
for link in all_link:
|
|
|
|
|
response1 = requests.get(link, headers=header)
|
|
|
|
|
source1 = response1.text
|
|
|
|
|
#print(source1)
|
|
|
|
|
soup1 = BeautifulSoup(source1, "lxml")
|
|
|
|
|
book1=soup1.select('div.post')
|
|
|
|
|
for b in book1:
|
|
|
|
|
books_name=b.select('div>div.post-name')
|
|
|
|
|
for book_name in books_name:
|
|
|
|
|
title=book_name.get_text(strip=True)
|
|
|
|
|
all_book.append({title})
|
|
|
|
|
details= b.select('li')
|
|
|
|
|
for detail1 in details:
|
|
|
|
|
detail=detail1.get_text()
|
|
|
|
|
all_book.append({detail})
|
|
|
|
|
contents=b.select('div>p')
|
|
|
|
|
for content1 in contents:
|
|
|
|
|
content=content1.get_text()
|
|
|
|
|
all_book.append({content})
|
|
|
|
|
|
|
|
|
|
#print(all_book)
|
|
|
|
|
filename = "book" + ".csv"
|
|
|
|
|
with open(filename,"w",encoding='utf-8') as file:
|
|
|
|
|
writer = csv.writer(file)
|
|
|
|
|
for a1 in all_book:
|
|
|
|
|
writer.writerow(a1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|