dbzy/zy6.py

import csv

import requests
from lxml import etree
import re
from bs4 import BeautifulSoup
header={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'}
url='https://shenmezhidedu.com/'

response=requests.get(url,headers=header)
source=response.text
#print(source)
all_link=[]
all_book=[]
soup = BeautifulSoup(source, "lxml")
#爬取心理学一栏的书名，作者，出版社，发行日期，内容简介和作者简介，存储到csv文件
books=soup.select('div.list-card')
for b in books:
    book=b.select('div.h6')
    for b1 in book:
        if '心理学' in b1.get_text():
            for group in b1.find_next_siblings('div', class_='list-grouped'):
                    links=group.find_all('di')
                    for l1 in links:
                        # print(l1['href'])
                        all_link.append(l1['href'])
                    #print(all_link)
for link in all_link:
    response1 = requests.get(link, headers=header)
    source1 = response1.text
    #print(source1)
    soup1 = BeautifulSoup(source1, "lxml")
    book1=soup1.select('div.post')
    for b in book1:
        books_name=b.select('div>div.post-name')
        for book_name in books_name:
            title=book_name.get_text(strip=True)
            all_book.append({title})
        details= b.select('li')
        for detail1 in details:
            detail=detail1.get_text()
            all_book.append({detail})
        contents=b.select('div>p')
        for content1 in contents:
            content=content1.get_text()
            all_book.append({content})

#print(all_book)
filename = "book" + ".csv"
with open(filename,"w",encoding='utf-8') as file:
    writer = csv.writer(file)
    for a1 in all_book:
        writer.writerow(a1)