import csv import requests from lxml import etree import re from bs4 import BeautifulSoup header={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'} url='https://shenmezhidedu.com/' response=requests.get(url,headers=header) source=response.text #print(source) all_link=[] all_book=[] soup = BeautifulSoup(source, "lxml") #爬取心理学一栏的书名,作者,出版社,发行日期,内容简介和作者简介,存储到csv文件 books=soup.select('div.list-card') for b in books: book=b.select('div.h6') for b1 in book: if '心理学' in b1.get_text(): for group in b1.find_next_siblings('div', class_='list-grouped'): links=group.find_all('di') for l1 in links: # print(l1['href']) all_link.append(l1['href']) #print(all_link) for link in all_link: response1 = requests.get(link, headers=header) source1 = response1.text #print(source1) soup1 = BeautifulSoup(source1, "lxml") book1=soup1.select('div.post') for b in book1: books_name=b.select('div>div.post-name') for book_name in books_name: title=book_name.get_text(strip=True) all_book.append({title}) details= b.select('li') for detail1 in details: detail=detail1.get_text() all_book.append({detail}) contents=b.select('div>p') for content1 in contents: content=content1.get_text() all_book.append({content}) #print(all_book) filename = "book" + ".csv" with open(filename,"w",encoding='utf-8') as file: writer = csv.writer(file) for a1 in all_book: writer.writerow(a1)