You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

61 lines
1.8 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import csv
import requests
from lxml import etree
import re
from bs4 import BeautifulSoup
header={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'}
url='https://shenmezhidedu.com/'
response=requests.get(url,headers=header)
source=response.text
#print(source)
all_link=[]
all_book=[]
soup = BeautifulSoup(source, "lxml")
#爬取心理学一栏的书名作者出版社发行日期内容简介和作者简介存储到csv文件
books=soup.select('div.list-card')
for b in books:
book=b.select('div.h6')
for b1 in book:
if '心理学' in b1.get_text():
for group in b1.find_next_siblings('div', class_='list-grouped'):
links=group.find_all('di')
for l1 in links:
# print(l1['href'])
all_link.append(l1['href'])
#print(all_link)
for link in all_link:
response1 = requests.get(link, headers=header)
source1 = response1.text
#print(source1)
soup1 = BeautifulSoup(source1, "lxml")
book1=soup1.select('div.post')
for b in book1:
books_name=b.select('div>div.post-name')
for book_name in books_name:
title=book_name.get_text(strip=True)
all_book.append({title})
details= b.select('li')
for detail1 in details:
detail=detail1.get_text()
all_book.append({detail})
contents=b.select('div>p')
for content1 in contents:
content=content1.get_text()
all_book.append({content})
#print(all_book)
filename = "book" + ".csv"
with open(filename,"w",encoding='utf-8') as file:
writer = csv.writer(file)
for a1 in all_book:
writer.writerow(a1)