From cbe8ed5859d9cb7361fff802362d4cf510fcbfd9 Mon Sep 17 00:00:00 2001 From: pcsfy8h5o <1545077945@qq.com> Date: Wed, 12 Jun 2024 10:59:06 +0800 Subject: [PATCH] ADD file via upload --- zy6.py | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 zy6.py diff --git a/zy6.py b/zy6.py new file mode 100644 index 0000000..f58a17e --- /dev/null +++ b/zy6.py @@ -0,0 +1,60 @@ +import csv + +import requests +from lxml import etree +import re +from bs4 import BeautifulSoup +header={"User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'} +url='https://shenmezhidedu.com/' + +response=requests.get(url,headers=header) +source=response.text +#print(source) +all_link=[] +all_book=[] +soup = BeautifulSoup(source, "lxml") +#爬取心理学一栏的书名,作者,出版社,发行日期,内容简介和作者简介,存储到csv文件 +books=soup.select('div.list-card') +for b in books: + book=b.select('div.h6') + for b1 in book: + if '心理学' in b1.get_text(): + for group in b1.find_next_siblings('div', class_='list-grouped'): + links=group.find_all('di') + for l1 in links: + # print(l1['href']) + all_link.append(l1['href']) + #print(all_link) +for link in all_link: + response1 = requests.get(link, headers=header) + source1 = response1.text + #print(source1) + soup1 = BeautifulSoup(source1, "lxml") + book1=soup1.select('div.post') + for b in book1: + books_name=b.select('div>div.post-name') + for book_name in books_name: + title=book_name.get_text(strip=True) + all_book.append({title}) + details= b.select('li') + for detail1 in details: + detail=detail1.get_text() + all_book.append({detail}) + contents=b.select('div>p') + for content1 in contents: + content=content1.get_text() + all_book.append({content}) + +#print(all_book) +filename = "book" + ".csv" +with open(filename,"w",encoding='utf-8') as file: + writer = csv.writer(file) + for a1 in all_book: + writer.writerow(a1) + + + + + + +