You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
73 lines
1.9 KiB
73 lines
1.9 KiB
6 months ago
|
import csv
|
||
|
from bs4 import BeautifulSoup
|
||
|
import requests
|
||
|
|
||
|
|
||
|
book_list=[]
|
||
|
url='https://search.dangdang.com/?key=%CA%E9%BC%AE&category_path=01.76.00.00.00.00#J_tab'
|
||
|
head = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"}
|
||
|
|
||
|
|
||
|
def get_resource(url):
|
||
|
response = requests.get(url, headers=head)
|
||
|
response.encoding = 'gb2312'
|
||
|
return response.text
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
def pares_page(get_resource):
|
||
|
soap=BeautifulSoup(get_resource,'lxml')
|
||
|
# 解析式
|
||
|
li_list=soap.select('#component_59>li')
|
||
|
print(len(li_list))
|
||
|
for i in li_list:
|
||
|
name=i.select('a')[0]['title']
|
||
|
author=i.select('.search_book_author>span>a')[0].text
|
||
|
price=i.select('.price>.search_now_price')[0].text
|
||
|
shop=i.select('.search_shangjia>a')[0].text
|
||
|
book={'name':name,'author':author,'price':price,'shop':shop}
|
||
|
book_list.append(book)
|
||
|
return book_list
|
||
|
|
||
|
|
||
|
|
||
|
# def connection(pares_page):
|
||
|
#
|
||
|
# conn = pymysql.Connection(user='root', password='123456', host='127.0.0.1', port=3306, db='moon',charset='utf8')
|
||
|
# cur=conn.cursor()
|
||
|
#
|
||
|
#
|
||
|
#
|
||
|
#
|
||
|
# try:
|
||
|
# for i in pares_page:
|
||
|
# sql_str="insert into book_information(name,author,price) values ('%s','%s','%s') "
|
||
|
# cur.execute(sql_str,(i[0],i[1],i[2]))
|
||
|
# conn.commit()
|
||
|
# except Exception as e:
|
||
|
# print(e)
|
||
|
# conn.rollback()
|
||
|
# finally:
|
||
|
# cur.close()
|
||
|
# conn.close()
|
||
|
|
||
|
f = open('网络课程.csv', mode='a', encoding='utf-8', newline='')
|
||
|
csv_write = csv.DictWriter(f, fieldnames=[
|
||
|
"name",
|
||
|
"author",
|
||
|
"price",
|
||
|
"shop"
|
||
|
])
|
||
|
csv_write.writeheader()
|
||
|
content = get_resource(url)
|
||
|
print(content)
|
||
|
data = pares_page(content)
|
||
|
print(data)
|
||
|
for i in data:
|
||
|
csv_write.writerow(i)
|
||
|
|
||
|
|
||
|
|