You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
67 lines
2.6 KiB
67 lines
2.6 KiB
import requests
|
|
from bs4 import BeautifulSoup
|
|
import csv
|
|
import pymysql
|
|
|
|
# 连接MySQL数据库
|
|
connection = pymysql.connect(
|
|
host='localhost',
|
|
user='root',
|
|
password='123456',
|
|
database='爬虫',
|
|
charset='utf8mb4',
|
|
cursorclass=pymysql.cursors.DictCursor
|
|
)
|
|
|
|
f = open('当当网书籍.csv', mode='a', encoding='utf-8', newline='')
|
|
csv_writer = csv.DictWriter(f, fieldnames=[
|
|
'排名',
|
|
'书名',
|
|
'现价',
|
|
'原价',
|
|
'折扣'])
|
|
csv_writer.writeheader()
|
|
|
|
url = 'http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-1'
|
|
new_url = 'http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-%d'
|
|
|
|
head = {
|
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 '
|
|
'Safari/537.36'}
|
|
|
|
response = requests.get(url, headers=head).text
|
|
|
|
soup = BeautifulSoup(response, "html.parser")
|
|
|
|
top = 1
|
|
for p in range(10):
|
|
response = requests.get(new_url % p, headers=head).text
|
|
soup = BeautifulSoup(response, "html.parser")
|
|
html_1 = soup.find("ul", attrs={'class': "bang_list clearfix bang_list_mode"})
|
|
html_2 = html_1.find_all("li")
|
|
for i in range(len(html_2)):
|
|
book_name_lable = html_2[i].find("div", class_="name")
|
|
book_name = book_name_lable.find("a").text
|
|
price_div = html_2[i].find("div", class_="price")
|
|
price_lable = price_div.find("p")
|
|
my = price_lable.find("span", attrs={"class", "price_n"}).text
|
|
sy = price_lable.find("span", attrs={"class", "price_r"}).text
|
|
discount = price_lable.find("span", attrs={"class", "price_s"}).text
|
|
print('Top' + str(top) + ':' + book_name + ' 原价:' + sy + ' 现价:' + my + ' 折扣:' + discount)
|
|
i += 1
|
|
dic = {'排名': top, '书名': book_name, '现价': my, '原价': sy, '折扣': discount}
|
|
csv_writer.writerow(dic)
|
|
with open('当当网.txt', 'a', encoding='utf-8')as f:
|
|
f.write('Top' + str(top) + ':' + book_name + ' 原价:' + sy + ' 现价:' + my + ' 折扣:' + discount + '\n')
|
|
|
|
# 将数据存入MySQL数据库
|
|
with connection.cursor() as cursor:
|
|
sql = "INSERT INTO 书(ranking, book_name, current_price, original_price, discount) VALUES (%s, %s, %s, %s, %s)"
|
|
cursor.execute(sql, (top, book_name, my, sy, discount))
|
|
connection.commit()
|
|
print('保存成功')
|
|
|
|
top += 1
|
|
|
|
# 关闭MySQL连接
|
|
connection.close() |