diff --git a/2.py b/2.py new file mode 100644 index 0000000..a6d98e8 --- /dev/null +++ b/2.py @@ -0,0 +1,67 @@ +import requests +from bs4 import BeautifulSoup +import csv +import pymysql + +# 连接MySQL数据库 +connection = pymysql.connect( + host='localhost', + user='root', + password='123456', + database='爬虫', + charset='utf8mb4', + cursorclass=pymysql.cursors.DictCursor +) + +f = open('当当网书籍.csv', mode='a', encoding='utf-8', newline='') +csv_writer = csv.DictWriter(f, fieldnames=[ + '排名', + '书名', + '现价', + '原价', + '折扣']) +csv_writer.writeheader() + +url = 'http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-1' +new_url = 'http://bang.dangdang.com/books/bestsellers/01.00.00.00.00.00-recent30-0-0-1-%d' + +head = { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 ' + 'Safari/537.36'} + +response = requests.get(url, headers=head).text + +soup = BeautifulSoup(response, "html.parser") + +top = 1 +for p in range(10): + response = requests.get(new_url % p, headers=head).text + soup = BeautifulSoup(response, "html.parser") + html_1 = soup.find("ul", attrs={'class': "bang_list clearfix bang_list_mode"}) + html_2 = html_1.find_all("li") + for i in range(len(html_2)): + book_name_lable = html_2[i].find("div", class_="name") + book_name = book_name_lable.find("a").text + price_div = html_2[i].find("div", class_="price") + price_lable = price_div.find("p") + my = price_lable.find("span", attrs={"class", "price_n"}).text + sy = price_lable.find("span", attrs={"class", "price_r"}).text + discount = price_lable.find("span", attrs={"class", "price_s"}).text + print('Top' + str(top) + ':' + book_name + ' 原价:' + sy + ' 现价:' + my + ' 折扣:' + discount) + i += 1 + dic = {'排名': top, '书名': book_name, '现价': my, '原价': sy, '折扣': discount} + csv_writer.writerow(dic) + with open('当当网.txt', 'a', encoding='utf-8')as f: + f.write('Top' + str(top) + ':' + book_name + ' 原价:' + sy + ' 现价:' + my + ' 折扣:' + discount + '\n') + + # 将数据存入MySQL数据库 + with connection.cursor() as cursor: + sql = "INSERT INTO 书(ranking, book_name, current_price, original_price, discount) VALUES (%s, %s, %s, %s, %s)" + cursor.execute(sql, (top, book_name, my, sy, discount)) + connection.commit() + print('保存成功') + + top += 1 + +# 关闭MySQL连接 +connection.close() \ No newline at end of file