From 3ce7d1ce6a30a02721b91fc17b54739cfd06322e Mon Sep 17 00:00:00 2001 From: pfe4a538u <2193317051@qq.com> Date: Wed, 12 Jun 2024 11:42:21 +0800 Subject: [PATCH] ADD file via upload --- wangluokecheng.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 wangluokecheng.py diff --git a/wangluokecheng.py b/wangluokecheng.py new file mode 100644 index 0000000..e48441d --- /dev/null +++ b/wangluokecheng.py @@ -0,0 +1,72 @@ +import csv +from bs4 import BeautifulSoup +import requests + + +book_list=[] +url='https://search.dangdang.com/?key=%CA%E9%BC%AE&category_path=01.76.00.00.00.00#J_tab' +head = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0"} + + +def get_resource(url): + response = requests.get(url, headers=head) + response.encoding = 'gb2312' + return response.text + + + + + +def pares_page(get_resource): + soap=BeautifulSoup(get_resource,'lxml') +# 解析式 + li_list=soap.select('#component_59>li') + print(len(li_list)) + for i in li_list: + name=i.select('a')[0]['title'] + author=i.select('.search_book_author>span>a')[0].text + price=i.select('.price>.search_now_price')[0].text + shop=i.select('.search_shangjia>a')[0].text + book={'name':name,'author':author,'price':price,'shop':shop} + book_list.append(book) + return book_list + + + +# def connection(pares_page): +# +# conn = pymysql.Connection(user='root', password='123456', host='127.0.0.1', port=3306, db='moon',charset='utf8') +# cur=conn.cursor() +# +# +# +# +# try: +# for i in pares_page: +# sql_str="insert into book_information(name,author,price) values ('%s','%s','%s') " +# cur.execute(sql_str,(i[0],i[1],i[2])) +# conn.commit() +# except Exception as e: +# print(e) +# conn.rollback() +# finally: +# cur.close() +# conn.close() + +f = open('网络课程.csv', mode='a', encoding='utf-8', newline='') +csv_write = csv.DictWriter(f, fieldnames=[ + "name", + "author", + "price", + "shop" + ]) +csv_write.writeheader() +content = get_resource(url) +print(content) +data = pares_page(content) +print(data) +for i in data: + csv_write.writerow(i) + + +