import pandas as pd import requests from bs4 import BeautifulSoup import random from time import sleep from tqdm import tqdm def get_page(url): n = 3 while True: try: # sleep(random.uniform(1, 2)) # 随机出现1-2之间的数,包含小数 headers = { 'Cookie': 'OCSSID=4df0bjva6j7ejussu8al3eqo03', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36', } response = requests.get(url, headers=headers, timeout=20) # print(response.text) return response.text except (TimeoutError, Exception): n -= 1 if n == 0: print('请求3次均失败,放弃此url请求,检查请求条件') return else: print('请求失败,重新请求') continue lists = [''] df = pd.read_csv( 'cai.csv',encoding="utf-8",names=['名称','链接','素材','评分','作者','图片','说明']) #gb18030 zuofa = [] for i in tqdm(df['链接']): sleep(3) # 发送HTTP请求获取网页内容 url = i # 替换为目标网站的URL html_content = get_page(i) # 解析HTML内容 soup = BeautifulSoup(html_content, 'html.parser') # 定位和提取数据 recipe_elements = soup.find_all('div', class_='step') # 替换为目标网站上菜品信息的标签和类名 a = '' try: element = recipe_elements[0].text a = a.join(element) a = a.replace("\n", " ") a = a.replace("\r", " ") a = a.replace(" ", "") zuofa.append(a) except: zuofa.append(a) df['做法']=zuofa df.to_csv('菜谱1.csv', index=False)