You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

63 lines
1.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pandas as pd
import requests
from bs4 import BeautifulSoup
import random
from time import sleep
from tqdm import tqdm
def get_page(url):
n = 3
while True:
try:
# sleep(random.uniform(1, 2)) # 随机出现1-2之间的数包含小数
headers = {
'Cookie': 'OCSSID=4df0bjva6j7ejussu8al3eqo03',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
}
response = requests.get(url, headers=headers, timeout=20)
# print(response.text)
return response.text
except (TimeoutError, Exception):
n -= 1
if n == 0:
print('请求3次均失败放弃此url请求,检查请求条件')
return
else:
print('请求失败,重新请求')
continue
lists = ['']
df = pd.read_csv( 'cai.csv',encoding="utf-8",names=['名称','链接','素材','评分','作者','图片','说明']) #gb18030
zuofa = []
for i in tqdm(df['链接']):
sleep(3)
# 发送HTTP请求获取网页内容
url = i # 替换为目标网站的URL
html_content = get_page(i)
# 解析HTML内容
soup = BeautifulSoup(html_content, 'html.parser')
# 定位和提取数据
recipe_elements = soup.find_all('div', class_='step') # 替换为目标网站上菜品信息的标签和类名
a = ''
try:
element = recipe_elements[0].text
a = a.join(element)
a = a.replace("\n", " ")
a = a.replace("\r", " ")
a = a.replace(" ", "")
zuofa.append(a)
except:
zuofa.append(a)
df['做法']=zuofa
df.to_csv('菜谱1.csv', index=False)