|
|
|
@ -0,0 +1,47 @@
|
|
|
|
|
import requests
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
# 爬取网站地址
|
|
|
|
|
url = "https://tophub.today/n/KqndgxeLl9"
|
|
|
|
|
headers = {
|
|
|
|
|
'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 抓取网站信息
|
|
|
|
|
response = requests.get(url, headers=headers)
|
|
|
|
|
response.raise_for_status() # 确保请求成功
|
|
|
|
|
|
|
|
|
|
# 转化为字符串类型(response.text 已经是字符串)
|
|
|
|
|
res = response.text
|
|
|
|
|
|
|
|
|
|
# 解析HTML
|
|
|
|
|
soup = BeautifulSoup(res, 'lxml')
|
|
|
|
|
|
|
|
|
|
# 初始化CSV内容
|
|
|
|
|
csv_content = 'num,title,link,heat\n' # 添加了一个 'link' 字段来存储链接
|
|
|
|
|
|
|
|
|
|
for tr in soup.find_all('tr'):
|
|
|
|
|
tds = tr.find_all('td')
|
|
|
|
|
if len(tds) >= 3:
|
|
|
|
|
num = tds[0].get_text(strip=True)
|
|
|
|
|
title_element = tds[1] # 获取标题的td元素
|
|
|
|
|
|
|
|
|
|
# 查找标题内的a标签(如果有的话)
|
|
|
|
|
link_element = title_element.find('a')
|
|
|
|
|
if link_element:
|
|
|
|
|
link = link_element.get('href') # 提取链接
|
|
|
|
|
else:
|
|
|
|
|
link = '' # 如果没有链接,则置为空字符串
|
|
|
|
|
|
|
|
|
|
title = title_element.get_text(strip=True) # 标题文本
|
|
|
|
|
heat = tds[2].get_text(strip=True)
|
|
|
|
|
|
|
|
|
|
# 构造CSV行
|
|
|
|
|
csv_row = f"{num},{title},{link},{heat}\n"
|
|
|
|
|
csv_content += csv_row
|
|
|
|
|
|
|
|
|
|
# 写入CSV文件
|
|
|
|
|
with open('weibo_hot_search.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
|
|
|
|
csvfile.write(csv_content)
|
|
|
|
|
|
|
|
|
|
print("数据已保存到weibo_hot_search.csv文件中。")
|