diff --git a/5.py b/5.py new file mode 100644 index 0000000..9d6d0b6 --- /dev/null +++ b/5.py @@ -0,0 +1,47 @@ +import requests +from bs4 import BeautifulSoup + +# 爬取网站地址 +url = "https://tophub.today/n/KqndgxeLl9" +headers = { + 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36" +} + +# 抓取网站信息 +response = requests.get(url, headers=headers) +response.raise_for_status() # 确保请求成功 + +# 转化为字符串类型(response.text 已经是字符串) +res = response.text + +# 解析HTML +soup = BeautifulSoup(res, 'lxml') + +# 初始化CSV内容 +csv_content = 'num,title,link,heat\n' # 添加了一个 'link' 字段来存储链接 + +for tr in soup.find_all('tr'): + tds = tr.find_all('td') + if len(tds) >= 3: + num = tds[0].get_text(strip=True) + title_element = tds[1] # 获取标题的td元素 + + # 查找标题内的a标签(如果有的话) + link_element = title_element.find('a') + if link_element: + link = link_element.get('href') # 提取链接 + else: + link = '' # 如果没有链接,则置为空字符串 + + title = title_element.get_text(strip=True) # 标题文本 + heat = tds[2].get_text(strip=True) + + # 构造CSV行 + csv_row = f"{num},{title},{link},{heat}\n" + csv_content += csv_row + + # 写入CSV文件 +with open('weibo_hot_search.csv', 'w', newline='', encoding='utf-8') as csvfile: + csvfile.write(csv_content) + +print("数据已保存到weibo_hot_search.csv文件中。") \ No newline at end of file