|
|
import requests
|
|
|
from bs4 import BeautifulSoup
|
|
|
import csv
|
|
|
|
|
|
url = "https://www.fy6b.com/"
|
|
|
headers = {
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0"
|
|
|
}
|
|
|
|
|
|
# 发送请求并获取响应
|
|
|
response = requests.get(url, headers=headers)
|
|
|
response.encoding = "utf-8"
|
|
|
|
|
|
# 解析HTML内容
|
|
|
soup = BeautifulSoup(response.text, "lxml")
|
|
|
|
|
|
# 提取数据
|
|
|
ruanjianname = []
|
|
|
raunjianjianjie = []
|
|
|
shijian = []
|
|
|
yuedu = []
|
|
|
|
|
|
# 提取标题
|
|
|
for title in soup.select("h3.item-title"):
|
|
|
ruanjianname.append(title.get_text(strip=True))
|
|
|
|
|
|
# 提取简介
|
|
|
for excerpt in soup.select("div.item-excerpt"):
|
|
|
raunjianjianjie.append(excerpt.get_text(strip=True))
|
|
|
|
|
|
# 提取时间(注意:可能需要根据实际的HTML结构进行调整)
|
|
|
for time in soup.select("span.item-meta-li.date"):
|
|
|
shijian.append(time.get_text(strip=True))
|
|
|
|
|
|
# 提取阅读量(注意:可能需要根据实际的HTML结构进行调整)
|
|
|
for view in soup.select("span.item-meta-li.views"):
|
|
|
yuedu.append(view.get_text(strip=True))
|
|
|
|
|
|
# 写入CSV文件
|
|
|
with open('fy6b_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
|
|
fieldnames = ['App名字', '简介', '时间', '阅读量']
|
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
|
|
|
|
writer.writeheader() # 写入表头
|
|
|
for i in range(len(ruanjianname)):
|
|
|
row = {
|
|
|
'App名字': ruanjianname[i],
|
|
|
'简介': raunjianjianjie[i],
|
|
|
'时间': shijian[i] if i < len(shijian) else '',
|
|
|
'阅读量': yuedu[i] if i < len(yuedu) else ''
|
|
|
}
|
|
|
writer.writerow(row) # 写入一行数据
|
|
|
|
|
|
print(f"已成功写入 {len(ruanjianname)} 条数据到 fy6b_data.csv 文件中。") |