You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
3.2 KiB

import requests
from lxml import etree
import re
import csv
from bs4 import BeautifulSoup
url="https://www.sina.com.cn/"
h={'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'}
# 获取源码及状态码
r=requests.get(url,headers=h)
c=r.status_code
r.encoding="utf-8"
ym=r.text
# print(r.text,c)
# 获取其中头条新闻的标题及链接
tt=[]
lj=r'<a target="_blank" class="linkNewsTopBold" href=[\'"](.*?)[^\'"]\S>'
bt=r'<a target="_blank" class="linkNewsTopBold" href=".*?">(.*?)</a>'
resultlj=re.findall(lj,ym)
resultbt=re.findall(bt,ym)
# print(resultlj)
# print(resultbt)
for i in range(len(resultlj)):
tt.append({
'链接': resultlj[i],
'标题': resultbt[i]
})
# print(*tt,sep="\n")
# 使用xpath获取新闻和标题
xw=[]
dom=etree.HTML(ym)
lj1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/@href')
bt1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/text()')
# print(lj1)
# print(bt1)
for i in range(len(lj1)):
xw.append({
'标题': bt1[i],
'链接': lj1[i]
})
# print(*xw,sep="\n")
# DictWriter类是csv模块中的一个辅助类用于将字典数据写入CSV文件。newline=""参数用于确保在不同的操作系统上正确处理换行符。
ttname="新闻.csv"
with open(ttname, "w", newline="") as file:
writer = csv.DictWriter(file,fieldnames=["标题","链接"])
writer.writeheader() # 写入标题行
for item in xw:
writer.writerow(item)
print(f"已将积分列表保存到{ttname}文件中。")
# 获取找车栏目中的图片链接将其加入列表
tp=dom.xpath('//div[@class="carbrand-logo clearfix"]/a/img')
tp_urls = []
for img in tp:
tp_url = img.get('src')
if tp_url:
tp_urls.append(tp_url)
# for tp_url in tp_urls:
# print(tp_url)
# # 打印其中的一张图片
# urls1='https://k.sinaimg.cn/auto4/autoimg/brand/07/07/64a7d61acc5fc8040707_95.jpg/w49h49l50t50q80a38.jpg'
# re=requests.get(urls1,headers=h)
# # print(re.content)
# with open('img.jpg', 'wb') as file:
# # 将响应内容写入文件
# file.write(re.content)
# 根据列表中的图片链接,全部输出图片信息
for tp_url in tp_urls:
tp_url1='http:'+tp_url
re=requests.get(tp_url1,headers=h)
a=re.content
# print(a)
# 下面的循环创建图片文件w使用写的方法b是以二进制模式进行
# with open('png' + tp_url.split('/')[-1], 'wb') as f:
# f.write(a)
jfurl = "http://api.sports.sina.com.cn/?p=sports&s=sport_client&a=index&_sport_t_=football&_sport_s_=opta&_sport_a_=teamOrder&type=4"
response = requests.get(jfurl,headers=h)
data = response.json()
# 解析数据
result = data.get("result", {}).get("data", [])
# for team in result:
# print(f"排名:{team['team_order']},球队:{team['team_cn']},积分:{team['score']}")
name = "英超积分榜.csv"
with open(name, "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["排名", "球队", "积分"]) # 写入标题行
for team in result:
writer.writerow([team["team_order"], team["team_cn"], team["score"]])
# print(f"已将积分列表保存到{name}文件中。")