You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
3.2 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
from lxml import etree
import re
import csv
from bs4 import BeautifulSoup
url="https://www.sina.com.cn/"
h={'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'}
# 获取源码及状态码
r=requests.get(url,headers=h)
c=r.status_code
r.encoding="utf-8"
ym=r.text
# print(r.text,c)
# 获取其中头条新闻的标题及链接
tt=[]
lj=r'<a target="_blank" class="linkNewsTopBold" href=[\'"](.*?)[^\'"]\S>'
bt=r'<a target="_blank" class="linkNewsTopBold" href=".*?">(.*?)</a>'
resultlj=re.findall(lj,ym)
resultbt=re.findall(bt,ym)
# print(resultlj)
# print(resultbt)
for i in range(len(resultlj)):
tt.append({
'链接': resultlj[i],
'标题': resultbt[i]
})
# print(*tt,sep="\n")
# 使用xpath获取新闻和标题
xw=[]
dom=etree.HTML(ym)
lj1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/@href')
bt1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/text()')
# print(lj1)
# print(bt1)
for i in range(len(lj1)):
xw.append({
'标题': bt1[i],
'链接': lj1[i]
})
# print(*xw,sep="\n")
# DictWriter类是csv模块中的一个辅助类用于将字典数据写入CSV文件。newline=""参数用于确保在不同的操作系统上正确处理换行符。
ttname="新闻.csv"
with open(ttname, "w", newline="") as file:
writer = csv.DictWriter(file,fieldnames=["标题","链接"])
writer.writeheader() # 写入标题行
for item in xw:
writer.writerow(item)
print(f"已将积分列表保存到{ttname}文件中。")
# 获取找车栏目中的图片链接将其加入列表
tp=dom.xpath('//div[@class="carbrand-logo clearfix"]/a/img')
tp_urls = []
for img in tp:
tp_url = img.get('src')
if tp_url:
tp_urls.append(tp_url)
# for tp_url in tp_urls:
# print(tp_url)
# # 打印其中的一张图片
# urls1='https://k.sinaimg.cn/auto4/autoimg/brand/07/07/64a7d61acc5fc8040707_95.jpg/w49h49l50t50q80a38.jpg'
# re=requests.get(urls1,headers=h)
# # print(re.content)
# with open('img.jpg', 'wb') as file:
# # 将响应内容写入文件
# file.write(re.content)
# 根据列表中的图片链接,全部输出图片信息
for tp_url in tp_urls:
tp_url1='http:'+tp_url
re=requests.get(tp_url1,headers=h)
a=re.content
# print(a)
# 下面的循环创建图片文件w使用写的方法b是以二进制模式进行
# with open('png' + tp_url.split('/')[-1], 'wb') as f:
# f.write(a)
jfurl = "http://api.sports.sina.com.cn/?p=sports&s=sport_client&a=index&_sport_t_=football&_sport_s_=opta&_sport_a_=teamOrder&type=4"
response = requests.get(jfurl,headers=h)
data = response.json()
# 解析数据
result = data.get("result", {}).get("data", [])
# for team in result:
# print(f"排名:{team['team_order']},球队:{team['team_cn']},积分:{team['score']}")
name = "英超积分榜.csv"
with open(name, "w", newline="") as file:
writer = csv.writer(file)
writer.writerow(["排名", "球队", "积分"]) # 写入标题行
for team in result:
writer.writerow([team["team_order"], team["team_cn"], team["score"]])
# print(f"已将积分列表保存到{name}文件中。")