diff --git a/13.py b/13.py new file mode 100644 index 0000000..550cfbd --- /dev/null +++ b/13.py @@ -0,0 +1,94 @@ +import requests +from lxml import etree +import re +import csv +from bs4 import BeautifulSoup + +url="https://www.sina.com.cn/" +h={'User-Agent': +'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'} +# 获取源码及状态码 +r=requests.get(url,headers=h) +c=r.status_code +r.encoding="utf-8" +ym=r.text +# print(r.text,c) +# 获取其中头条新闻的标题及链接 +tt=[] +lj=r'' +bt=r'(.*?)' +resultlj=re.findall(lj,ym) +resultbt=re.findall(bt,ym) +# print(resultlj) +# print(resultbt) +for i in range(len(resultlj)): + tt.append({ + '链接': resultlj[i], + '标题': resultbt[i] + }) +# print(*tt,sep="\n") +# 使用xpath获取新闻和标题 +xw=[] +dom=etree.HTML(ym) +lj1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/@href') +bt1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/text()') +# print(lj1) +# print(bt1) +for i in range(len(lj1)): + xw.append({ + '标题': bt1[i], + '链接': lj1[i] + + }) +# print(*xw,sep="\n") +# DictWriter类是csv模块中的一个辅助类,用于将字典数据写入CSV文件。newline=""参数用于确保在不同的操作系统上正确处理换行符。 +ttname="新闻.csv" +with open(ttname, "w", newline="") as file: + writer = csv.DictWriter(file,fieldnames=["标题","链接"]) + writer.writeheader() # 写入标题行 + for item in xw: + writer.writerow(item) +print(f"已将积分列表保存到{ttname}文件中。") + +# 获取找车栏目中的图片链接将其加入列表 +tp=dom.xpath('//div[@class="carbrand-logo clearfix"]/a/img') +tp_urls = [] +for img in tp: + tp_url = img.get('src') + if tp_url: + tp_urls.append(tp_url) +# for tp_url in tp_urls: +# print(tp_url) +# # 打印其中的一张图片 +# urls1='https://k.sinaimg.cn/auto4/autoimg/brand/07/07/64a7d61acc5fc8040707_95.jpg/w49h49l50t50q80a38.jpg' +# re=requests.get(urls1,headers=h) +# # print(re.content) +# with open('img.jpg', 'wb') as file: +# # 将响应内容写入文件 +# file.write(re.content) + +# 根据列表中的图片链接,全部输出图片信息 +for tp_url in tp_urls: + tp_url1='http:'+tp_url + re=requests.get(tp_url1,headers=h) + a=re.content + # print(a) + # 下面的循环创建图片文件,w使用写的方法,b是以二进制模式进行 + # with open('png' + tp_url.split('/')[-1], 'wb') as f: + # f.write(a) + + +jfurl = "http://api.sports.sina.com.cn/?p=sports&s=sport_client&a=index&_sport_t_=football&_sport_s_=opta&_sport_a_=teamOrder&type=4" +response = requests.get(jfurl,headers=h) +data = response.json() +# 解析数据 +result = data.get("result", {}).get("data", []) +# for team in result: +# print(f"排名:{team['team_order']},球队:{team['team_cn']},积分:{team['score']}") +name = "英超积分榜.csv" +with open(name, "w", newline="") as file: + writer = csv.writer(file) + writer.writerow(["排名", "球队", "积分"]) # 写入标题行 + for team in result: + writer.writerow([team["team_order"], team["team_cn"], team["score"]]) +# print(f"已将积分列表保存到{name}文件中。") \ No newline at end of file