import requests from lxml import etree import re import csv from bs4 import BeautifulSoup url="https://www.sina.com.cn/" h={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'} # 获取源码及状态码 r=requests.get(url,headers=h) c=r.status_code r.encoding="utf-8" ym=r.text # print(r.text,c) # 获取其中头条新闻的标题及链接 tt=[] lj=r'' bt=r'(.*?)' resultlj=re.findall(lj,ym) resultbt=re.findall(bt,ym) # print(resultlj) # print(resultbt) for i in range(len(resultlj)): tt.append({ '链接': resultlj[i], '标题': resultbt[i] }) # print(*tt,sep="\n") # 使用xpath获取新闻和标题 xw=[] dom=etree.HTML(ym) lj1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/@href') bt1= dom.xpath('//div[@class="top_newslist"]/ul/li/a/text()') # print(lj1) # print(bt1) for i in range(len(lj1)): xw.append({ '标题': bt1[i], '链接': lj1[i] }) # print(*xw,sep="\n") # DictWriter类是csv模块中的一个辅助类,用于将字典数据写入CSV文件。newline=""参数用于确保在不同的操作系统上正确处理换行符。 ttname="新闻.csv" with open(ttname, "w", newline="") as file: writer = csv.DictWriter(file,fieldnames=["标题","链接"]) writer.writeheader() # 写入标题行 for item in xw: writer.writerow(item) print(f"已将积分列表保存到{ttname}文件中。") # 获取找车栏目中的图片链接将其加入列表 tp=dom.xpath('//div[@class="carbrand-logo clearfix"]/a/img') tp_urls = [] for img in tp: tp_url = img.get('src') if tp_url: tp_urls.append(tp_url) # for tp_url in tp_urls: # print(tp_url) # # 打印其中的一张图片 # urls1='https://k.sinaimg.cn/auto4/autoimg/brand/07/07/64a7d61acc5fc8040707_95.jpg/w49h49l50t50q80a38.jpg' # re=requests.get(urls1,headers=h) # # print(re.content) # with open('img.jpg', 'wb') as file: # # 将响应内容写入文件 # file.write(re.content) # 根据列表中的图片链接,全部输出图片信息 for tp_url in tp_urls: tp_url1='http:'+tp_url re=requests.get(tp_url1,headers=h) a=re.content # print(a) # 下面的循环创建图片文件,w使用写的方法,b是以二进制模式进行 # with open('png' + tp_url.split('/')[-1], 'wb') as f: # f.write(a) jfurl = "http://api.sports.sina.com.cn/?p=sports&s=sport_client&a=index&_sport_t_=football&_sport_s_=opta&_sport_a_=teamOrder&type=4" response = requests.get(jfurl,headers=h) data = response.json() # 解析数据 result = data.get("result", {}).get("data", []) # for team in result: # print(f"排名:{team['team_order']},球队:{team['team_cn']},积分:{team['score']}") name = "英超积分榜.csv" with open(name, "w", newline="") as file: writer = csv.writer(file) writer.writerow(["排名", "球队", "积分"]) # 写入标题行 for team in result: writer.writerow([team["team_order"], team["team_cn"], team["score"]]) # print(f"已将积分列表保存到{name}文件中。")