import requests from lxml import etree data_list = [] #爬取数据 url = "http://www.boxofficecn.com/the-red-box-office" headers = { 'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.111 Safari/537.36" } # 使用 requests 的 get 请求该电影排行网页信息 r = requests.get(url=url, headers=headers) r.encoding = 'utf-8' html = r.text doc = etree.HTML(html) trs = doc.xpath("//tbody/tr") for tr in trs: year = tr.xpath("./td[@class='column-1']/text()")[0] name = tr.xpath("./td[@class='column-2']/text()")[0] cast = tr.xpath("./td[@class='column-3']/text()")[0] num = tr.xpath("./td[@class='column-4']/text()") if num: num = num[0] else: num = tr.xpath("./td[@class='column-4']/font/text()")[0] data_list.append([year,name,cast,num]) #数据处理 data_list_new = [] for data in data_list: data_list_new.append(f"{data[0]}---{data[1]}---{data[2]}---{data[3]}") #将数据写入文件 with open("./movie.text","w") as f: for data in data_list_new: f.write(data+"\n")