You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

34 lines
877 B

import requests
from lxml import etree
url = 'https://www.maoyan.com/board/4?offset=0'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
}
response = requests.get(url=url, headers=headers)
tree = etree.HTML(response.text)
titles = tree.xpath('/html/body/div[4]/div/div/div[1]/dl/dd/div/div/div[1]/p[1]/a/text()')
actors = tree.xpath('/html/body/div[4]/div/div/div[1]/dl/dd/div/div/div[1]/p[2]/text()')
times = tree.xpath('/html/body/div[4]/div/div/div[1]/dl/dd/div/div/div[1]/p[3]/text()')
items = []
for i in range(len(titles)):
title = titles[i].strip()
actors = actors[i].strip()
times = times[i].strip()
items.append({
'title': titles,
'actor': actors[3:],
'time': times[5:]
})
for i in items:
print(i)