You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
34 lines
877 B
34 lines
877 B
import requests
|
|
from lxml import etree
|
|
|
|
url = 'https://www.maoyan.com/board/4?offset=0'
|
|
|
|
|
|
|
|
headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0'
|
|
}
|
|
|
|
response = requests.get(url=url, headers=headers)
|
|
|
|
tree = etree.HTML(response.text)
|
|
|
|
titles = tree.xpath('/html/body/div[4]/div/div/div[1]/dl/dd/div/div/div[1]/p[1]/a/text()')
|
|
actors = tree.xpath('/html/body/div[4]/div/div/div[1]/dl/dd/div/div/div[1]/p[2]/text()')
|
|
times = tree.xpath('/html/body/div[4]/div/div/div[1]/dl/dd/div/div/div[1]/p[3]/text()')
|
|
|
|
items = []
|
|
|
|
for i in range(len(titles)):
|
|
title = titles[i].strip()
|
|
actors = actors[i].strip()
|
|
times = times[i].strip()
|
|
|
|
items.append({
|
|
'title': titles,
|
|
'actor': actors[3:],
|
|
'time': times[5:]
|
|
})
|
|
|
|
for i in items:
|
|
print(i) |