You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
2.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"下载完成!\n"
]
}
],
"source": [
"import requests\n",
"import re\n",
"import csv, time\n",
"\n",
"# 创建一个保存的csv文件并设置好表头\n",
"timenow = time.strftime(\"%Y-%m-%d-%H%M%S\", time.localtime())\n",
"file = '猫眼电影top100榜-%s.csv' % (timenow)\n",
"# 写入表头\n",
"with open(file, 'a+', encoding='gb18030', newline='') as f:\n",
" writer_f = csv.writer(f)\n",
" writer_f.writerow(['排名', '电影名称', '主演', '上映时间', '评分'])\n",
"\n",
"\n",
"# 定义一个爬取其中一页的电影信息\n",
"def get_one_page(url):\n",
" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'}\n",
" response = requests.get(url, headers=headers).text\n",
"\n",
" pattern = re.compile(\n",
" '<dd>.*?board-index.*?>(.*?)</i>.*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',\n",
" re.S)\n",
" items = re.findall(pattern, response)\n",
" # print(items)\n",
" for item in items:\n",
" pm = item[0]\n",
" mc = item[1]\n",
" zy = item[2].strip()\n",
" sj = item[3]\n",
" pf = item[4] + item[5]\n",
" print(pm, mc, zy, sj, pf)\n",
" # 写入到csv文件\n",
" with open(file, 'a+', encoding='gb18030', newline='') as f:\n",
" writer = csv.writer(f)\n",
" writer.writerow([pm, mc, zy, sj, pf])\n",
"\n",
"\n",
"# 共有10个分页分批写入\n",
"for i in range(10):\n",
" page = i * 10\n",
" url = 'https://maoyan.com/board/4?offset=' + str(page)\n",
"get_one_page(url)\n",
"time.sleep(1)\n",
"print('下载完成!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}