You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

94 lines
2.6 KiB

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"下载完成!\n"
]
}
],
"source": [
"import requests\n",
"import re\n",
"import csv, time\n",
"\n",
"# 创建一个保存的csv文件并设置好表头\n",
"timenow = time.strftime(\"%Y-%m-%d-%H%M%S\", time.localtime())\n",
"file = '猫眼电影top100榜-%s.csv' % (timenow)\n",
"# 写入表头\n",
"with open(file, 'a+', encoding='gb18030', newline='') as f:\n",
" writer_f = csv.writer(f)\n",
" writer_f.writerow(['排名', '电影名称', '主演', '上映时间', '评分'])\n",
"\n",
"\n",
"# 定义一个爬取其中一页的电影信息\n",
"def get_one_page(url):\n",
" headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'}\n",
" response = requests.get(url, headers=headers).text\n",
"\n",
" pattern = re.compile(\n",
" '<dd>.*?board-index.*?>(.*?)</i>.*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',\n",
" re.S)\n",
" items = re.findall(pattern, response)\n",
" # print(items)\n",
" for item in items:\n",
" pm = item[0]\n",
" mc = item[1]\n",
" zy = item[2].strip()\n",
" sj = item[3]\n",
" pf = item[4] + item[5]\n",
" print(pm, mc, zy, sj, pf)\n",
" # 写入到csv文件\n",
" with open(file, 'a+', encoding='gb18030', newline='') as f:\n",
" writer = csv.writer(f)\n",
" writer.writerow([pm, mc, zy, sj, pf])\n",
"\n",
"\n",
"# 共有10个分页分批写入\n",
"for i in range(10):\n",
" page = i * 10\n",
" url = 'https://maoyan.com/board/4?offset=' + str(page)\n",
"get_one_page(url)\n",
"time.sleep(1)\n",
"print('下载完成!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}