diff --git a/zh.ipynb b/zh.ipynb new file mode 100644 index 0000000..d6f8521 --- /dev/null +++ b/zh.ipynb @@ -0,0 +1,93 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "下载完成!\n" + ] + } + ], + "source": [ + "import requests\n", + "import re\n", + "import csv, time\n", + "\n", + "# 创建一个保存的csv文件并设置好表头\n", + "timenow = time.strftime(\"%Y-%m-%d-%H%M%S\", time.localtime())\n", + "file = '猫眼电影top100榜-%s.csv' % (timenow)\n", + "# 写入表头\n", + "with open(file, 'a+', encoding='gb18030', newline='') as f:\n", + " writer_f = csv.writer(f)\n", + " writer_f.writerow(['排名', '电影名称', '主演', '上映时间', '评分'])\n", + "\n", + "\n", + "# 定义一个爬取其中一页的电影信息\n", + "def get_one_page(url):\n", + " headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'}\n", + " response = requests.get(url, headers=headers).text\n", + "\n", + " pattern = re.compile(\n", + " '
.*?board-index.*?>(.*?).*?name.*?a.*?>(.*?).*?star.*?>(.*?)

.*?releasetime.*?>(.*?)

.*?integer.*?>(.*?).*?fraction.*?>(.*?).*?
',\n", + " re.S)\n", + " items = re.findall(pattern, response)\n", + " # print(items)\n", + " for item in items:\n", + " pm = item[0]\n", + " mc = item[1]\n", + " zy = item[2].strip()\n", + " sj = item[3]\n", + " pf = item[4] + item[5]\n", + " print(pm, mc, zy, sj, pf)\n", + " # 写入到csv文件\n", + " with open(file, 'a+', encoding='gb18030', newline='') as f:\n", + " writer = csv.writer(f)\n", + " writer.writerow([pm, mc, zy, sj, pf])\n", + "\n", + "\n", + "# 共有10个分页,分批写入\n", + "for i in range(10):\n", + " page = i * 10\n", + " url = 'https://maoyan.com/board/4?offset=' + str(page)\n", + "get_one_page(url)\n", + "time.sleep(1)\n", + "print('下载完成!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}