From af474de4c1eaafc1ed9d1374265a09999c417f34 Mon Sep 17 00:00:00 2001
From: p7mq6zalo <2715209264@qq.com>
Date: Fri, 15 Apr 2022 23:55:22 +0800
Subject: [PATCH] ADD file via upload
---
zh.ipynb | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 93 insertions(+)
create mode 100644 zh.ipynb
diff --git a/zh.ipynb b/zh.ipynb
new file mode 100644
index 0000000..d6f8521
--- /dev/null
+++ b/zh.ipynb
@@ -0,0 +1,93 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "下载完成!\n"
+ ]
+ }
+ ],
+ "source": [
+ "import requests\n",
+ "import re\n",
+ "import csv, time\n",
+ "\n",
+ "# 创建一个保存的csv文件并设置好表头\n",
+ "timenow = time.strftime(\"%Y-%m-%d-%H%M%S\", time.localtime())\n",
+ "file = '猫眼电影top100榜-%s.csv' % (timenow)\n",
+ "# 写入表头\n",
+ "with open(file, 'a+', encoding='gb18030', newline='') as f:\n",
+ " writer_f = csv.writer(f)\n",
+ " writer_f.writerow(['排名', '电影名称', '主演', '上映时间', '评分'])\n",
+ "\n",
+ "\n",
+ "# 定义一个爬取其中一页的电影信息\n",
+ "def get_one_page(url):\n",
+ " headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0'}\n",
+ " response = requests.get(url, headers=headers).text\n",
+ "\n",
+ " pattern = re.compile(\n",
+ " '
.*?board-index.*?>(.*?).*?name.*?a.*?>(.*?).*?star.*?>(.*?).*?releasetime.*?>(.*?).*?integer.*?>(.*?).*?fraction.*?>(.*?).*?',\n",
+ " re.S)\n",
+ " items = re.findall(pattern, response)\n",
+ " # print(items)\n",
+ " for item in items:\n",
+ " pm = item[0]\n",
+ " mc = item[1]\n",
+ " zy = item[2].strip()\n",
+ " sj = item[3]\n",
+ " pf = item[4] + item[5]\n",
+ " print(pm, mc, zy, sj, pf)\n",
+ " # 写入到csv文件\n",
+ " with open(file, 'a+', encoding='gb18030', newline='') as f:\n",
+ " writer = csv.writer(f)\n",
+ " writer.writerow([pm, mc, zy, sj, pf])\n",
+ "\n",
+ "\n",
+ "# 共有10个分页,分批写入\n",
+ "for i in range(10):\n",
+ " page = i * 10\n",
+ " url = 'https://maoyan.com/board/4?offset=' + str(page)\n",
+ "get_one_page(url)\n",
+ "time.sleep(1)\n",
+ "print('下载完成!')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "collapsed": true
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.1"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}