|
|
|
|
@ -0,0 +1,160 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "2f1c464b",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# 导入相关库\n",
|
|
|
|
|
"import requests\n",
|
|
|
|
|
"from lxml import etree\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# 请求头headers,避免反爬技术\n",
|
|
|
|
|
"headers = {\n",
|
|
|
|
|
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36\"\n",
|
|
|
|
|
"}\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"message = []\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"for i in range(10):\n",
|
|
|
|
|
" url = \"https://maoyan.com/board/4?offset=\" + str(i * 10)\n",
|
|
|
|
|
" response = requests.get(url, headers=headers)\n",
|
|
|
|
|
" html = response.text\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" data = etree.HTML(html) # 初始化\n",
|
|
|
|
|
" passage = []\n",
|
|
|
|
|
"\t\n",
|
|
|
|
|
" for h in range(10):\n",
|
|
|
|
|
" \t# 电影的序号\n",
|
|
|
|
|
" number = data.xpath('//div[@class=\"main\"]/dl/dd/i/text()')\n",
|
|
|
|
|
" passage.append(number[h])\n",
|
|
|
|
|
" # 电影的名字\n",
|
|
|
|
|
" name = data.xpath('//div[@class=\"movie-item-info\"]//p[@class=\"name\"]/a/text()')\n",
|
|
|
|
|
" passage.append(name[h])\n",
|
|
|
|
|
" # 电影的上映时间\n",
|
|
|
|
|
" time = data.xpath('//p[@class=\"releasetime\"]/text()')\n",
|
|
|
|
|
" passage.append(time[h])\n",
|
|
|
|
|
" # 电影主演\n",
|
|
|
|
|
" star = data.xpath('//p[@class=\"star\"]/text()')\n",
|
|
|
|
|
" passage.append(star[h])\n",
|
|
|
|
|
" # 电影评分\n",
|
|
|
|
|
" # score = data.xpath('//p[@class=\"score\"]/i/text()')\n",
|
|
|
|
|
" # passage.append(score[h])\n",
|
|
|
|
|
" message.append(passage)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # print(passage)\n",
|
|
|
|
|
"# print(message[0])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"every_movie_list = []\n",
|
|
|
|
|
"for every_html in message:\n",
|
|
|
|
|
" x = 0\n",
|
|
|
|
|
" m = 0\n",
|
|
|
|
|
" n = 4\n",
|
|
|
|
|
" while x <= 9:\n",
|
|
|
|
|
" every_movie = every_html[m:n]\n",
|
|
|
|
|
" m += 4\n",
|
|
|
|
|
" n += 4\n",
|
|
|
|
|
" x += 1\n",
|
|
|
|
|
" every_movie_list.append(every_movie)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# print(every_movie_list)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"timelist = []\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"for everymovie_time in every_movie_list:\n",
|
|
|
|
|
" # print(everymovie_time[2])\n",
|
|
|
|
|
" timelist.append(everymovie_time[2][5:9])\n",
|
|
|
|
|
"# print(timelist)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"yearone = []\n",
|
|
|
|
|
"yeartwo = []\n",
|
|
|
|
|
"yearthree = []\n",
|
|
|
|
|
"yearfour = []\n",
|
|
|
|
|
"yearfive = []\n",
|
|
|
|
|
"yearsix = []\n",
|
|
|
|
|
"yearseven = []\n",
|
|
|
|
|
"yeareight = []\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"for year in timelist:\n",
|
|
|
|
|
" num = int(year)\n",
|
|
|
|
|
" if num <= 1950:\n",
|
|
|
|
|
" yearone.append(year)\n",
|
|
|
|
|
" elif 1950 < num <= 1960:\n",
|
|
|
|
|
" yeartwo.append(year)\n",
|
|
|
|
|
" elif 1960 < num <= 1970:\n",
|
|
|
|
|
" yearthree.append(year)\n",
|
|
|
|
|
" elif 1970 < num <= 1980:\n",
|
|
|
|
|
" yearfour.append(year)\n",
|
|
|
|
|
" elif 1980 < num <= 1990:\n",
|
|
|
|
|
" yearfive.append(year)\n",
|
|
|
|
|
" elif 1990 < num <= 2000:\n",
|
|
|
|
|
" yearsix.append(year)\n",
|
|
|
|
|
" elif 2000 < num <= 2010:\n",
|
|
|
|
|
" yearseven.append(year)\n",
|
|
|
|
|
" elif 2010 < num <= 2020:\n",
|
|
|
|
|
" yeareight.append(year)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(yearone)\n",
|
|
|
|
|
"print(yeartwo)\n",
|
|
|
|
|
"print(yearthree)\n",
|
|
|
|
|
"print(yearfour)\n",
|
|
|
|
|
"print(yearfive)\n",
|
|
|
|
|
"print(yearsix)\n",
|
|
|
|
|
"print(yearseven)\n",
|
|
|
|
|
"print(yeareight)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(len(yearone))\n",
|
|
|
|
|
"print(len(yeartwo))\n",
|
|
|
|
|
"print(len(yearthree))\n",
|
|
|
|
|
"print(len(yearfour))\n",
|
|
|
|
|
"print(len(yearfive))\n",
|
|
|
|
|
"print(len(yearsix))\n",
|
|
|
|
|
"print(len(yearseven))\n",
|
|
|
|
|
"print(len(yeareight))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# 以猫眼电影上架时间做条形图\n",
|
|
|
|
|
"from matplotlib import pyplot as plt\n",
|
|
|
|
|
"from matplotlib import font_manager\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"my_font = font_manager.FontProperties(fname=\"C:\\Windows\\Fonts\\SimSun.ttc\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"x = [\"1950年之前\",\"1950-1960年\",\"1960-1970年\",\"1970-1980年\",\"1980-1990年\",\"1990-2000年\",\"2000-2010年\",\"2010-2020年\"]\n",
|
|
|
|
|
"y = [2,3,3,1,4,25,31,31]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"fig = plt.figure(figsize=(13,8),dpi=80)\n",
|
|
|
|
|
"ax = fig.add_subplot(1,1,1)\n",
|
|
|
|
|
"plt.bar(range(len(x)),y,width=0.5,color=\"pink\")\n",
|
|
|
|
|
"plt.xticks(range(len(x)),x,fontproperties=my_font,rotation=0)\n",
|
|
|
|
|
"plt.grid(alpha=0.4)\n",
|
|
|
|
|
"ax.set_title('各年份电影上架数量条形图',fontproperties=my_font)\n",
|
|
|
|
|
"ax.set_xlabel('年份时间段',fontproperties=my_font)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"plt.savefig(\"./年份条形图\")\n",
|
|
|
|
|
"plt.show()"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.9.7"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|