|
|
|
|
@ -0,0 +1,164 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "9a5582a5",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import re\n",
|
|
|
|
|
"import pandas\n",
|
|
|
|
|
"import requests\n",
|
|
|
|
|
"from bs4 import BeautifulSoup\n",
|
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
|
"from matplotlib import font_manager\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def get_html(url):\n",
|
|
|
|
|
" try:\n",
|
|
|
|
|
" r = requests.get(url) # 使用get来获取网页数据\n",
|
|
|
|
|
" r.raise_for_status() # 如果返回参数不为200,抛出异常\n",
|
|
|
|
|
" r.encoding = r.apparent_encoding # 获取网页编码方式\n",
|
|
|
|
|
" return r.text # 返回获取的内容\n",
|
|
|
|
|
" except:\n",
|
|
|
|
|
" return '错误'\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def save(html):\n",
|
|
|
|
|
" # 解析网页\n",
|
|
|
|
|
" soup = BeautifulSoup(html, 'html.parser') # 指定Beautiful的解析器为“html.parser”\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" with open('1.txt', 'r+', encoding='UTF-8') as f:\n",
|
|
|
|
|
" f.write(soup.text)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # 定义好相关列表准备存储相关信息\n",
|
|
|
|
|
" TScore = [] # 综合评分\n",
|
|
|
|
|
" name = [] # 动漫名字\n",
|
|
|
|
|
" bfl = [] # 播放量\n",
|
|
|
|
|
" pls = [] # 评论数\n",
|
|
|
|
|
" scs = [] # 收藏数\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # ******************************************** 动漫名字存储\n",
|
|
|
|
|
" for tag in soup.find_all('div', class_='info'):\n",
|
|
|
|
|
" # print(tag)\n",
|
|
|
|
|
" bf = tag.a.string\n",
|
|
|
|
|
" name.append(str(bf))\n",
|
|
|
|
|
" print(name)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # ******************************************** 播放量存储\n",
|
|
|
|
|
" for tag in soup.find_all('div', class_='detail'):\n",
|
|
|
|
|
" # print(tag)\n",
|
|
|
|
|
" bf = tag.find('span', class_='data-box').get_text()\n",
|
|
|
|
|
" # 统一单位为‘万’\n",
|
|
|
|
|
" if '亿' in bf:\n",
|
|
|
|
|
" num = float(re.search(r'\\d(.\\d)?', bf).group()) * 10000\n",
|
|
|
|
|
" # print(num)\n",
|
|
|
|
|
" bf = num\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" bf = re.search(r'\\d*(\\.)?\\d', bf).group()\n",
|
|
|
|
|
" bfl.append(float(bf))\n",
|
|
|
|
|
" print(bfl)\n",
|
|
|
|
|
" # ******************************************** 评论数存储\n",
|
|
|
|
|
" for tag in soup.find_all('div', class_='detail'):\n",
|
|
|
|
|
" # pl = tag.span.next_sibling.next_sibling\n",
|
|
|
|
|
" pl = tag.find('span', class_='data-box').next_sibling.next_sibling.get_text()\n",
|
|
|
|
|
" # *********统一单位\n",
|
|
|
|
|
" if '万' not in pl:\n",
|
|
|
|
|
" pl = '%.1f' % (float(pl) / 10000)\n",
|
|
|
|
|
" # print(123, pl)\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" pl = re.search(r'\\d*(\\.)?\\d', pl).group()\n",
|
|
|
|
|
" pls.append(float(pl))\n",
|
|
|
|
|
" print(pls)\n",
|
|
|
|
|
" # ******************************************** 收藏数\n",
|
|
|
|
|
" for tag in soup.find_all('div', class_='detail'):\n",
|
|
|
|
|
" sc = tag.find('span', class_='data-box').next_sibling.next_sibling.next_sibling.next_sibling.get_text()\n",
|
|
|
|
|
" sc = re.search(r'\\d*(\\.)?\\d', sc).group()\n",
|
|
|
|
|
" scs.append(float(sc))\n",
|
|
|
|
|
" print(scs)\n",
|
|
|
|
|
" # ******************************************** 综合评分\n",
|
|
|
|
|
" for tag in soup.find_all('div', class_='pts'):\n",
|
|
|
|
|
" zh = tag.find('div').get_text()\n",
|
|
|
|
|
" TScore.append(int(zh))\n",
|
|
|
|
|
" print('综合评分', TScore)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # 存储至excel表格中\n",
|
|
|
|
|
" info = {'动漫名': name, '播放量(万)': bfl, '评论数(万)': pls, '收藏数(万)': scs, '综合评分': TScore}\n",
|
|
|
|
|
" dm_file = pandas.DataFrame(info)\n",
|
|
|
|
|
" dm_file.to_excel('Dongman.xlsx', sheet_name=\"动漫数据分析\")\n",
|
|
|
|
|
" # 将所有列表返回\n",
|
|
|
|
|
" return name, bfl, pls, scs, TScore\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def view(info):\n",
|
|
|
|
|
" my_font = font_manager.FontProperties(fname='./data/STHeiti Medium.ttc') # 设置中文字体(图标中能显示中文)\n",
|
|
|
|
|
" dm_name = info[0] # 番剧名\n",
|
|
|
|
|
" dm_play = info[1] # 番剧播放量\n",
|
|
|
|
|
" dm_review = info[2] # 番剧评论数\n",
|
|
|
|
|
" dm_favorite = info[3] # 番剧收藏数\n",
|
|
|
|
|
" dm_com_score = info[4] # 番剧综合评分\n",
|
|
|
|
|
" # print(dm_com_score)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # 为了坐标轴上能显示中文\n",
|
|
|
|
|
" plt.rcParams['font.sans-serif'] = ['SimHei']\n",
|
|
|
|
|
" plt.rcParams['axes.unicode_minus'] = False\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # **********************************************************************综合评分和播放量对比\n",
|
|
|
|
|
" # *******综合评分条形图\n",
|
|
|
|
|
" fig, ax1 = plt.subplots()\n",
|
|
|
|
|
" plt.bar(dm_name, dm_com_score, color='red') #设置柱状图\n",
|
|
|
|
|
" plt.title('综合评分和播放量数据分析', fontproperties=my_font) # 表标题\n",
|
|
|
|
|
" ax1.tick_params(labelsize=6)\n",
|
|
|
|
|
" plt.xlabel('番剧名') # 横轴名\n",
|
|
|
|
|
" plt.ylabel('综合评分') # 纵轴名\n",
|
|
|
|
|
" plt.xticks(rotation=90, color='green') # 设置横坐标变量名旋转度数和颜色\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # *******播放量折y\n",
|
|
|
|
|
" ax2 = ax1.twinx() # 组合图必须加这个\n",
|
|
|
|
|
" ax2.plot(dm_play, color='cyan') # 设置线粗细,节点样式\n",
|
|
|
|
|
" plt.ylabel('播放量') # y轴\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" plt.plot(1, label='综合评分', color=\"red\", linewidth=5.0) # 图例\n",
|
|
|
|
|
" plt.plot(1, label='播放量', color=\"cyan\", linewidth=1.0, linestyle=\"-\") # 图例\n",
|
|
|
|
|
" plt.legend()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" plt.savefig(r'E:1.png', dpi=1000, bbox_inches='tight') #保存至本地\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # plt.show()\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def main():\n",
|
|
|
|
|
" url = 'https://www.bilibili.com/v/popular/rank/bangumi' # 网址\n",
|
|
|
|
|
" html = get_html(url) # 获取返回值\n",
|
|
|
|
|
" # print(html)\n",
|
|
|
|
|
" info = save(html)\n",
|
|
|
|
|
" view(info)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"if __name__ == '__main__':\n",
|
|
|
|
|
" main()\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3 (ipykernel)",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.9.7"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 5
|
|
|
|
|
}
|