ADD file via upload

3 years ago · 87159e6766
parent 0f45253d4a
commit 87159e6766
1 changed files with 178 additions and 0 deletions
--- a/王烽——b站视频排行版.ipynb
+++ b/王烽——b站视频排行版.ipynb
@ -0,0 +1,178 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import requests\n",
+    "import xlwt\n",
+    "from lxml import html\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def bug(key_once):\n",
+    "    url = 'https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3'\n",
+    "    #获得一个网页最简单直接的方法就是  r = requests.get(url)  ，向服务器请求资源。\n",
+    "    response = requests.get(url).text #response.text返回的是Unicode格式，http响应内容的字符串形式，url对应的页面内容\n",
+    "    l = html.fromstring(response)   #html.fromstring 会解析html 头部中charset属性，并且自动的decode\n",
+    "    \n",
+    "    #下面我们使用相对路径，查找一下相对路径下li标签下的div标签下的class=\"rank-item\",class=content属性的值\n",
+    "    matter = l.xpath('//li[@class=\"rank-item\"]/div[@class=\"content\"]/div[@class=\"info\"]')\n",
+    "    rank = 0  #一开始排名为0\n",
+    "\n",
+    "    #Python join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。\n",
+    "    for item in matter:  #for循环遍历matter\n",
+    "        rank += 1   #排名+1\n",
+    "        topic = \"\".join(item.xpath('./a/text()')).replace('\"','')\n",
+    "        #split()：拆分字符串。通过指定分隔符对字符串进行切片，并返回分割后的字符串列表（list）\n",
+    "        playinfo = \"\".join(item.xpath('./div[@class=\"detail\"]/span/text()')).split(\"万\") \n",
+    "        play = playinfo[0] + \"万\"\n",
+    "        up = \"\".join(item.xpath('./div[@class=\"detail\"]/a/span/text()'))\n",
+    "        hot = \"\".join(item.xpath('./div[@class=\"pts\"]/div/text()'))\n",
+    "       \n",
+    "        key_once.append({   #在video_list.append列表末尾添加新的对象\n",
+    "            'rank': rank,    #排名\n",
+    "            'topic': topic,  #题目  \n",
+    "            'play': play,   #播放量\n",
+    "            'up': up,   #up名\n",
+    "            'hot': hot  #热度\n",
+    "      \n",
+    "        })\n",
+    "    return key_once\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "生成的文件位于：b站视频排行榜前100.xls\n"
+     ]
+    }
+   ],
+   "source": [
+    "def importance(key_once):\n",
+    "    workbook = xlwt.Workbook()  # 定义、创建一个workbook\n",
+    "    table = workbook.add_sheet('b站视频排行榜前100')  # 添加、创建一个sheet\n",
+    "    al = xlwt.XFStyle()  # 初始化样式\n",
+    "    al.horz = 0x02  # 为样式创建字体，设置字体水平居中\n",
+    "    al.vert = 0x01  # 设置字体垂直居中\n",
+    "    head = ['标题', 'up主','综合热度','播放量', '排名']  # 表头\n",
+    "    \n",
+    "    for y in range(len(head)):\n",
+    "        table.write(0, y, head[y],al)  # 把表头写到Excel里面去 参数对应行，列，值，（格式）\n",
+    "    i = 1\n",
+    "    \n",
+    "    for item in key_once:\n",
+    "        # 设置第一、二行的宽度\n",
+    "        first_col=table.col(0)\n",
+    "        sec_col= table.col(1)\n",
+    "        first_col.width=256*40                 \n",
+    "        sec_col.width=256*25\n",
+    "        \n",
+    "        table.write(i, 0, item['topic'],al)\n",
+    "        table.write(i, 1, item['up'], al)\n",
+    "        table.write(i, 2, item['hot'], al)\n",
+    "        table.write(i, 3, item['play'], al)\n",
+    "        table.write(i, 4, item['rank'], al)\n",
+    "        i += 1\n",
+    "        \n",
+    "    # 如果文件存在，则将其删除\n",
+    "    if os.path.exists('b站视频排行榜前100.xls'):\n",
+    "        os.remove('b站视频排行榜前100.xls')\n",
+    "    workbook.save('.\\\\b站视频排行榜前100.xls')  #保存excel文件的路径\n",
+    "    print(\"生成的文件位于：b站视频排行榜前100.xls\")\n",
+    "\n",
+    "if __name__ == '__main__':   #调用前面两个函数\n",
+    "    key_once = []\n",
+    "    importance(bug(key_once))\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}