diff --git a/王烽——b站视频排行版.ipynb b/王烽——b站视频排行版.ipynb new file mode 100644 index 0000000..e2c0bf1 --- /dev/null +++ b/王烽——b站视频排行版.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "import os\n", + "import requests\n", + "import xlwt\n", + "from lxml import html\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "def bug(key_once):\n", + " url = 'https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3'\n", + " #获得一个网页最简单直接的方法就是 r = requests.get(url) ,向服务器请求资源。\n", + " response = requests.get(url).text #response.text返回的是Unicode格式,http响应内容的字符串形式,url对应的页面内容\n", + " l = html.fromstring(response) #html.fromstring 会解析html 头部中charset属性,并且自动的decode\n", + " \n", + " #下面我们使用相对路径,查找一下相对路径下li标签下的div标签下的class=\"rank-item\",class=content属性的值\n", + " matter = l.xpath('//li[@class=\"rank-item\"]/div[@class=\"content\"]/div[@class=\"info\"]')\n", + " rank = 0 #一开始排名为0\n", + "\n", + " #Python join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。\n", + " for item in matter: #for循环遍历matter\n", + " rank += 1 #排名+1\n", + " topic = \"\".join(item.xpath('./a/text()')).replace('\"','')\n", + " #split():拆分字符串。通过指定分隔符对字符串进行切片,并返回分割后的字符串列表(list)\n", + " playinfo = \"\".join(item.xpath('./div[@class=\"detail\"]/span/text()')).split(\"万\") \n", + " play = playinfo[0] + \"万\"\n", + " up = \"\".join(item.xpath('./div[@class=\"detail\"]/a/span/text()'))\n", + " hot = \"\".join(item.xpath('./div[@class=\"pts\"]/div/text()'))\n", + " \n", + " key_once.append({ #在video_list.append列表末尾添加新的对象\n", + " 'rank': rank, #排名\n", + " 'topic': topic, #题目 \n", + " 'play': play, #播放量\n", + " 'up': up, #up名\n", + " 'hot': hot #热度\n", + " \n", + " })\n", + " return key_once\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "生成的文件位于:b站视频排行榜前100.xls\n" + ] + } + ], + "source": [ + "def importance(key_once):\n", + " workbook = xlwt.Workbook() # 定义、创建一个workbook\n", + " table = workbook.add_sheet('b站视频排行榜前100') # 添加、创建一个sheet\n", + " al = xlwt.XFStyle() # 初始化样式\n", + " al.horz = 0x02 # 为样式创建字体,设置字体水平居中\n", + " al.vert = 0x01 # 设置字体垂直居中\n", + " head = ['标题', 'up主','综合热度','播放量', '排名'] # 表头\n", + " \n", + " for y in range(len(head)):\n", + " table.write(0, y, head[y],al) # 把表头写到Excel里面去 参数对应行,列,值,(格式)\n", + " i = 1\n", + " \n", + " for item in key_once:\n", + " # 设置第一、二行的宽度\n", + " first_col=table.col(0)\n", + " sec_col= table.col(1)\n", + " first_col.width=256*40 \n", + " sec_col.width=256*25\n", + " \n", + " table.write(i, 0, item['topic'],al)\n", + " table.write(i, 1, item['up'], al)\n", + " table.write(i, 2, item['hot'], al)\n", + " table.write(i, 3, item['play'], al)\n", + " table.write(i, 4, item['rank'], al)\n", + " i += 1\n", + " \n", + " # 如果文件存在,则将其删除\n", + " if os.path.exists('b站视频排行榜前100.xls'):\n", + " os.remove('b站视频排行榜前100.xls')\n", + " workbook.save('.\\\\b站视频排行榜前100.xls') #保存excel文件的路径\n", + " print(\"生成的文件位于:b站视频排行榜前100.xls\")\n", + "\n", + "if __name__ == '__main__': #调用前面两个函数\n", + " key_once = []\n", + " importance(bug(key_once))\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}