|
|
|
@ -0,0 +1,178 @@
|
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 12,
|
|
|
|
|
"metadata": {
|
|
|
|
|
"scrolled": true
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import os\n",
|
|
|
|
|
"import requests\n",
|
|
|
|
|
"import xlwt\n",
|
|
|
|
|
"from lxml import html\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 13,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def bug(key_once):\n",
|
|
|
|
|
" url = 'https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3'\n",
|
|
|
|
|
" #获得一个网页最简单直接的方法就是 r = requests.get(url) ,向服务器请求资源。\n",
|
|
|
|
|
" response = requests.get(url).text #response.text返回的是Unicode格式,http响应内容的字符串形式,url对应的页面内容\n",
|
|
|
|
|
" l = html.fromstring(response) #html.fromstring 会解析html 头部中charset属性,并且自动的decode\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" #下面我们使用相对路径,查找一下相对路径下li标签下的div标签下的class=\"rank-item\",class=content属性的值\n",
|
|
|
|
|
" matter = l.xpath('//li[@class=\"rank-item\"]/div[@class=\"content\"]/div[@class=\"info\"]')\n",
|
|
|
|
|
" rank = 0 #一开始排名为0\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" #Python join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。\n",
|
|
|
|
|
" for item in matter: #for循环遍历matter\n",
|
|
|
|
|
" rank += 1 #排名+1\n",
|
|
|
|
|
" topic = \"\".join(item.xpath('./a/text()')).replace('\"','')\n",
|
|
|
|
|
" #split():拆分字符串。通过指定分隔符对字符串进行切片,并返回分割后的字符串列表(list)\n",
|
|
|
|
|
" playinfo = \"\".join(item.xpath('./div[@class=\"detail\"]/span/text()')).split(\"万\") \n",
|
|
|
|
|
" play = playinfo[0] + \"万\"\n",
|
|
|
|
|
" up = \"\".join(item.xpath('./div[@class=\"detail\"]/a/span/text()'))\n",
|
|
|
|
|
" hot = \"\".join(item.xpath('./div[@class=\"pts\"]/div/text()'))\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" key_once.append({ #在video_list.append列表末尾添加新的对象\n",
|
|
|
|
|
" 'rank': rank, #排名\n",
|
|
|
|
|
" 'topic': topic, #题目 \n",
|
|
|
|
|
" 'play': play, #播放量\n",
|
|
|
|
|
" 'up': up, #up名\n",
|
|
|
|
|
" 'hot': hot #热度\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" })\n",
|
|
|
|
|
" return key_once\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"生成的文件位于:b站视频排行榜前100.xls\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"def importance(key_once):\n",
|
|
|
|
|
" workbook = xlwt.Workbook() # 定义、创建一个workbook\n",
|
|
|
|
|
" table = workbook.add_sheet('b站视频排行榜前100') # 添加、创建一个sheet\n",
|
|
|
|
|
" al = xlwt.XFStyle() # 初始化样式\n",
|
|
|
|
|
" al.horz = 0x02 # 为样式创建字体,设置字体水平居中\n",
|
|
|
|
|
" al.vert = 0x01 # 设置字体垂直居中\n",
|
|
|
|
|
" head = ['标题', 'up主','综合热度','播放量', '排名'] # 表头\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" for y in range(len(head)):\n",
|
|
|
|
|
" table.write(0, y, head[y],al) # 把表头写到Excel里面去 参数对应行,列,值,(格式)\n",
|
|
|
|
|
" i = 1\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" for item in key_once:\n",
|
|
|
|
|
" # 设置第一、二行的宽度\n",
|
|
|
|
|
" first_col=table.col(0)\n",
|
|
|
|
|
" sec_col= table.col(1)\n",
|
|
|
|
|
" first_col.width=256*40 \n",
|
|
|
|
|
" sec_col.width=256*25\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" table.write(i, 0, item['topic'],al)\n",
|
|
|
|
|
" table.write(i, 1, item['up'], al)\n",
|
|
|
|
|
" table.write(i, 2, item['hot'], al)\n",
|
|
|
|
|
" table.write(i, 3, item['play'], al)\n",
|
|
|
|
|
" table.write(i, 4, item['rank'], al)\n",
|
|
|
|
|
" i += 1\n",
|
|
|
|
|
" \n",
|
|
|
|
|
" # 如果文件存在,则将其删除\n",
|
|
|
|
|
" if os.path.exists('b站视频排行榜前100.xls'):\n",
|
|
|
|
|
" os.remove('b站视频排行榜前100.xls')\n",
|
|
|
|
|
" workbook.save('.\\\\b站视频排行榜前100.xls') #保存excel文件的路径\n",
|
|
|
|
|
" print(\"生成的文件位于:b站视频排行榜前100.xls\")\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"if __name__ == '__main__': #调用前面两个函数\n",
|
|
|
|
|
" key_once = []\n",
|
|
|
|
|
" importance(bug(key_once))\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.6.4"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|