ADD file via upload

master
pw4e8vz7f 3 years ago
parent 0f45253d4a
commit 87159e6766

@ -0,0 +1,178 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"import xlwt\n",
"from lxml import html\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def bug(key_once):\n",
" url = 'https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3'\n",
" #获得一个网页最简单直接的方法就是 r = requests.get(url) ,向服务器请求资源。\n",
" response = requests.get(url).text #response.text返回的是Unicode格式http响应内容的字符串形式url对应的页面内容\n",
" l = html.fromstring(response) #html.fromstring 会解析html 头部中charset属性并且自动的decode\n",
" \n",
" #下面我们使用相对路径查找一下相对路径下li标签下的div标签下的class=\"rank-item\",class=content属性的值\n",
" matter = l.xpath('//li[@class=\"rank-item\"]/div[@class=\"content\"]/div[@class=\"info\"]')\n",
" rank = 0 #一开始排名为0\n",
"\n",
" #Python join() 方法用于将序列中的元素以指定的字符连接生成一个新的字符串。\n",
" for item in matter: #for循环遍历matter\n",
" rank += 1 #排名+1\n",
" topic = \"\".join(item.xpath('./a/text()')).replace('\"','')\n",
" #split()拆分字符串。通过指定分隔符对字符串进行切片并返回分割后的字符串列表list\n",
" playinfo = \"\".join(item.xpath('./div[@class=\"detail\"]/span/text()')).split(\"万\") \n",
" play = playinfo[0] + \"万\"\n",
" up = \"\".join(item.xpath('./div[@class=\"detail\"]/a/span/text()'))\n",
" hot = \"\".join(item.xpath('./div[@class=\"pts\"]/div/text()'))\n",
" \n",
" key_once.append({ #在video_list.append列表末尾添加新的对象\n",
" 'rank': rank, #排名\n",
" 'topic': topic, #题目 \n",
" 'play': play, #播放量\n",
" 'up': up, #up名\n",
" 'hot': hot #热度\n",
" \n",
" })\n",
" return key_once\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"生成的文件位于b站视频排行榜前100.xls\n"
]
}
],
"source": [
"def importance(key_once):\n",
" workbook = xlwt.Workbook() # 定义、创建一个workbook\n",
" table = workbook.add_sheet('b站视频排行榜前100') # 添加、创建一个sheet\n",
" al = xlwt.XFStyle() # 初始化样式\n",
" al.horz = 0x02 # 为样式创建字体,设置字体水平居中\n",
" al.vert = 0x01 # 设置字体垂直居中\n",
" head = ['标题', 'up主','综合热度','播放量', '排名'] # 表头\n",
" \n",
" for y in range(len(head)):\n",
" table.write(0, y, head[y],al) # 把表头写到Excel里面去 参数对应行,列,值,(格式)\n",
" i = 1\n",
" \n",
" for item in key_once:\n",
" # 设置第一、二行的宽度\n",
" first_col=table.col(0)\n",
" sec_col= table.col(1)\n",
" first_col.width=256*40 \n",
" sec_col.width=256*25\n",
" \n",
" table.write(i, 0, item['topic'],al)\n",
" table.write(i, 1, item['up'], al)\n",
" table.write(i, 2, item['hot'], al)\n",
" table.write(i, 3, item['play'], al)\n",
" table.write(i, 4, item['rank'], al)\n",
" i += 1\n",
" \n",
" # 如果文件存在,则将其删除\n",
" if os.path.exists('b站视频排行榜前100.xls'):\n",
" os.remove('b站视频排行榜前100.xls')\n",
" workbook.save('.\\\\b站视频排行榜前100.xls') #保存excel文件的路径\n",
" print(\"生成的文件位于b站视频排行榜前100.xls\")\n",
"\n",
"if __name__ == '__main__': #调用前面两个函数\n",
" key_once = []\n",
" importance(bug(key_once))\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading…
Cancel
Save