diff --git a/lzd.ipynb b/lzd.ipynb new file mode 100644 index 0000000..b287962 --- /dev/null +++ b/lzd.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "#爬虫部分\n", + "import requests\n", + "import bs4\n", + "import time\n", + "import random\n", + "import pandas as pd\n", + "\n", + "ulist = []\n", + "url = 'https://www.shanghairanking.cn/rankings/bcur/202011'\n", + "headers = {\n", + " 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'}\n", + "\n", + "response = requests.get(url=url, headers=headers)\n", + "response.encoding = \"utf-8\" # 转化字符码\n", + "html = response.text\n", + "\n", + "soup = bs4.BeautifulSoup(html, 'html.parser')\n", + "\n", + "for tr in soup.find('tbody').children:\n", + " if isinstance(tr, bs4.element.Tag):\n", + " tds = tr.find_all('td') # 等价于 tds = tr('td')\n", + " name = tds[1].text\n", + " print(name)\n", + " score = tds[4].text\n", + " print(score)\n", + " pd1 = pd.DataFrame({'学校': name, '总分': score}, index=[0])\n", + " ulist.append(pd1)\n", + "\n", + "second = random.randrange(3, 5)\n", + "time.sleep(second)\n", + "\n", + "university2 = pd.concat(ulist[0:10])\n", + "university2.to_excel('university.xlsx', index=False)\n", + "#可视化部分\n", + "from pyecharts.charts import Bar # 柱状图\n", + "import xlrd # xlrd是对excel读取(read) xlwt是写入(write)\n", + "\n", + "data = xlrd.open_workbook(r'D:\\数据可视化\\university.xlsx') # 打开本地excel表格\n", + "\n", + "table = data.sheets()[0] # 拿出表格的第一个sheet\n", + "\n", + "names = [] # 画图用的x轴,坐标是姓名\n", + "sources = [] # 画图用的y轴,坐标成绩\n", + "# 循环输出每行内容\n", + "for i in range(1, table.nrows):\n", + " a = table.row_values(i) # 把第几行拿出来作为一个列表\n", + " name = a[0] # 姓名提取加入name列表\n", + " names.append(name)\n", + "\n", + " source = a[1]\n", + " sources.append(source)\n", + "\n", + "bar = Bar() # 创建一个柱状图对象\n", + "bar.add_xaxis(names) # 设置x轴\n", + "bar.add_yaxis('总分', sources) # 设置y轴和图标名\n", + "bar.render('柱形图.html') # 输出html文件来显示柱状图" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}