From 8c94273cb23ba3563c3be102daeb0e78b683f7c0 Mon Sep 17 00:00:00 2001 From: Zhao <923712175@qq.com> Date: Fri, 2 Dec 2022 08:25:58 +0800 Subject: [PATCH] data complete --- draw.ipynb | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 266 insertions(+) create mode 100644 draw.ipynb diff --git a/draw.ipynb b/draw.ipynb new file mode 100644 index 0000000..4bae966 --- /dev/null +++ b/draw.ipynb @@ -0,0 +1,266 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "3a2ed9c7", + "metadata": {}, + "outputs": [], + "source": [ + "from pyecharts.charts import *\n", + "from pyecharts import options as opts\n", + "import random\n", + "import datetime\n", + "import pandas as pd\n", + "import jieba\n", + "from pyecharts.commons.utils import JsCode\n", + "from pyecharts.globals import ThemeType" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1e98896c", + "metadata": {}, + "outputs": [], + "source": [ + "data_taobao = pd.read_csv('商品信息.csv')\n", + "data_taobao['price_range'] = pd.cut(x = data_taobao['售价'],bins=[0,100,300,500,700,900,1000,2000,4000,8000,12000])\n", + "data_taobao['销量'] = data_taobao['销量'].str.extract('(\\d+)')\n", + "data_taobao['销量'] = pd.to_numeric(data_taobao['销量'], downcast='integer')\n", + "\n", + "b = data_taobao['售价'].value_counts(bins=[0,100,300,500,700,900,1000,2000,4000,8000,12000], sort=False) # 分成五个区间\n", + "x = b.index\n", + "y = list(b)\n", + "x = [str(i) for i in x]\n", + "sales_sum = data_taobao.groupby(['price_range'])['销量'].sum()\n", + "y2 = list(sales_sum)\n", + "colors = [\"#5793f3\", \"#d14a61\", \"#675bba\"]\n", + "\n", + "def multiPage():\n", + " bar = (\n", + " Bar(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n", + " .add_xaxis(x)\n", + " .add_yaxis(\n", + " \"数量\",\n", + " y,\n", + " z=0,\n", + " yaxis_index=1,\n", + " color=colors[0]\n", + " )\n", + " .extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " name=\"数量\",\n", + " type_=\"value\",\n", + " position=\"right\",\n", + " axisline_opts=opts.AxisLineOpts(\n", + " linestyle_opts=opts.LineStyleOpts(color=colors[0])\n", + " ),\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}\"),\n", + " )\n", + " )\n", + " .extend_axis(\n", + " yaxis=opts.AxisOpts(\n", + " type_=\"value\",\n", + " name=\"销量\",\n", + " position=\"left\",\n", + " axisline_opts=opts.AxisLineOpts(\n", + " linestyle_opts=opts.LineStyleOpts(color=colors[1])\n", + " ),\n", + " axislabel_opts=opts.LabelOpts(formatter=\"{value}\"),\n", + " splitline_opts=opts.SplitLineOpts(\n", + " is_show=True, linestyle_opts=opts.LineStyleOpts(opacity=1)\n", + " ),\n", + " )\n", + " )\n", + " .set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),\n", + " title_opts=opts.TitleOpts(title=\"各价格区间的数量和价格\"),\n", + " tooltip_opts=opts.TooltipOpts(trigger=\"axis\", axis_pointer_type=\"cross\"),\n", + " )\n", + "\n", + " )\n", + " line = (\n", + " Line()\n", + " .add_xaxis(xaxis_data=x)\n", + " .add_yaxis(\n", + " series_name=\"销量\",y_axis=y2,yaxis_index=0,color=colors[1]\n", + " )\n", + " )\n", + "\n", + " return bar.overlap(line)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "781f04aa", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Building prefix dict from the default dictionary ...\n", + "Loading model from cache C:\\Users\\FXBL\\AppData\\Local\\Temp\\jieba.cache\n", + "Loading model cost 0.909 seconds.\n", + "Prefix dict has been built successfully.\n" + ] + } + ], + "source": [ + "goods_name = str(data_taobao['商品名称'])\n", + "words = jieba.lcut(goods_name)\n", + "while ' ' in words:\n", + " words.remove(' ')\n", + "while ' ' in words:\n", + " words.remove(' ')\n", + "while '\\n' in words:\n", + " words.remove('\\n')\n", + "counts = {}\n", + "for word in words:\n", + " if len(word) == 1: # 单个词语不计算在内\n", + " continue\n", + " else:\n", + " counts[word] = counts.get(word, 0) + 1 \n", + "# print(counts)\n", + "lcounts = []\n", + "for item in counts.items():\n", + " lcounts.append(item)\n", + "data = lcounts\n", + "def wordcloudPage():\n", + " wordcloud = (\n", + " WordCloud(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n", + " .add(series_name=\"商品名称词云\", data_pair=data, word_size_range=[12, 132])\n", + " .set_global_opts(\n", + " title_opts=opts.TitleOpts(\n", + " title=\"商品名称词云\", title_textstyle_opts=opts.TextStyleOpts(font_size=23)\n", + " ),\n", + " tooltip_opts=opts.TooltipOpts(is_show=True),\n", + " )\n", + " )\n", + " return wordcloud" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c00b29cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'c:\\\\Users\\\\FXBL\\\\Desktop\\\\draw_result.html'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "province_counts = {}\n", + "for word in data_taobao['发货地']:\n", + "# if len(word) == 1: # 单个词语不计算在内\n", + "# continue\n", + "\n", + " province_counts[word] = province_counts.get(word, 0) + 1 \n", + "# print(counts)\n", + "pro_lcounts = []\n", + "provincs = []\n", + "pro_num = []\n", + "\n", + "for item in province_counts.items():\n", + " pro_lcounts.append(item)\n", + "pro_lcounts = sorted(pro_lcounts,key=lambda x: x[1],reverse=True)\n", + "for pro,num in pro_lcounts:\n", + " provincs.append(pro)\n", + " pro_num.append(num)\n", + "provincs = provincs[:7]\n", + "pro_num = pro_num[:7]\n", + "m = (\n", + " Map()\n", + " .add(\"\", pro_lcounts, \"china\",is_map_symbol_show=False,)\n", + " .set_global_opts(title_opts=opts.TitleOpts(title=\"全国发货地\"), visualmap_opts=opts.VisualMapOpts())\n", + ")\n", + "\n", + "province_bar = (\n", + " Bar()\n", + " .add_xaxis(xaxis_data=provincs)\n", + " .add_yaxis(\n", + " series_name=\"\",\n", + " y_axis=pro_num,\n", + " label_opts=opts.LabelOpts(\n", + " is_show=True, position=\"right\", formatter=\"{b}: {c}\"\n", + " ),\n", + " )\n", + " .reversal_axis()\n", + " .set_global_opts(\n", + " xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),\n", + " yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),\n", + " tooltip_opts=opts.TooltipOpts(is_show=False),\n", + " visualmap_opts=opts.VisualMapOpts(\n", + " is_calculable=True,\n", + " dimension=0,\n", + " pos_left=\"10\",\n", + " pos_top=\"center\",\n", + " range_text=[\"High\", \"Low\"],\n", + " range_color=[\"lightskyblue\", \"yellow\", \"orangered\"],\n", + " textstyle_opts=opts.TextStyleOpts(color=\"#ddd\"),\n", + " )\n", + " )\n", + " )\n", + "\n", + "\n", + "gchart = (\n", + " Grid(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n", + " .add(\n", + " province_bar,\n", + " grid_opts=opts.GridOpts(\n", + " pos_left=\"10\", pos_right=\"45%\", pos_top=\"70%\", pos_bottom=\"5\"\n", + " ),\n", + " )\n", + "# .add(pie, grid_opts=opts.GridOpts())\n", + " .add(m, grid_opts=opts.GridOpts())\n", + ")\n", + "page = (\n", + " Page()\n", + " .add(\n", + " multiPage(),\n", + " wordcloudPage(),\n", + " gchart\n", + " )\n", + ")\n", + "page.render(\"draw_result.html\") " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.8 64-bit (microsoft store)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "vscode": { + "interpreter": { + "hash": "bf2667e6cf4b8fac0f8544cb57d81a6418925f70e2172127655c1ef276bfd501" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}