data complete

master
Zhao 2 years ago
parent af9d1b1d5d
commit 8c94273cb2

@ -0,0 +1,266 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "3a2ed9c7",
"metadata": {},
"outputs": [],
"source": [
"from pyecharts.charts import *\n",
"from pyecharts import options as opts\n",
"import random\n",
"import datetime\n",
"import pandas as pd\n",
"import jieba\n",
"from pyecharts.commons.utils import JsCode\n",
"from pyecharts.globals import ThemeType"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1e98896c",
"metadata": {},
"outputs": [],
"source": [
"data_taobao = pd.read_csv('商品信息.csv')\n",
"data_taobao['price_range'] = pd.cut(x = data_taobao['售价'],bins=[0,100,300,500,700,900,1000,2000,4000,8000,12000])\n",
"data_taobao['销量'] = data_taobao['销量'].str.extract('(\\d+)')\n",
"data_taobao['销量'] = pd.to_numeric(data_taobao['销量'], downcast='integer')\n",
"\n",
"b = data_taobao['售价'].value_counts(bins=[0,100,300,500,700,900,1000,2000,4000,8000,12000], sort=False) # 分成五个区间\n",
"x = b.index\n",
"y = list(b)\n",
"x = [str(i) for i in x]\n",
"sales_sum = data_taobao.groupby(['price_range'])['销量'].sum()\n",
"y2 = list(sales_sum)\n",
"colors = [\"#5793f3\", \"#d14a61\", \"#675bba\"]\n",
"\n",
"def multiPage():\n",
" bar = (\n",
" Bar(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n",
" .add_xaxis(x)\n",
" .add_yaxis(\n",
" \"数量\",\n",
" y,\n",
" z=0,\n",
" yaxis_index=1,\n",
" color=colors[0]\n",
" )\n",
" .extend_axis(\n",
" yaxis=opts.AxisOpts(\n",
" name=\"数量\",\n",
" type_=\"value\",\n",
" position=\"right\",\n",
" axisline_opts=opts.AxisLineOpts(\n",
" linestyle_opts=opts.LineStyleOpts(color=colors[0])\n",
" ),\n",
" axislabel_opts=opts.LabelOpts(formatter=\"{value}\"),\n",
" )\n",
" )\n",
" .extend_axis(\n",
" yaxis=opts.AxisOpts(\n",
" type_=\"value\",\n",
" name=\"销量\",\n",
" position=\"left\",\n",
" axisline_opts=opts.AxisLineOpts(\n",
" linestyle_opts=opts.LineStyleOpts(color=colors[1])\n",
" ),\n",
" axislabel_opts=opts.LabelOpts(formatter=\"{value}\"),\n",
" splitline_opts=opts.SplitLineOpts(\n",
" is_show=True, linestyle_opts=opts.LineStyleOpts(opacity=1)\n",
" ),\n",
" )\n",
" )\n",
" .set_global_opts(\n",
" xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),\n",
" title_opts=opts.TitleOpts(title=\"各价格区间的数量和价格\"),\n",
" tooltip_opts=opts.TooltipOpts(trigger=\"axis\", axis_pointer_type=\"cross\"),\n",
" )\n",
"\n",
" )\n",
" line = (\n",
" Line()\n",
" .add_xaxis(xaxis_data=x)\n",
" .add_yaxis(\n",
" series_name=\"销量\",y_axis=y2,yaxis_index=0,color=colors[1]\n",
" )\n",
" )\n",
"\n",
" return bar.overlap(line)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "781f04aa",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Building prefix dict from the default dictionary ...\n",
"Loading model from cache C:\\Users\\FXBL\\AppData\\Local\\Temp\\jieba.cache\n",
"Loading model cost 0.909 seconds.\n",
"Prefix dict has been built successfully.\n"
]
}
],
"source": [
"goods_name = str(data_taobao['商品名称'])\n",
"words = jieba.lcut(goods_name)\n",
"while ' ' in words:\n",
" words.remove(' ')\n",
"while ' ' in words:\n",
" words.remove(' ')\n",
"while '\\n' in words:\n",
" words.remove('\\n')\n",
"counts = {}\n",
"for word in words:\n",
" if len(word) == 1: # 单个词语不计算在内\n",
" continue\n",
" else:\n",
" counts[word] = counts.get(word, 0) + 1 \n",
"# print(counts)\n",
"lcounts = []\n",
"for item in counts.items():\n",
" lcounts.append(item)\n",
"data = lcounts\n",
"def wordcloudPage():\n",
" wordcloud = (\n",
" WordCloud(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n",
" .add(series_name=\"商品名称词云\", data_pair=data, word_size_range=[12, 132])\n",
" .set_global_opts(\n",
" title_opts=opts.TitleOpts(\n",
" title=\"商品名称词云\", title_textstyle_opts=opts.TextStyleOpts(font_size=23)\n",
" ),\n",
" tooltip_opts=opts.TooltipOpts(is_show=True),\n",
" )\n",
" )\n",
" return wordcloud"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c00b29cc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'c:\\\\Users\\\\FXBL\\\\Desktop\\\\draw_result.html'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"province_counts = {}\n",
"for word in data_taobao['发货地']:\n",
"# if len(word) == 1: # 单个词语不计算在内\n",
"# continue\n",
"\n",
" province_counts[word] = province_counts.get(word, 0) + 1 \n",
"# print(counts)\n",
"pro_lcounts = []\n",
"provincs = []\n",
"pro_num = []\n",
"\n",
"for item in province_counts.items():\n",
" pro_lcounts.append(item)\n",
"pro_lcounts = sorted(pro_lcounts,key=lambda x: x[1],reverse=True)\n",
"for pro,num in pro_lcounts:\n",
" provincs.append(pro)\n",
" pro_num.append(num)\n",
"provincs = provincs[:7]\n",
"pro_num = pro_num[:7]\n",
"m = (\n",
" Map()\n",
" .add(\"\", pro_lcounts, \"china\",is_map_symbol_show=False,)\n",
" .set_global_opts(title_opts=opts.TitleOpts(title=\"全国发货地\"), visualmap_opts=opts.VisualMapOpts())\n",
")\n",
"\n",
"province_bar = (\n",
" Bar()\n",
" .add_xaxis(xaxis_data=provincs)\n",
" .add_yaxis(\n",
" series_name=\"\",\n",
" y_axis=pro_num,\n",
" label_opts=opts.LabelOpts(\n",
" is_show=True, position=\"right\", formatter=\"{b}: {c}\"\n",
" ),\n",
" )\n",
" .reversal_axis()\n",
" .set_global_opts(\n",
" xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),\n",
" yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),\n",
" tooltip_opts=opts.TooltipOpts(is_show=False),\n",
" visualmap_opts=opts.VisualMapOpts(\n",
" is_calculable=True,\n",
" dimension=0,\n",
" pos_left=\"10\",\n",
" pos_top=\"center\",\n",
" range_text=[\"High\", \"Low\"],\n",
" range_color=[\"lightskyblue\", \"yellow\", \"orangered\"],\n",
" textstyle_opts=opts.TextStyleOpts(color=\"#ddd\"),\n",
" )\n",
" )\n",
" )\n",
"\n",
"\n",
"gchart = (\n",
" Grid(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n",
" .add(\n",
" province_bar,\n",
" grid_opts=opts.GridOpts(\n",
" pos_left=\"10\", pos_right=\"45%\", pos_top=\"70%\", pos_bottom=\"5\"\n",
" ),\n",
" )\n",
"# .add(pie, grid_opts=opts.GridOpts())\n",
" .add(m, grid_opts=opts.GridOpts())\n",
")\n",
"page = (\n",
" Page()\n",
" .add(\n",
" multiPage(),\n",
" wordcloudPage(),\n",
" gchart\n",
" )\n",
")\n",
"page.render(\"draw_result.html\") "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.8 64-bit (microsoft store)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
},
"vscode": {
"interpreter": {
"hash": "bf2667e6cf4b8fac0f8544cb57d81a6418925f70e2172127655c1ef276bfd501"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading…
Cancel
Save