You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
267 lines
8.8 KiB
267 lines
8.8 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "3a2ed9c7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pyecharts.charts import *\n",
|
|
"from pyecharts import options as opts\n",
|
|
"import random\n",
|
|
"import datetime\n",
|
|
"import pandas as pd\n",
|
|
"import jieba\n",
|
|
"from pyecharts.commons.utils import JsCode\n",
|
|
"from pyecharts.globals import ThemeType"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "1e98896c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data_taobao = pd.read_csv('商品信息.csv')\n",
|
|
"data_taobao['price_range'] = pd.cut(x = data_taobao['售价'],bins=[0,100,300,500,700,900,1000,2000,4000,8000,12000])\n",
|
|
"data_taobao['销量'] = data_taobao['销量'].str.extract('(\\d+)')\n",
|
|
"data_taobao['销量'] = pd.to_numeric(data_taobao['销量'], downcast='integer')\n",
|
|
"\n",
|
|
"b = data_taobao['售价'].value_counts(bins=[0,100,300,500,700,900,1000,2000,4000,8000,12000], sort=False) # 分成五个区间\n",
|
|
"x = b.index\n",
|
|
"y = list(b)\n",
|
|
"x = [str(i) for i in x]\n",
|
|
"sales_sum = data_taobao.groupby(['price_range'])['销量'].sum()\n",
|
|
"y2 = list(sales_sum)\n",
|
|
"colors = [\"#5793f3\", \"#d14a61\", \"#675bba\"]\n",
|
|
"\n",
|
|
"def multiPage():\n",
|
|
" bar = (\n",
|
|
" Bar(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n",
|
|
" .add_xaxis(x)\n",
|
|
" .add_yaxis(\n",
|
|
" \"数量\",\n",
|
|
" y,\n",
|
|
" z=0,\n",
|
|
" yaxis_index=1,\n",
|
|
" color=colors[0]\n",
|
|
" )\n",
|
|
" .extend_axis(\n",
|
|
" yaxis=opts.AxisOpts(\n",
|
|
" name=\"数量\",\n",
|
|
" type_=\"value\",\n",
|
|
" position=\"right\",\n",
|
|
" axisline_opts=opts.AxisLineOpts(\n",
|
|
" linestyle_opts=opts.LineStyleOpts(color=colors[0])\n",
|
|
" ),\n",
|
|
" axislabel_opts=opts.LabelOpts(formatter=\"{value}\"),\n",
|
|
" )\n",
|
|
" )\n",
|
|
" .extend_axis(\n",
|
|
" yaxis=opts.AxisOpts(\n",
|
|
" type_=\"value\",\n",
|
|
" name=\"销量\",\n",
|
|
" position=\"left\",\n",
|
|
" axisline_opts=opts.AxisLineOpts(\n",
|
|
" linestyle_opts=opts.LineStyleOpts(color=colors[1])\n",
|
|
" ),\n",
|
|
" axislabel_opts=opts.LabelOpts(formatter=\"{value}\"),\n",
|
|
" splitline_opts=opts.SplitLineOpts(\n",
|
|
" is_show=True, linestyle_opts=opts.LineStyleOpts(opacity=1)\n",
|
|
" ),\n",
|
|
" )\n",
|
|
" )\n",
|
|
" .set_global_opts(\n",
|
|
" xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-15)),\n",
|
|
" title_opts=opts.TitleOpts(title=\"各价格区间的数量和价格\"),\n",
|
|
" tooltip_opts=opts.TooltipOpts(trigger=\"axis\", axis_pointer_type=\"cross\"),\n",
|
|
" )\n",
|
|
"\n",
|
|
" )\n",
|
|
" line = (\n",
|
|
" Line()\n",
|
|
" .add_xaxis(xaxis_data=x)\n",
|
|
" .add_yaxis(\n",
|
|
" series_name=\"销量\",y_axis=y2,yaxis_index=0,color=colors[1]\n",
|
|
" )\n",
|
|
" )\n",
|
|
"\n",
|
|
" return bar.overlap(line)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "781f04aa",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Building prefix dict from the default dictionary ...\n",
|
|
"Loading model from cache C:\\Users\\FXBL\\AppData\\Local\\Temp\\jieba.cache\n",
|
|
"Loading model cost 0.909 seconds.\n",
|
|
"Prefix dict has been built successfully.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"goods_name = str(data_taobao['商品名称'])\n",
|
|
"words = jieba.lcut(goods_name)\n",
|
|
"while ' ' in words:\n",
|
|
" words.remove(' ')\n",
|
|
"while ' ' in words:\n",
|
|
" words.remove(' ')\n",
|
|
"while '\\n' in words:\n",
|
|
" words.remove('\\n')\n",
|
|
"counts = {}\n",
|
|
"for word in words:\n",
|
|
" if len(word) == 1: # 单个词语不计算在内\n",
|
|
" continue\n",
|
|
" else:\n",
|
|
" counts[word] = counts.get(word, 0) + 1 \n",
|
|
"# print(counts)\n",
|
|
"lcounts = []\n",
|
|
"for item in counts.items():\n",
|
|
" lcounts.append(item)\n",
|
|
"data = lcounts\n",
|
|
"def wordcloudPage():\n",
|
|
" wordcloud = (\n",
|
|
" WordCloud(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n",
|
|
" .add(series_name=\"商品名称词云\", data_pair=data, word_size_range=[12, 132])\n",
|
|
" .set_global_opts(\n",
|
|
" title_opts=opts.TitleOpts(\n",
|
|
" title=\"商品名称词云\", title_textstyle_opts=opts.TextStyleOpts(font_size=23)\n",
|
|
" ),\n",
|
|
" tooltip_opts=opts.TooltipOpts(is_show=True),\n",
|
|
" )\n",
|
|
" )\n",
|
|
" return wordcloud"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "c00b29cc",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'c:\\\\Users\\\\FXBL\\\\Desktop\\\\draw_result.html'"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"province_counts = {}\n",
|
|
"for word in data_taobao['发货地']:\n",
|
|
"# if len(word) == 1: # 单个词语不计算在内\n",
|
|
"# continue\n",
|
|
"\n",
|
|
" province_counts[word] = province_counts.get(word, 0) + 1 \n",
|
|
"# print(counts)\n",
|
|
"pro_lcounts = []\n",
|
|
"provincs = []\n",
|
|
"pro_num = []\n",
|
|
"\n",
|
|
"for item in province_counts.items():\n",
|
|
" pro_lcounts.append(item)\n",
|
|
"pro_lcounts = sorted(pro_lcounts,key=lambda x: x[1],reverse=True)\n",
|
|
"for pro,num in pro_lcounts:\n",
|
|
" provincs.append(pro)\n",
|
|
" pro_num.append(num)\n",
|
|
"provincs = provincs[:7]\n",
|
|
"pro_num = pro_num[:7]\n",
|
|
"m = (\n",
|
|
" Map()\n",
|
|
" .add(\"\", pro_lcounts, \"china\",is_map_symbol_show=False,)\n",
|
|
" .set_global_opts(title_opts=opts.TitleOpts(title=\"全国发货地\"), visualmap_opts=opts.VisualMapOpts())\n",
|
|
")\n",
|
|
"\n",
|
|
"province_bar = (\n",
|
|
" Bar()\n",
|
|
" .add_xaxis(xaxis_data=provincs)\n",
|
|
" .add_yaxis(\n",
|
|
" series_name=\"\",\n",
|
|
" y_axis=pro_num,\n",
|
|
" label_opts=opts.LabelOpts(\n",
|
|
" is_show=True, position=\"right\", formatter=\"{b}: {c}\"\n",
|
|
" ),\n",
|
|
" )\n",
|
|
" .reversal_axis()\n",
|
|
" .set_global_opts(\n",
|
|
" xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),\n",
|
|
" yaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(is_show=False)),\n",
|
|
" tooltip_opts=opts.TooltipOpts(is_show=False),\n",
|
|
" visualmap_opts=opts.VisualMapOpts(\n",
|
|
" is_calculable=True,\n",
|
|
" dimension=0,\n",
|
|
" pos_left=\"10\",\n",
|
|
" pos_top=\"center\",\n",
|
|
" range_text=[\"High\", \"Low\"],\n",
|
|
" range_color=[\"lightskyblue\", \"yellow\", \"orangered\"],\n",
|
|
" textstyle_opts=opts.TextStyleOpts(color=\"#ddd\"),\n",
|
|
" )\n",
|
|
" )\n",
|
|
" )\n",
|
|
"\n",
|
|
"\n",
|
|
"gchart = (\n",
|
|
" Grid(init_opts=opts.InitOpts(width=\"1500px\", height=\"650px\",theme=ThemeType.ESSOS))\n",
|
|
" .add(\n",
|
|
" province_bar,\n",
|
|
" grid_opts=opts.GridOpts(\n",
|
|
" pos_left=\"10\", pos_right=\"45%\", pos_top=\"70%\", pos_bottom=\"5\"\n",
|
|
" ),\n",
|
|
" )\n",
|
|
"# .add(pie, grid_opts=opts.GridOpts())\n",
|
|
" .add(m, grid_opts=opts.GridOpts())\n",
|
|
")\n",
|
|
"page = (\n",
|
|
" Page()\n",
|
|
" .add(\n",
|
|
" multiPage(),\n",
|
|
" wordcloudPage(),\n",
|
|
" gchart\n",
|
|
" )\n",
|
|
")\n",
|
|
"page.render(\"draw_result.html\") "
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3.10.8 64-bit (microsoft store)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.8"
|
|
},
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "bf2667e6cf4b8fac0f8544cb57d81a6418925f70e2172127655c1ef276bfd501"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|