From acb8f0f3e7c42bbad476c14af1afcb4d550942d5 Mon Sep 17 00:00:00 2001 From: hut22412030117 Date: Wed, 12 Nov 2025 21:41:48 +0800 Subject: [PATCH 1/3] ADD file via upload --- 8.2.2 计算Pearson相关系数.py | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 8.2.2 计算Pearson相关系数.py diff --git a/8.2.2 计算Pearson相关系数.py b/8.2.2 计算Pearson相关系数.py new file mode 100644 index 0000000..0317c9f --- /dev/null +++ b/8.2.2 计算Pearson相关系数.py @@ -0,0 +1,44 @@ +import numpy as np +import pandas as pd +from pyecharts import options as opts +from pyecharts.charts import HeatMap + +# 修改为正确的路径 +income_data = pd.read_csv('data/income_tax.csv', index_col=0) # 使用相对路径 + +# 保留两位小数 +data_cor = np.round(income_data.corr(method = 'pearson'), 2) + +y_data = list(data_cor.columns) # 获取y轴标签 +x_data = list(data_cor.index) # 获取x轴标签 +# 相关系数矩阵转为列表 +values = data_cor.values.tolist() +# 对应相关系数的位置 +value = [[i, j, values[i][j]] for i in range(len(x_data)) + for j in range(len(y_data))] + +heatmap = ( + # 导入热力图 + HeatMap() + # 设置x轴 + .add_xaxis(x_data) + # 设置y轴 + .add_yaxis( + '', y_data, + value, + label_opts=opts.LabelOpts( + is_show=True, position='inside'), + ) + .set_global_opts( + # 设置标题 + title_opts=opts.TitleOpts(title='相关系数热力图'), + # 设置图例 + visualmap_opts=opts.VisualMapOpts( + is_show=False, pos_bottom='center', + max_=1, min_=0.9 + ) + ) +) + +# 保存为HTML文件的路径 +heatmap.render('tmp/相关系数热力图.html') # 使用相对路径保存文件 -- 2.34.1 From 0f62895c1b75fe24a5a042e1aa2c28e4370a7df6 Mon Sep 17 00:00:00 2001 From: hut22412030117 Date: Wed, 12 Nov 2025 21:42:30 +0800 Subject: [PATCH 2/3] ADD file via upload --- 8.3.2 选取关键特征.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 8.3.2 选取关键特征.py diff --git a/8.3.2 选取关键特征.py b/8.3.2 选取关键特征.py new file mode 100644 index 0000000..50e28cd --- /dev/null +++ b/8.3.2 选取关键特征.py @@ -0,0 +1,35 @@ +import pandas as pd +import numpy as np +from sklearn.linear_model import Lasso + +# 修改为正确的路径 +income_data = pd.read_csv('data/income_tax.csv', index_col=0) # 读取数据 + +# 取2005年~2019年的数据建模 +data_train = income_data.iloc[:, 0:13].copy() +data_mean = data_train.mean() +data_std = data_train.std() +data_train = (data_train - data_mean) / data_std # 数据标准化 + +# 构建Lasso回归模型 +lasso = Lasso(alpha=1000, random_state=1234) +lasso.fit(data_train, income_data['y']) + +# 输出回归系数,保留5位小数 +print('Lasso回归系数为:', np.round(lasso.coef_, 5)) + +# 计算系数非零的个数 +print('系数非零个数为:', np.sum(lasso.coef_ != 0)) + +# 返回系数非零特征 +mask = lasso.coef_ != 0 +print('系数非零特征:', income_data.columns[:-1][mask]) + +# 返回系数非零的数据 +new_reg_data = income_data.iloc[:, 0:13].iloc[:, mask] + +# 修改为正确的路径保存数据 +new_reg_data.to_csv('tmp/new_reg_data.csv') # 存储数据 + +# 输出数据的维度 +print('输出数据的维度为:', new_reg_data.shape) -- 2.34.1 From cd104443563cfff4cbb6e91df8ed1a60ff6b1b70 Mon Sep 17 00:00:00 2001 From: hut22412030117 Date: Wed, 12 Nov 2025 21:42:57 +0800 Subject: [PATCH 3/3] ADD file via upload --- 8.4.3 预测企业所得税.py | 94 ++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 8.4.3 预测企业所得税.py diff --git a/8.4.3 预测企业所得税.py b/8.4.3 预测企业所得税.py new file mode 100644 index 0000000..f69fef6 --- /dev/null +++ b/8.4.3 预测企业所得税.py @@ -0,0 +1,94 @@ +import numpy as np +import pandas as pd +from gm11 import gm11 # 引入自编的灰色预测函数 + +# 读取经过特征选择后的数据 +new_reg_data = pd.read_csv('tmp/new_reg_data.csv', index_col=0) # 修改为相对路径 +# 读取数据 +income_data = pd.read_csv('data/income_tax.csv', index_col=0) # 修改为相对路径 +new_reg_data.index = range(2005, 2020) +new_reg_data.loc[2020] = None +new_reg_data.loc[2021] = None + +c = [] +p = [] +# 进行灰色预测 +for i in list(new_reg_data.columns): + f = gm11(np.array(new_reg_data.loc[range(2005, 2020), i]))[0] + c.append(gm11(np.array(new_reg_data.loc[range(2005, 2020), i]))[4]) + p.append(gm11(np.array(new_reg_data.loc[range(2005, 2020), i]))[5]) + new_reg_data.loc[2020, i] = f(len(new_reg_data) - 1) + new_reg_data.loc[2021, i] = f(len(new_reg_data)) + new_reg_data[i] = new_reg_data[i].round(2) # 保留两位小数 + +new_reg_data = pd.concat([new_reg_data, income_data['y']], axis=1) +new_reg_data.to_csv('tmp/new_reg_data_GM11.csv') # 保存为相对路径 +print('预测结果为:\n', new_reg_data.iloc[-2:, :6]) # 预测结果展示 + +from sklearn.svm import LinearSVR +from sklearn.metrics import explained_variance_score, mean_absolute_error, median_absolute_error, r2_score + +# 读取灰色预测数据 +gm11_data = pd.read_csv('tmp/new_reg_data_GM11.csv', index_col=0) # 修改为相对路径 +feature = gm11_data.columns[:-1] # 获取特征列(排除最后一列'y') + +# 取2005~2019年的数据建模 +data_train = gm11_data.loc[range(2005, 2020)].copy() +data_mean = data_train.mean() +data_std = data_train.std() +data_train = (data_train - data_mean) / data_std # 数据标准化 + +x_train = np.array(data_train[feature]) # 特征数据 +y_train = np.array(data_train['y']) # 标签数据 +linearsvr = LinearSVR(random_state=1234) # 调用LinearSVR类 +linearsvr.fit(x_train, y_train) + +# 预测,并还原结果 +x = np.array(((gm11_data[feature] - data_mean[feature]) / data_std[feature])) +gm11_data['y_pred'] = linearsvr.predict(x) * data_std['y'] + data_mean['y'] + +# 保存预测结果 +gm11_data.to_csv('tmp/new_reg_data_GM11_revenue.csv') # 保存为相对路径 +print('真实值与预测值分别为:\n', gm11_data[['y', 'y_pred']]) + +# 计算各项评估指标 +print('可解释方差值:', explained_variance_score(gm11_data['y'][:-2], gm11_data['y_pred'][:-2])) +print('平均绝对误差:', mean_absolute_error(gm11_data['y'][:-2], gm11_data['y_pred'][:-2])) +print('中值绝对误差:', median_absolute_error(gm11_data['y'][:-2], gm11_data['y_pred'][:-2])) +print('R2值:', r2_score(gm11_data['y'][:-2], gm11_data['y_pred'][:-2])) + +from pyecharts.charts import Grid, Scatter, Line +from pyecharts import options as opts + +# 设置x轴的值 +x_data = ['2005', '2006', '2007', '2008', '2009', '2010', + '2011', '2012', '2013', '2014', '2015', '2016', + '2017', '2018', '2019', '2020', '2021'] + +# 绘制线 +line = (Line(init_opts=opts.InitOpts(width='800px', height='310px')) + .add_xaxis(x_data) + .add_yaxis('真实值', gm11_data['y'].tolist(), label_opts=opts.LabelOpts(is_show=False)) + .add_yaxis('预测值', gm11_data['y_pred'].tolist(), label_opts=opts.LabelOpts(is_show=False)) +) + +# 绘制点 +scatter = ( + Scatter(init_opts=opts.InitOpts(width='800px', height='310px')) + .add_xaxis(x_data) + .add_yaxis('真实值', gm11_data['y'].tolist(), label_opts=opts.LabelOpts(is_show=False), symbol_size=10, symbol='diamond') + .add_yaxis('预测值', gm11_data['y_pred'].tolist(), label_opts=opts.LabelOpts(is_show=False), symbol_size=10, symbol='pin') + .set_global_opts( + title_opts=opts.TitleOpts(title='真实值与预测值对比'), + yaxis_opts=opts.AxisOpts(name='企业所得税(万元)', name_location='middle', name_gap=70), + xaxis_opts=opts.AxisOpts(name='年份', name_location='middle', name_gap=30), + ) +) + +# 叠加图 +scatter.overlap(line) + +# 创建网格并保存结果 +grid = Grid() +grid.add(scatter, grid_opts=opts.GridOpts(pos_top='10%', pos_left='12%', pos_bottom='35%')) +grid.render('tmp/真实值与预测值对比.html') # 保存为相对路径 -- 2.34.1