diff --git a/代码/.idea/.gitignore b/代码/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/代码/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/代码/.idea/.name b/代码/.idea/.name new file mode 100644 index 0000000..9d8508e --- /dev/null +++ b/代码/.idea/.name @@ -0,0 +1 @@ +任务8.2 分析财政收入数据特征的相关性.py \ No newline at end of file diff --git a/代码/.idea/inspectionProfiles/Project_Default.xml b/代码/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..f3c7f10 --- /dev/null +++ b/代码/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,12 @@ + + + + \ No newline at end of file diff --git a/代码/.idea/inspectionProfiles/profiles_settings.xml b/代码/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/代码/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/代码/.idea/misc.xml b/代码/.idea/misc.xml new file mode 100644 index 0000000..db8786c --- /dev/null +++ b/代码/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/代码/.idea/modules.xml b/代码/.idea/modules.xml new file mode 100644 index 0000000..55d4718 --- /dev/null +++ b/代码/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/代码/.idea/other.xml b/代码/.idea/other.xml new file mode 100644 index 0000000..2e75c2e --- /dev/null +++ b/代码/.idea/other.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/代码/.idea/代码.iml b/代码/.idea/代码.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/代码/.idea/代码.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/代码/GM11.py b/代码/GM11.py new file mode 100644 index 0000000..be536b0 --- /dev/null +++ b/代码/GM11.py @@ -0,0 +1,15 @@ +#-*- coding: utf-8 -*- + +def GM11(x0): #自定义灰色预测函数 + import numpy as np + x1 = x0.cumsum() #1-AGO序列 + z1 = (x1[:len(x1)-1] + x1[1:])/2.0 #紧邻均值(MEAN)生成序列 + z1 = z1.reshape((len(z1),1)) + B = np.append(-z1, np.ones_like(z1), axis = 1) + Yn = x0[1:].reshape((len(x0)-1, 1)) + [[a],[b]] = np.dot(np.dot(np.linalg.inv(np.dot(B.T, B)), B.T), Yn) #计算参数 + f = lambda k: (x0[0]-b/a)*np.exp(-a*(k-1))-(x0[0]-b/a)*np.exp(-a*(k-2)) #还原值 + delta = np.abs(x0 - np.array([f(i) for i in range(1,len(x0)+1)])) + C = delta.std()/x0.std() + P = 1.0*(np.abs(delta - delta.mean()) < 0.6745*x0.std()).sum()/len(x0) + return f, a, b, x0[0], C, P #返回灰色预测函数、a、b、首项、方差比、小残差概率 \ No newline at end of file diff --git a/代码/__pycache__/GM11.cpython-312.pyc b/代码/__pycache__/GM11.cpython-312.pyc new file mode 100644 index 0000000..88b1eb9 Binary files /dev/null and b/代码/__pycache__/GM11.cpython-312.pyc differ diff --git a/代码/data.csv b/代码/data.csv new file mode 100644 index 0000000..6a7d20d --- /dev/null +++ b/代码/data.csv @@ -0,0 +1,21 @@ +x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,y +3831732,181.54,448.19,7571,6212.7,6370241,525.71,985.31,60.62,65.66,120,1.029,5321,64.87 +3913824,214.63,549.97,9038.16,7601.73,6467115,618.25,1259.2,73.46,95.46,113.5,1.051,6529,99.75 +3928907,239.56,686.44,9905.31,8092.82,6560508,638.94,1468.06,81.16,81.16,108.2,1.064,7008,88.11 +4282130,261.58,802.59,10444.6,8767.98,6664862,656.58,1678.12,85.72,91.7,102.2,1.092,7694,106.07 +4453911,283.14,904.57,11255.7,9422.33,6741400,758.83,1893.52,88.88,114.61,97.7,1.2,8027,137.32 +4548852,308.58,1000.69,12018.52,9751.44,6850024,878.26,2139.18,92.85,152.78,98.5,1.198,8549,188.14 +4962579,348.09,1121.13,13966.53,11349.47,7006896,923.67,2492.74,94.37,170.62,102.8,1.348,9566,219.91 +5029338,387.81,1248.29,14694,11467.35,7125979,978.21,2841.65,97.28,214.53,98.9,1.467,10473,271.91 +5070216,453.49,1370.68,13380.47,10671.78,7206229,1009.24,3203.96,103.07,202.18,97.6,1.56,11469,269.1 +5210706,533.55,1494.27,15002.59,11570.58,7251888,1175.17,3758.62,109.91,222.51,100.1,1.456,12360,300.55 +5407087,598.33,1677.77,16884.16,13120.83,7376720,1348.93,4450.55,117.15,249.01,101.7,1.424,14174,338.45 +5744550,665.32,1905.84,18287.24,14468.24,7505322,1519.16,5154.23,130.22,303.41,101.5,1.456,16394,408.86 +5994973,738.97,2199.14,19850.66,15444.93,7607220,1696.38,6081.86,128.51,356.99,102.3,1.438,17881,476.72 +6236312,877.07,2624.24,22469.22,18951.32,7734787,1863.34,7140.32,149.87,429.36,103.4,1.474,20058,838.99 +6529045,1005.37,3187.39,25316.72,20835.95,7841695,2105.54,8287.38,169.19,508.84,105.9,1.515,22114,843.14 +6791495,1118.03,3615.77,27609.59,22820.89,7946154,2659.85,9138.21,172.28,557.74,97.5,1.633,24190,1107.67 +7110695,1304.48,4476.38,30658.49,25011.61,8061370,3263.57,10748.28,188.57,664.06,103.2,1.638,29549,1399.16 +7431755,1700.87,5243.03,34438.08,28209.74,8145797,3412.21,12423.44,204.54,710.66,105.5,1.67,34214,1535.14 +7512997,1969.51,5977.27,38053.52,30490.44,8222969,3758.39,13551.21,213.76,760.49,103,1.825,37934,1579.68 +7599295,2110.78,6882.85,42049.14,33156.83,8323096,4454.55,15420.14,228.46,852.56,102.6,1.906,41972,2088.14 diff --git a/代码/new_reg_data.csv b/代码/new_reg_data.csv new file mode 100644 index 0000000..992ce34 --- /dev/null +++ b/代码/new_reg_data.csv @@ -0,0 +1,21 @@ +,x1,x3,x4,x5,x6,x7,x8,x13 +0,3831732,448.19,7571.0,6212.7,6370241,525.71,985.31,5321 +1,3913824,549.97,9038.16,7601.73,6467115,618.25,1259.2,6529 +2,3928907,686.44,9905.31,8092.82,6560508,638.94,1468.06,7008 +3,4282130,802.59,10444.6,8767.98,6664862,656.58,1678.12,7694 +4,4453911,904.57,11255.7,9422.33,6741400,758.83,1893.52,8027 +5,4548852,1000.69,12018.52,9751.44,6850024,878.26,2139.18,8549 +6,4962579,1121.13,13966.53,11349.47,7006896,923.67,2492.74,9566 +7,5029338,1248.29,14694.0,11467.35,7125979,978.21,2841.65,10473 +8,5070216,1370.68,13380.47,10671.78,7206229,1009.24,3203.96,11469 +9,5210706,1494.27,15002.59,11570.58,7251888,1175.17,3758.62,12360 +10,5407087,1677.77,16884.16,13120.83,7376720,1348.93,4450.55,14174 +11,5744550,1905.84,18287.24,14468.24,7505322,1519.16,5154.23,16394 +12,5994973,2199.14,19850.66,15444.93,7607220,1696.38,6081.86,17881 +13,6236312,2624.24,22469.22,18951.32,7734787,1863.34,7140.32,20058 +14,6529045,3187.39,25316.72,20835.95,7841695,2105.54,8287.38,22114 +15,6791495,3615.77,27609.59,22820.89,7946154,2659.85,9138.21,24190 +16,7110695,4476.38,30658.49,25011.61,8061370,3263.57,10748.28,29549 +17,7431755,5243.03,34438.08,28209.74,8145797,3412.21,12423.44,34214 +18,7512997,5977.27,38053.52,30490.44,8222969,3758.39,13551.21,37934 +19,7599295,6882.85,42049.14,33156.83,8323096,4454.55,15420.14,41972 diff --git a/代码/new_reg_data_GM11.xlsx b/代码/new_reg_data_GM11.xlsx new file mode 100644 index 0000000..8e4abb7 Binary files /dev/null and b/代码/new_reg_data_GM11.xlsx differ diff --git a/代码/new_reg_data_GM11_revenue.xlsx b/代码/new_reg_data_GM11_revenue.xlsx new file mode 100644 index 0000000..f1688cb Binary files /dev/null and b/代码/new_reg_data_GM11_revenue.xlsx differ diff --git a/代码/tset.py b/代码/tset.py new file mode 100644 index 0000000..506694d --- /dev/null +++ b/代码/tset.py @@ -0,0 +1,10 @@ +import time +import keyboard + +time.sleep(3) # 等待3秒 +with open(r'C:\Users\唐志超\Desktop\pp.txt', encoding='utf-8') as file: + for line in file: + keyboard.write(line) + + + diff --git a/代码/任务8.2 分析财政收入数据特征的相关性.py b/代码/任务8.2 分析财政收入数据特征的相关性.py new file mode 100644 index 0000000..8d7e4ec --- /dev/null +++ b/代码/任务8.2 分析财政收入数据特征的相关性.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +############################################################################### +####################### 任务实现 ####################### +############################################################################### + +# 代码 8-1 +import numpy as np +import pandas as pd +# inputfile = '../data/data.csv' ## 输入的数据文件 +inputfile = './data.csv' ## 输入的数据文件 +data = pd.read_csv(inputfile) ## 读取数据 +## 保留两位小数 +print('相关系数矩阵为:',np.round(data.corr(method = 'pearson'), 2)) + diff --git a/代码/任务8.3 使用Lasso回归选取财政收入预测的关键特征.py b/代码/任务8.3 使用Lasso回归选取财政收入预测的关键特征.py new file mode 100644 index 0000000..9490dfe --- /dev/null +++ b/代码/任务8.3 使用Lasso回归选取财政收入预测的关键特征.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +############################################################################### +####################### 任务实现 ####################### +############################################################################### + +# 代码 8-2 +# 使用了 Lasso 回归模型来分析数据集的特征, +# 从中筛选出与目标变量 y 相关的重要特征,并将结果保存到一个新的 CSV 文件中。以下是对代码的逐步分析: +import numpy as np +import pandas as pd +from sklearn.linear_model import Lasso +# inputfile = '../data/data.csv' #输入的数据文件 +inputfile = './data.csv' #输入的数据文件 +data = pd.read_csv(inputfile) #读取数据 +# lasso = Lasso(1000) #调用Lasso()函数,设置λ的值为1000 +lasso = Lasso(alpha=1000, max_iter=5000) # 增加迭代次数 +lasso.fit(data.iloc[:,0:13],data['y']) +print('相关系数为:',np.round(lasso.coef_,5)) #输出结果,保留五位小数 + +## 计算相关系数非零的个数 +print('相关系数非零个数为:',np.sum(lasso.coef_ != 0)) + + +mask = lasso.coef_ != 0 #返回一个相关系数是否为零的布尔数组 +print('相关系数是否为零:',mask) + +# outputfile = '../tmp/new_reg_data.csv' #输出的数据文件 +outputfile = './new_reg_data.csv' #输出的数据文件 +# new_reg_data = data.iloc[:, mask] #返回相关系数非零的数据 +# 返回相关系数非零的数据 +new_reg_data = data.iloc[:, :13].loc[:, mask] # 只选择0到12列中非零的列 +new_reg_data.to_csv(outputfile) #存储数据 +print('输出数据的维度为:',new_reg_data.shape) #查看输出数据的维度 diff --git a/代码/任务8.4 使用灰色预测和SVR构建财政收入预测模型.py b/代码/任务8.4 使用灰色预测和SVR构建财政收入预测模型.py new file mode 100644 index 0000000..919e3fb --- /dev/null +++ b/代码/任务8.4 使用灰色预测和SVR构建财政收入预测模型.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +############################################################################### +####################### 任务实现 ######################## +############################################################################### +# 代码 8-3这段代码的主要功能是使用灰色预测模型对时间序列数据进行预测,并将结果保存到 Excel 文件中。以下是代码的逐步解析: +import numpy as np +import pandas as pd +from GM11 import GM11 # 引入自编的灰色预测函数 + +# 输入的数据文件 +inputfile = './new_reg_data.csv' +inputfile1 = './data.csv' +new_reg_data = pd.read_csv(inputfile) # 读取经过特征选择后的数据 +data = pd.read_csv(inputfile1) # 读取总的数据 + +# 设置索引 +new_reg_data.index = range(1994, 2014) +new_reg_data.loc[2014] = None +new_reg_data.loc[2015] = None + +# 需要预测的列 +l = ['x1', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x13'] +outputfile = './new_reg_data_GM11.xlsx' +y = list(data['y'].values) # 提取财政收入列,合并至新数据框中 +y.extend([np.nan, np.nan]) # 添加2014和2015年对应的NaN值 + +for i in l: + f = GM11(new_reg_data.loc[range(1994, 2014), i].values)[0] + new_reg_data.loc[2014, i] = f(len(new_reg_data) - 1) # 2014年预测结果 + new_reg_data.loc[2015, i] = f(len(new_reg_data)) # 2015年预测结果 + new_reg_data[i] = new_reg_data[i].round(2) # 保留两位小数 + +# 使用 .xls 扩展名 +new_reg_data['y'] = y # 添加财政收入列到new_reg_data +new_reg_data.to_excel(outputfile, index=True) # 结果输出,索引列也写入 +# 显示预测结果 +print('预测结果为:', new_reg_data.loc[2014:2015, :]) # 预测结果展示 + + + +# 代码 8-4 +# 利用灰色预测的输出,进一步应用 SVR 模型进行建模与预测,并将结果进行保存 +import pandas as pd +import numpy as np +from sklearn.svm import LinearSVR +import matplotlib.pyplot as plt +from sklearn.metrics import explained_variance_score,\ +mean_absolute_error,mean_squared_error,\ +median_absolute_error,r2_score +# inputfile = '../tmp/new_reg_data_GM11.xls' #灰色预测后保存的路径 +inputfile = './new_reg_data_GM11.xlsx' #灰色预测后保存的路径 +data = pd.read_excel(inputfile,index_col=0) #读取数据 +feature = ['x1', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x13'] +data_train = data.loc[range(1994,2014)].copy()#取2014年前的数据建模 +data_mean = data_train.mean() +data_std = data_train.std() +data_train = (data_train - data_mean)/data_std #数据标准化 +# 计算数据的均值和标准差,然后对数据进行标准化处理,使其均值为0,标准差为1。 +# 特征数据和标签数据 +x_train = data_train[feature].values # 使用 .values 替代 .as_matrix() +y_train = data_train['y'].values # 使用 .values 替代 .as_matrix() +# 创建 SVR 模型实例,并使用训练数据进行拟合。 +linearsvr = LinearSVR() #调用LinearSVR()函数 +linearsvr.fit(x_train,y_train) +# 将数据再次进行标准化,然后用模型进行预测,并将预测值还原到原始尺度上(解标准化)。 +x = ((data[feature] - data_mean[feature])/ \ +# data_std[feature]).as_matrix() #预测,并还原结果。 +data_std[feature]).values #预测,并还原结果。 +data[u'y_pred'] = linearsvr.predict(x) * \ +data_std['y'] + data_mean['y'] +## SVR预测后保存的结果 +# outputfile = '../tmp/new_reg_data_GM11_revenue.xls' +outputfile = './new_reg_data_GM11_revenue.xlsx' +data.to_excel(outputfile) +print('真实值与预测值分别为:',data[['y','y_pred']]) + +print('预测图为:',data[['y','y_pred']].plot(subplots = True, + style=['b-o','r-*'])) +# 绘制真实值与预测值在同一个图形中 +# 为确保中文标题能正常显示 +plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 替换为可以显示中文的字体 +plt.rcParams['axes.unicode_minus'] = False # 确保负号显示正常 +ax = data[['y', 'y_pred']].plot(style=['r-o', 'b-*'], figsize=(10, 5)) +plt.title('真实值与预测值对比') # 添加标题 +plt.xlabel('时间') # 添加x轴标签 +plt.ylabel('值') # 添加y轴标签 + +# 添加图例,明确哪条线是哪个 +plt.legend(['真实值', '预测值']) # 添加图例 +plt.show() # 显示图形 + diff --git a/数据/data.csv b/数据/data.csv new file mode 100644 index 0000000..6a7d20d --- /dev/null +++ b/数据/data.csv @@ -0,0 +1,21 @@ +x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,y +3831732,181.54,448.19,7571,6212.7,6370241,525.71,985.31,60.62,65.66,120,1.029,5321,64.87 +3913824,214.63,549.97,9038.16,7601.73,6467115,618.25,1259.2,73.46,95.46,113.5,1.051,6529,99.75 +3928907,239.56,686.44,9905.31,8092.82,6560508,638.94,1468.06,81.16,81.16,108.2,1.064,7008,88.11 +4282130,261.58,802.59,10444.6,8767.98,6664862,656.58,1678.12,85.72,91.7,102.2,1.092,7694,106.07 +4453911,283.14,904.57,11255.7,9422.33,6741400,758.83,1893.52,88.88,114.61,97.7,1.2,8027,137.32 +4548852,308.58,1000.69,12018.52,9751.44,6850024,878.26,2139.18,92.85,152.78,98.5,1.198,8549,188.14 +4962579,348.09,1121.13,13966.53,11349.47,7006896,923.67,2492.74,94.37,170.62,102.8,1.348,9566,219.91 +5029338,387.81,1248.29,14694,11467.35,7125979,978.21,2841.65,97.28,214.53,98.9,1.467,10473,271.91 +5070216,453.49,1370.68,13380.47,10671.78,7206229,1009.24,3203.96,103.07,202.18,97.6,1.56,11469,269.1 +5210706,533.55,1494.27,15002.59,11570.58,7251888,1175.17,3758.62,109.91,222.51,100.1,1.456,12360,300.55 +5407087,598.33,1677.77,16884.16,13120.83,7376720,1348.93,4450.55,117.15,249.01,101.7,1.424,14174,338.45 +5744550,665.32,1905.84,18287.24,14468.24,7505322,1519.16,5154.23,130.22,303.41,101.5,1.456,16394,408.86 +5994973,738.97,2199.14,19850.66,15444.93,7607220,1696.38,6081.86,128.51,356.99,102.3,1.438,17881,476.72 +6236312,877.07,2624.24,22469.22,18951.32,7734787,1863.34,7140.32,149.87,429.36,103.4,1.474,20058,838.99 +6529045,1005.37,3187.39,25316.72,20835.95,7841695,2105.54,8287.38,169.19,508.84,105.9,1.515,22114,843.14 +6791495,1118.03,3615.77,27609.59,22820.89,7946154,2659.85,9138.21,172.28,557.74,97.5,1.633,24190,1107.67 +7110695,1304.48,4476.38,30658.49,25011.61,8061370,3263.57,10748.28,188.57,664.06,103.2,1.638,29549,1399.16 +7431755,1700.87,5243.03,34438.08,28209.74,8145797,3412.21,12423.44,204.54,710.66,105.5,1.67,34214,1535.14 +7512997,1969.51,5977.27,38053.52,30490.44,8222969,3758.39,13551.21,213.76,760.49,103,1.825,37934,1579.68 +7599295,2110.78,6882.85,42049.14,33156.83,8323096,4454.55,15420.14,228.46,852.56,102.6,1.906,41972,2088.14 diff --git a/财政收入预测分析报告.docx b/财政收入预测分析报告.docx new file mode 100644 index 0000000..3233aee Binary files /dev/null and b/财政收入预测分析报告.docx differ