ADD file via upload

选取关键特征
hut22412030117 5 months ago
parent acb8f0f3e7
commit 0f62895c1b

@ -0,0 +1,35 @@
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso
# 修改为正确的路径
income_data = pd.read_csv('data/income_tax.csv', index_col=0) # 读取数据
# 取2005年~2019年的数据建模
data_train = income_data.iloc[:, 0:13].copy()
data_mean = data_train.mean()
data_std = data_train.std()
data_train = (data_train - data_mean) / data_std # 数据标准化
# 构建Lasso回归模型
lasso = Lasso(alpha=1000, random_state=1234)
lasso.fit(data_train, income_data['y'])
# 输出回归系数保留5位小数
print('Lasso回归系数为', np.round(lasso.coef_, 5))
# 计算系数非零的个数
print('系数非零个数为:', np.sum(lasso.coef_ != 0))
# 返回系数非零特征
mask = lasso.coef_ != 0
print('系数非零特征:', income_data.columns[:-1][mask])
# 返回系数非零的数据
new_reg_data = income_data.iloc[:, 0:13].iloc[:, mask]
# 修改为正确的路径保存数据
new_reg_data.to_csv('tmp/new_reg_data.csv') # 存储数据
# 输出数据的维度
print('输出数据的维度为:', new_reg_data.shape)
Loading…
Cancel
Save