|
|
|
@ -0,0 +1,85 @@
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
Created on Sat May 24 20:16:34 2025
|
|
|
|
|
|
|
|
|
|
@author: LENOVO
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
|
from sklearn.datasets import load_boston
|
|
|
|
|
boston = load_boston()
|
|
|
|
|
#print(boston.data.shape,boston.target.shape,boston.feature_names)
|
|
|
|
|
df = pd.DataFrame(boston.data,columns=boston.feature_names)
|
|
|
|
|
df.columns.insert(-1,'PRICE')
|
|
|
|
|
df["PRICE"] = boston.target
|
|
|
|
|
#print(df)
|
|
|
|
|
|
|
|
|
|
print("各种回归模型预测波士顿房价及区间:")
|
|
|
|
|
print("注:性能以均方误差MSE来衡量 (除logistic regression)")
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
print("1.多元线性回归:")
|
|
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
|
lr = LinearRegression()
|
|
|
|
|
x = df.drop('PRICE',axis = 1)
|
|
|
|
|
y = df['PRICE']
|
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25,
|
|
|
|
|
random_state=33)
|
|
|
|
|
lr.fit(x_train, y_train)
|
|
|
|
|
y_pred = lr.predict(x_test)
|
|
|
|
|
#print(lr.intercept_,lr.coef_)
|
|
|
|
|
print("R²: {}".format(lr.score(x_test,y_test)))
|
|
|
|
|
print("MSE: {}".format(round(mean_squared_error(y_pred,y_test),3)))
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
print("2.多项式回归:")
|
|
|
|
|
from sklearn.preprocessing import PolynomialFeatures
|
|
|
|
|
for i in range(1,4):
|
|
|
|
|
poly_features = PolynomialFeatures(degree=i, include_bias=False)
|
|
|
|
|
x_train_poly = poly_features.fit_transform(x_train)
|
|
|
|
|
x_test_poly = poly_features.fit_transform(x_test)
|
|
|
|
|
lr.fit(x_train_poly,y_train)
|
|
|
|
|
y_pred = lr.predict(x_test_poly)
|
|
|
|
|
#print(lr.intercept_,lr.coef_)
|
|
|
|
|
print("R²: {}".format(lr.score(x_test_poly,y_test)))
|
|
|
|
|
print("MSE: {}".format(round(mean_squared_error(y_pred,y_test),3)))
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
print("3.逻辑回归:")
|
|
|
|
|
import pandas as pd
|
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
|
from sklearn.metrics import accuracy_score
|
|
|
|
|
y_train1 = pd.Series(y_train)
|
|
|
|
|
y_test1 = pd.Series(y_test)
|
|
|
|
|
y_train_binned = pd.cut(y_train1, bins=3, labels=['low', 'medium', 'high'])
|
|
|
|
|
y_test_binned = pd.cut(y_test1, bins=3, labels=['low', 'medium', 'high'])
|
|
|
|
|
log_reg = LogisticRegression(random_state=42)
|
|
|
|
|
log_reg.fit(x_train, y_train_binned)
|
|
|
|
|
y_pred_log = log_reg.predict(x_test)
|
|
|
|
|
print("准确率:", round(accuracy_score(y_test_binned, y_pred_log),4))
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
print("4.分位数回归:")
|
|
|
|
|
import statsmodels.api as sm
|
|
|
|
|
import statsmodels.formula.api as smf
|
|
|
|
|
|
|
|
|
|
for q in [0.25,0.5,0.75]:
|
|
|
|
|
model = smf.quantreg('PRICE ~ RM + LSTAT', data=df)
|
|
|
|
|
result = model.fit(q=q)
|
|
|
|
|
#print(result.summary())
|
|
|
|
|
pred = result.predict(x_test)
|
|
|
|
|
#print(f"预测分段{q}房价:", pred)
|
|
|
|
|
print(f"预测分段{q}房价 MSE:",round(mean_squared_error(pred,y_test),3))
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
|
|
print("5.XGboost:")
|
|
|
|
|
import xgboost as xgb
|
|
|
|
|
data_dmatrix = xgb.DMatrix(data=x,label=y)
|
|
|
|
|
xg_reg = xgb.XGBRegressor(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = 5, alpha = 10, n_estimators = 10)
|
|
|
|
|
xg_reg.fit(x_train,y_train)
|
|
|
|
|
preds = xg_reg.predict(x_test)
|
|
|
|
|
print("MSE: {}".format(round(mean_squared_error(preds,y_test),3)))
|