# -*- coding: utf-8 -*- """ Created on Sat May 24 20:16:34 2025 @author: LENOVO """ import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error from sklearn.datasets import load_boston boston = load_boston() #print(boston.data.shape,boston.target.shape,boston.feature_names) df = pd.DataFrame(boston.data,columns=boston.feature_names) df.columns.insert(-1,'PRICE') df["PRICE"] = boston.target #print(df) print("各种回归模型预测波士顿房价及区间:") print("注:性能以均方误差MSE来衡量 (除logistic regression)") print() print("1.多元线性回归:") from sklearn.linear_model import LinearRegression lr = LinearRegression() x = df.drop('PRICE',axis = 1) y = df['PRICE'] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33) lr.fit(x_train, y_train) y_pred = lr.predict(x_test) #print(lr.intercept_,lr.coef_) print("R²: {}".format(lr.score(x_test,y_test))) print("MSE: {}".format(round(mean_squared_error(y_pred,y_test),3))) print() print("2.多项式回归:") from sklearn.preprocessing import PolynomialFeatures for i in range(1,4): poly_features = PolynomialFeatures(degree=i, include_bias=False) x_train_poly = poly_features.fit_transform(x_train) x_test_poly = poly_features.fit_transform(x_test) lr.fit(x_train_poly,y_train) y_pred = lr.predict(x_test_poly) #print(lr.intercept_,lr.coef_) print("R²: {}".format(lr.score(x_test_poly,y_test))) print("MSE: {}".format(round(mean_squared_error(y_pred,y_test),3))) print() print("3.逻辑回归:") import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score y_train1 = pd.Series(y_train) y_test1 = pd.Series(y_test) y_train_binned = pd.cut(y_train1, bins=3, labels=['low', 'medium', 'high']) y_test_binned = pd.cut(y_test1, bins=3, labels=['low', 'medium', 'high']) log_reg = LogisticRegression(random_state=42) log_reg.fit(x_train, y_train_binned) y_pred_log = log_reg.predict(x_test) print("准确率:", round(accuracy_score(y_test_binned, y_pred_log),4)) print() print("4.分位数回归:") import statsmodels.api as sm import statsmodels.formula.api as smf for q in [0.25,0.5,0.75]: model = smf.quantreg('PRICE ~ RM + LSTAT', data=df) result = model.fit(q=q) #print(result.summary()) pred = result.predict(x_test) #print(f"预测分段{q}房价:", pred) print(f"预测分段{q}房价 MSE:",round(mean_squared_error(pred,y_test),3)) print() print("5.XGboost:") import xgboost as xgb data_dmatrix = xgb.DMatrix(data=x,label=y) xg_reg = xgb.XGBRegressor(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = 5, alpha = 10, n_estimators = 10) xg_reg.fit(x_train,y_train) preds = xg_reg.predict(x_test) print("MSE: {}".format(round(mean_squared_error(preds,y_test),3)))