You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

86 lines
3.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Sat May 24 20:16:34 2025
@author: LENOVO
"""
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
boston = load_boston()
#print(boston.data.shape,boston.target.shape,boston.feature_names)
df = pd.DataFrame(boston.data,columns=boston.feature_names)
df.columns.insert(-1,'PRICE')
df["PRICE"] = boston.target
#print(df)
print("各种回归模型预测波士顿房价及区间:")
print("性能以均方误差MSE来衡量 (除logistic regression)")
print()
print("1.多元线性回归:")
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
x = df.drop('PRICE',axis = 1)
y = df['PRICE']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25,
random_state=33)
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)
#print(lr.intercept_,lr.coef_)
print("R²: {}".format(lr.score(x_test,y_test)))
print("MSE: {}".format(round(mean_squared_error(y_pred,y_test),3)))
print()
print("2.多项式回归:")
from sklearn.preprocessing import PolynomialFeatures
for i in range(1,4):
poly_features = PolynomialFeatures(degree=i, include_bias=False)
x_train_poly = poly_features.fit_transform(x_train)
x_test_poly = poly_features.fit_transform(x_test)
lr.fit(x_train_poly,y_train)
y_pred = lr.predict(x_test_poly)
#print(lr.intercept_,lr.coef_)
print("R²: {}".format(lr.score(x_test_poly,y_test)))
print("MSE: {}".format(round(mean_squared_error(y_pred,y_test),3)))
print()
print("3.逻辑回归:")
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
y_train1 = pd.Series(y_train)
y_test1 = pd.Series(y_test)
y_train_binned = pd.cut(y_train1, bins=3, labels=['low', 'medium', 'high'])
y_test_binned = pd.cut(y_test1, bins=3, labels=['low', 'medium', 'high'])
log_reg = LogisticRegression(random_state=42)
log_reg.fit(x_train, y_train_binned)
y_pred_log = log_reg.predict(x_test)
print("准确率:", round(accuracy_score(y_test_binned, y_pred_log),4))
print()
print("4.分位数回归:")
import statsmodels.api as sm
import statsmodels.formula.api as smf
for q in [0.25,0.5,0.75]:
model = smf.quantreg('PRICE ~ RM + LSTAT', data=df)
result = model.fit(q=q)
#print(result.summary())
pred = result.predict(x_test)
#print(f"预测分段{q}房价:", pred)
print(f"预测分段{q}房价 MSE:",round(mean_squared_error(pred,y_test),3))
print()
print("5.XGboost:")
import xgboost as xgb
data_dmatrix = xgb.DMatrix(data=x,label=y)
xg_reg = xgb.XGBRegressor(objective ='reg:linear', colsample_bytree = 0.3, learning_rate = 0.1,max_depth = 5, alpha = 10, n_estimators = 10)
xg_reg.fit(x_train,y_train)
preds = xg_reg.predict(x_test)
print("MSE: {}".format(round(mean_squared_error(preds,y_test),3)))