# -*- coding: utf-8 -*- """ Created on Mon May 10 14:06:20 2021 @author: hzh """ import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error df = pd.read_csv('C:\\Users\\HP\\Desktop\\Artificial Intelligence\\advertising.csv') print(df) # 探索分析:绘制散点图 plt.figure('fig1') plt.plot(df['TV'], df['sales'], 'ro') plt.title('TV') plt.grid() # plt.figure('fig2') plt.title('radio') plt.plot(df['radio'], df['sales'], 'g^') plt.grid() # plt.figure('fig3') plt.plot(df['newspaper'], df['sales'], 'b*') plt.title('newspaper') plt.grid() # 获取训练与测试数据 x = df[['TV']] print(x) y = df['sales'] print(y) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33) print((x_train, y_train)) print((x_test, y_test)) print(len(x_train)) print(len(x_test)) # 模型建立与训练 lr = LinearRegression(normalize=True) lr.fit(x_train, y_train) print('回归方程的系数:', lr.coef_) print('回归方程的截距:', lr.intercept_) # 预测及效果检测 y_predict = lr.predict(x_test) print(y_predict) # print(mean_squared_error(y_test,y_predict)) print(lr.score(x_train, y_train)) # 可视化 x = df[['TV']] y = df['sales'] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33) lr = LinearRegression() lr.fit(x_train, y_train) plt.scatter(x_test, y_test) plt.plot(x_test, lr.predict(x_test), color='blue', linewidth=3) # 画出回归直线 # 多元线性回归 x = df[['TV', 'radio', 'newspaper']] y = df['sales'] x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33) lr = LinearRegression() lr.fit(x_train, y_train) print('回归方程的系数:', lr.coef_) print('回归方程的截距:', lr.intercept_) y_pred = lr.predict(x_test) print(y_pred) print('MSE:', mean_squared_error(y_test, y_pred)) print(lr.score(x_train, y_train)) # 预测 test_data = [[200, 40, 50], [300, 50, 60]] y_preds = lr.predict(test_data) print(y_preds) plt.plot(test_data, y_preds, 'b*') plt.plot(test_data, y_preds, color='blue', linewidth=3) # 画出回归直线 plt.show() # 一元线性回归电影票房预测 import matplotlib.pyplot as plt from sklearn import linear_model def drawplt(): plt.figure() plt.title('Cost and Income Of a Film') plt.xlabel('Cost(Million Yuan)') plt.ylabel('Income(Million Yuan)') plt.axis([0, 25, 0, 60]) plt.grid(True) X = [[6], [9], [12], [14], [16]] y = [[9], [12], [29], [35], [59]] model = linear_model.LinearRegression() model.fit(X, y) a = model.predict([[20]]) print("投资2千万的电影预计票房收入为:{:.2f}百万元".format(model.predict([[20]])[0][0])) print("回归模型的系数是:", model.coef_) print("回归模型的截距是:", model.intercept_) print("最佳拟合线: y = ", int(model.intercept_), "+", int(model.coef_), "× x") drawplt() plt.plot(X, y, 'k.') plt.plot([0, 25], [model.intercept_, 25 * model.coef_ + model.intercept_]) plt.show() # 儿童身高预测