|
|
|
@ -0,0 +1,112 @@
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
"""
|
|
|
|
|
Created on Mon May 10 14:06:20 2021
|
|
|
|
|
|
|
|
|
|
@author: hzh
|
|
|
|
|
"""
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
|
|
|
|
|
|
df = pd.read_csv('C:\\Users\\HP\\Desktop\\Artificial Intelligence\\advertising.csv')
|
|
|
|
|
print(df)
|
|
|
|
|
# 探索分析:绘制散点图
|
|
|
|
|
plt.figure('fig1')
|
|
|
|
|
plt.plot(df['TV'], df['sales'], 'ro')
|
|
|
|
|
plt.title('TV')
|
|
|
|
|
plt.grid()
|
|
|
|
|
#
|
|
|
|
|
plt.figure('fig2')
|
|
|
|
|
plt.title('radio')
|
|
|
|
|
plt.plot(df['radio'], df['sales'], 'g^')
|
|
|
|
|
plt.grid()
|
|
|
|
|
#
|
|
|
|
|
plt.figure('fig3')
|
|
|
|
|
plt.plot(df['newspaper'], df['sales'], 'b*')
|
|
|
|
|
plt.title('newspaper')
|
|
|
|
|
plt.grid()
|
|
|
|
|
|
|
|
|
|
# 获取训练与测试数据
|
|
|
|
|
x = df[['TV']]
|
|
|
|
|
print(x)
|
|
|
|
|
y = df['sales']
|
|
|
|
|
print(y)
|
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
|
|
|
|
|
print((x_train, y_train))
|
|
|
|
|
print((x_test, y_test))
|
|
|
|
|
print(len(x_train))
|
|
|
|
|
print(len(x_test))
|
|
|
|
|
|
|
|
|
|
# 模型建立与训练
|
|
|
|
|
lr = LinearRegression(normalize=True)
|
|
|
|
|
lr.fit(x_train, y_train)
|
|
|
|
|
print('回归方程的系数:', lr.coef_)
|
|
|
|
|
print('回归方程的截距:', lr.intercept_)
|
|
|
|
|
|
|
|
|
|
# 预测及效果检测
|
|
|
|
|
y_predict = lr.predict(x_test)
|
|
|
|
|
print(y_predict)
|
|
|
|
|
# print(mean_squared_error(y_test,y_predict))
|
|
|
|
|
print(lr.score(x_train, y_train))
|
|
|
|
|
|
|
|
|
|
# 可视化
|
|
|
|
|
x = df[['TV']]
|
|
|
|
|
y = df['sales']
|
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33)
|
|
|
|
|
lr = LinearRegression()
|
|
|
|
|
lr.fit(x_train, y_train)
|
|
|
|
|
plt.scatter(x_test, y_test)
|
|
|
|
|
plt.plot(x_test, lr.predict(x_test), color='blue', linewidth=3) # 画出回归直线
|
|
|
|
|
|
|
|
|
|
# 多元线性回归
|
|
|
|
|
x = df[['TV', 'radio', 'newspaper']]
|
|
|
|
|
y = df['sales']
|
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
|
|
|
|
|
lr = LinearRegression()
|
|
|
|
|
lr.fit(x_train, y_train)
|
|
|
|
|
print('回归方程的系数:', lr.coef_)
|
|
|
|
|
print('回归方程的截距:', lr.intercept_)
|
|
|
|
|
y_pred = lr.predict(x_test)
|
|
|
|
|
print(y_pred)
|
|
|
|
|
print('MSE:', mean_squared_error(y_test, y_pred))
|
|
|
|
|
print(lr.score(x_train, y_train))
|
|
|
|
|
|
|
|
|
|
# 预测
|
|
|
|
|
test_data = [[200, 40, 50], [300, 50, 60]]
|
|
|
|
|
y_preds = lr.predict(test_data)
|
|
|
|
|
print(y_preds)
|
|
|
|
|
plt.plot(test_data, y_preds, 'b*')
|
|
|
|
|
plt.plot(test_data, y_preds, color='blue', linewidth=3) # 画出回归直线
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
# 一元线性回归电影票房预测
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
from sklearn import linear_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def drawplt():
|
|
|
|
|
plt.figure()
|
|
|
|
|
plt.title('Cost and Income Of a Film')
|
|
|
|
|
plt.xlabel('Cost(Million Yuan)')
|
|
|
|
|
plt.ylabel('Income(Million Yuan)')
|
|
|
|
|
plt.axis([0, 25, 0, 60])
|
|
|
|
|
plt.grid(True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
X = [[6], [9], [12], [14], [16]]
|
|
|
|
|
y = [[9], [12], [29], [35], [59]]
|
|
|
|
|
model = linear_model.LinearRegression()
|
|
|
|
|
model.fit(X, y)
|
|
|
|
|
a = model.predict([[20]])
|
|
|
|
|
print("投资2千万的电影预计票房收入为:{:.2f}百万元".format(model.predict([[20]])[0][0]))
|
|
|
|
|
print("回归模型的系数是:", model.coef_)
|
|
|
|
|
print("回归模型的截距是:", model.intercept_)
|
|
|
|
|
print("最佳拟合线: y = ", int(model.intercept_), "+", int(model.coef_), "× x")
|
|
|
|
|
drawplt()
|
|
|
|
|
plt.plot(X, y, 'k.')
|
|
|
|
|
plt.plot([0, 25], [model.intercept_, 25 * model.coef_ + model.intercept_])
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
# 儿童身高预测
|