toratoratora/数据分析3 学生上课代码清单.py

# -*- coding: utf-8 -*-
"""
Created on Mon May 10 14:06:20 2021

@author: hzh
"""
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

df = pd.read_csv('C:\\Users\\HP\\Desktop\\Artificial Intelligence\\advertising.csv')
print(df)
# 探索分析:绘制散点图
plt.figure('fig1')
plt.plot(df['TV'], df['sales'], 'ro')
plt.title('TV')
plt.grid()
#
plt.figure('fig2')
plt.title('radio')
plt.plot(df['radio'], df['sales'], 'g^')
plt.grid()
#
plt.figure('fig3')
plt.plot(df['newspaper'], df['sales'], 'b*')
plt.title('newspaper')
plt.grid()

# 获取训练与测试数据
x = df[['TV']]
print(x)
y = df['sales']
print(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
print((x_train, y_train))
print((x_test, y_test))
print(len(x_train))
print(len(x_test))

# 模型建立与训练
lr = LinearRegression(normalize=True)
lr.fit(x_train, y_train)
print('回归方程的系数：', lr.coef_)
print('回归方程的截距：', lr.intercept_)

# 预测及效果检测
y_predict = lr.predict(x_test)
print(y_predict)
# print(mean_squared_error(y_test,y_predict))
print(lr.score(x_train, y_train))

# 可视化
x = df[['TV']]
y = df['sales']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33)
lr = LinearRegression()
lr.fit(x_train, y_train)
plt.scatter(x_test, y_test)
plt.plot(x_test, lr.predict(x_test), color='blue', linewidth=3)  # 画出回归直线

# 多元线性回归
x = df[['TV', 'radio', 'newspaper']]
y = df['sales']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
lr = LinearRegression()
lr.fit(x_train, y_train)
print('回归方程的系数：', lr.coef_)
print('回归方程的截距：', lr.intercept_)
y_pred = lr.predict(x_test)
print(y_pred)
print('MSE:', mean_squared_error(y_test, y_pred))
print(lr.score(x_train, y_train))

# 预测
test_data = [[200, 40, 50], [300, 50, 60]]
y_preds = lr.predict(test_data)
print(y_preds)
plt.plot(test_data, y_preds, 'b*')
plt.plot(test_data, y_preds, color='blue', linewidth=3)  # 画出回归直线
plt.show()

# 一元线性回归电影票房预测
import matplotlib.pyplot as plt
from sklearn import linear_model


def drawplt():
	plt.figure()
	plt.title('Cost and Income Of a Film')
	plt.xlabel('Cost(Million Yuan)')
	plt.ylabel('Income(Million Yuan)')
	plt.axis([0, 25, 0, 60])
	plt.grid(True)


X = [[6], [9], [12], [14], [16]]
y = [[9], [12], [29], [35], [59]]
model = linear_model.LinearRegression()
model.fit(X, y)
a = model.predict([[20]])
print("投资2千万的电影预计票房收入为：{:.2f}百万元".format(model.predict([[20]])[0][0]))
print("回归模型的系数是：", model.coef_)
print("回归模型的截距是：", model.intercept_)
print("最佳拟合线: y = ", int(model.intercept_), "＋", int(model.coef_), "× x")
drawplt()
plt.plot(X, y, 'k.')
plt.plot([0, 25], [model.intercept_, 25 * model.coef_ + model.intercept_])
plt.show()

# 儿童身高预测