You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
toratoratora/数据分析3 学生上课代码清单.py

113 lines
3.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Mon May 10 14:06:20 2021
@author: hzh
"""
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
df = pd.read_csv('C:\\Users\\HP\\Desktop\\Artificial Intelligence\\advertising.csv')
print(df)
# 探索分析:绘制散点图
plt.figure('fig1')
plt.plot(df['TV'], df['sales'], 'ro')
plt.title('TV')
plt.grid()
#
plt.figure('fig2')
plt.title('radio')
plt.plot(df['radio'], df['sales'], 'g^')
plt.grid()
#
plt.figure('fig3')
plt.plot(df['newspaper'], df['sales'], 'b*')
plt.title('newspaper')
plt.grid()
# 获取训练与测试数据
x = df[['TV']]
print(x)
y = df['sales']
print(y)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
print((x_train, y_train))
print((x_test, y_test))
print(len(x_train))
print(len(x_test))
# 模型建立与训练
lr = LinearRegression(normalize=True)
lr.fit(x_train, y_train)
print('回归方程的系数:', lr.coef_)
print('回归方程的截距:', lr.intercept_)
# 预测及效果检测
y_predict = lr.predict(x_test)
print(y_predict)
# print(mean_squared_error(y_test,y_predict))
print(lr.score(x_train, y_train))
# 可视化
x = df[['TV']]
y = df['sales']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33)
lr = LinearRegression()
lr.fit(x_train, y_train)
plt.scatter(x_test, y_test)
plt.plot(x_test, lr.predict(x_test), color='blue', linewidth=3) # 画出回归直线
# 多元线性回归
x = df[['TV', 'radio', 'newspaper']]
y = df['sales']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
lr = LinearRegression()
lr.fit(x_train, y_train)
print('回归方程的系数:', lr.coef_)
print('回归方程的截距:', lr.intercept_)
y_pred = lr.predict(x_test)
print(y_pred)
print('MSE:', mean_squared_error(y_test, y_pred))
print(lr.score(x_train, y_train))
# 预测
test_data = [[200, 40, 50], [300, 50, 60]]
y_preds = lr.predict(test_data)
print(y_preds)
plt.plot(test_data, y_preds, 'b*')
plt.plot(test_data, y_preds, color='blue', linewidth=3) # 画出回归直线
plt.show()
# 一元线性回归电影票房预测
import matplotlib.pyplot as plt
from sklearn import linear_model
def drawplt():
plt.figure()
plt.title('Cost and Income Of a Film')
plt.xlabel('Cost(Million Yuan)')
plt.ylabel('Income(Million Yuan)')
plt.axis([0, 25, 0, 60])
plt.grid(True)
X = [[6], [9], [12], [14], [16]]
y = [[9], [12], [29], [35], [59]]
model = linear_model.LinearRegression()
model.fit(X, y)
a = model.predict([[20]])
print("投资2千万的电影预计票房收入为{:.2f}百万元".format(model.predict([[20]])[0][0]))
print("回归模型的系数是:", model.coef_)
print("回归模型的截距是:", model.intercept_)
print("最佳拟合线: y = ", int(model.intercept_), "", int(model.coef_), "× x")
drawplt()
plt.plot(X, y, 'k.')
plt.plot([0, 25], [model.intercept_, 25 * model.coef_ + model.intercept_])
plt.show()
# 儿童身高预测