From 1353d391e26d79165a7665c1b3686ec62166ed3e Mon Sep 17 00:00:00 2001 From: hnu202110040108 Date: Wed, 22 Jun 2022 14:16:09 +0800 Subject: [PATCH] ADD file via upload --- 数据分析3 学生上课代码清单.py | 112 ++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 数据分析3 学生上课代码清单.py diff --git a/数据分析3 学生上课代码清单.py b/数据分析3 学生上课代码清单.py new file mode 100644 index 0000000..b3859ec --- /dev/null +++ b/数据分析3 学生上课代码清单.py @@ -0,0 +1,112 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon May 10 14:06:20 2021 + +@author: hzh +""" +import pandas as pd +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error + +df = pd.read_csv('C:\\Users\\HP\\Desktop\\Artificial Intelligence\\advertising.csv') +print(df) +# 探索分析:绘制散点图 +plt.figure('fig1') +plt.plot(df['TV'], df['sales'], 'ro') +plt.title('TV') +plt.grid() +# +plt.figure('fig2') +plt.title('radio') +plt.plot(df['radio'], df['sales'], 'g^') +plt.grid() +# +plt.figure('fig3') +plt.plot(df['newspaper'], df['sales'], 'b*') +plt.title('newspaper') +plt.grid() + +# 获取训练与测试数据 +x = df[['TV']] +print(x) +y = df['sales'] +print(y) +x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33) +print((x_train, y_train)) +print((x_test, y_test)) +print(len(x_train)) +print(len(x_test)) + +# 模型建立与训练 +lr = LinearRegression(normalize=True) +lr.fit(x_train, y_train) +print('回归方程的系数:', lr.coef_) +print('回归方程的截距:', lr.intercept_) + +# 预测及效果检测 +y_predict = lr.predict(x_test) +print(y_predict) +# print(mean_squared_error(y_test,y_predict)) +print(lr.score(x_train, y_train)) + +# 可视化 +x = df[['TV']] +y = df['sales'] +x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33) +lr = LinearRegression() +lr.fit(x_train, y_train) +plt.scatter(x_test, y_test) +plt.plot(x_test, lr.predict(x_test), color='blue', linewidth=3) # 画出回归直线 + +# 多元线性回归 +x = df[['TV', 'radio', 'newspaper']] +y = df['sales'] +x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33) +lr = LinearRegression() +lr.fit(x_train, y_train) +print('回归方程的系数:', lr.coef_) +print('回归方程的截距:', lr.intercept_) +y_pred = lr.predict(x_test) +print(y_pred) +print('MSE:', mean_squared_error(y_test, y_pred)) +print(lr.score(x_train, y_train)) + +# 预测 +test_data = [[200, 40, 50], [300, 50, 60]] +y_preds = lr.predict(test_data) +print(y_preds) +plt.plot(test_data, y_preds, 'b*') +plt.plot(test_data, y_preds, color='blue', linewidth=3) # 画出回归直线 +plt.show() + +# 一元线性回归电影票房预测 +import matplotlib.pyplot as plt +from sklearn import linear_model + + +def drawplt(): + plt.figure() + plt.title('Cost and Income Of a Film') + plt.xlabel('Cost(Million Yuan)') + plt.ylabel('Income(Million Yuan)') + plt.axis([0, 25, 0, 60]) + plt.grid(True) + + +X = [[6], [9], [12], [14], [16]] +y = [[9], [12], [29], [35], [59]] +model = linear_model.LinearRegression() +model.fit(X, y) +a = model.predict([[20]]) +print("投资2千万的电影预计票房收入为:{:.2f}百万元".format(model.predict([[20]])[0][0])) +print("回归模型的系数是:", model.coef_) +print("回归模型的截距是:", model.intercept_) +print("最佳拟合线: y = ", int(model.intercept_), "+", int(model.coef_), "× x") +drawplt() +plt.plot(X, y, 'k.') +plt.plot([0, 25], [model.intercept_, 25 * model.coef_ + model.intercept_]) +plt.show() + +# 儿童身高预测