From 1353d391e26d79165a7665c1b3686ec62166ed3e Mon Sep 17 00:00:00 2001
From: hnu202110040108 <napoleonandcaesar@163.com>
Date: Wed, 22 Jun 2022 14:16:09 +0800
Subject: [PATCH] ADD file via upload

---
 数据分析3 学生上课代码清单.py | 112 ++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 数据分析3 学生上课代码清单.py

diff --git a/数据分析3 学生上课代码清单.py b/数据分析3 学生上课代码清单.py
new file mode 100644
index 0000000..b3859ec
--- /dev/null
+++ b/数据分析3 学生上课代码清单.py	
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon May 10 14:06:20 2021
+
+@author: hzh
+"""
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error
+
+df = pd.read_csv('C:\\Users\\HP\\Desktop\\Artificial Intelligence\\advertising.csv')
+print(df)
+# 探索分析:绘制散点图
+plt.figure('fig1')
+plt.plot(df['TV'], df['sales'], 'ro')
+plt.title('TV')
+plt.grid()
+#
+plt.figure('fig2')
+plt.title('radio')
+plt.plot(df['radio'], df['sales'], 'g^')
+plt.grid()
+#
+plt.figure('fig3')
+plt.plot(df['newspaper'], df['sales'], 'b*')
+plt.title('newspaper')
+plt.grid()
+
+# 获取训练与测试数据
+x = df[['TV']]
+print(x)
+y = df['sales']
+print(y)
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
+print((x_train, y_train))
+print((x_test, y_test))
+print(len(x_train))
+print(len(x_test))
+
+# 模型建立与训练
+lr = LinearRegression(normalize=True)
+lr.fit(x_train, y_train)
+print('回归方程的系数：', lr.coef_)
+print('回归方程的截距：', lr.intercept_)
+
+# 预测及效果检测
+y_predict = lr.predict(x_test)
+print(y_predict)
+# print(mean_squared_error(y_test,y_predict))
+print(lr.score(x_train, y_train))
+
+# 可视化
+x = df[['TV']]
+y = df['sales']
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=33)
+lr = LinearRegression()
+lr.fit(x_train, y_train)
+plt.scatter(x_test, y_test)
+plt.plot(x_test, lr.predict(x_test), color='blue', linewidth=3)  # 画出回归直线
+
+# 多元线性回归
+x = df[['TV', 'radio', 'newspaper']]
+y = df['sales']
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
+lr = LinearRegression()
+lr.fit(x_train, y_train)
+print('回归方程的系数：', lr.coef_)
+print('回归方程的截距：', lr.intercept_)
+y_pred = lr.predict(x_test)
+print(y_pred)
+print('MSE:', mean_squared_error(y_test, y_pred))
+print(lr.score(x_train, y_train))
+
+# 预测
+test_data = [[200, 40, 50], [300, 50, 60]]
+y_preds = lr.predict(test_data)
+print(y_preds)
+plt.plot(test_data, y_preds, 'b*')
+plt.plot(test_data, y_preds, color='blue', linewidth=3)  # 画出回归直线
+plt.show()
+
+# 一元线性回归电影票房预测
+import matplotlib.pyplot as plt
+from sklearn import linear_model
+
+
+def drawplt():
+	plt.figure()
+	plt.title('Cost and Income Of a Film')
+	plt.xlabel('Cost(Million Yuan)')
+	plt.ylabel('Income(Million Yuan)')
+	plt.axis([0, 25, 0, 60])
+	plt.grid(True)
+
+
+X = [[6], [9], [12], [14], [16]]
+y = [[9], [12], [29], [35], [59]]
+model = linear_model.LinearRegression()
+model.fit(X, y)
+a = model.predict([[20]])
+print("投资2千万的电影预计票房收入为：{:.2f}百万元".format(model.predict([[20]])[0][0]))
+print("回归模型的系数是：", model.coef_)
+print("回归模型的截距是：", model.intercept_)
+print("最佳拟合线: y = ", int(model.intercept_), "＋", int(model.coef_), "× x")
+drawplt()
+plt.plot(X, y, 'k.')
+plt.plot([0, 25], [model.intercept_, 25 * model.coef_ + model.intercept_])
+plt.show()
+
+# 儿童身高预测