diff --git a/数据拟合上课学生代码.py b/数据拟合上课学生代码.py new file mode 100644 index 0000000..020e3c1 --- /dev/null +++ b/数据拟合上课学生代码.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +""" +Created on Sun Jun 13 06:23:11 2021 + +@author: hzh +""" + +import numpy as np +import matplotlib.pyplot as plt + +# 生成100个数据点 +np.random.seed(7) +X = 2 * np.random.rand(100) # 生成100个随机数,模拟x +Y = 15 + 3 * X + np.random.randn(100) # 生成100个随机数,模拟y,真实的a=3,b=15 +fig, ax = plt.subplots() +ax.scatter(X, Y) +fig.savefig('scatter.pdf') +fig.show() + +learning_rate = 0.1 +roundN = 7 # 对数据点集的轮数 +np.random.seed(3) +a = np.random.randn() +b = np.random.randn() +print(a) +print(b) + + +# 函数定义求误差的最小值 +def errorCompute(a, b): + error = 0 + for j in range(len(X)): + error += 1 / 2 * (a * X[j] + b - Y[j]) ** 2 + return error / len(X) + + +# 用梯度下降法求得a,b的值 +# 通过多轮迭代,输出每轮迭代的误差,当误差的变化不大的时候此时的a,b为所求的拟合线的值 +for i in range(roundN): + for j in range(len(X)): + if j % 50 == 0: + print("round=%d,iter=%d,a=%f,b=%f,E=%f" % (i, j, a, b, errorCompute(a, b))) + gradA = (a * X[j] + b - Y[j]) * X[j] # 对a求偏导的公式 + gradB = a * X[j] + b - Y[j] # 对b求偏导的公式 + a = a - learning_rate * gradA # 迭代公式 + b = b - learning_rate * gradB # 迭代公式 +print(a, b) # 输出a,b的值 +# 下面绘制图形 +maxX = max(X) +minX = min(X) +maxY = max(Y) +minY = min(Y) +X_fit = np.arange(minX, maxX, 0.01) +Y_fit = a * X_fit + b # 梯度下降法得到的拟合线的方程 +plt.plot(X, Y, '.') # 散点图100个点 +plt.plot(X_fit, Y_fit, 'r-', label='Gradient Descent') # 拟合线 +plt.plot(X_fit, 15 + 3 * X_fit, 'b-', label='True') # 实际的线 +plt.legend() # 显示图例 +plt.show() + +# 练习题: +#################begin################## +# 模拟函数y=10x+30,生成100个数据点坐标。 +# 请用梯度下降法拟合出数据点隐含的线性函数y=ax+b,求出a,b值。 + + +# 数据拟合2 +# 模拟二元函数f(x,y)=5*x+10*y+30,生成1000个数据点坐标。 +# 用梯度下降法拟合出数据点隐含的线性函数f(x,y)=w1*x+w2*y+b的 +# w1,w2,b值 + + +# 线性预测 +import numpy as np +from sklearn.model_selection import train_test_split + +# 数据处理 +data = np.loadtxt('advertising.txt', delimiter=',') +print(data) +X = data[:, 0:-1] +y = data[:, -1] +X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) # 归一化处理 +# print(X) +# 分割训练与测试 +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) +# 梯度下降法,y=w1*x1+w2*x2+w3*x3+b +# 初始化 +np.random.seed(10) +w1, w2, w3, w4, b = np.random.randn(5) +lr = 0.001 # 0.00001 +rounds = 1000 # 300,0.001 ,4.54 + + +def computeErr(X, y): # 误差计算 + err = 0 + for i in range(len(X)): + err += 1 / 2 * (X[i, 0] * w1 + X[i, 1] * w2 + X[i, 2] * w3 + b - y[i]) ** 2 + return err / len(X) + + +for i in range(rounds): # 梯度下降法拟合训练集 + for j in range(len(X_train)): + w1 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 0] + w2 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 1] + w3 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 2] + b -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) + if i % 100 == 0: + print('第%i轮迭代训练集误差=%.2f' % (i, computeErr(X_train, y_train))) +# 模型评估 +print('测试集误差:', computeErr(X_test, y_test)) +print('权重:', w1, w2, w3) +print('截距:', b) +predict = (X_test * np.array([w1, w2, w3])).sum(axis=1) + b +print(len(predict)) +mse = ((predict - y_test) ** 2).sum() / len(y_test) +print('rmse=', mse ** 0.5)