# -*- coding: utf-8 -*- """ Created on Sun Jun 13 06:23:11 2021 @author: hzh """ import numpy as np import matplotlib.pyplot as plt # 生成100个数据点 np.random.seed(7) X = 2 * np.random.rand(100) # 生成100个随机数,模拟x Y = 15 + 3 * X + np.random.randn(100) # 生成100个随机数,模拟y,真实的a=3,b=15 fig, ax = plt.subplots() ax.scatter(X, Y) fig.savefig('scatter.pdf') fig.show() learning_rate = 0.1 roundN = 7 # 对数据点集的轮数 np.random.seed(3) a = np.random.randn() b = np.random.randn() print(a) print(b) # 函数定义求误差的最小值 def errorCompute(a, b): error = 0 for j in range(len(X)): error += 1 / 2 * (a * X[j] + b - Y[j]) ** 2 return error / len(X) # 用梯度下降法求得a,b的值 # 通过多轮迭代,输出每轮迭代的误差,当误差的变化不大的时候此时的a,b为所求的拟合线的值 for i in range(roundN): for j in range(len(X)): if j % 50 == 0: print("round=%d,iter=%d,a=%f,b=%f,E=%f" % (i, j, a, b, errorCompute(a, b))) gradA = (a * X[j] + b - Y[j]) * X[j] # 对a求偏导的公式 gradB = a * X[j] + b - Y[j] # 对b求偏导的公式 a = a - learning_rate * gradA # 迭代公式 b = b - learning_rate * gradB # 迭代公式 print(a, b) # 输出a,b的值 # 下面绘制图形 maxX = max(X) minX = min(X) maxY = max(Y) minY = min(Y) X_fit = np.arange(minX, maxX, 0.01) Y_fit = a * X_fit + b # 梯度下降法得到的拟合线的方程 plt.plot(X, Y, '.') # 散点图100个点 plt.plot(X_fit, Y_fit, 'r-', label='Gradient Descent') # 拟合线 plt.plot(X_fit, 15 + 3 * X_fit, 'b-', label='True') # 实际的线 plt.legend() # 显示图例 plt.show() # 练习题: #################begin################## # 模拟函数y=10x+30,生成100个数据点坐标。 # 请用梯度下降法拟合出数据点隐含的线性函数y=ax+b,求出a,b值。 # 数据拟合2 # 模拟二元函数f(x,y)=5*x+10*y+30,生成1000个数据点坐标。 # 用梯度下降法拟合出数据点隐含的线性函数f(x,y)=w1*x+w2*y+b的 # w1,w2,b值 # 线性预测 import numpy as np from sklearn.model_selection import train_test_split # 数据处理 data = np.loadtxt('advertising.txt', delimiter=',') print(data) X = data[:, 0:-1] y = data[:, -1] X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) # 归一化处理 # print(X) # 分割训练与测试 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # 梯度下降法,y=w1*x1+w2*x2+w3*x3+b # 初始化 np.random.seed(10) w1, w2, w3, w4, b = np.random.randn(5) lr = 0.001 # 0.00001 rounds = 1000 # 300,0.001 ,4.54 def computeErr(X, y): # 误差计算 err = 0 for i in range(len(X)): err += 1 / 2 * (X[i, 0] * w1 + X[i, 1] * w2 + X[i, 2] * w3 + b - y[i]) ** 2 return err / len(X) for i in range(rounds): # 梯度下降法拟合训练集 for j in range(len(X_train)): w1 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 0] w2 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 1] w3 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 2] b -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) if i % 100 == 0: print('第%i轮迭代训练集误差=%.2f' % (i, computeErr(X_train, y_train))) # 模型评估 print('测试集误差:', computeErr(X_test, y_test)) print('权重:', w1, w2, w3) print('截距:', b) predict = (X_test * np.array([w1, w2, w3])).sum(axis=1) + b print(len(predict)) mse = ((predict - y_test) ** 2).sum() / len(y_test) print('rmse=', mse ** 0.5)