You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
toratoratora/数据拟合上课学生代码.py

117 lines
3.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
Created on Sun Jun 13 06:23:11 2021
@author: hzh
"""
import numpy as np
import matplotlib.pyplot as plt
# 生成100个数据点
np.random.seed(7)
X = 2 * np.random.rand(100) # 生成100个随机数模拟x
Y = 15 + 3 * X + np.random.randn(100) # 生成100个随机数模拟y真实的a=3,b=15
fig, ax = plt.subplots()
ax.scatter(X, Y)
fig.savefig('scatter.pdf')
fig.show()
learning_rate = 0.1
roundN = 7 # 对数据点集的轮数
np.random.seed(3)
a = np.random.randn()
b = np.random.randn()
print(a)
print(b)
# 函数定义求误差的最小值
def errorCompute(a, b):
error = 0
for j in range(len(X)):
error += 1 / 2 * (a * X[j] + b - Y[j]) ** 2
return error / len(X)
# 用梯度下降法求得a,b的值
# 通过多轮迭代输出每轮迭代的误差当误差的变化不大的时候此时的a,b为所求的拟合线的值
for i in range(roundN):
for j in range(len(X)):
if j % 50 == 0:
print("round=%d,iter=%d,a=%f,b=%f,E=%f" % (i, j, a, b, errorCompute(a, b)))
gradA = (a * X[j] + b - Y[j]) * X[j] # 对a求偏导的公式
gradB = a * X[j] + b - Y[j] # 对b求偏导的公式
a = a - learning_rate * gradA # 迭代公式
b = b - learning_rate * gradB # 迭代公式
print(a, b) # 输出a,b的值
# 下面绘制图形
maxX = max(X)
minX = min(X)
maxY = max(Y)
minY = min(Y)
X_fit = np.arange(minX, maxX, 0.01)
Y_fit = a * X_fit + b # 梯度下降法得到的拟合线的方程
plt.plot(X, Y, '.') # 散点图100个点
plt.plot(X_fit, Y_fit, 'r-', label='Gradient Descent') # 拟合线
plt.plot(X_fit, 15 + 3 * X_fit, 'b-', label='True') # 实际的线
plt.legend() # 显示图例
plt.show()
# 练习题:
#################begin##################
# 模拟函数y=10x+30,生成100个数据点坐标。
# 请用梯度下降法拟合出数据点隐含的线性函数y=ax+b求出a,b值。
# 数据拟合2
# 模拟二元函数f(x,y)=5*x+10*y+30,生成1000个数据点坐标。
# 用梯度下降法拟合出数据点隐含的线性函数f(x,y)=w1*x+w2*y+b的
# w1,w2,b值
# 线性预测
import numpy as np
from sklearn.model_selection import train_test_split
# 数据处理
data = np.loadtxt('advertising.txt', delimiter=',')
print(data)
X = data[:, 0:-1]
y = data[:, -1]
X = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) # 归一化处理
# print(X)
# 分割训练与测试
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
# 梯度下降法y=w1*x1+w2*x2+w3*x3+b
# 初始化
np.random.seed(10)
w1, w2, w3, w4, b = np.random.randn(5)
lr = 0.001 # 0.00001
rounds = 1000 # 300,0.001 ,4.54
def computeErr(X, y): # 误差计算
err = 0
for i in range(len(X)):
err += 1 / 2 * (X[i, 0] * w1 + X[i, 1] * w2 + X[i, 2] * w3 + b - y[i]) ** 2
return err / len(X)
for i in range(rounds): # 梯度下降法拟合训练集
for j in range(len(X_train)):
w1 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 0]
w2 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 1]
w3 -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j]) * X_train[j, 2]
b -= lr * (X_train[j, 0] * w1 + X_train[j, 1] * w2 + X_train[j, 2] * w3 + b - y_train[j])
if i % 100 == 0:
print('%i轮迭代训练集误差=%.2f' % (i, computeErr(X_train, y_train)))
# 模型评估
print('测试集误差:', computeErr(X_test, y_test))
print('权重:', w1, w2, w3)
print('截距:', b)
predict = (X_test * np.array([w1, w2, w3])).sum(axis=1) + b
print(len(predict))
mse = ((predict - y_test) ** 2).sum() / len(y_test)
print('rmse=', mse ** 0.5)