FunctionFitting/X2.py

import inspect
from tkinter import *
import tkinter as tk
import mpl_toolkits.axisartist as axisartist
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.optimize import approx_fprime
import data as gl_data
import numpy as np

from X1 import *


#------------------借用setphoto（）
set_img1 = None
set_img2 = None


# 这里有个将图像输出到主界面的函数
def set_phtot(index1):

    global set_img1,set_img2

    # 显示图像
    Canvas2 = gl_data.Canvas2

    if index1 == 1:
        if set_img2 != None:
            Canvas2.delete(set_img2)

        gl_data.Img1=PhotoImage(file=r"dot.png")
        # print(gl_data.Img1)
        gl_data.Img1 = gl_data.Img1.zoom(2, 2)
        set_img1=Canvas2.create_image(2, 10, image=gl_data.Img1, anchor=NW)
        # Canvas2.itemconfigure(set_img1,int(420 * gl_data.MAGNIDICATION), int(300 * gl_data.MAGNIDICATION))
        Canvas2.update()
        print("已输出数据点")
    elif index1 == 2:
        if set_img1 != None:
            Canvas2.delete(set_img1)

        gl_data.Img2=PhotoImage(file=r"line.png")
        # print(gl_data.Img2)
        gl_data.Img2 = gl_data.Img2.zoom(2, 2)
        set_img2=Canvas2.create_image(2, 10, image=gl_data.Img2, anchor=NW)
        # Canvas2.itemconfigure(set_img2, 0,0,int(420*gl_data.MAGNIDICATION), int(300*gl_data.MAGNIDICATION))
        Canvas2.update()
        print("已输出数据点和曲线")


def window():
    root_window = Tk()
    root_window.title('函数拟合')

    root_window.geometry('900x600')  # 设置窗口大小:宽x高,注,此处不能为 "*",必须使用 "x"
    # 设置主窗口的背景颜色,颜色值可以是英文单词，或者颜色值的16进制数,除此之外还可以使用Tk内置的颜色常量
    root_window["background"] = "white"
    root_window.resizable(0, 0)  # 防止用户调整尺寸
    label1 = tk.Label(root_window, text="样本数据\n集文件", font=('Times', 8), bg="white",
                      width=13, height=3,  # 设置标签内容区大小
                      padx=0, pady=0, borderwidth=0, )
    label1.place(x=10, y=4)  # 设置填充区距离、边框宽度和其样式（凹陷式）
    label3 = tk.Label(root_window, text="", font=('Times', 8), bg="white", fg="black",
                      width=39, height=2, padx=0, pady=0, borderwidth=0, relief="ridge", highlightcolor="blue")
    label3.place(x=122, y=10)
    return root_window
# if __name__ == '__main__':
#     root_window = window()
#     root_window.mainloop()

def draw_axis(low, high, step=250):
    fig = plt.figure(figsize=(4.4, 3.2)) # 设置显示大小
    ax = axisartist.Subplot(fig, 111)	# 使用axisartist.Subplot方法创建一个绘图区对象ax
    fig.add_axes(ax)   # 将绘图区对象添加到画布中
    ax.axis[:].set_visible(False)# 通过set_visible方法设置绘图区所有坐标轴隐藏
    ax.axis["x"] = ax.new_floating_axis(0, 0)	# 添加新的x坐标轴
    ax.axis["x"].set_axisline_style("-|>", size=1.0) # 给x坐标轴加上箭头
    ax.axis["y"] = ax.new_floating_axis(1, 0)	# 添加新的y坐标轴
    ax.axis["y"].set_axisline_style("-|>", size=1.0) # y坐标轴加上箭头
    ax.axis["x"].set_axis_direction("bottom")	# 设置x、y轴上刻度显示方向
    ax.axis["y"].set_axis_direction("left")	# 设置x、y轴上刻度显示方向
    # plt.xlim(low, high)  # 把x轴的刻度范围设置
    # plt.ylim(low, high)  # 把y轴的刻度范围设置
    # 把x轴和y轴的刻度范围设置
    ax.set_xlim(low, high)
    ax.set_ylim(low, high)
    ax.set_xticks(np.arange(low, high + 5, step))  # 把x轴的刻度间隔设置
    ax.set_yticks(np.arange(low, high + 5, step))  # 把y轴的刻度间隔设置
    # plt.show()
    return ax
# if __name__ == '__main__':
#     draw_axis(gl_data.LOW, gl_data.HIGH)


def selfdata_show(sampleData, low, high):
    x = sampleData[:,0]
    y = sampleData[:,1]
    ax = draw_axis(low, high)   # 绘制x、y轴
    ax.scatter(x, y, c='red')   # 绘制样本数据点
    # plt.show()  #显示图片
    plt.savefig(r"dot.png", facecolor='w', bbox_inches='tight')  # 保存为png文件
    plt.clf()
    set_phtot(1) # 后续函数将图像显示在主界面中
# if __name__ == '__main__':
#     sampleData = random_points(20, 0, 1000)  # 生成样本点
#     selfdata_show(sampleData, 0, 1000)   # 绘制样本数据点


# 编程4.5-----------------------------------------

# m次多项式的最小二乘法

# 这里是没有对数据进行了标准化处理的
def least_square_method_UnNormalized(m, sampleData):
    x = sampleData[:, 0]
    y = sampleData[:, 1]

    # 使用标准化的x构造A矩阵
    A = np.vander(x, m + 1, increasing=True)

    # 计算拟合系数theta，使用最小二乘法的正规方程
    theta = np.linalg.inv(A.T @ A) @ A.T @ y

    # 计算残差平方和和其他统计量
    residuals = y - A @ theta
    residuals_squared_sum = residuals.T @ residuals
    degrees_of_freedom = len(y) - (m + 1)
    mean_squared_error = residuals_squared_sum / degrees_of_freedom
    covariance_matrix = np.linalg.inv(A.T @ A) * mean_squared_error

    return theta, covariance_matrix

# 这里是对数据进行了标准化处理的
def least_square_method(m, sampleData):
    x = sampleData[:, 0]
    y = sampleData[:, 1]

    # 标准化x
    x_mean = x.mean()
    x_std = x.std()
    x_normalized = (x - x_mean) / x_std

    # 使用标准化的x构造A矩阵
    A = np.vander(x_normalized, m + 1, increasing=True)

    # 计算拟合系数theta，使用最小二乘法的正规方程
    theta = np.linalg.inv(A.T @ A) @ A.T @ y

    # 计算残差平方和和其他统计量
    residuals = y - A @ theta
    residuals_squared_sum = residuals.T @ residuals
    degrees_of_freedom = len(y) - (m + 1)
    mean_squared_error = residuals_squared_sum / degrees_of_freedom
    covariance_matrix = np.linalg.inv(A.T @ A) * mean_squared_error

    return theta, covariance_matrix

# def curve_draw_line(curveData,sampleData):
#     x_fit = curveData[:,0]
#     y_fit = curveData[:,1]
#     x = sampleData[:,0]
#     y = sampleData[:,1]
#     # 创建图形对象并绘制拟合曲线和样本点
#     ax = draw_axis(0, 1000,step=250)
#     ax.plot(x_fit, y_fit, label=f'FitCurve')
#     ax.scatter(x, y, color='red', label='SampleData')
#     # ax.set_xlabel('X')
#     # ax.set_ylabel('Y')
#     plt.legend()
#     plt.show()


def draw_dots_and_line(curveData, sampleData, low,high):
    x_fit, y_fit = curveData[:, 0], curveData[:, 1]
    x, y = sampleData[:, 0], sampleData[:, 1]
    # 创建图形对象并绘制拟合曲线和样本点
    ax = draw_axis(low,high,step=250)
    ax.plot(x_fit, y_fit, label=f'FitCurve')
    ax.scatter(x, y, color='red', label='SampleData')
    # ax.set_xlabel('X')
    # ax.set_ylabel('Y')
    plt.legend()
    plt.show()

# 重写compute_curveData以适应标准化的需要
def compute_curveData2(low, high, step, theta, m, x_mean, x_std):
    x_fit = np.arange(low, high, step)
    # 将x_fit标准化
    x_fit_normalized = (x_fit - x_mean) / x_std
    A_fit = np.vander(x_fit_normalized, m + 1, increasing=True)
    y_fit = A_fit @ theta
    return np.column_stack((x_fit, y_fit))


if __name__ == '__main__':
    # sampleData = random_points(20,0, 1000)
    # x = sampleData[:,0]
    # y = sampleData[:,1]
    # 这里为了便于观察使用设计好的数据
    x = np.array([0, 100, 200, 300, 400, 500, 600, 700, 800, 900])
    sy = np.array([10, 20, 10, 50, 80, 130, 210, 340, 550, 890])
    sampleData = np.array(list(zip(x, sy)))# 将两个一维数组拼接成二维数组
    m = 3   # 使用三次函数拟合
    theta, covariance_matrix = least_square_method(m, sampleData)
    curveData = compute_curveData2(0,1000,1,theta,m, x.mean(), x.std())
    draw_dots_and_line(curveData,sampleData,0,1000)

# 编程4.5 END-----------------------------------------


# 编程 4.12 -----------------------------------------
def gradient_descent_method(sampleData, degree, learning_rate=0.001, iterations=50000):

    def error_function(y_pred, y): # 定义误差函数error_function()
        return y_pred - y

    def gradient(degree,error,x_normalized,sampleNum,learning_rate): # 定义梯度函数gradient()
        # 对每个参数计算梯度
        for i in range(degree + 1):
            # 计算损失函数对参数的偏导数（梯度）
            gradient = np.dot(error, x_normalized**i) * 2 / sampleNum
            # 梯度下降更新参数
            theta[i] -= learning_rate * gradient
        return theta

    sx, sy = sampleData[:, 0], sampleData[:, 1]

    # 初始化参数（多项式系数）为0
    theta = np.zeros(degree + 1)
    sampleNum = len(sx)  # 样本数量

    # 归一化x以改善数值稳定性
    x_normalized = (sx - sx.mean()) / sx.std()
    # 迭代梯度下降
    for _ in range(iterations):
        # 通过当前参数计算多项式的值
        y_pred = np.polyval(theta[::-1], x_normalized)

        # 计算预测值与真实值之间的误差
        error = error_function(y_pred, sy)
        # 计算梯度并更新theta
        theta = gradient(degree, error, x_normalized, sampleNum, learning_rate)

    # --------------------
    # 设计矩阵
    A = np.vander(x_normalized, N=degree + 1, increasing=True)
    # 计算残差方差
    residuals = sy - np.polyval(theta[::-1], x_normalized)
    residuals_squared_sum = np.sum(residuals ** 2)
    variance = residuals_squared_sum / (sampleNum - (degree + 1))
    # 估计协方差矩阵
    # 这里使用A的伪逆来近似最小二乘法中的(A^T A)^-1项
    covariance_matrix = np.linalg.pinv(A.T @ A) * variance
    # --------------------

    return theta, covariance_matrix


def compute_curveData2(low, high, step, theta, m, x_mean, x_std):
    x_fit = np.arange(low, high, step)
    # 将x_fit标准化
    x_fit_normalized = (x_fit - x_mean) / x_std
    A_fit = np.vander(x_fit_normalized, m + 1, increasing=True)
    y_fit = A_fit @ theta
    return np.column_stack((x_fit, y_fit))


if __name__ == '__main__':
    # sampleData = random_points(20,0, 1000)
    # sx, sy = sampleData[:, 0], sampleData[:, 1]
    # 这里为了便于观察使用设计好的数据
    sx = np.array([0, 100, 200, 300, 400, 500, 600, 700, 800, 900])
    sy = np.array([10, 20, 10, 50, 80, 130, 210, 340, 550, 890])
    sampleData = np.array(list(zip(sx, sy)))

    # 设置一个较小的学习率和较多的迭代次数
    learning_rate = 0.001
    iterations = 50000
    degree = 4

    # 梯度下降算法求解
    theta, covariance_matrix = gradient_descent_method(sampleData, degree, learning_rate, iterations)
    # 计算画图所需要的拟合数据
    curveData = compute_curveData2(0, 1000, 1, theta, degree, sx.mean(), sx.std())
    # 画拟合结果
    draw_dots_and_line(curveData, sampleData, 0, 1000)


# 这里是为了后续fit函数中调用梯度下降法
# 这里将梯度下降法的输入变为了与curve_fit相同，由于协方差矩阵只对于多项式函数适用，因此这里返回的第二个值为False
# 这里搞好了，就是函数的梯度不能够确定，因此调用了包approx_fprime
def gradient_descent_method_X5(func, sx, sy, learning_rate=0.001, iterations=5000):

    def error_function(y_pred, y): # 定义误差函数error_function()
        return (y_pred - y) ** 2

    # 定义损失函数和梯度的计算
    def loss_grad(theta):
        y_pred = func(sx_normalized, *theta)
        # 计算预测值与真实值之间的误差
        error = error_function(y_pred, sy)
        # 调用包来实现对任意函数计算梯度
        grad = approx_fprime(theta, lambda t: np.mean((func(sx_normalized, *t) - sy) ** 2), epsilon=1e-6)
        return error, grad

    # 因为X4和X5中的函数方式不一样，X5中不是function类，没有self参数，因此要处理不同的参数要求
    sig = inspect.signature(func)    # 获取所有参数的名称
    params = [param.name for param in sig.parameters.values()]
    # print(params)
    # 动态确定func需要的参数数量（减去self, sx或者只有sx）
    if 'self' in params:
        params_count = func.__code__.co_argcount - 2
    else:
        params_count = func.__code__.co_argcount - 1
    # print(params_count)

    # 基于参数数量初始化theta
    theta = np.random.randn(params_count) * 0.01

    # 标准化sx
    mean_sx = np.mean(sx)
    std_sx = np.std(sx)
    sx_normalized = (sx - mean_sx) / std_sx

    # 梯度下降循环
    for _ in range(iterations):
        err, grad = loss_grad(theta)
        theta -= learning_rate * grad

    # 由于不再特定于多项式函数，以下部分相关于残差方差和协方差矩阵的计算需要根据实际情况调整
    # 但是实际没有使用到这个矩阵，因此返回False

    return theta, False