From 5f60bf1169ba62d61dcc2e98d465486a93166248 Mon Sep 17 00:00:00 2001 From: p4w2aybsf <2363061197@qq.com> Date: Thu, 29 Apr 2021 17:16:35 +0800 Subject: [PATCH] Add 'optimizer.py' --- optimizer.py | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 optimizer.py diff --git a/optimizer.py b/optimizer.py new file mode 100644 index 0000000..9c22bcd --- /dev/null +++ b/optimizer.py @@ -0,0 +1,130 @@ +# coding: utf-8 +import numpy as np + +class SGD: + + """随机梯度下降法(Stochastic Gradient Descent)""" + + def __init__(self, lr=0.01): + self.lr = lr + + def update(self, params, grads): + for key in params.keys(): + params[key] -= self.lr * grads[key] + + +class Momentum: + + """Momentum SGD""" + + def __init__(self, lr=0.01, momentum=0.9): + self.lr = lr + self.momentum = momentum + self.v = None + + def update(self, params, grads): + if self.v is None: + self.v = {} + for key, val in params.items(): + self.v[key] = np.zeros_like(val) + + for key in params.keys(): + self.v[key] = self.momentum*self.v[key] - self.lr*grads[key] + params[key] += self.v[key] + + +class Nesterov: + + """Nesterov's Accelerated Gradient (http://arxiv.org/abs/1212.0901)""" + + def __init__(self, lr=0.01, momentum=0.9): + self.lr = lr + self.momentum = momentum + self.v = None + + def update(self, params, grads): + if self.v is None: + self.v = {} + for key, val in params.items(): + self.v[key] = np.zeros_like(val) + + for key in params.keys(): + self.v[key] *= self.momentum + self.v[key] -= self.lr * grads[key] + params[key] += self.momentum * self.momentum * self.v[key] + params[key] -= (1 + self.momentum) * self.lr * grads[key] + + +class AdaGrad: + + """AdaGrad""" + + def __init__(self, lr=0.01): + self.lr = lr + self.h = None + + def update(self, params, grads): + if self.h is None: + self.h = {} + for key, val in params.items(): + self.h[key] = np.zeros_like(val) + + for key in params.keys(): + self.h[key] += grads[key] * grads[key] + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7) + + +class RMSprop: + + """RMSprop""" + + def __init__(self, lr=0.01, decay_rate = 0.99): + self.lr = lr + self.decay_rate = decay_rate + self.h = None + + def update(self, params, grads): + if self.h is None: + self.h = {} + for key, val in params.items(): + self.h[key] = np.zeros_like(val) + + for key in params.keys(): + self.h[key] *= self.decay_rate + self.h[key] += (1 - self.decay_rate) * grads[key] * grads[key] + params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7) + + +class Adam: + + """Adam (http://arxiv.org/abs/1412.6980v8)""" + + def __init__(self, lr=0.001, beta1=0.9, beta2=0.999): + self.lr = lr + self.beta1 = beta1 + self.beta2 = beta2 + self.iter = 0 + self.m = None + self.v = None + + def update(self, params, grads): + if self.m is None: + self.m, self.v = {}, {} + for key, val in params.items(): + self.m[key] = np.zeros_like(val) + self.v[key] = np.zeros_like(val) + + self.iter += 1 + lr_t = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter) + + for key in params.keys(): + #self.m[key] = self.beta1*self.m[key] + (1-self.beta1)*grads[key] + #self.v[key] = self.beta2*self.v[key] + (1-self.beta2)*(grads[key]**2) + self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key]) + self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key]) + + params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7) + + #unbias_m += (1 - self.beta1) * (grads[key] - self.m[key]) # correct bias + #unbisa_b += (1 - self.beta2) * (grads[key]*grads[key] - self.v[key]) # correct bias + #params[key] += self.lr * unbias_m / (np.sqrt(unbisa_b) + 1e-7)