# solvers.py # ---------- # Licensing Information: You are free to use or extend these projects for # educational purposes provided that (1) you do not distribute or publish # solutions, (2) you retain this notice, and (3) you provide clear # attribution to UC Berkeley, including a link to http://ai.berkeley.edu. # # Attribution Information: The Pacman AI projects were developed at UC Berkeley. # The core projects and autograders were primarily created by John DeNero # (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu). # Student side autograding was added by Brad Miller, Nick Hay, and # Pieter Abbeel (pabbeel@cs.berkeley.edu). import numpy as np import tensorflow as tf import tensorflow_util as tfu from tensorflow_util import MinibatchIndefinitelyGenerator import plotUtil import util def categorical_crossentropy(predictions, targets): return tf.reduce_mean(-tf.reduce_sum(targets * tf.log(tf.clip_by_value(predictions, 1e-10, float('inf'))), reduction_indices=[1])) def squared_error(predictions, targets): return tf.reduce_mean(tf.reduce_sum(tf.square(targets - predictions), reduction_indices=[1])) class Solver(object): """ Solver abstract class. """ def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): raise NotImplementedError class GradientDescentSolver(Solver): def __init__(self, learning_rate, iterations, momentum=0, weight_decay=1e-3, loss_function=None, plot=0): """ Gradient descent solver for optimizing a model given some data. Args: learning_rate: also known as alpha. Used for parameter updates. iterations: number of gradient steps (i.e. updates) to perform when solving a model. momentum: also known as mu. Used for velocity updates. weight_decay: coefficient for l2 regularization on the loss. loss_function: loss function to use for the objective being optimized. plot: whether to show a plot of the loss for every iteration. """ if learning_rate < 0: raise ValueError('learning_rate should be a non-negative number, %r given' % learning_rate) self.learning_rate = learning_rate if not isinstance(iterations, int) or iterations < 0: raise ValueError('iterations should be a non-negative integer, %d given' % iterations) self.iterations = iterations if not (0 <= momentum <= 1): raise ValueError('momentum should be between 0 and 1 (inclusive), %r given' % momentum) self.momentum = momentum if weight_decay < 0: raise ValueError('weight_decay should be a non-negative number, %r given' % weight_decay) self.weight_decay = weight_decay self.loss_function = loss_function or categorical_crossentropy self.plot = plot def get_updates_without_momentum(self, loss_tensor, param_vars): """ Question 4: Returns the gradient descent updates when no momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (param_var, new_param_tensor) indicating that, at runtime, the parameter param_var should be updated with new_param_tensor. You implementation should use the gradient tensors (provided below) and the member variable self.learning_rate. """ grad_tensors = tf.gradients(loss_tensor, param_vars) updates = [] "*** YOUR CODE HERE ***" for i in range(len(param_vars)): param_var = param_vars[i] new_param_tensor = param_var - self.learning_rate * grad_tensors[i] updates.append((param_var, new_param_tensor)) return updates def get_updates_with_momentum(self, loss_tensor, param_vars): """ Question 5: Returns the gradient descent updates when momentum is used. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. You implementation should use the gradient tensors and the velocity variables (both provided below), and the member variables self.learning_rate and self.momentum. """ grad_tensors = tf.gradients(loss_tensor, param_vars) vel_vars = [tf.Variable(np.zeros(param_var.get_shape(), dtype=np.float32)) for param_var in param_vars] tfu.get_session().run([vel_var.initializer for vel_var in vel_vars]) updates = [] "*** YOUR CODE HERE ***" for i in range(len(param_vars)): new_vel = self.momentum * vel_vars[i] - self.learning_rate * grad_tensors[i] updates.append((vel_vars[i], new_vel)) new_tensor = param_vars[i] + new_vel updates.append((param_vars[i], new_tensor)) return updates def get_loss_tensor(self, prediction_tensor, target_ph, param_vars): loss_tensor = self.loss_function(prediction_tensor, target_ph) loss_tensor += self.weight_decay * sum(tf.nn.l2_loss(param_var) for param_var in param_vars) return loss_tensor def get_updates(self, loss_tensor, param_vars): """ Returns the gradient descent updates. Args: loss_tensor: loss tensor used to compute the gradients. param_vars: list of parameter variables. Returns: A list of tuples, where each tuple is an update of the form (var, new_tensor) indicating that, at runtime, the variable var should be updated with new_tensor. """ if self.momentum == 0: return self.get_updates_without_momentum(loss_tensor, param_vars) else: return self.get_updates_with_momentum(loss_tensor, param_vars) def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.a: Optimize the model and return the intermediate losses. Optimize the model using gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. You may not need to fill in both "*** YOUR CODE HERE ***" blanks, but they are both provided so you can define variables outside and inside the for loop. Useful method: session.run """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" # train_loss should be the loss of this iteration using all of the training data # val_loss should be the loss of this iteration using all of the validation data train_loss = session.run(loss_tensor, feed_dict = {model.input_ph: input_train_data, target_ph: target_train_data}) session.run(update_ops, feed_dict = {model.input_ph: input_train_data, target_ph: target_train_data}) val_loss = session.run(loss_tensor, feed_dict = {model.input_ph: input_val_data, target_ph: target_val_data}) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses def display_progress(self, iter_, train_losses, val_losses): print("Iteration {} of {}".format(iter_, self.iterations)) print(" training loss = {:.6f}".format(train_losses[-1])) print(" validation loss = {:.6f}".format(val_losses[-1])) if self.plot and iter_ % self.plot == 0: plotUtil.plotTwoCurves(range(len(train_losses)), train_losses, range(len(val_losses)), val_losses, label1='training loss', label2='validation loss', showLegend=True, figureIdx=2, figureTitle="%s: Training and Validation Loss" % self.__class__.__name__) class StochasticGradientDescentSolver(GradientDescentSolver): def __init__(self, learning_rate, iterations, momentum=0, weight_decay=1e-3, shuffle=None, loss_function=None, plot=0): """ Stochastic gradient descent solver for optimizing a model given some data. Args: learning_rate: also known as alpha. Used for parameter updates. iterations: number of gradient steps (i.e. updates) to perform when solving a model. momentum: also known as mu. Used for velocity updates. weight_decay: coefficient for l2 regularization on the loss. shuffle: whether the order of the data points should be randomized when iterating over the data loss_function: loss function to use for the objective being optimized. plot: whether to show a plot of the loss for every iteration. """ super(StochasticGradientDescentSolver, self).__init__( learning_rate, iterations, momentum=momentum, weight_decay=weight_decay, loss_function=loss_function, plot=plot) self.shuffle = True if shuffle is None else shuffle def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.b: Optimize the model and return the intermediate losses. Optimize the model using stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. In here, the gradient descent is stochastic, meaning that you don't need to use all the data at once before you update the model parameters. Instead, you update the model parameters as you iterate over the data. You must use MinibatchIndefinitelyGenerator to iterate over the data, otherwise your solution might differ from the one of the autograder. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.shuffle when you instantiate the generator. You will have to choose a proper batch size too. Useful member variables and methods: self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" train_gen = MinibatchIndefinitelyGenerator(train_data, 1, self.shuffle) val_gen = MinibatchIndefinitelyGenerator(val_data, 1, self.shuffle) loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead a, b = train_gen.next() train_loss = session.run(loss_tensor, feed_dict = {model.input_ph: a, target_ph: b}) session.run(update_ops, feed_dict = {model.input_ph: a, target_ph: b}) c, d = val_gen.next() val_loss = session.run(loss_tensor, feed_dict = {model.input_ph: c, target_ph: d}) train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses class MinibatchStochasticGradientDescentSolver(GradientDescentSolver): def __init__(self, learning_rate, iterations, batch_size, momentum=0, weight_decay=1e-3, shuffle=None, loss_function=None, plot=0): """ Minibatch stochastic gradient descent solver for optimizing a model given some data. Args: learning_rate: also known as alpha. Used for parameter updates. iterations: number of gradient steps (i.e. updates) to perform when solving a model. batch_size: minibatch size to use when iterating the training and validation data. momentum: also known as mu. Used for velocity updates. weight_decay: coefficient for l2 regularization on the loss. shuffle: whether the order of the data points should be randomized when iterating over the data loss_function: loss function to use for the objective being optimized. plot: whether to show a plot of the loss for every iteration. """ super(MinibatchStochasticGradientDescentSolver, self).__init__( learning_rate, iterations, momentum=momentum, weight_decay=weight_decay, loss_function=loss_function, plot=plot) self.shuffle = True if shuffle is None else shuffle if not isinstance(batch_size, int) or batch_size < 0: raise ValueError('batch_size should be a non-negative integer, %d given' % batch_size) self.batch_size = batch_size def solve(self, input_train_data, target_train_data, input_val_data, target_val_data, model, callback=None): """ Question 6.c: Optimize the model and return the intermediate losses. Optimize the model using minibatch stochastic gradient descent by running the variable updates for self.iterations iterations. Args: input_train_data: a numpy.array with shape (N, R) target_train_data: a numpy.array with shape (N, S) input_val_data: a numpy.array with shape (M, R) target_val_data: a numpy.array with shape (M, S) model: the model from which the parameters are optimized Returns: A tuple of lists, where the first list contains the training loss of each iteration and the second list contains the validation loss of each iteration. The validation loss should be computed using the same amount of data as the training loss, but using the validation data. N and M are the numbers of training points, respectively, and R and S are the dimensions for each input and target data point, respectively. For minibatch stochastic gradient descent, you will need to iterate over the data in minibatches. As before, you must use MinibatchIndefinitelyGenerator to iterate over the data. You will need to instantiate two generators (one for the training data and another one for the validation data) and you should do it before the for loop. You should read the docstring of MinibatchIndefinitelyGenerator in tensorflow_util.py to figure out how to use it. Make sure to pass in self.batch_size and self.shuffle when you instantiate the generator. Useful member variables and methods: self.batch_size self.shuffle session.run(...) generator.next() """ session = tfu.get_session() target_ph = tf.placeholder(tf.float32, shape=(None,) + target_train_data.shape[1:]) placeholders = [model.input_ph, target_ph] train_data = [input_train_data, target_train_data] val_data = [input_val_data, target_val_data] # You may want to initialize some variables that are shared across iterations "*** YOUR CODE HERE ***" train_gen = MinibatchIndefinitelyGenerator(train_data, self.batch_size, self.shuffle) val_gen = MinibatchIndefinitelyGenerator(val_data, self.batch_size, self.shuffle) loss_tensor = self.get_loss_tensor(model.prediction_tensor, target_ph, model.get_param_vars(regularizable=True)) updates = self.get_updates(loss_tensor, model.get_param_vars(trainable=True)) update_ops = [tf.assign(old_var, new_var_or_tensor) for (old_var, new_var_or_tensor) in updates] train_losses = [] val_losses = [] for iter_ in range(self.iterations): "*** YOUR CODE HERE ***" a, b = train_gen.next() train_loss = session.run(loss_tensor, feed_dict = {model.input_ph: a, target_ph: b}) session.run(update_ops, feed_dict = {model.input_ph: a, target_ph: b}) c, d = val_gen.next() val_loss = session.run(loss_tensor, feed_dict = {model.input_ph: c, target_ph: d}) # train_loss should be the loss of this iteration using only the training data that was used for the updates # val_loss should be the loss of this iteration using the same amount of data used for the updates, but using the validation data instead train_losses.append(train_loss) val_losses.append(val_loss) if callback is not None: callback(model) self.display_progress(iter_, train_losses, val_losses) return train_losses, val_losses