diff options
Diffstat (limited to 'QNetwork/optimizers.py')
-rw-r--r-- | QNetwork/optimizers.py | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/QNetwork/optimizers.py b/QNetwork/optimizers.py new file mode 100644 index 0000000..7d28f92 --- /dev/null +++ b/QNetwork/optimizers.py @@ -0,0 +1,116 @@ +import numpy as np + +###################################################################### +## class Optimizers() +###################################################################### + +class Optimizers(): + + def __init__(self, all_weights): + '''all_weights is a vector of all of a neural networks weights concatenated into a one-dimensional vector''' + + self.all_weights = all_weights + + # The following initializations are only used by adam. + # Only initializing m, v, beta1t and beta2t here allows multiple calls to adam to handle training + # with multiple subsets (batches) of training data. + self.mt = np.zeros_like(all_weights) + self.vt = np.zeros_like(all_weights) + self.beta1 = 0.9 + self.beta2 = 0.999 + self.beta1t = 1 + self.beta2t = 1 + + + def sgd(self, error_f, gradient_f, fargs=[], n_epochs=100, learning_rate=0.001, verbose=True, error_convert_f=None): + ''' +error_f: function that requires X and T as arguments (given in fargs) and returns mean squared error. +gradient_f: function that requires X and T as arguments (in fargs) and returns gradient of mean squared error + with respect to each weight. +error_convert_f: function that converts the standardized error from error_f to original T units. + ''' + + error_trace = [] + epochs_per_print = n_epochs // 10 + + for epoch in range(n_epochs): + + error = error_f(*fargs) + grad = gradient_f(*fargs) + + # Update all weights using -= to modify their values in-place. + self.all_weights -= learning_rate * grad + + if error_convert_f: + error = error_convert_f(error) + error_trace.append(error) + + if verbose and ((epoch + 1) % max(1, epochs_per_print) == 0): + print(f'sgd: Epoch {epoch+1:d} Error={error:.5f}') + + return error_trace + + def adam(self, error_f, gradient_f, fargs=[], n_epochs=100, learning_rate=0.001, verbose=True, error_convert_f=None): + ''' +error_f: function that requires X and T as arguments (given in fargs) and returns mean squared error. +gradient_f: function that requires X and T as arguments (in fargs) and returns gradient of mean squared error + with respect to each weight. +error_convert_f: function that converts the standardized error from error_f to original T units. + ''' + + alpha = learning_rate # learning rate called alpha in original paper on adam + epsilon = 1e-8 + error_trace = [] + epochs_per_print = n_epochs // 10 + + for epoch in range(n_epochs): + + error = error_f(*fargs) + grad = gradient_f(*fargs) + + self.mt[:] = self.beta1 * self.mt + (1 - self.beta1) * grad + self.vt[:] = self.beta2 * self.vt + (1 - self.beta2) * grad * grad + self.beta1t *= self.beta1 + self.beta2t *= self.beta2 + + m_hat = self.mt / (1 - self.beta1t) + v_hat = self.vt / (1 - self.beta2t) + + # Update all weights using -= to modify their values in-place. + self.all_weights -= alpha * m_hat / (np.sqrt(v_hat) + epsilon) + + if error_convert_f: + error = error_convert_f(error) + error_trace.append(error) + + if verbose and ((epoch + 1) % max(1, epochs_per_print) == 0): + print(f'Adam: Epoch {epoch+1:d} Error={error:.5f}') + + return error_trace + +if __name__ == '__main__': + + import matplotlib.pyplot as plt + plt.ion() + + def parabola(wmin): + return ((w - wmin) ** 2)[0] + + def parabola_gradient(wmin): + return 2 * (w - wmin) + + w = np.array([0.0]) + optimizer = Optimizers(w) + + wmin = 5 + optimizer.sgd(parabola, parabola_gradient, [wmin], + n_epochs=500, learning_rate=0.1) + + print(f'sgd: Minimum of parabola is at {wmin}. Value found is {w}') + + w = np.array([0.0]) + optimizer = Optimizers(w) + optimizer.adam(parabola, parabola_gradient, [wmin], + n_epochs=500, learning_rate=0.1) + + print(f'adam: Minimum of parabola is at {wmin}. Value found is {w}') |