summaryrefslogtreecommitdiff
path: root/QNetwork/optimizers.py
diff options
context:
space:
mode:
Diffstat (limited to 'QNetwork/optimizers.py')
-rw-r--r--QNetwork/optimizers.py116
1 files changed, 116 insertions, 0 deletions
diff --git a/QNetwork/optimizers.py b/QNetwork/optimizers.py
new file mode 100644
index 0000000..7d28f92
--- /dev/null
+++ b/QNetwork/optimizers.py
@@ -0,0 +1,116 @@
+import numpy as np
+
+######################################################################
+## class Optimizers()
+######################################################################
+
+class Optimizers():
+
+ def __init__(self, all_weights):
+ '''all_weights is a vector of all of a neural networks weights concatenated into a one-dimensional vector'''
+
+ self.all_weights = all_weights
+
+ # The following initializations are only used by adam.
+ # Only initializing m, v, beta1t and beta2t here allows multiple calls to adam to handle training
+ # with multiple subsets (batches) of training data.
+ self.mt = np.zeros_like(all_weights)
+ self.vt = np.zeros_like(all_weights)
+ self.beta1 = 0.9
+ self.beta2 = 0.999
+ self.beta1t = 1
+ self.beta2t = 1
+
+
+ def sgd(self, error_f, gradient_f, fargs=[], n_epochs=100, learning_rate=0.001, verbose=True, error_convert_f=None):
+ '''
+error_f: function that requires X and T as arguments (given in fargs) and returns mean squared error.
+gradient_f: function that requires X and T as arguments (in fargs) and returns gradient of mean squared error
+ with respect to each weight.
+error_convert_f: function that converts the standardized error from error_f to original T units.
+ '''
+
+ error_trace = []
+ epochs_per_print = n_epochs // 10
+
+ for epoch in range(n_epochs):
+
+ error = error_f(*fargs)
+ grad = gradient_f(*fargs)
+
+ # Update all weights using -= to modify their values in-place.
+ self.all_weights -= learning_rate * grad
+
+ if error_convert_f:
+ error = error_convert_f(error)
+ error_trace.append(error)
+
+ if verbose and ((epoch + 1) % max(1, epochs_per_print) == 0):
+ print(f'sgd: Epoch {epoch+1:d} Error={error:.5f}')
+
+ return error_trace
+
+ def adam(self, error_f, gradient_f, fargs=[], n_epochs=100, learning_rate=0.001, verbose=True, error_convert_f=None):
+ '''
+error_f: function that requires X and T as arguments (given in fargs) and returns mean squared error.
+gradient_f: function that requires X and T as arguments (in fargs) and returns gradient of mean squared error
+ with respect to each weight.
+error_convert_f: function that converts the standardized error from error_f to original T units.
+ '''
+
+ alpha = learning_rate # learning rate called alpha in original paper on adam
+ epsilon = 1e-8
+ error_trace = []
+ epochs_per_print = n_epochs // 10
+
+ for epoch in range(n_epochs):
+
+ error = error_f(*fargs)
+ grad = gradient_f(*fargs)
+
+ self.mt[:] = self.beta1 * self.mt + (1 - self.beta1) * grad
+ self.vt[:] = self.beta2 * self.vt + (1 - self.beta2) * grad * grad
+ self.beta1t *= self.beta1
+ self.beta2t *= self.beta2
+
+ m_hat = self.mt / (1 - self.beta1t)
+ v_hat = self.vt / (1 - self.beta2t)
+
+ # Update all weights using -= to modify their values in-place.
+ self.all_weights -= alpha * m_hat / (np.sqrt(v_hat) + epsilon)
+
+ if error_convert_f:
+ error = error_convert_f(error)
+ error_trace.append(error)
+
+ if verbose and ((epoch + 1) % max(1, epochs_per_print) == 0):
+ print(f'Adam: Epoch {epoch+1:d} Error={error:.5f}')
+
+ return error_trace
+
+if __name__ == '__main__':
+
+ import matplotlib.pyplot as plt
+ plt.ion()
+
+ def parabola(wmin):
+ return ((w - wmin) ** 2)[0]
+
+ def parabola_gradient(wmin):
+ return 2 * (w - wmin)
+
+ w = np.array([0.0])
+ optimizer = Optimizers(w)
+
+ wmin = 5
+ optimizer.sgd(parabola, parabola_gradient, [wmin],
+ n_epochs=500, learning_rate=0.1)
+
+ print(f'sgd: Minimum of parabola is at {wmin}. Value found is {w}')
+
+ w = np.array([0.0])
+ optimizer = Optimizers(w)
+ optimizer.adam(parabola, parabola_gradient, [wmin],
+ n_epochs=500, learning_rate=0.1)
+
+ print(f'adam: Minimum of parabola is at {wmin}. Value found is {w}')