1 files changed, 116 insertions, 0 deletions
diff --git a/QNetwork/optimizers.py b/QNetwork/optimizers.py
new file mode 100644
index 0000000..7d28f92
--- /dev/null
+++ b/QNetwork/optimizers.py
@@ -0,0 +1,116 @@
+import numpy as np
+
+######################################################################
+## class Optimizers()
+######################################################################
+
+class Optimizers():
+
+    def __init__(self, all_weights):
+        '''all_weights is a vector of all of a neural networks weights concatenated into a one-dimensional vector'''
+        
+        self.all_weights = all_weights
+
+        # The following initializations are only used by adam.
+        # Only initializing m, v, beta1t and beta2t here allows multiple calls to adam to handle training
+        # with multiple subsets (batches) of training data.
+        self.mt = np.zeros_like(all_weights)
+        self.vt = np.zeros_like(all_weights)
+        self.beta1 = 0.9
+        self.beta2 = 0.999
+        self.beta1t = 1
+        self.beta2t = 1
+
+        
+    def sgd(self, error_f, gradient_f, fargs=[], n_epochs=100, learning_rate=0.001, verbose=True, error_convert_f=None):
+        '''
+error_f: function that requires X and T as arguments (given in fargs) and returns mean squared error.
+gradient_f: function that requires X and T as arguments (in fargs) and returns gradient of mean squared error
+            with respect to each weight.
+error_convert_f: function that converts the standardized error from error_f to original T units.
+        '''
+
+        error_trace = []
+        epochs_per_print = n_epochs // 10
+
+        for epoch in range(n_epochs):
+
+            error = error_f(*fargs)
+            grad = gradient_f(*fargs)
+
+            # Update all weights using -= to modify their values in-place.
+            self.all_weights -= learning_rate * grad
+
+            if error_convert_f:
+                error = error_convert_f(error)
+            error_trace.append(error)
+
+            if verbose and ((epoch + 1) % max(1, epochs_per_print) == 0):
+                print(f'sgd: Epoch {epoch+1:d} Error={error:.5f}')
+
+        return error_trace
+
+    def adam(self, error_f, gradient_f, fargs=[], n_epochs=100, learning_rate=0.001, verbose=True, error_convert_f=None):
+        '''
+error_f: function that requires X and T as arguments (given in fargs) and returns mean squared error.
+gradient_f: function that requires X and T as arguments (in fargs) and returns gradient of mean squared error
+            with respect to each weight.
+error_convert_f: function that converts the standardized error from error_f to original T units.
+        '''
+
+        alpha = learning_rate  # learning rate called alpha in original paper on adam
+        epsilon = 1e-8
+        error_trace = []
+        epochs_per_print = n_epochs // 10
+
+        for epoch in range(n_epochs):
+
+            error = error_f(*fargs)
+            grad = gradient_f(*fargs)
+
+            self.mt[:] = self.beta1 * self.mt + (1 - self.beta1) * grad
+            self.vt[:] = self.beta2 * self.vt + (1 - self.beta2) * grad * grad
+            self.beta1t *= self.beta1
+            self.beta2t *= self.beta2
+
+            m_hat = self.mt / (1 - self.beta1t)
+            v_hat = self.vt / (1 - self.beta2t)
+
+            # Update all weights using -= to modify their values in-place.
+            self.all_weights -= alpha * m_hat / (np.sqrt(v_hat) + epsilon)
+    
+            if error_convert_f:
+                error = error_convert_f(error)
+            error_trace.append(error)
+
+            if verbose and ((epoch + 1) % max(1, epochs_per_print) == 0):
+                print(f'Adam: Epoch {epoch+1:d} Error={error:.5f}')
+
+        return error_trace
+
+if __name__ == '__main__':
+
+    import matplotlib.pyplot as plt
+    plt.ion()
+
+    def parabola(wmin):
+        return ((w - wmin) ** 2)[0]
+
+    def parabola_gradient(wmin):
+        return 2 * (w - wmin)
+
+    w = np.array([0.0])
+    optimizer = Optimizers(w)
+
+    wmin = 5
+    optimizer.sgd(parabola, parabola_gradient, [wmin],
+                  n_epochs=500, learning_rate=0.1)
+
+    print(f'sgd: Minimum of parabola is at {wmin}. Value found is {w}')
+
+    w = np.array([0.0])
+    optimizer = Optimizers(w)
+    optimizer.adam(parabola, parabola_gradient, [wmin],
+                   n_epochs=500, learning_rate=0.1)
+    
+    print(f'adam: Minimum of parabola is at {wmin}. Value found is {w}')