1 files changed, 21 insertions, 19 deletions
diff --git a/one_revised_snake_q_table.ipynb b/one_revised_snake_q_table.ipynb
index e2f7bb7..e827ae4 100644
--- a/one_revised_snake_q_table.ipynb
+++ b/one_revised_snake_q_table.ipynb
@@ -45,8 +45,8 @@
    "outputs": [],
    "source": [
     "# defines game window size and block size, in pixels\n",
-    "WINDOW_WIDTH = 640\n",
-    "WINDOW_HEIGHT = 480\n",
+    "WINDOW_WIDTH = 480\n",
+    "WINDOW_HEIGHT = 320\n",
     "GAME_UNITS = 80"
    ]
   },
@@ -60,7 +60,7 @@
     "game_engine = multiplayer.Playfield(window_width=WINDOW_WIDTH,\n",
     "                                    window_height=WINDOW_HEIGHT,\n",
     "                                    units=GAME_UNITS,\n",
-    "                                    g_speed=35,\n",
+    "                                    g_speed=45,\n",
     "                                    s_size=1)"
    ]
   },
@@ -143,7 +143,7 @@
     {
      "data": {
       "text/plain": [
-       "[<CollisionType.NONE: 2>]"
+       "[<CollisionType.GOAL: 1>]"
       ]
      },
      "execution_count": 6,
@@ -263,7 +263,9 @@
     {
      "data": {
       "text/plain": [
-       "([Point(x=320, y=160)], [Point(x=320, y=160)], Point(x=0, y=400))"
+       "([Point(x=160, y=80)],\n",
+       " [Point(x=160, y=80), Point(x=160, y=0)],\n",
+       " Point(x=0, y=0))"
       ]
      },
      "execution_count": 9,
@@ -552,7 +554,7 @@
    "source": [
     "reward = -1\n",
     "\n",
-    "def update_q(q, old_X, new_X, outcome, lr=0.05):\n",
+    "def update_q(q, old_X, new_X, outcome, lr=0.07):\n",
     "    if outcome == multiplayer.CollisionType.GOAL:\n",
     "        q[new_X.state, new_X.action] = 0\n",
     "    else:\n",
@@ -565,7 +567,7 @@
    "id": "01b21e01-174e-4fdd-ad70-dcc1e6483fb2",
    "metadata": {},
    "source": [
-    "Now all that is needed is the training loop. I have high expectations for this agent, so I will only allow it 1500 moves to train itself! Here is where the outputs of pick_greedy_action come in handy, because they can be used as a direct index into Q:"
+    "Now all that is needed is the training loop. I have high expectations for this agent, so I will only allow it 2000 moves to train itself! Here is where the outputs of pick_greedy_action come in handy, because they can be used as a direct index into Q:"
    ]
   },
   {
@@ -575,9 +577,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "n_steps = 1500\n",
+    "n_steps = 2000\n",
     "epsilon = 1\n",
-    "final_epsilon = 0.001\n",
+    "final_epsilon = 0.003\n",
     "epsilon_decay =  np.exp(np.log(final_epsilon) / (n_steps))"
    ]
   },
@@ -618,14 +620,14 @@
     {
      "data": {
       "text/plain": [
-       "array([[-2.35070513, -1.09149338, -0.16535442, -0.56686264],\n",
-       "       [-9.89365379, -3.92409428, -5.34681609, -1.89000207],\n",
-       "       [-0.11260587, -4.74939046, -9.9192565 , -0.43756285],\n",
-       "       [-1.40606955, -3.09973297, -1.00213181, -0.59918486],\n",
-       "       [ 0.        , -0.48427898, -3.18961465, -0.15631095],\n",
-       "       [-1.23097846, -1.83318991, -3.93160124, -1.29734622],\n",
-       "       [-0.85699409, -0.19789849, -0.82954074, -5.23579225],\n",
-       "       [-4.3355246 , -1.17885939, -0.53698344, -2.4048526 ]])"
+       "array([[-7.98101961, -2.63365542, -0.40046043, -1.66295111],\n",
+       "       [-6.79790104, -8.97312148, -2.76629639, -2.01841064],\n",
+       "       [-2.5668375 , -7.76913115, -5.25510457, -1.60454875],\n",
+       "       [-2.53858287, -4.87135085, -7.27897488, -3.55392954],\n",
+       "       [-1.11332388, -1.16738974, -8.00673287, -3.76512078],\n",
+       "       [-4.80299325, -1.82240999, -4.36261659, -7.78143806],\n",
+       "       [-3.74031239, -1.81917483, -2.55794318, -8.59533619],\n",
+       "       [-7.27706114, -5.22216365, -2.79252452, -3.34047701]])"
       ]
      },
      "execution_count": 22,
@@ -700,10 +702,10 @@
    "outputs": [],
    "source": [
     "for step in range(n_steps):\n",
-    "    # p1\n",
+    "    # p1 (YELLOW)\n",
     "    p1_X = pick_greedy_action(set_q, p1, epsilon)\n",
     "\n",
-    "    # p2\n",
+    "    # p2 (RED)\n",
     "    p2_X = pick_greedy_action(q, p2, epsilon) # state, action\n",
     "    \n",
     "    game_engine.player_advance([p1_X.action, p2_X.action])\n",