diff options
Diffstat (limited to 'one_revised_snake_q_table.ipynb')
-rw-r--r-- | one_revised_snake_q_table.ipynb | 40 |
1 files changed, 21 insertions, 19 deletions
diff --git a/one_revised_snake_q_table.ipynb b/one_revised_snake_q_table.ipynb index e2f7bb7..e827ae4 100644 --- a/one_revised_snake_q_table.ipynb +++ b/one_revised_snake_q_table.ipynb @@ -45,8 +45,8 @@ "outputs": [], "source": [ "# defines game window size and block size, in pixels\n", - "WINDOW_WIDTH = 640\n", - "WINDOW_HEIGHT = 480\n", + "WINDOW_WIDTH = 480\n", + "WINDOW_HEIGHT = 320\n", "GAME_UNITS = 80" ] }, @@ -60,7 +60,7 @@ "game_engine = multiplayer.Playfield(window_width=WINDOW_WIDTH,\n", " window_height=WINDOW_HEIGHT,\n", " units=GAME_UNITS,\n", - " g_speed=35,\n", + " g_speed=45,\n", " s_size=1)" ] }, @@ -143,7 +143,7 @@ { "data": { "text/plain": [ - "[<CollisionType.NONE: 2>]" + "[<CollisionType.GOAL: 1>]" ] }, "execution_count": 6, @@ -263,7 +263,9 @@ { "data": { "text/plain": [ - "([Point(x=320, y=160)], [Point(x=320, y=160)], Point(x=0, y=400))" + "([Point(x=160, y=80)],\n", + " [Point(x=160, y=80), Point(x=160, y=0)],\n", + " Point(x=0, y=0))" ] }, "execution_count": 9, @@ -552,7 +554,7 @@ "source": [ "reward = -1\n", "\n", - "def update_q(q, old_X, new_X, outcome, lr=0.05):\n", + "def update_q(q, old_X, new_X, outcome, lr=0.07):\n", " if outcome == multiplayer.CollisionType.GOAL:\n", " q[new_X.state, new_X.action] = 0\n", " else:\n", @@ -565,7 +567,7 @@ "id": "01b21e01-174e-4fdd-ad70-dcc1e6483fb2", "metadata": {}, "source": [ - "Now all that is needed is the training loop. I have high expectations for this agent, so I will only allow it 1500 moves to train itself! Here is where the outputs of pick_greedy_action come in handy, because they can be used as a direct index into Q:" + "Now all that is needed is the training loop. I have high expectations for this agent, so I will only allow it 2000 moves to train itself! Here is where the outputs of pick_greedy_action come in handy, because they can be used as a direct index into Q:" ] }, { @@ -575,9 +577,9 @@ "metadata": {}, "outputs": [], "source": [ - "n_steps = 1500\n", + "n_steps = 2000\n", "epsilon = 1\n", - "final_epsilon = 0.001\n", + "final_epsilon = 0.003\n", "epsilon_decay = np.exp(np.log(final_epsilon) / (n_steps))" ] }, @@ -618,14 +620,14 @@ { "data": { "text/plain": [ - "array([[-2.35070513, -1.09149338, -0.16535442, -0.56686264],\n", - " [-9.89365379, -3.92409428, -5.34681609, -1.89000207],\n", - " [-0.11260587, -4.74939046, -9.9192565 , -0.43756285],\n", - " [-1.40606955, -3.09973297, -1.00213181, -0.59918486],\n", - " [ 0. , -0.48427898, -3.18961465, -0.15631095],\n", - " [-1.23097846, -1.83318991, -3.93160124, -1.29734622],\n", - " [-0.85699409, -0.19789849, -0.82954074, -5.23579225],\n", - " [-4.3355246 , -1.17885939, -0.53698344, -2.4048526 ]])" + "array([[-7.98101961, -2.63365542, -0.40046043, -1.66295111],\n", + " [-6.79790104, -8.97312148, -2.76629639, -2.01841064],\n", + " [-2.5668375 , -7.76913115, -5.25510457, -1.60454875],\n", + " [-2.53858287, -4.87135085, -7.27897488, -3.55392954],\n", + " [-1.11332388, -1.16738974, -8.00673287, -3.76512078],\n", + " [-4.80299325, -1.82240999, -4.36261659, -7.78143806],\n", + " [-3.74031239, -1.81917483, -2.55794318, -8.59533619],\n", + " [-7.27706114, -5.22216365, -2.79252452, -3.34047701]])" ] }, "execution_count": 22, @@ -700,10 +702,10 @@ "outputs": [], "source": [ "for step in range(n_steps):\n", - " # p1\n", + " # p1 (YELLOW)\n", " p1_X = pick_greedy_action(set_q, p1, epsilon)\n", "\n", - " # p2\n", + " # p2 (RED)\n", " p2_X = pick_greedy_action(q, p2, epsilon) # state, action\n", " \n", " game_engine.player_advance([p1_X.action, p2_X.action])\n", |