summaryrefslogtreecommitdiff
path: root/one_revised_snake_q_table.ipynb
diff options
context:
space:
mode:
Diffstat (limited to 'one_revised_snake_q_table.ipynb')
-rw-r--r--one_revised_snake_q_table.ipynb40
1 files changed, 21 insertions, 19 deletions
diff --git a/one_revised_snake_q_table.ipynb b/one_revised_snake_q_table.ipynb
index e2f7bb7..e827ae4 100644
--- a/one_revised_snake_q_table.ipynb
+++ b/one_revised_snake_q_table.ipynb
@@ -45,8 +45,8 @@
"outputs": [],
"source": [
"# defines game window size and block size, in pixels\n",
- "WINDOW_WIDTH = 640\n",
- "WINDOW_HEIGHT = 480\n",
+ "WINDOW_WIDTH = 480\n",
+ "WINDOW_HEIGHT = 320\n",
"GAME_UNITS = 80"
]
},
@@ -60,7 +60,7 @@
"game_engine = multiplayer.Playfield(window_width=WINDOW_WIDTH,\n",
" window_height=WINDOW_HEIGHT,\n",
" units=GAME_UNITS,\n",
- " g_speed=35,\n",
+ " g_speed=45,\n",
" s_size=1)"
]
},
@@ -143,7 +143,7 @@
{
"data": {
"text/plain": [
- "[<CollisionType.NONE: 2>]"
+ "[<CollisionType.GOAL: 1>]"
]
},
"execution_count": 6,
@@ -263,7 +263,9 @@
{
"data": {
"text/plain": [
- "([Point(x=320, y=160)], [Point(x=320, y=160)], Point(x=0, y=400))"
+ "([Point(x=160, y=80)],\n",
+ " [Point(x=160, y=80), Point(x=160, y=0)],\n",
+ " Point(x=0, y=0))"
]
},
"execution_count": 9,
@@ -552,7 +554,7 @@
"source": [
"reward = -1\n",
"\n",
- "def update_q(q, old_X, new_X, outcome, lr=0.05):\n",
+ "def update_q(q, old_X, new_X, outcome, lr=0.07):\n",
" if outcome == multiplayer.CollisionType.GOAL:\n",
" q[new_X.state, new_X.action] = 0\n",
" else:\n",
@@ -565,7 +567,7 @@
"id": "01b21e01-174e-4fdd-ad70-dcc1e6483fb2",
"metadata": {},
"source": [
- "Now all that is needed is the training loop. I have high expectations for this agent, so I will only allow it 1500 moves to train itself! Here is where the outputs of pick_greedy_action come in handy, because they can be used as a direct index into Q:"
+ "Now all that is needed is the training loop. I have high expectations for this agent, so I will only allow it 2000 moves to train itself! Here is where the outputs of pick_greedy_action come in handy, because they can be used as a direct index into Q:"
]
},
{
@@ -575,9 +577,9 @@
"metadata": {},
"outputs": [],
"source": [
- "n_steps = 1500\n",
+ "n_steps = 2000\n",
"epsilon = 1\n",
- "final_epsilon = 0.001\n",
+ "final_epsilon = 0.003\n",
"epsilon_decay = np.exp(np.log(final_epsilon) / (n_steps))"
]
},
@@ -618,14 +620,14 @@
{
"data": {
"text/plain": [
- "array([[-2.35070513, -1.09149338, -0.16535442, -0.56686264],\n",
- " [-9.89365379, -3.92409428, -5.34681609, -1.89000207],\n",
- " [-0.11260587, -4.74939046, -9.9192565 , -0.43756285],\n",
- " [-1.40606955, -3.09973297, -1.00213181, -0.59918486],\n",
- " [ 0. , -0.48427898, -3.18961465, -0.15631095],\n",
- " [-1.23097846, -1.83318991, -3.93160124, -1.29734622],\n",
- " [-0.85699409, -0.19789849, -0.82954074, -5.23579225],\n",
- " [-4.3355246 , -1.17885939, -0.53698344, -2.4048526 ]])"
+ "array([[-7.98101961, -2.63365542, -0.40046043, -1.66295111],\n",
+ " [-6.79790104, -8.97312148, -2.76629639, -2.01841064],\n",
+ " [-2.5668375 , -7.76913115, -5.25510457, -1.60454875],\n",
+ " [-2.53858287, -4.87135085, -7.27897488, -3.55392954],\n",
+ " [-1.11332388, -1.16738974, -8.00673287, -3.76512078],\n",
+ " [-4.80299325, -1.82240999, -4.36261659, -7.78143806],\n",
+ " [-3.74031239, -1.81917483, -2.55794318, -8.59533619],\n",
+ " [-7.27706114, -5.22216365, -2.79252452, -3.34047701]])"
]
},
"execution_count": 22,
@@ -700,10 +702,10 @@
"outputs": [],
"source": [
"for step in range(n_steps):\n",
- " # p1\n",
+ " # p1 (YELLOW)\n",
" p1_X = pick_greedy_action(set_q, p1, epsilon)\n",
"\n",
- " # p2\n",
+ " # p2 (RED)\n",
" p2_X = pick_greedy_action(q, p2, epsilon) # state, action\n",
" \n",
" game_engine.player_advance([p1_X.action, p2_X.action])\n",