Bug correction replay

simoninithomas · web-flow · commit de9fe540fd4d · 2018-07-08T18:12:22.000+02:00
diff --git a/Deep Q Learning/Doom/Deep Q learning with Doom.ipynb b/Deep Q Learning/Doom/Deep Q learning with Doom.ipynb
@@ -102,7 +102,9 @@
   {
    "cell_type": "code",
    "execution_count": 1,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "import tensorflow as tf      # Deep Learning library\n",
@@ -150,7 +152,9 @@
   {
    "cell_type": "code",
    "execution_count": 2,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "\"\"\"\n",
@@ -208,7 +212,9 @@
   {
    "cell_type": "code",
    "execution_count": 3,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "game, possible_actions = create_environment()"
@@ -232,7 +238,9 @@
   {
    "cell_type": "code",
    "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "\"\"\"\n",
@@ -299,7 +307,9 @@
   {
    "cell_type": "code",
    "execution_count": 5,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "stack_size = 4 # We stack 4 frames\n",
@@ -348,7 +358,9 @@
   {
    "cell_type": "code",
    "execution_count": 6,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "### MODEL HYPERPARAMETERS\n",
@@ -397,7 +409,9 @@
   {
    "cell_type": "code",
    "execution_count": 7,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "class DQNetwork:\n",
@@ -517,7 +531,9 @@
   {
    "cell_type": "code",
    "execution_count": 8,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Reset the graph\n",
@@ -541,7 +557,9 @@
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "class Memory():\n",
@@ -570,7 +588,9 @@
   {
    "cell_type": "code",
    "execution_count": 10,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Instantiate memory\n",
@@ -636,7 +656,9 @@
   {
    "cell_type": "code",
    "execution_count": 11,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Setup TensorBoard Writer\n",
@@ -683,7 +705,9 @@
   {
    "cell_type": "code",
    "execution_count": 12,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "\"\"\"\n",
@@ -885,7 +909,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "with tf.Session() as sess:\n",
@@ -894,32 +920,48 @@
     "    \n",
     "    totalScore = 0\n",
     "    \n",
-    "   \n",
     "    # Load the model\n",
     "    saver.restore(sess, \"./models/model.ckpt\")\n",
     "    game.init()\n",
     "    for i in range(1):\n",
     "        \n",
+    "        done = False\n",
+    "        \n",
     "        game.new_episode()\n",
+    "        \n",
+    "        state = game.get_state().screen_buffer\n",
+    "        state, stacked_frames = stack_frames(stacked_frames, state, True)\n",
+    "            \n",
     "        while not game.is_episode_finished():\n",
-    "            frame = game.get_state().screen_buffer\n",
-    "            state = stack_frames(stacked_frames, frame)\n",
     "            # Take the biggest Q value (= the best action)\n",
     "            Qs = sess.run(DQNetwork.output, feed_dict = {DQNetwork.inputs_: state.reshape((1, *state.shape))})\n",
-    "            action = np.argmax(Qs)\n",
-    "            action = possible_actions[int(action)]\n",
-    "            game.make_action(action)        \n",
+    "            \n",
+    "            # Take the biggest Q value (= the best action)\n",
+    "            choice = np.argmax(Qs)\n",
+    "            action = possible_actions[int(choice)]\n",
+    "            \n",
+    "            game.make_action(action)\n",
+    "            done = game.is_episode_finished()\n",
     "            score = game.get_total_reward()\n",
+    "            \n",
+    "            if done:\n",
+    "                break  \n",
+    "                \n",
+    "            else:\n",
+    "                print(\"else\")\n",
+    "                next_state = game.get_state().screen_buffer\n",
+    "                next_state, stacked_frames = stack_frames(stacked_frames, next_state, False)\n",
+    "                state = next_state\n",
+    "                \n",
+    "        score = game.get_total_reward()\n",
     "        print(\"Score: \", score)\n",
-    "        totalScore += score\n",
-    "    print(\"TOTAL_SCORE\", totalScore/100.0)\n",
     "    game.close()"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [default]",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },