completed DDPG

qpc001 · Dec 17, 2018 · bdb4b20 · bdb4b20
1 parent 8ccb911
commit bdb4b20
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 10 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,5 @@
 drlnd/
 *Banana.app/
 *deep-reinforcement-learning/
-*venv/
+*venv/
+*playground_test_runs.py
diff --git a/Agents/Actor_Critic_Agents/DDPG_Agent.py b/Agents/Actor_Critic_Agents/DDPG_Agent.py
@@ -2,14 +2,11 @@
 import torch
 from torch import optim
 from DQN_Agents.DQN_Agent_With_Fixed_Q_Targets import DQN_Agent_With_Fixed_Q_Targets
-from DQN_Agents.DQN_Agent import DQN_Agent
 from Model import Model
 from Utilities.OU_Noise import OU_Noise
-import numpy as np
 
-""" WIP, Not Finished Yet """
 # TODO the noise should act as a multiplier not an addition. otherwise the scale of the actions matter a lot
-# TODO use batch normalisation
+# TODO add batch normalisation
 # TODO currently critic takes state and action choice in at layer 1 rather than  concatonating them later in the network
 
 class DDPG_Agent(DQN_Agent_With_Fixed_Q_Targets):
@@ -65,7 +62,7 @@ def step(self):
         self.state = self.next_state #this is to set the state for the next iteration
 
     def pick_action(self):
-
+        """Picks an action using the actor network and then adds some noise to it to ensure exploration"""
         state = torch.from_numpy(self.state).float().to(self.device)
 
         self.actor_local.eval()
@@ -75,7 +72,6 @@ def pick_action(self):
 
         action += self.noise.sample()
 
-
         return action
 
     def compute_q_values_for_next_states(self, next_states):

diff --git a/README.md b/README.md
@@ -13,8 +13,9 @@ This repository contains PyTorch implementations of deep reinforcement learning
 1. REINFORCE
 1. Hill Climbing
 7. Genetic Evolution
+1. DDPG
 
-I plan to include PPO, DDPG and A2C soon.
+I plan to also include PPO and A2C soon.
 
 ### Usage ###
 

diff --git a/requirements.txt b/requirements.txt
@@ -102,7 +102,7 @@ nbformat==4.4.0
 networkx==2.1
 nltk==3.3
 nose==1.3.7
-notebook==5.6.0
+notebook==5.7.2
 numba==0.39.0
 numexpr==2.6.8
 numpy==1.15.1
@@ -160,7 +160,7 @@ pyzmq==17.1.2
 QtAwesome==0.4.4
 qtconsole==4.4.1
 QtPy==1.5.0
-requests==2.19.1
+requests==2.20.0
 rope==0.11.0
 ruamel-yaml==0.15.46
 scikit-image==0.14.0