[bug-fix] Update the gail config for the new steps in 0.14.0 (#3475)

Ervin T · anupambhatnagar · commit 76a33ec85f55 · 2020-02-25T16:33:16.000-08:00
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ### Bug Fixes
 - Fixed an issue which caused self-play training sessions to consume a lot of memory. (#3451)
 - Fixed an IndexError when using GAIL or behavioral cloning with demonstrations recorded with 0.14.0 or later (#3464)
+- Updated the `gail_config.yaml` to work with per-Agent steps (#3475)
 
 
 ## [0.14.0-preview] - 2020-02-13
diff --git a/config/gail_config.yaml b/config/gail_config.yaml
@@ -14,27 +14,27 @@ default:
     num_layers: 2
     time_horizon: 64
     sequence_length: 64
-    summary_freq: 1000
+    summary_freq: 10000
     use_recurrent: false
     reward_signals:
         extrinsic:
             strength: 1.0
             gamma: 0.99
 
 Pyramids:
-    summary_freq: 2000
+    summary_freq: 30000
     time_horizon: 128
     batch_size: 128
     buffer_size: 2048
     hidden_units: 512
     num_layers: 2
     beta: 1.0e-2
-    max_steps: 5.0e5
+    max_steps: 1.0e7
     num_epoch: 3
     behavioral_cloning:
         demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo
         strength: 0.5
-        steps: 10000
+        steps: 150000
     reward_signals:
         extrinsic:
             strength: 1.0
@@ -55,14 +55,14 @@ CrawlerStatic:
     time_horizon: 1000
     batch_size: 2024
     buffer_size: 20240
-    max_steps: 1e6
-    summary_freq: 3000
+    max_steps: 1e7
+    summary_freq: 30000
     num_layers: 3
     hidden_units: 512
     behavioral_cloning:
         demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
         strength: 0.5
-        steps: 5000
+        steps: 50000
     reward_signals:
         gail:
             strength: 1.0
@@ -71,20 +71,20 @@ CrawlerStatic:
             demo_path: Project/Assets/ML-Agents/Examples/Crawler/Demos/ExpertCrawlerSta.demo
 
 PushBlock:
-    max_steps: 5.0e4
+    max_steps: 1.5e7
     batch_size: 128
     buffer_size: 2048
     beta: 1.0e-2
     hidden_units: 256
-    summary_freq: 2000
+    summary_freq: 60000
     time_horizon: 64
     num_layers: 2
     reward_signals:
         gail:
             strength: 1.0
             gamma: 0.99
             encoding_size: 128
-            demo_path: Project/Assets/ML-Agents/Examples/PushBlock/Demos/ExpertPush.demo
+            demo_path: Project/Assets/Demonstrations/PushblockDemo.demo
 
 Hallway:
     use_recurrent: true
@@ -96,8 +96,8 @@ Hallway:
     num_epoch: 3
     buffer_size: 1024
     batch_size: 128
-    max_steps: 5.0e5
-    summary_freq: 1000
+    max_steps: 1.0e7
+    summary_freq: 10000
     time_horizon: 64
     reward_signals:
         extrinsic:
@@ -111,8 +111,7 @@ Hallway:
 
 FoodCollector:
     batch_size: 64
-    summary_freq: 1000
-    max_steps: 5.0e4
+    max_steps: 2.0e6
     use_recurrent: false
     hidden_units: 128
     learning_rate: 3.0e-4