Skip to content
This repository was archived by the owner on Dec 28, 2023. It is now read-only.

Commit 219b5dd

Browse files
author
Omegastick
committed
Move observation normalizer update to after network update
1 parent 348f705 commit 219b5dd

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/algorithms/ppo.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,6 @@ std::vector<UpdateDatum> PPO::update(RolloutStorage &rollouts, float decay_level
4747
float clip_param = original_clip_param * decay_level;
4848
optimizer->options.learning_rate_ = original_learning_rate * decay_level;
4949

50-
// Update observation normalizer
51-
if (policy->using_observation_normalizer())
52-
{
53-
policy->update_observation_normalizer(rollouts.get_observations());
54-
}
55-
5650
// Calculate advantages
5751
auto returns = rollouts.get_returns();
5852
auto value_preds = rollouts.get_value_predictions();
@@ -145,6 +139,12 @@ std::vector<UpdateDatum> PPO::update(RolloutStorage &rollouts, float decay_level
145139
}
146140

147141
finish_update:
142+
// Update observation normalizer
143+
if (policy->using_observation_normalizer())
144+
{
145+
policy->update_observation_normalizer(rollouts.get_observations());
146+
}
147+
148148
total_value_loss /= num_updates;
149149
total_action_loss /= num_updates;
150150
total_entropy /= num_updates;

0 commit comments

Comments
 (0)