ray-project · sven1977 · Mar 18, 2024 · Mar 18, 2024 · sven1977 · Mar 18, 2024
@@ -90,6 +90,10 @@ def _compute_values(self, batch, device=None):
 
         # Separate vf-encoder.
         if hasattr(self.encoder, "critic_encoder"):
+            if self.is_stateful():
+                # The recurrent encoders expect a `(state_in, h)`  key in the
+                # input dict while the key returned is `(state_in, critic, h)`.
+                batch[Columns.STATE_IN] = batch[Columns.STATE_IN][CRITIC]
             encoder_outs = self.encoder.critic_encoder(batch)[ENCODER_OUT]
         # Shared encoder.
         else:

@@ -716,7 +716,9 @@ def _batch_slice(self, slice_: slice) -> "SampleBatch":
 
         # Exclude INFOs from regular array slicing as the data under this column might
         # be a list (not good for `tree.map_structure` call).
-        infos = self.get(SampleBatch.INFOS)
+        # Furthermore, slicing does not work when the data in the column is
+        # singular (not a list or array).
+        infos = self.pop(SampleBatch.INFOS, None)
         data = tree.map_structure(lambda value: value[start:stop], self)
         if infos is not None:
             data[SampleBatch.INFOS] = infos[start:stop]