huggingface · ben-z · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025 · Jul 10, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -95,7 +95,7 @@ dependencies = [
 pygame-dep = ["pygame>=2.5.1"]
 placo-dep = ["placo>=0.9.6"]
 transformers-dep = ["transformers>=4.50.3,<4.52.0"] # TODO: Bumb dependency
-grpcio-dep = ["grpcio==1.71.0"]
+grpcio-dep = ["grpcio==1.73.1", "protobuf==6.31.0"]
 
 # Motors
 feetech = ["feetech-servo-sdk>=1.0.0"]
@@ -119,14 +119,14 @@ intelrealsense = [
 # Policies
 pi0 = ["lerobot[transformers-dep]"]
 smolvla = ["lerobot[transformers-dep]", "num2words>=0.5.14", "accelerate>=1.7.0", "safetensors>=0.4.3"]
-hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.9", "protobuf>=5.29.3", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
+hilserl = ["lerobot[transformers-dep]", "gym-hil>=0.1.9", "lerobot[grpcio-dep]", "lerobot[placo-dep]"]
 
 # Features
 async = ["lerobot[grpcio-dep]", "matplotlib>=3.10.3"]
 
 # Development
 docs = ["hf-doc-builder @ git+https://github.com/huggingface/doc-builder.git@main", "watchdog >= 6.0.0"]
-dev = ["pre-commit>=3.7.0", "debugpy>=1.8.1", "grpcio-tools==1.71.0"]
+dev = ["pre-commit>=3.7.0", "debugpy>=1.8.1", "lerobot[grpcio-dep]", "grpcio-tools==1.73.1"]
 test = ["pytest>=8.1.0", "pytest-timeout>=2.4.0", "pytest-cov>=5.0.0", "mock-serial>=0.0.1 ; sys_platform != 'win32'"]
 video_benchmark = ["scikit-image>=0.23.2", "pandas>=2.2.2"]
 

diff --git a/src/lerobot/configs/parser.py b/src/lerobot/configs/parser.py
@@ -19,11 +19,14 @@
 from collections.abc import Sequence
 from functools import wraps
 from pathlib import Path
+from typing import TypeVar
 
 import draccus
 
 from lerobot.utils.utils import has_method
 
+T = TypeVar("T")
+
 PATH_KEY = "path"
 PLUGIN_DISCOVERY_SUFFIX = "discover_packages_path"
 
@@ -151,6 +154,32 @@ def filter_arg(field_to_filter: str, args: Sequence[str] | None = None) -> list[
     return [arg for arg in args if not arg.startswith(f"--{field_to_filter}=")]
 
 
+def filter_args_recursive(field_name: str, args: Sequence[str] | None = None) -> tuple[list[str], list[str]]:
+    """
+    Filters arguments for a given field and all its subfields.
+
+    Args:
+        field_name (str): The name of the field to filter arguments for.
+        args (Sequence[str] | None): The sequence of command-line arguments to be filtered.
+            Defaults to None.
+
+    Returns:
+        tuple[list[str], list[str]]: A tuple containing two lists:
+            - The first list contains arguments that start with the field name or subfield name.
+            - The second list contains arguments that do not start with the field name or subfield name.
+    """
+    with_field = []
+    without_field = []
+
+    for arg in args:
+        if arg.startswith(f"--{field_name}.") or arg.startswith(f"--{field_name}="):
+            with_field.append(arg)
+        else:
+            without_field.append(arg)
+
+    return with_field, without_field
+
+
 def filter_path_args(fields_to_filter: str | list[str], args: Sequence[str] | None = None) -> list[str]:
     """
     Filters command-line arguments related to fields with specific path arguments.
@@ -184,7 +213,11 @@ def filter_path_args(fields_to_filter: str | list[str], args: Sequence[str] | No
     return filtered_args
 
 
-def wrap(config_path: Path | None = None):
+def parse(
+    config_class: type[T],
+    config_path: Path | str | None = None,
+    args: Sequence[str] | None = None,
+) -> T:
     """
     HACK: Similar to draccus.wrap but does three additional things:
         - Will remove '.path' arguments from CLI in order to process them later on.
@@ -194,7 +227,29 @@ def wrap(config_path: Path | None = None):
             their own subclasses of config classes, so that draccus can find the right class to instantiate
             from the CLI '.type' arguments
     """
+    cli_args = args or sys.argv[1:]
+    plugin_args = parse_plugin_args(PLUGIN_DISCOVERY_SUFFIX, cli_args)
+    for plugin_cli_arg, plugin_path in plugin_args.items():
+        try:
+            load_plugin(plugin_path)
+        except PluginLoadError as e:
+            # add the relevant CLI arg to the error message
+            raise PluginLoadError(f"{e}\nFailed plugin CLI Arg: {plugin_cli_arg}") from e
+        cli_args = filter_arg(plugin_cli_arg, cli_args)
+    config_path_cli = parse_arg("config_path", cli_args)
+    if has_method(config_class, "__get_path_fields__"):
+        path_fields = config_class.__get_path_fields__()
+        cli_args = filter_path_args(path_fields, cli_args)
+    if has_method(config_class, "from_pretrained") and config_path_cli:
+        cli_args = filter_arg("config_path", cli_args)
+        cfg = config_class.from_pretrained(config_path_cli, cli_args=cli_args)
+    else:
+        cfg = draccus.parse(config_class=config_class, config_path=config_path, args=cli_args)
+
+    return cfg
 
+
+def wrap(config_path: Path | None = None):
     def wrapper_outer(fn):
         @wraps(fn)
         def wrapper_inner(*args, **kwargs):
@@ -204,24 +259,7 @@ def wrapper_inner(*args, **kwargs):
                 cfg = args[0]
                 args = args[1:]
             else:
-                cli_args = sys.argv[1:]
-                plugin_args = parse_plugin_args(PLUGIN_DISCOVERY_SUFFIX, cli_args)
-                for plugin_cli_arg, plugin_path in plugin_args.items():
-                    try:
-                        load_plugin(plugin_path)
-                    except PluginLoadError as e:
-                        # add the relevant CLI arg to the error message
-                        raise PluginLoadError(f"{e}\nFailed plugin CLI Arg: {plugin_cli_arg}") from e
-                    cli_args = filter_arg(plugin_cli_arg, cli_args)
-                config_path_cli = parse_arg("config_path", cli_args)
-                if has_method(argtype, "__get_path_fields__"):
-                    path_fields = argtype.__get_path_fields__()
-                    cli_args = filter_path_args(path_fields, cli_args)
-                if has_method(argtype, "from_pretrained") and config_path_cli:
-                    cli_args = filter_arg("config_path", cli_args)
-                    cfg = argtype.from_pretrained(config_path_cli, cli_args=cli_args)
-                else:
-                    cfg = draccus.parse(config_class=argtype, config_path=config_path, args=cli_args)
+                cfg = parse(config_class=argtype, config_path=config_path)
             response = fn(cfg, *args, **kwargs)
             return response
 

diff --git a/src/lerobot/configs/types.py b/src/lerobot/configs/types.py
@@ -40,3 +40,11 @@ def __getitem__(self, key: Any) -> Any: ...
 class PolicyFeature:
     type: FeatureType
     shape: tuple
+
+
+@dataclass
+class AsyncStats:
+    # the number of ticks executed since the beginning of the last action chunk
+    steps_since_last_chunk_start: int
+    # round-trip inference latency in ticks.
+    inference_latency_steps: int
diff --git a/src/lerobot/policies/act/modeling_act.py b/src/lerobot/policies/act/modeling_act.py
@@ -108,7 +108,7 @@ def reset(self):
             self._action_queue = deque([], maxlen=self.config.n_action_steps)
 
     @torch.no_grad()
-    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+    def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Select a single action given environment observations.
 
         This method wraps `select_actions` in order to return one action at a time for execution in the
@@ -133,7 +133,7 @@ def select_action(self, batch: dict[str, Tensor]) -> Tensor:
         return self._action_queue.popleft()
 
     @torch.no_grad()
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Predict a chunk of actions given environment observations."""
         self.eval()
 

diff --git a/src/lerobot/policies/diffusion/modeling_diffusion.py b/src/lerobot/policies/diffusion/modeling_diffusion.py
@@ -100,7 +100,7 @@ def reset(self):
             self._queues["observation.environment_state"] = deque(maxlen=self.config.n_obs_steps)
 
     @torch.no_grad()
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Predict a chunk of actions given environment observations."""
         # stack n latest observations from the queue
         batch = {k: torch.stack(list(self._queues[k]), dim=1) for k in batch if k in self._queues}
@@ -112,7 +112,7 @@ def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
         return actions
 
     @torch.no_grad()
-    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+    def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Select a single action given environment observations.
 
         This method handles caching a history of observations and an action trajectory generated by the

diff --git a/src/lerobot/policies/pi0/modeling_pi0.py b/src/lerobot/policies/pi0/modeling_pi0.py
@@ -261,12 +261,12 @@ def get_optim_params(self) -> dict:
         return self.parameters()
 
     @torch.no_grad()
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Predict a chunk of actions given environment observations."""
         raise NotImplementedError("Currently not implemented for PI0")
 
     @torch.no_grad()
-    def select_action(self, batch: dict[str, Tensor], noise: Tensor | None = None) -> Tensor:
+    def select_action(self, batch: dict[str, Tensor], noise: Tensor | None = None, **kwargs) -> Tensor:
         """Select a single action given environment observations.
 
         This method wraps `select_actions` in order to return one action at a time for execution in the

diff --git a/src/lerobot/policies/pi0fast/modeling_pi0fast.py b/src/lerobot/policies/pi0fast/modeling_pi0fast.py
@@ -193,12 +193,12 @@ def _pi_aloha_encode_actions_inv(self, actions):
         return actions
 
     @torch.no_grad()
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Predict a chunk of actions given environment observations."""
         raise NotImplementedError("Currently not implemented for PI0FAST")
 
     @torch.no_grad()
-    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+    def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Select a single action given environment observations.
 
         This method wraps `select_actions` in order to return one action at a time for execution in the

diff --git a/src/lerobot/policies/pretrained.py b/src/lerobot/policies/pretrained.py
@@ -172,7 +172,7 @@ def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict | None]:
         raise NotImplementedError
 
     @abc.abstractmethod
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Returns the action chunk (for action chunking policies) for a given observation, potentially in batch mode.
 
         Child classes using action chunking should use this method within `select_action` to form the action chunk
@@ -181,7 +181,7 @@ def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
         raise NotImplementedError
 
     @abc.abstractmethod
-    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+    def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Return one action to run in the environment (potentially in batch mode).
 
         When the model uses a history of observations, or outputs a sequence of actions, this method deals

diff --git a/src/lerobot/policies/sac/modeling_sac.py b/src/lerobot/policies/sac/modeling_sac.py
@@ -78,12 +78,12 @@ def reset(self):
         pass
 
     @torch.no_grad()
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Predict a chunk of actions given environment observations."""
         raise NotImplementedError("SACPolicy does not support action chunking. It returns single actions!")
 
     @torch.no_grad()
-    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+    def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """Select action for inference/evaluation"""
 
         observations_features = None

diff --git a/src/lerobot/policies/sac/reward_model/modeling_classifier.py b/src/lerobot/policies/sac/reward_model/modeling_classifier.py
@@ -301,14 +301,14 @@ def get_optim_params(self):
         """Return optimizer parameters for the policy."""
         return self.parameters()
 
-    def select_action(self, batch: dict[str, Tensor]) -> Tensor:
+    def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """
         This method is required by PreTrainedPolicy but not used for reward classifiers.
         The reward classifier is not an actor and does not select actions.
         """
         raise NotImplementedError("Reward classifiers do not select actions")
 
-    def predict_action_chunk(self, batch: dict[str, Tensor]) -> Tensor:
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
         """
         This method is required by PreTrainedPolicy but not used for reward classifiers.
         The reward classifier is not an actor and does not produce action chunks.

diff --git a/src/lerobot/policies/smolvla/configuration_smolvla.py b/src/lerobot/policies/smolvla/configuration_smolvla.py
@@ -57,6 +57,9 @@ class SmolVLAConfig(PreTrainedConfig):
     # Gripper dimensions will remain in absolute values.
     use_delta_joint_actions_aloha: bool = False
 
+    # Whether to compile parts of the model using torch.compile. Improves inference speed but increases memory usage and startup time.
+    compile_model: bool = False
+
     # Tokenizer
     tokenizer_max_length: int = 48
 
@@ -101,6 +104,15 @@ class SmolVLAConfig(PreTrainedConfig):
     min_period: float = 4e-3  # sensitivity range for the timestep used in sine-cosine positional encoding
     max_period: float = 4.0
 
+    # Inference settings
+    inference_enable_rtc: bool = False  # Whether to enable real-time action chunking (RTC): https://www.physicalintelligence.company/research/real_time_chunking
+    inference_rtc_d: int = (
+        -1
+    )  # Inference delay (in action steps). If -1, it is set automatically based on roundtrip inference time.
+    inference_rtc_soft_mask_length: int = -1  # The length of the soft mask for RTC (in action steps). If -1, it is set automatically to chunk_size - d - t
+    inference_rtc_beta: float = 5.0  # RTC maximum guidance weight.
+    inference_rtc_debug: bool = False  # Whether to enable debug mode for RTC. Will print debug information for RTC. RTC denoising will be slower.
+
     def __post_init__(self):
         super().__post_init__()