diff --git a/metadrive/component/sensors/base_camera.py b/metadrive/component/sensors/base_camera.py index 0971cdbf8..61e2ec1e5 100644 --- a/metadrive/component/sensors/base_camera.py +++ b/metadrive/component/sensors/base_camera.py @@ -1,7 +1,9 @@ -import numpy as np from typing import Union -from panda3d.core import NodePath + import cv2 +import numpy as np +from panda3d.core import NodePath + from metadrive.component.sensors.base_sensor import BaseSensor from metadrive.utils.cuda import check_cudart_err @@ -17,6 +19,7 @@ from panda3d.core import Vec3 from metadrive.engine.core.image_buffer import ImageBuffer +from metadrive import constants class BaseCamera(ImageBuffer, BaseSensor): @@ -118,7 +121,7 @@ def perceive( self, to_float=True, new_parent_node: Union[NodePath, None] = None, position=None, hpr=None ) -> np.ndarray: """ - When clip is set to False, the image will be represented by unit8 with component value ranging from [0-255]. + When to_float is set to False, the image will be represented by unit8 with component value ranging from [0-255]. Otherwise, it will be float type with component value ranging from [0.-1.]. By default, the reset parameters are all None. In this case, the camera will render the result with poses and position set by track() function. @@ -127,15 +130,29 @@ def perceive( camera to capture a new image and return the camera to the owner. This usually happens when using one camera to render multiple times from different positions and poses. - new_parent_node should be a NodePath like object.origin and vehicle.origin or self.engine.origin, which - means the world origin. When new_parent_node is set, both position and hpr have to be set as well. The position + new_parent_node should be a NodePath like object.origin or vehicle.origin or self.engine.origin, which + means the world origin. When new_parent_node is set, both position and hpr have to be set as well. The position and hpr are all 3-dim vector representing: 1) the relative position to the reparent node 2) the heading/pitch/roll of the sensor - """ + Args: + to_float: When to_float is set to False, the image will be represented by unit8 with component value ranging + from [0-255]. Otherwise, it will be float type with component value ranging from [0.-1.]. + new_parent_node: new_parent_node should be a NodePath like object.origin or vehicle.origin or + self.engine.origin, which means the world origin. When new_parent_node is set, both position and hpr + have to be set as well. The position and hpr are all 3-dim vector representing: + position: the relative position to the reparent node + hpr: the heading/pitch/roll of the sensor + + Return: + Array representing the image. + """ if new_parent_node: - assert position and hpr, "When new_parent_node is set, both position and hpr should be set as well" + if position is None: + position = constants.DEFAULT_SENSOR_OFFSET + if hpr is None: + position = constants.DEFAULT_SENSOR_HPR # return camera to original state original_object = self.cam.getParent() diff --git a/metadrive/constants.py b/metadrive/constants.py index 4c306d80b..00f5cc5bd 100644 --- a/metadrive/constants.py +++ b/metadrive/constants.py @@ -542,3 +542,7 @@ class CameraTagStateKey: RGB = "rgb" Depth = "depth" Semantic = "semantic" + + +DEFAULT_SENSOR_OFFSET = (0., 0.8, 1.5) +DEFAULT_SENSOR_HPR = (0., 0.0, 0.0) diff --git a/metadrive/engine/core/main_camera.py b/metadrive/engine/core/main_camera.py index f07bb42e9..46fc453bb 100644 --- a/metadrive/engine/core/main_camera.py +++ b/metadrive/engine/core/main_camera.py @@ -23,6 +23,7 @@ except ImportError: _cuda_enable = False from metadrive.component.sensors.base_sensor import BaseSensor +from metadrive import constants class MainCamera(BaseSensor): @@ -451,7 +452,7 @@ def perceive( self, to_float=True, new_parent_node: Union[NodePath, None] = None, position=None, hpr=None ) -> np.ndarray: """ - When clip is set to False, the image will be represented by unit8 with component value ranging from [0-255]. + When to_float is set to False, the image will be represented by unit8 with component value ranging from [0-255]. Otherwise, it will be float type with component value ranging from [0.-1.]. By default, the reset parameters are all None. In this case, the camera will render the result with poses and position set by track() function. @@ -460,15 +461,30 @@ def perceive( camera to capture a new image and return the camera to the owner. This usually happens when using one camera to render multiple times from different positions and poses. - new_parent_node should be a NodePath like object.origin and vehicle.origin or self.engine.origin, which + new_parent_node should be a NodePath like object.origin or vehicle.origin or self.engine.origin, which means the world origin. When new_parent_node is set, both position and hpr have to be set as well. The position and hpr are all 3-dim vector representing: 1) the relative position to the reparent node 2) the heading/pitch/roll of the sensor + + Args: + to_float: When to_float is set to False, the image will be represented by unit8 with component value ranging + from [0-255]. Otherwise, it will be float type with component value ranging from [0.-1.]. + new_parent_node: new_parent_node should be a NodePath like object.origin or vehicle.origin or + self.engine.origin, which means the world origin. When new_parent_node is set, both position and hpr + have to be set as well. The position and hpr are all 3-dim vector representing: + position: the relative position to the reparent node + hpr: the heading/pitch/roll of the sensor + + Return: + Array representing the image. """ if new_parent_node: - assert position and hpr, "When new_parent_node is set, both position and hpr should be set as well" + if position is None: + position = constants.DEFAULT_SENSOR_OFFSET + if hpr is None: + position = constants.DEFAULT_SENSOR_HPR # return camera to original state original_object = self.camera.getParent() diff --git a/metadrive/envs/base_env.py b/metadrive/envs/base_env.py index 47e47f188..8846238d9 100644 --- a/metadrive/envs/base_env.py +++ b/metadrive/envs/base_env.py @@ -1,6 +1,4 @@ import logging -from metadrive.obs.image_obs import ImageStateObservation -from metadrive.obs.state_obs import LidarStateObservation import time from collections import defaultdict from typing import Union, Dict, AnyStr, Optional, Tuple, Callable @@ -9,6 +7,7 @@ import numpy as np from panda3d.core import PNMImage +from metadrive import constants from metadrive.component.sensors.base_camera import BaseCamera from metadrive.component.sensors.dashboard import DashBoard from metadrive.component.sensors.distance_detector import LaneLineDetector, SideDetector @@ -22,8 +21,10 @@ from metadrive.manager.agent_manager import VehicleAgentManager from metadrive.manager.record_manager import RecordManager from metadrive.manager.replay_manager import ReplayManager -from metadrive.obs.observation_base import DummyObservation +from metadrive.obs.image_obs import ImageStateObservation from metadrive.obs.observation_base import BaseObservation +from metadrive.obs.observation_base import DummyObservation +from metadrive.obs.state_obs import LidarStateObservation from metadrive.policy.env_input_policy import EnvInputPolicy from metadrive.scenario.utils import convert_recorded_scenario_exported from metadrive.utils import Config, merge_dicts, get_np_random, concat_step_infos @@ -434,7 +435,7 @@ def step(self, actions: Union[Union[np.ndarray, list], Dict[AnyStr, Union[list, return self._get_step_return(actions, engine_info=engine_info) # collect observation, reward, termination def _preprocess_actions(self, actions: Union[np.ndarray, Dict[AnyStr, np.ndarray], int]) \ - -> Union[np.ndarray, Dict[AnyStr, np.ndarray], int]: + -> Union[np.ndarray, Dict[AnyStr, np.ndarray], int]: if not self.is_multi_agent: actions = {v_id: actions for v_id in self.agents.keys()} else: @@ -887,7 +888,8 @@ def switch_to_third_person_view(self): self.main_camera.track(current_track_agent) for name, sensor in self.engine.sensors.items(): if hasattr(sensor, "track") and name != "main_camera": - sensor.track(current_track_agent.origin, [0., 0.8, 1.5], [0, 0.59681, 0]) + camera_video_posture = [0, 0.59681, 0] + sensor.track(current_track_agent.origin, constants.DEFAULT_SENSOR_OFFSET, camera_video_posture) return def next_seed_reset(self):