added comments

JelinR · Feb 4, 2025 · 4ef1b12 · 4ef1b12
1 parent 1f275a6
commit 4ef1b12
Show file tree

Hide file tree

Showing 15 changed files with 56 additions and 17 deletions.
diff --git a/config/experiments/vlfm_objectnav_hm3d.yaml b/config/experiments/vlfm_objectnav_hm3d.yaml
@@ -28,6 +28,7 @@ habitat:
       base_explorer:
         turn_angle: 30
 
+
 habitat_baselines:
   evaluate: True
   eval_ckpt_path_dir: data/dummy_policy.pth
@@ -39,7 +40,7 @@ habitat_baselines:
   video_dir: "video_dir"
   test_episode_count: -1
   checkpoint_folder: "data/new_checkpoints"
-  trainer_name: "vlfm"
+  trainer_name: "vlfm"                        #Check out: vlfm -> utils -> vlfm_trainer
   num_updates: 270000
   log_interval: 10
   num_checkpoints: 100

diff --git a/scripts/launch_vlm_servers.sh b/scripts/launch_vlm_servers.sh
@@ -28,10 +28,10 @@ tmux split-window -h -t ${session_name}:0.0
 tmux split-window -h -t ${session_name}:0.2
 
 # Run commands in each pane
-tmux send-keys -t ${session_name}:0.0 "${VLFM_PYTHON} -m vlfm.vlm.grounding_dino --port ${GROUNDING_DINO_PORT}" C-m
-tmux send-keys -t ${session_name}:0.1 "${VLFM_PYTHON} -m vlfm.vlm.blip2itm --port ${BLIP2ITM_PORT}" C-m
-tmux send-keys -t ${session_name}:0.2 "${VLFM_PYTHON} -m vlfm.vlm.sam --port ${SAM_PORT}" C-m
-tmux send-keys -t ${session_name}:0.3 "${VLFM_PYTHON} -m vlfm.vlm.yolov7 --port ${YOLOV7_PORT}" C-m
+tmux send-keys -t ${session_name}:0.0 "${VLFM_PYTHON} -m vlfm.vlm.grounding_dino --port ${GROUNDING_DINO_PORT}" C-m     #Bug: Needed local installation
+tmux send-keys -t ${session_name}:0.1 "${VLFM_PYTHON} -m vlfm.vlm.blip2itm --port ${BLIP2ITM_PORT}" C-m                 #Bug: Port was occupied
+tmux send-keys -t ${session_name}:0.2 "${VLFM_PYTHON} -m vlfm.vlm.sam --port ${SAM_PORT}" C-m                           #Bug: Needed local installation
+tmux send-keys -t ${session_name}:0.3 "${VLFM_PYTHON} -m vlfm.vlm.yolov7 --port ${YOLOV7_PORT}" C-m                     #Bug: Couldn't load attempt_load since seaborn wasn't installed
 
 # Attach to the tmux session to view the windows
 echo "Created tmux session '${session_name}'. You must wait up to 90 seconds for the model weights to finish being loaded."

diff --git a/vlfm/__init__.py b/vlfm/__init__.py
@@ -1 +1 @@
-# Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved.
+# Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved.
diff --git a/vlfm/mapping/object_point_cloud_map.py b/vlfm/mapping/object_point_cloud_map.py
@@ -53,6 +53,8 @@ def update_map(
         else:
             # Mark all points of local_cloud whose distance from the camera is too far
             # as being out of range
+
+            #TODO: Shouldn't it be (max_depth - min_depth)?
             within_range = (local_cloud[:, 0] <= max_depth * 0.95) * 1.0  # 5% margin
             # All values of 1 in within_range will be considered within range, and all
             # values of 0 will be considered out of range; these 0s need to be

diff --git a/vlfm/mapping/obstacle_map.py b/vlfm/mapping/obstacle_map.py
@@ -5,7 +5,6 @@
 import cv2
 import numpy as np
 
-#TODO: Where to get these from?
 # These involve getting frontiers and updating explored areas
 from frontier_exploration.frontier_detection import detect_frontier_waypoints
 from frontier_exploration.utils.fog_of_war import reveal_fog_of_war
@@ -36,9 +35,9 @@ def __init__(
         pixels_per_meter: int = 20,
     ):
         super().__init__(size, pixels_per_meter)
-        self.explored_area = np.zeros((size, size), dtype=bool)
-        self._map = np.zeros((size, size), dtype=bool)
-        self._navigable_map = np.zeros((size, size), dtype=bool)
+        self.explored_area = np.zeros((size, size), dtype=bool)     #For Explored Areas
+        self._map = np.zeros((size, size), dtype=bool)              #For Obstacles
+        self._navigable_map = np.zeros((size, size), dtype=bool)    #For Navigable Spaces (Inverse of Obstacle map accounting also for the robot radius)
         self._min_height = min_height
         self._max_height = max_height
         self._area_thresh_in_pixels = area_thresh * (self.pixels_per_meter**2)

diff --git a/vlfm/mapping/value_map.py b/vlfm/mapping/value_map.py
@@ -51,7 +51,7 @@ def __init__(
     ) -> None:
         """
         Args:
-            value_channels: The number of channels in the value map.
+            value_channels: The number of channels in the value map.    #TODO: What does this correspond to?  Is it the two channels for semantic scores and confidence scores?
             size: The size of the value map in pixels.
             use_max_confidence: Whether to use the maximum confidence value in the value
                 map or a weighted average confidence value.
@@ -198,13 +198,15 @@ def visualize(
         if obstacle_map is not None:
             reduced_map[obstacle_map.explored_area == 0] = 0
         map_img = np.flipud(reduced_map)
+
         # Make all 0s in the value map equal to the max value, so they don't throw off
         # the color mapping (will revert later)
         zero_mask = map_img == 0
         map_img[zero_mask] = np.max(map_img)
         map_img = monochannel_to_inferno_rgb(map_img)
         # Revert all values that were originally zero to white
         map_img[zero_mask] = (255, 255, 255)
+
         if len(self._camera_positions) > 0:
             self._traj_vis.draw_trajectory(
                 map_img,

diff --git a/vlfm/policy/base_objectnav_policy.py b/vlfm/policy/base_objectnav_policy.py
@@ -31,6 +31,7 @@ class BasePolicy:  # type: ignore
         pass
 
 
+
 class BaseObjectNavPolicy(BasePolicy):
     _target_object: str = ""
     _policy_info: Dict[str, Any] = {}
@@ -64,9 +65,11 @@ def __init__(
         self._object_detector = GroundingDINOClient(port=int(os.environ.get("GROUNDING_DINO_PORT", "12181")))
         self._coco_object_detector = YOLOv7Client(port=int(os.environ.get("YOLOV7_PORT", "12184")))
         self._mobile_sam = MobileSAMClient(port=int(os.environ.get("SAM_PORT", "12183")))
+
         self._use_vqa = use_vqa
         if use_vqa:
             self._vqa = BLIP2Client(port=int(os.environ.get("BLIP2_PORT", "12185")))
+
         self._pointnav_policy = WrappedPointNavResNetPolicy(pointnav_policy_path)
         self._object_map: ObjectPointCloudMap = ObjectPointCloudMap(erosion_size=object_map_erosion_size)
         self._depth_image_shape = tuple(depth_image_shape)
@@ -154,6 +157,7 @@ def _pre_step(self, observations: "TensorDict", masks: Tensor) -> None:
         if not self._did_reset and masks[0] == 0:
             self._reset()
             self._target_object = observations["objectgoal"]
+
         try:
             self._cache_observations(observations)
         except IndexError as e:
@@ -258,6 +262,7 @@ def _pointnav(self, goal: np.ndarray, stop: bool = False) -> Tensor:
                 self._pointnav_policy.reset()
                 masks = torch.zeros_like(masks)
             self._last_goal = goal
+
         robot_xy = self._observations_cache["robot_xy"]
         heading = self._observations_cache["robot_heading"]
         rho, theta = rho_theta(robot_xy, heading, goal)
@@ -308,7 +313,21 @@ def _update_object_map(
         Returns:
             ObjectDetections: The object detections from the object detector.
         """
-        detections = self._get_object_detections(rgb)
+        #detections = self._get_object_detections(rgb)
+
+        #TODO: Changed
+        print('here')
+        from vlfm.vlm.detections import ObjectDetections
+        import torch
+
+        detections = ObjectDetections(
+            image_source = rgb,
+            boxes = torch.tensor([]),
+            logits = torch.tensor([]),
+            phrases = [],
+            fmt='xyxy'
+        )
+
         height, width = rgb.shape[:2]
         self._object_masks = np.zeros((height, width), dtype=np.uint8)
         if np.array_equal(depth, np.ones_like(depth)) and detections.num_detections > 0:

diff --git a/vlfm/policy/base_policy.py b/vlfm/policy/base_policy.py
@@ -10,6 +10,7 @@
 from habitat_baselines.rl.ppo.policy import PolicyActionData
 
 
+
 @baseline_registry.register_policy
 class BasePolicy(Policy):
     """The bare minimum needed to load a policy for evaluation using ppo_trainer.py"""

diff --git a/vlfm/policy/habitat_policies.py b/vlfm/policy/habitat_policies.py
@@ -118,6 +118,7 @@ def from_config(cls, config: DictConfig, *args_unused: Any, **kwargs_unused: Any
 
         return cls(**kwargs)
 
+
     def act(
         self: Union["HabitatMixin", BaseObjectNavPolicy],
         observations: TensorDict,
@@ -129,13 +130,15 @@ def act(
         """Converts object ID to string name, returns action as PolicyActionData"""
         object_id: int = observations[ObjectGoalSensor.cls_uuid][0].item()
         obs_dict = observations.to_tree()
+
         if self._dataset_type == "hm3d":
             obs_dict[ObjectGoalSensor.cls_uuid] = HM3D_ID_TO_NAME[object_id]
         elif self._dataset_type == "mp3d":
             obs_dict[ObjectGoalSensor.cls_uuid] = MP3D_ID_TO_NAME[object_id]
             self._non_coco_caption = " . ".join(MP3D_ID_TO_NAME).replace("|", " . ") + " ."
         else:
             raise ValueError(f"Dataset type {self._dataset_type} not recognized")
+
         parent_cls: BaseObjectNavPolicy = super()  # type: ignore
         try:
             action, rnn_hidden_states = parent_cls.act(obs_dict, rnn_hidden_states, prev_actions, masks, deterministic)
@@ -182,6 +185,7 @@ def _cache_observations(self: Union["HabitatMixin", BaseObjectNavPolicy], observ
         depth = observations["depth"][0].cpu().numpy()
         x, y = observations["gps"][0].cpu().numpy()
         camera_yaw = observations["compass"][0].cpu().item()
+
         depth = filter_depth(depth.reshape(depth.shape[:2]), blur_type=None)
         # Habitat GPS makes west negative, so flip y
         camera_position = np.array([x, -y, self._camera_height])

diff --git a/vlfm/policy/itm_policy.py b/vlfm/policy/itm_policy.py
@@ -23,6 +23,7 @@
 PROMPT_SEPARATOR = "|"
 
 
+
 class BaseITMPolicy(BaseObjectNavPolicy):
     _target_object_color: Tuple[int, int, int] = (0, 255, 0)
     _selected__frontier_color: Tuple[int, int, int] = (0, 255, 255)

diff --git a/vlfm/run.py b/vlfm/run.py
@@ -30,10 +30,10 @@ class HabitatConfigPlugin(SearchPathPlugin):
     def manipulate_search_path(self, search_path: ConfigSearchPath) -> None:
         search_path.append(provider="habitat", path="config/")
 
-
+#Registers habitat config dir path to the ConfigSearchPath globally
 register_hydra_plugin(HabitatConfigPlugin)
 
-
+#Loads the VLFM config dir locally in this script
 @hydra.main(
     version_base=None,
     config_path="../config",
@@ -52,6 +52,7 @@ def main(cfg: DictConfig) -> None:
             cfg.habitat.simulator.agents.main_agent.sim_sensors.pop("semantic_sensor")
         except KeyError:
             pass
+
     execute_exp(cfg, "eval" if cfg.habitat_baselines.evaluate else "train")
 
 

diff --git a/vlfm/utils/habitat_visualizer.py b/vlfm/utils/habitat_visualizer.py
@@ -49,6 +49,7 @@ def collect_data(
     ) -> None:
         assert len(infos) == 1, "Only support one environment for now"
 
+        #Obtain depth observation
         if "annotated_depth" in policy_info[0]:
             depth = policy_info[0]["annotated_depth"]
             self.using_annotated_depth = True
@@ -58,6 +59,7 @@ def collect_data(
         depth = overlay_frame(depth, infos[0])
         self.depth.append(depth)
 
+        #Obtain RGB Observation
         if "annotated_rgb" in policy_info[0]:
             rgb = policy_info[0]["annotated_rgb"]
             self.using_annotated_rgb = True
@@ -68,8 +70,10 @@ def collect_data(
         # Visualize target point cloud on the map
         color_point_cloud_on_map(infos, policy_info)
 
+        #Get Top Down Map for the Habitat Scene
         map = maps.colorize_draw_agent_and_fit_to_height(infos[0]["top_down_map"], self.depth[0].shape[0])
         self.maps.append(map)
+
         vis_map_imgs = [
             self._reorient_rescale_habitat_map(infos, policy_info[0][vkey])
             for vkey in ["obstacle_map", "value_map"]

diff --git a/vlfm/utils/vlfm_trainer.py b/vlfm/utils/vlfm_trainer.py
@@ -96,9 +96,12 @@ def _eval_checkpoint(
         if config.habitat_baselines.verbose:
             logger.info(f"env config: {OmegaConf.to_yaml(config)}")
 
+
         self._init_envs(config, is_eval=True)
 
-        self._agent = self._create_agent(None)
+        #TODO: Checkpoint - habitat_baselines -> rl -> ppo -> single_agent_access_mgr.py
+        #TODO: Checkpoint - habitat_baselines -> rl -> multi-agent -> pop_play_wrappers.py
+        self._agent = self._create_agent(None) 
         action_shape, discrete_actions = get_action_space_info(self._agent.policy_action_space)
 
         if self._agent.actor_critic.should_load_agent_state:
@@ -154,7 +157,7 @@ def _eval_checkpoint(
         assert number_of_eval_episodes > 0, "You must specify a number of evaluation episodes with test_episode_count"
 
         pbar = tqdm.tqdm(total=number_of_eval_episodes * evals_per_ep)
-        self._agent.eval()
+        self._agent.eval()  #What does this do?
 
         from vlfm.utils.habitat_visualizer import HabitatVis
 
@@ -165,7 +168,7 @@ def _eval_checkpoint(
             current_episodes_info = self.envs.current_episodes()
 
             with inference_mode():
-                action_data = self._agent.actor_critic.act(
+                action_data = self._agent.actor_critic.act(     #The output action_data should contain the policy_info
                     batch,
                     test_recurrent_hidden_states,
                     prev_actions,

diff --git a/vlfm/vlm/blip2itm.py b/vlfm/vlm/blip2itm.py
@@ -67,6 +67,7 @@ def cosine(self, image: np.ndarray, txt: str) -> float:
 if __name__ == "__main__":
     import argparse
 
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--port", type=int, default=12182)
     args = parser.parse_args()

diff --git a/vlfm/vlm/yolov7.py b/vlfm/vlm/yolov7.py
@@ -12,6 +12,7 @@
 
 from .server_wrapper import ServerMixin, host_model, send_request, str_to_image
 
+
 sys.path.insert(0, "yolov7/")
 try:
     from models.experimental import attempt_load  # noqa: E402
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		# Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved.
		# Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved.
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,6 +12,7 @@ @@
     from .server_wrapper import ServerMixin, host_model, send_request, str_to_image
     sys.path.insert(0, "yolov7/")
     try:
         from models.experimental import attempt_load  # noqa: E402
@@ Expand Down @@