Skip to content

Commit

Permalink
added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
JelinR committed Feb 4, 2025
1 parent 1f275a6 commit 4ef1b12
Show file tree
Hide file tree
Showing 15 changed files with 56 additions and 17 deletions.
3 changes: 2 additions & 1 deletion config/experiments/vlfm_objectnav_hm3d.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ habitat:
base_explorer:
turn_angle: 30


habitat_baselines:
evaluate: True
eval_ckpt_path_dir: data/dummy_policy.pth
Expand All @@ -39,7 +40,7 @@ habitat_baselines:
video_dir: "video_dir"
test_episode_count: -1
checkpoint_folder: "data/new_checkpoints"
trainer_name: "vlfm"
trainer_name: "vlfm" #Check out: vlfm -> utils -> vlfm_trainer
num_updates: 270000
log_interval: 10
num_checkpoints: 100
Expand Down
8 changes: 4 additions & 4 deletions scripts/launch_vlm_servers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ tmux split-window -h -t ${session_name}:0.0
tmux split-window -h -t ${session_name}:0.2

# Run commands in each pane
tmux send-keys -t ${session_name}:0.0 "${VLFM_PYTHON} -m vlfm.vlm.grounding_dino --port ${GROUNDING_DINO_PORT}" C-m
tmux send-keys -t ${session_name}:0.1 "${VLFM_PYTHON} -m vlfm.vlm.blip2itm --port ${BLIP2ITM_PORT}" C-m
tmux send-keys -t ${session_name}:0.2 "${VLFM_PYTHON} -m vlfm.vlm.sam --port ${SAM_PORT}" C-m
tmux send-keys -t ${session_name}:0.3 "${VLFM_PYTHON} -m vlfm.vlm.yolov7 --port ${YOLOV7_PORT}" C-m
tmux send-keys -t ${session_name}:0.0 "${VLFM_PYTHON} -m vlfm.vlm.grounding_dino --port ${GROUNDING_DINO_PORT}" C-m #Bug: Needed local installation
tmux send-keys -t ${session_name}:0.1 "${VLFM_PYTHON} -m vlfm.vlm.blip2itm --port ${BLIP2ITM_PORT}" C-m #Bug: Port was occupied
tmux send-keys -t ${session_name}:0.2 "${VLFM_PYTHON} -m vlfm.vlm.sam --port ${SAM_PORT}" C-m #Bug: Needed local installation
tmux send-keys -t ${session_name}:0.3 "${VLFM_PYTHON} -m vlfm.vlm.yolov7 --port ${YOLOV7_PORT}" C-m #Bug: Couldn't load attempt_load since seaborn wasn't installed

# Attach to the tmux session to view the windows
echo "Created tmux session '${session_name}'. You must wait up to 90 seconds for the model weights to finish being loaded."
Expand Down
2 changes: 1 addition & 1 deletion vlfm/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
# Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved.
# Copyright (c) 2023 Boston Dynamics AI Institute LLC. All rights reserved.
2 changes: 2 additions & 0 deletions vlfm/mapping/object_point_cloud_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ def update_map(
else:
# Mark all points of local_cloud whose distance from the camera is too far
# as being out of range

#TODO: Shouldn't it be (max_depth - min_depth)?
within_range = (local_cloud[:, 0] <= max_depth * 0.95) * 1.0 # 5% margin
# All values of 1 in within_range will be considered within range, and all
# values of 0 will be considered out of range; these 0s need to be
Expand Down
7 changes: 3 additions & 4 deletions vlfm/mapping/obstacle_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import cv2
import numpy as np

#TODO: Where to get these from?
# These involve getting frontiers and updating explored areas
from frontier_exploration.frontier_detection import detect_frontier_waypoints
from frontier_exploration.utils.fog_of_war import reveal_fog_of_war
Expand Down Expand Up @@ -36,9 +35,9 @@ def __init__(
pixels_per_meter: int = 20,
):
super().__init__(size, pixels_per_meter)
self.explored_area = np.zeros((size, size), dtype=bool)
self._map = np.zeros((size, size), dtype=bool)
self._navigable_map = np.zeros((size, size), dtype=bool)
self.explored_area = np.zeros((size, size), dtype=bool) #For Explored Areas
self._map = np.zeros((size, size), dtype=bool) #For Obstacles
self._navigable_map = np.zeros((size, size), dtype=bool) #For Navigable Spaces (Inverse of Obstacle map accounting also for the robot radius)
self._min_height = min_height
self._max_height = max_height
self._area_thresh_in_pixels = area_thresh * (self.pixels_per_meter**2)
Expand Down
4 changes: 3 additions & 1 deletion vlfm/mapping/value_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def __init__(
) -> None:
"""
Args:
value_channels: The number of channels in the value map.
value_channels: The number of channels in the value map. #TODO: What does this correspond to? Is it the two channels for semantic scores and confidence scores?
size: The size of the value map in pixels.
use_max_confidence: Whether to use the maximum confidence value in the value
map or a weighted average confidence value.
Expand Down Expand Up @@ -198,13 +198,15 @@ def visualize(
if obstacle_map is not None:
reduced_map[obstacle_map.explored_area == 0] = 0
map_img = np.flipud(reduced_map)

# Make all 0s in the value map equal to the max value, so they don't throw off
# the color mapping (will revert later)
zero_mask = map_img == 0
map_img[zero_mask] = np.max(map_img)
map_img = monochannel_to_inferno_rgb(map_img)
# Revert all values that were originally zero to white
map_img[zero_mask] = (255, 255, 255)

if len(self._camera_positions) > 0:
self._traj_vis.draw_trajectory(
map_img,
Expand Down
21 changes: 20 additions & 1 deletion vlfm/policy/base_objectnav_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class BasePolicy: # type: ignore
pass



class BaseObjectNavPolicy(BasePolicy):
_target_object: str = ""
_policy_info: Dict[str, Any] = {}
Expand Down Expand Up @@ -64,9 +65,11 @@ def __init__(
self._object_detector = GroundingDINOClient(port=int(os.environ.get("GROUNDING_DINO_PORT", "12181")))
self._coco_object_detector = YOLOv7Client(port=int(os.environ.get("YOLOV7_PORT", "12184")))
self._mobile_sam = MobileSAMClient(port=int(os.environ.get("SAM_PORT", "12183")))

self._use_vqa = use_vqa
if use_vqa:
self._vqa = BLIP2Client(port=int(os.environ.get("BLIP2_PORT", "12185")))

self._pointnav_policy = WrappedPointNavResNetPolicy(pointnav_policy_path)
self._object_map: ObjectPointCloudMap = ObjectPointCloudMap(erosion_size=object_map_erosion_size)
self._depth_image_shape = tuple(depth_image_shape)
Expand Down Expand Up @@ -154,6 +157,7 @@ def _pre_step(self, observations: "TensorDict", masks: Tensor) -> None:
if not self._did_reset and masks[0] == 0:
self._reset()
self._target_object = observations["objectgoal"]

try:
self._cache_observations(observations)
except IndexError as e:
Expand Down Expand Up @@ -258,6 +262,7 @@ def _pointnav(self, goal: np.ndarray, stop: bool = False) -> Tensor:
self._pointnav_policy.reset()
masks = torch.zeros_like(masks)
self._last_goal = goal

robot_xy = self._observations_cache["robot_xy"]
heading = self._observations_cache["robot_heading"]
rho, theta = rho_theta(robot_xy, heading, goal)
Expand Down Expand Up @@ -308,7 +313,21 @@ def _update_object_map(
Returns:
ObjectDetections: The object detections from the object detector.
"""
detections = self._get_object_detections(rgb)
#detections = self._get_object_detections(rgb)

#TODO: Changed
print('here')
from vlfm.vlm.detections import ObjectDetections
import torch

detections = ObjectDetections(
image_source = rgb,
boxes = torch.tensor([]),
logits = torch.tensor([]),
phrases = [],
fmt='xyxy'
)

height, width = rgb.shape[:2]
self._object_masks = np.zeros((height, width), dtype=np.uint8)
if np.array_equal(depth, np.ones_like(depth)) and detections.num_detections > 0:
Expand Down
1 change: 1 addition & 0 deletions vlfm/policy/base_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from habitat_baselines.rl.ppo.policy import PolicyActionData



@baseline_registry.register_policy
class BasePolicy(Policy):
"""The bare minimum needed to load a policy for evaluation using ppo_trainer.py"""
Expand Down
4 changes: 4 additions & 0 deletions vlfm/policy/habitat_policies.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def from_config(cls, config: DictConfig, *args_unused: Any, **kwargs_unused: Any

return cls(**kwargs)


def act(
self: Union["HabitatMixin", BaseObjectNavPolicy],
observations: TensorDict,
Expand All @@ -129,13 +130,15 @@ def act(
"""Converts object ID to string name, returns action as PolicyActionData"""
object_id: int = observations[ObjectGoalSensor.cls_uuid][0].item()
obs_dict = observations.to_tree()

if self._dataset_type == "hm3d":
obs_dict[ObjectGoalSensor.cls_uuid] = HM3D_ID_TO_NAME[object_id]
elif self._dataset_type == "mp3d":
obs_dict[ObjectGoalSensor.cls_uuid] = MP3D_ID_TO_NAME[object_id]
self._non_coco_caption = " . ".join(MP3D_ID_TO_NAME).replace("|", " . ") + " ."
else:
raise ValueError(f"Dataset type {self._dataset_type} not recognized")

parent_cls: BaseObjectNavPolicy = super() # type: ignore
try:
action, rnn_hidden_states = parent_cls.act(obs_dict, rnn_hidden_states, prev_actions, masks, deterministic)
Expand Down Expand Up @@ -182,6 +185,7 @@ def _cache_observations(self: Union["HabitatMixin", BaseObjectNavPolicy], observ
depth = observations["depth"][0].cpu().numpy()
x, y = observations["gps"][0].cpu().numpy()
camera_yaw = observations["compass"][0].cpu().item()

depth = filter_depth(depth.reshape(depth.shape[:2]), blur_type=None)
# Habitat GPS makes west negative, so flip y
camera_position = np.array([x, -y, self._camera_height])
Expand Down
1 change: 1 addition & 0 deletions vlfm/policy/itm_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
PROMPT_SEPARATOR = "|"



class BaseITMPolicy(BaseObjectNavPolicy):
_target_object_color: Tuple[int, int, int] = (0, 255, 0)
_selected__frontier_color: Tuple[int, int, int] = (0, 255, 255)
Expand Down
5 changes: 3 additions & 2 deletions vlfm/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ class HabitatConfigPlugin(SearchPathPlugin):
def manipulate_search_path(self, search_path: ConfigSearchPath) -> None:
search_path.append(provider="habitat", path="config/")


#Registers habitat config dir path to the ConfigSearchPath globally
register_hydra_plugin(HabitatConfigPlugin)


#Loads the VLFM config dir locally in this script
@hydra.main(
version_base=None,
config_path="../config",
Expand All @@ -52,6 +52,7 @@ def main(cfg: DictConfig) -> None:
cfg.habitat.simulator.agents.main_agent.sim_sensors.pop("semantic_sensor")
except KeyError:
pass

execute_exp(cfg, "eval" if cfg.habitat_baselines.evaluate else "train")


Expand Down
4 changes: 4 additions & 0 deletions vlfm/utils/habitat_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def collect_data(
) -> None:
assert len(infos) == 1, "Only support one environment for now"

#Obtain depth observation
if "annotated_depth" in policy_info[0]:
depth = policy_info[0]["annotated_depth"]
self.using_annotated_depth = True
Expand All @@ -58,6 +59,7 @@ def collect_data(
depth = overlay_frame(depth, infos[0])
self.depth.append(depth)

#Obtain RGB Observation
if "annotated_rgb" in policy_info[0]:
rgb = policy_info[0]["annotated_rgb"]
self.using_annotated_rgb = True
Expand All @@ -68,8 +70,10 @@ def collect_data(
# Visualize target point cloud on the map
color_point_cloud_on_map(infos, policy_info)

#Get Top Down Map for the Habitat Scene
map = maps.colorize_draw_agent_and_fit_to_height(infos[0]["top_down_map"], self.depth[0].shape[0])
self.maps.append(map)

vis_map_imgs = [
self._reorient_rescale_habitat_map(infos, policy_info[0][vkey])
for vkey in ["obstacle_map", "value_map"]
Expand Down
9 changes: 6 additions & 3 deletions vlfm/utils/vlfm_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,12 @@ def _eval_checkpoint(
if config.habitat_baselines.verbose:
logger.info(f"env config: {OmegaConf.to_yaml(config)}")


self._init_envs(config, is_eval=True)

self._agent = self._create_agent(None)
#TODO: Checkpoint - habitat_baselines -> rl -> ppo -> single_agent_access_mgr.py
#TODO: Checkpoint - habitat_baselines -> rl -> multi-agent -> pop_play_wrappers.py
self._agent = self._create_agent(None)
action_shape, discrete_actions = get_action_space_info(self._agent.policy_action_space)

if self._agent.actor_critic.should_load_agent_state:
Expand Down Expand Up @@ -154,7 +157,7 @@ def _eval_checkpoint(
assert number_of_eval_episodes > 0, "You must specify a number of evaluation episodes with test_episode_count"

pbar = tqdm.tqdm(total=number_of_eval_episodes * evals_per_ep)
self._agent.eval()
self._agent.eval() #What does this do?

from vlfm.utils.habitat_visualizer import HabitatVis

Expand All @@ -165,7 +168,7 @@ def _eval_checkpoint(
current_episodes_info = self.envs.current_episodes()

with inference_mode():
action_data = self._agent.actor_critic.act(
action_data = self._agent.actor_critic.act( #The output action_data should contain the policy_info
batch,
test_recurrent_hidden_states,
prev_actions,
Expand Down
1 change: 1 addition & 0 deletions vlfm/vlm/blip2itm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def cosine(self, image: np.ndarray, txt: str) -> float:
if __name__ == "__main__":
import argparse


parser = argparse.ArgumentParser()
parser.add_argument("--port", type=int, default=12182)
args = parser.parse_args()
Expand Down
1 change: 1 addition & 0 deletions vlfm/vlm/yolov7.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from .server_wrapper import ServerMixin, host_model, send_request, str_to_image


sys.path.insert(0, "yolov7/")
try:
from models.experimental import attempt_load # noqa: E402
Expand Down

0 comments on commit 4ef1b12

Please sign in to comment.