Skip to content

Commit

Permalink
Install fix under mac, plus some small improvements.
Browse files Browse the repository at this point in the history
  • Loading branch information
luigifreda committed Dec 16, 2024
1 parent 36158b4 commit 0c8aa8f
Show file tree
Hide file tree
Showing 10 changed files with 134 additions and 81 deletions.
145 changes: 81 additions & 64 deletions depth_estimator_crestereo.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import numpy as np
import os
import sys
import platform

import config
config.cfg.set_lib('crestereo')
Expand All @@ -41,6 +42,7 @@


def enforce_megengine_linking():
# Experimental: trying to enforce correct linking
import ctypes
# Path to the directory containing your libraries
site_packages_path = next(p for p in sys.path if 'site-packages' in p)
Expand All @@ -63,9 +65,11 @@ def enforce_megengine_linking():
ctypes.CDLL(os.path.join(lib_path, "libmegengine_shared.so")) # Load the main library last


#enforce_megengine_linking()
import megengine as mge
import crestereo.nets as crestereo_nets
if platform.system() != 'Darwin':
# Unfortunately, megengine is not fully supported on macOS
#enforce_megengine_linking()
import megengine as mge
import crestereo.nets as crestereo_nets


# Stereo depth prediction using the Crestereo model.
Expand All @@ -87,70 +91,83 @@ def __init__(self, device=None, camera:Camera=None,
dataset_env_type=dataset_env_type, precision=None)

def load_model(self, model_path=kCrestereoModelPath):
print("DepthEstimatorCrestereo: Loading model:", os.path.abspath(model_path))
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found: {model_path}")
pretrained_dict = mge.load(model_path)
model = crestereo_nets.Model(max_disp=self.max_disparity, mixed_precision=False, test_mode=True)
model.load_state_dict(pretrained_dict["state_dict"], strict=True)
model.eval()
return model

def infer(self, image, image_right=None):
if image_right is None:
message = 'Image right is None. Are you using a stereo dataset?'
Printer.red(message)
raise ValueError(message)
if platform.system() != 'Darwin':

print("DepthEstimatorCrestereo: Loading model:", os.path.abspath(model_path))
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found: {model_path}")
pretrained_dict = mge.load(model_path)
model = crestereo_nets.Model(max_disp=self.max_disparity, mixed_precision=False, test_mode=True)
model.load_state_dict(pretrained_dict["state_dict"], strict=True)
model.eval()
return model

in_shape = image.shape
print(f'DepthEstimatorCrestereo: Running inference: {image.shape} {image_right.shape}')

# Compute disparity map
imgL = image.transpose(2, 0, 1)
imgR = image_right.transpose(2, 0, 1)
imgL = np.ascontiguousarray(imgL[None, :, :, :])
imgR = np.ascontiguousarray(imgR[None, :, :, :])

imgL = mge.tensor(imgL).astype("float32")
imgR = mge.tensor(imgR).astype("float32")

imgL_dw2 = mge.functional.nn.interpolate(
imgL,
size=(imgL.shape[2] // 2, imgL.shape[3] // 2),
mode="bilinear",
align_corners=True,
)
imgR_dw2 = mge.functional.nn.interpolate(
imgR,
size=(imgL.shape[2] // 2, imgL.shape[3] // 2),
mode="bilinear",
align_corners=True,
)
pred_flow_dw2 = self.model(imgL_dw2, imgR_dw2, iters=self.n_iter, flow_init=None)

pred_flow = self.model(imgL, imgR, iters=self.n_iter, flow_init=pred_flow_dw2)
disparity_map = mge.functional.squeeze(pred_flow[:, 0, :, :]).numpy()
else:
Printer.red("DepthEstimatorCrestereo: Not implemented for MacOS")
raise NotImplementedError
return None

def infer(self, image, image_right=None):
if platform.system() != 'Darwin':

if image_right is None:
message = 'Image right is None. Are you using a stereo dataset?'
Printer.red(message)
raise ValueError(message)

out_shape = disparity_map.shape
if out_shape[0] != in_shape[0] or out_shape[1] != in_shape[1]:
in_w = in_shape[1]
out_w = out_shape[1]
# got this formula from the original testing code
t = float(in_w) / float(out_w)
disparity_map = cv2.resize(disparity_map, (in_shape[1], in_shape[0]), interpolation=cv2.INTER_AREA)*t

self.disparity_map = disparity_map

bf = self.camera.bf if self.camera is not None else 1.0
if self.camera is None:
Printer.red('Camera is None!')
in_shape = image.shape
print(f'DepthEstimatorCrestereo: Running inference: {image.shape} {image_right.shape}')

# Compute disparity map
imgL = image.transpose(2, 0, 1)
imgR = image_right.transpose(2, 0, 1)
imgL = np.ascontiguousarray(imgL[None, :, :, :])
imgR = np.ascontiguousarray(imgR[None, :, :, :])

imgL = mge.tensor(imgL).astype("float32")
imgR = mge.tensor(imgR).astype("float32")

imgL_dw2 = mge.functional.nn.interpolate(
imgL,
size=(imgL.shape[2] // 2, imgL.shape[3] // 2),
mode="bilinear",
align_corners=True,
)
imgR_dw2 = mge.functional.nn.interpolate(
imgR,
size=(imgL.shape[2] // 2, imgL.shape[3] // 2),
mode="bilinear",
align_corners=True,
)
pred_flow_dw2 = self.model(imgL_dw2, imgR_dw2, iters=self.n_iter, flow_init=None)

pred_flow = self.model(imgL, imgR, iters=self.n_iter, flow_init=pred_flow_dw2)
disparity_map = mge.functional.squeeze(pred_flow[:, 0, :, :]).numpy()

# Compute depth map
abs_disparity_map = np.abs(disparity_map, dtype=float)
depth_map = np.where(abs_disparity_map > self.min_disparity, bf / abs_disparity_map, 0.0)
self.depth_map = depth_map
out_shape = disparity_map.shape
if out_shape[0] != in_shape[0] or out_shape[1] != in_shape[1]:
in_w = in_shape[1]
out_w = out_shape[1]
# got this formula from the original testing code
t = float(in_w) / float(out_w)
disparity_map = cv2.resize(disparity_map, (in_shape[1], in_shape[0]), interpolation=cv2.INTER_AREA)*t

self.disparity_map = disparity_map

bf = self.camera.bf if self.camera is not None else 1.0
if self.camera is None:
Printer.red('Camera is None!')

# Compute depth map
abs_disparity_map = np.abs(disparity_map, dtype=float)
depth_map = np.where(abs_disparity_map > self.min_disparity, bf / abs_disparity_map, 0.0)
self.depth_map = depth_map

print(f'DepthEstimatorCrestereo: Depth map shape: {depth_map.shape}')
return depth_map

print(f'DepthEstimatorCrestereo: Depth map shape: {depth_map.shape}')
return depth_map
else:
Printer.red("DepthEstimatorCrestereo: Not implemented for MacOS")
raise NotImplementedError


9 changes: 8 additions & 1 deletion depth_estimator_depth_anything_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import numpy as np
import os
import sys
import platform

import config
config.cfg.set_lib('depth_anything_v2')
Expand Down Expand Up @@ -58,10 +59,16 @@ def __init__(self, device=None, camera:Camera=None,
encoder_name='vitl', precision=None): # or 'vits', 'vitb' we use the largest by default
if device is None:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type != 'cuda':
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
if device.type == 'cuda':
print('DepthEstimatorDepthPro: Using CUDA')
elif device.type == 'mps':
if not torch.backends.mps.is_available(): # Should return True for MPS availability
raise Exception('DepthEstimatorDepthPro: MPS is not available')
print('DepthEstimatorDepthPro: Using MPS')
else:
print('DepthEstimatorDepthPro: Using CPU')
print('DepthEstimatorDepthPro: Using CPU')
transform = None
model = DepthAnythingV2(**{**DepthEstimatorDepthAnythingV2.model_configs[encoder_name], 'max_depth': max_depth})
dataset_name = 'vkitti' if dataset_env_type == DatasetEnvironmentType.OUTDOOR else 'hypersim'
Expand Down
7 changes: 5 additions & 2 deletions depth_estimator_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import config
import torch
import platform

from camera import Camera
from dataset import DatasetEnvironmentType
Expand All @@ -48,9 +49,9 @@ class DepthEstimatorType(SerializableEnum):
DEPTH_SGBM = 0 # Depth SGBM from OpenCV [Stereo, it requires a stereo dataset]
DEPTH_ANYTHING_V2 = 1 # "Depth Anything V2" [Monocular]
DEPTH_PRO = 2 # "Depth Pro: Sharp Monocular Metric Depth in Less Than a Second" [Monocular]
DEPTH_RAFT_STEREO = 3 # "RAFT-Stereo: Multilevel Recurrent Field Transforms for Stereo Matching" [Stereo, it requires a stereo dataset]
DEPTH_RAFT_STEREO = 3 # "RAFT-Stereo: Multilevel Recurrent Field Transforms for Stereo Matching" [Stereo, it requires a stereo dataset]
DEPTH_CRESTEREO = 4 # "Practical Stereo Matching via Cascaded Recurrent Network with Adaptive Correlation" [Stereo, it requires a stereo dataset]
# Note: Unstable and with linking problems [WIP] => use DepthEstimatorType.DEPTH_CRESTEREO_PYTORCH
# Note: Unstable and with linking problems under Linux, not supported under mac [WIP] => use DepthEstimatorType.DEPTH_CRESTEREO_PYTORCH
DEPTH_CRESTEREO_PYTORCH = 5 # "Practical Stereo Matching via Cascaded Recurrent Network with Adaptive Correlation", pytorch implementation [Stereo, it requires a stereo dataset]


Expand Down Expand Up @@ -86,6 +87,8 @@ def depth_estimator_factory(depth_estimator_type=DepthEstimatorType.DEPTH_ANYTHI
min_depth=min_depth, max_depth=max_depth,
dataset_env_type=dataset_env_type)
elif depth_estimator_type == DepthEstimatorType.DEPTH_CRESTEREO:
if platform.system() == 'Darwin':
raise ValueError('DepthEstimatorType.DEPTH_CRESTEREO is not supported on macOS. Use DepthEstimatorType.DEPTH_CRESTEREO_PYTORCH instead!')
return DepthEstimatorCrestereo(device=device, camera=camera,
min_depth=min_depth, max_depth=max_depth,
dataset_env_type=dataset_env_type)
Expand Down
2 changes: 1 addition & 1 deletion depth_estimator_raft_stereo.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def __init__(self, device=None, camera:Camera=None,
raise ValueError(f'Model {restore_ckpt} does not exist')

self.model = torch.nn.DataParallel(RAFTStereo(self.model_config), device_ids=[0])
self.model.load_state_dict(torch.load(restore_ckpt))
self.model.load_state_dict(torch.load(restore_ckpt, map_location='cpu'))
self.module = self.model.module
self.module.to(device)
self.module.eval()
Expand Down
3 changes: 2 additions & 1 deletion ground_truth.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def getTimestampPositionAndAbsoluteScale(self, frame_id):
abs_scale = 1
else:
abs_scale = np.sqrt((x - x_prev)*(x - x_prev) + (y - y_prev)*(y - y_prev) + (z - z_prev)*(z - z_prev))
#print(f'reading frame {frame_id}, timestamp: {timestamp:.15f}, x: {x:.15f}, y: {y:.15f}, z: {z:.15f}, scale: {abs_scale:.15f}')
return timestamp,x,y,z,abs_scale

# return timestamp, x,y,z, qx,qy,qz,qw, scale
Expand Down Expand Up @@ -443,7 +444,7 @@ def __init__(self, path, name, associations=None, start_frame_id=0, type = Groun
print('base_path: ', base_path)

if not os.path.isfile(self.filename):
error_message = f'ERROR: Groundtruth file not found: {self.filename}'
error_message = f'ERROR: Groundtruth file not found: {self.filename}. Use the script groundtruth/generate_euroc_groundtruths_as_tum.sh to generate these groundtruth files!'
Printer.red(error_message)
sys.exit(error_message)

Expand Down
9 changes: 6 additions & 3 deletions install_pip3_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,11 @@ fi


# crestereo
pip install --upgrade cryptography pyOpenSSL
python3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html # This brings issues when launched in parallel processes
#pip install megengine # This brings issues with non-supported CUDA architecture
if [[ "$OSTYPE" != "darwin"* ]]; then
# Unfortunately, megengine is not supported on macOS with arm architecture
pip install --upgrade cryptography pyOpenSSL
python3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html # This brings issues when launched in parallel processes
#pip install megengine # This brings issues with non-supported CUDA architecture on my machine
fi

pip install gdown # to download from google drive
2 changes: 2 additions & 0 deletions install_thirdparty.sh
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ if [ ! -d raft_stereo ]; then
cd raft_stereo
#git checkout 6068c1a26f84f8132de10f60b2bc0ce61568e085 # use this commit if you hit any problems

git apply ../raft_stereo.patch

./download_models.sh
fi

Expand Down
6 changes: 6 additions & 0 deletions pyenv-activate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,9 @@ export PYTHONPATH="" # clean python path => for me, remove ROS stuff

# N.B.: in order to deactivate the virtual environment run:
# $ deactivate

# Check if the operating system is Darwin (macOS)
if [[ $OSTYPE == 'darwin'* ]]; then
export PYTORCH_ENABLE_MPS_FALLBACK=1
echo "setting PYTORCH_ENABLE_MPS_FALLBACK: $PYTORCH_ENABLE_MPS_FALLBACK"
fi
13 changes: 13 additions & 0 deletions thirdparty/raft_stereo.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/download_models.sh b/download_models.sh
index 81582c2..489ba6a 100755
--- a/download_models.sh
+++ b/download_models.sh
@@ -3,4 +3,6 @@ mkdir models -p
cd models
wget https://www.dropbox.com/s/ftveifyqcomiwaq/models.zip
unzip models.zip
-rm models.zip -f
+if [ -f models.zip ]; then
+ rm -f models.zip
+fi
\ No newline at end of file
19 changes: 10 additions & 9 deletions visual_odometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,31 @@
from feature_tracker import FeatureTrackerTypes, FeatureTrackingResult, FeatureTracker
from utils_geom import poseRt, is_rotation_matrix, closest_rotation_matrix
from timer import TimerFps
from ground_truth import GroundTruth

class VoStage(Enum):
NO_IMAGES_YET = 0 # no image received
GOT_FIRST_IMAGE = 1 # got first image, we can proceed in a normal way (match current image with previous image)

kVerbose=True
kMinNumFeature = 2000
kRansacThresholdNormalized = 0.0004 # metric threshold used for normalized image coordinates (originally 0.0003)
kRansacThresholdPixels = 0.1 # pixel threshold used for image coordinates
kAbsoluteScaleThresholdKitti = 0.1 # absolute translation scale; it is also the minimum translation norm for an accepted motion
kAbsoluteScaleThresholdIndoor = 0.01 # absolute translation scale; it is also the minimum translation norm for an accepted motion
kUseEssentialMatrixEstimation = True # using the essential matrix fitting algorithm is more robust RANSAC given five-point algorithm solver
kRansacProb = 0.999 # (originally 0.999)
kRansacThresholdNormalized = 0.0004 # metric threshold used for normalized image coordinates (originally 0.0003)
kRansacThresholdPixels = 0.1 # pixel threshold used for image coordinates
kAbsoluteScaleThresholdKitti = 0.1 # absolute translation scale; it is also the minimum translation norm for an accepted motion
kAbsoluteScaleThresholdIndoor = 0.01 # absolute translation scale; it is also the minimum translation norm for an accepted motion
kUseEssentialMatrixEstimation = True # using the essential matrix fitting algorithm is more robust RANSAC given five-point algorithm solver
kRansacProb = 0.999 # (originally 0.999)
kMinAveragePixelShiftForMotionEstimation = 1.5 # if the average pixel shift is below this threshold, motion is considered to be small enough to be ignored
kUseGroundTruthScale = True


# This class is a first start to understand the basics of inter frame feature tracking and camera pose estimation.
# It combines the simplest VO ingredients without performing any image point triangulation or
# windowed bundle adjustment. At each step $k$, it estimates the current camera pose $C_k$ with respect to the previous one $C_{k-1}$.
# The inter frame pose estimation returns $[R_{k-1,k},t_{k-1,k}]$ with $||t_{k-1,k}||=1$.
# With this very basic approach, you need to use a ground truth in order to recover a correct inter-frame scale $s$ and estimate a
# valid trajectory by composing $C_k = C_{k-1} * [R_{k-1,k}, s t_{k-1,k}]$.
class VisualOdometry(object):
def __init__(self, cam, groundtruth, feature_tracker : FeatureTracker):
def __init__(self, cam, groundtruth: GroundTruth, feature_tracker: FeatureTracker):
self.stage = VoStage.NO_IMAGES_YET
self.cam = cam

Expand Down Expand Up @@ -189,7 +190,7 @@ def processFrame(self, frame_id):
# t is estimated up to scale (i.e. the algorithm always returns ||trc||=1, we need a scale in order to recover a translation which is coherent with the previous estimated ones)
absolute_scale = self.getAbsoluteScale(frame_id)
# NOTE: This simplistic estimation approach provide reasonable results with Kitti where a good ground velocity provides a decent interframe parallax. It does not work with indoor datasets.
if(absolute_scale > self.absolute_scale_threshold and self.average_pixel_shift > 1):
if absolute_scale > self.absolute_scale_threshold and self.average_pixel_shift > kMinAveragePixelShiftForMotionEstimation:
# compose absolute motion [Rwa,twa] with estimated relative motion [Rab,s*tab] (s is the scale extracted from the ground truth)
# [Rwb,twb] = [Rwa,twa]*[Rab,tab] = [Rwa*Rab|twa + Rwa*tab]
print('estimated t with norm |t|: ', np.linalg.norm(t), ' (just for sake of clarity)')
Expand Down

0 comments on commit 0c8aa8f

Please sign in to comment.