opensfm/dense.py

import logging
import typing as t

import cv2
import numpy as np
from opensfm import io
from opensfm import log
from opensfm import pydense
from opensfm import pymap
from opensfm import tracking
from opensfm import types
from opensfm.context import parallel_map
from opensfm.dataset import UndistortedDataSet

logger = logging.getLogger(__name__)


def compute_depthmaps(
    data: UndistortedDataSet,
    graph: pymap.TracksManager,
    reconstruction: types.Reconstruction,
):
    """Compute and refine depthmaps for all shots.

    Args:
        data: an UndistortedDataset
        graph: the tracks graph
        reconstruction: the undistorted reconstruction
    """
    logger.info("Computing neighbors")
    config = data.config
    processes = config["processes"]
    num_neighbors = config["depthmap_num_neighbors"]

    neighbors = {}
    common_tracks = common_tracks_double_dict(graph)
    for shot in reconstruction.shots.values():
        neighbors[shot.id] = find_neighboring_images(
            shot, common_tracks, reconstruction, num_neighbors
        )

    arguments = []
    for shot in reconstruction.shots.values():
        if len(neighbors[shot.id]) <= 1:
            continue
        mind, maxd = compute_depth_range(graph, reconstruction, shot, config)
        arguments.append((data, neighbors[shot.id], mind, maxd, shot))
    parallel_map(compute_depthmap_catched, arguments, processes)

    arguments = []
    for shot in reconstruction.shots.values():
        if len(neighbors[shot.id]) <= 1:
            continue
        arguments.append((data, neighbors[shot.id], shot))
    parallel_map(clean_depthmap_catched, arguments, processes)

    arguments = []
    for shot in reconstruction.shots.values():
        if len(neighbors[shot.id]) <= 1:
            continue
        arguments.append((data, neighbors[shot.id], shot))
    parallel_map(prune_depthmap_catched, arguments, processes)

    point_cloud = merge_depthmaps(data, reconstruction)
    data.save_point_cloud(*point_cloud, filename="merged.ply")


def compute_depthmap_catched(arguments):
    try:
        compute_depthmap(arguments)
    except Exception as e:
        logger.error("Exception on child. Arguments: {}".format(arguments))
        logger.exception(e)


def clean_depthmap_catched(arguments):
    try:
        clean_depthmap(arguments)
    except Exception as e:
        logger.error("Exception on child. Arguments: {}".format(arguments))
        logger.exception(e)


def prune_depthmap_catched(arguments):
    try:
        prune_depthmap(arguments)
    except Exception as e:
        logger.error("Exception on child. Arguments: {}".format(arguments))
        logger.exception(e)


def compute_depthmap(arguments):
    """Compute depthmap for a single shot."""
    log.setup()

    data: UndistortedDataSet = arguments[0]
    neighbors = arguments[1]
    min_depth = arguments[2]
    max_depth = arguments[3]
    shot = arguments[4]

    method = data.config["depthmap_method"]

    if data.raw_depthmap_exists(shot.id):
        logger.info("Using precomputed raw depthmap {}".format(shot.id))
        return
    logger.info("Computing depthmap for image {0} with {1}".format(shot.id, method))

    de = pydense.DepthmapEstimator()
    de.set_depth_range(min_depth, max_depth, 100)
    de.set_patchmatch_iterations(data.config["depthmap_patchmatch_iterations"])
    de.set_patch_size(data.config["depthmap_patch_size"])
    de.set_min_patch_sd(data.config["depthmap_min_patch_sd"])
    add_views_to_depth_estimator(data, neighbors, de)

    if method == "BRUTE_FORCE":
        depth, plane, score, nghbr = de.compute_brute_force()
    elif method == "PATCH_MATCH":
        depth, plane, score, nghbr = de.compute_patch_match()
    elif method == "PATCH_MATCH_SAMPLE":
        depth, plane, score, nghbr = de.compute_patch_match_sample()
    else:
        raise ValueError(
            "Unknown depthmap method type "
            "(must be BRUTE_FORCE, PATCH_MATCH or PATCH_MATCH_SAMPLE)"
        )

    good_score = score > data.config["depthmap_min_correlation_score"]
    depth = depth * (depth < max_depth) * good_score

    # Save and display results
    neighbor_ids = [i.id for i in neighbors[1:]]
    data.save_raw_depthmap(shot.id, depth, plane, score, nghbr, neighbor_ids)

    if data.config["depthmap_save_debug_files"]:
        image = data.load_undistorted_image(shot.id)
        image = scale_down_image(image, depth.shape[1], depth.shape[0])
        ply = depthmap_to_ply(shot, depth, image)
        with io.open_wt(data.depthmap_file(shot.id, "raw.npz.ply")) as fout:
            fout.write(ply)

    if data.config.get("interactive"):
        import matplotlib.pyplot as plt

        plt.figure()
        plt.suptitle("Shot: " + shot.id + ", neighbors: " + ", ".join(neighbor_ids))
        plt.subplot(2, 3, 1)
        plt.imshow(image)
        plt.subplot(2, 3, 2)
        plt.imshow(color_plane_normals(plane))
        plt.subplot(2, 3, 3)
        plt.imshow(depth)
        plt.colorbar()
        plt.subplot(2, 3, 4)
        plt.imshow(score)
        plt.colorbar()
        plt.subplot(2, 3, 5)
        plt.imshow(nghbr)
        plt.colorbar()
        plt.show()


def clean_depthmap(arguments):
    """Clean depthmap by checking consistency with neighbors."""
    log.setup()

    data: UndistortedDataSet = arguments[0]
    neighbors = arguments[1]
    shot = arguments[2]

    if data.clean_depthmap_exists(shot.id):
        logger.info("Using precomputed clean depthmap {}".format(shot.id))
        return
    logger.info("Cleaning depthmap for image {}".format(shot.id))

    dc = pydense.DepthmapCleaner()
    dc.set_same_depth_threshold(data.config["depthmap_same_depth_threshold"])
    dc.set_min_consistent_views(data.config["depthmap_min_consistent_views"])
    add_views_to_depth_cleaner(data, neighbors, dc)
    depth = dc.clean()

    # Save and display results
    raw_depth, raw_plane, raw_score, raw_nghbr, nghbrs = data.load_raw_depthmap(shot.id)
    data.save_clean_depthmap(shot.id, depth, raw_plane, raw_score)

    if data.config["depthmap_save_debug_files"]:
        image = data.load_undistorted_image(shot.id)
        image = scale_down_image(image, depth.shape[1], depth.shape[0])
        ply = depthmap_to_ply(shot, depth, image)
        with io.open_wt(data.depthmap_file(shot.id, "clean.npz.ply")) as fout:
            fout.write(ply)

    if data.config.get("interactive"):
        import matplotlib.pyplot as plt

        plt.figure()
        plt.suptitle("Shot: " + shot.id)
        plt.subplot(2, 2, 1)
        plt.imshow(raw_depth)
        plt.colorbar()
        plt.subplot(2, 2, 2)
        plt.imshow(depth)
        plt.colorbar()
        plt.show()


def prune_depthmap(arguments):
    """Prune depthmap to remove redundant points."""
    log.setup()

    data: UndistortedDataSet = arguments[0]
    neighbors = arguments[1]
    shot = arguments[2]

    if data.pruned_depthmap_exists(shot.id):
        logger.info("Using precomputed pruned depthmap {}".format(shot.id))
        return
    logger.info("Pruning depthmap for image {}".format(shot.id))

    dp = pydense.DepthmapPruner()
    dp.set_same_depth_threshold(data.config["depthmap_same_depth_threshold"])
    add_views_to_depth_pruner(data, neighbors, dp)
    points, normals, colors, labels = dp.prune()

    # Save and display results
    data.save_pruned_depthmap(shot.id, points, normals, colors, labels)

    if data.config["depthmap_save_debug_files"]:
        data.save_point_cloud(points, normals, colors, labels, "pruned.npz.ply")


def aggregate_depthmaps(shot_ids, depthmap_provider):
    """Aggregate depthmaps by concatenation."""

    points = []
    normals = []
    colors = []
    labels = []
    for shot_id in shot_ids:
        p, n, c, l = depthmap_provider(shot_id)
        points.append(p)
        normals.append(n)
        colors.append(c)
        labels.append(l)

    return (
        np.concatenate(points),
        np.concatenate(normals),
        np.concatenate(colors),
        np.concatenate(labels),
    )


def merge_depthmaps(
    data: UndistortedDataSet, reconstruction: types.Reconstruction
) -> t.Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Merge depthmaps into a single point cloud."""
    shot_ids = [s for s in reconstruction.shots if data.pruned_depthmap_exists(s)]

    def depthmap_provider(shot_id):
        return data.load_pruned_depthmap(shot_id)

    return merge_depthmaps_from_provider(shot_ids, depthmap_provider)


def merge_depthmaps_from_provider(
    shot_ids: t.Iterable[str], depthmap_provider: t.Callable
) -> t.Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """Merge depthmaps into a single point cloud."""
    logger.info("Merging depthmaps")

    if not shot_ids:
        logger.warning("Depthmaps contain no points.  Try using more images.")
        return np.array([]), np.array([]), np.array([]), np.array([])

    return aggregate_depthmaps(shot_ids, depthmap_provider)


def add_views_to_depth_estimator(data: UndistortedDataSet, neighbors, de):
    """Add neighboring views to the DepthmapEstimator."""
    num_neighbors = data.config["depthmap_num_matching_views"]
    for shot in neighbors[: num_neighbors + 1]:
        assert shot.camera.projection_type == "perspective"
        color_image = data.load_undistorted_image(shot.id)
        mask = load_combined_mask(data, shot)
        gray_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2GRAY)
        original_height, original_width = gray_image.shape
        width = min(original_width, int(data.config["depthmap_resolution"]))
        height = width * original_height // original_width
        image = scale_down_image(gray_image, width, height)
        mask = scale_image(mask, image.shape[1], image.shape[0], cv2.INTER_NEAREST)
        K = shot.camera.get_K_in_pixel_coordinates(width, height)
        R = shot.pose.get_rotation_matrix()
        t = shot.pose.translation
        de.add_view(K, R, t, image, mask)


def add_views_to_depth_cleaner(data: UndistortedDataSet, neighbors, dc):
    for shot in neighbors:
        if not data.raw_depthmap_exists(shot.id):
            continue
        depth, plane, score, nghbr, nghbrs = data.load_raw_depthmap(shot.id)
        height, width = depth.shape
        K = shot.camera.get_K_in_pixel_coordinates(width, height)
        R = shot.pose.get_rotation_matrix()
        t = shot.pose.translation
        dc.add_view(K, R, t, depth)


def load_combined_mask(data: UndistortedDataSet, shot):
    """Load the undistorted mask.

    If no mask exists return an array of ones.
    """
    mask = data.load_undistorted_combined_mask(shot.id)
    if mask is None:
        size = int(shot.camera.height), int(shot.camera.width)
        return np.ones(size, dtype=np.uint8)
    else:
        return mask


def load_segmentation_labels(data: UndistortedDataSet, shot):
    """Load the undistorted segmentation labels.

    If no segmentation exists return an array of zeros.
    """
    if data.undistorted_segmentation_exists(shot.id):
        return data.load_undistorted_segmentation(shot.id)
    else:
        size = shot.camera.height, shot.camera.width
        return np.zeros(size, dtype=np.uint8)


def add_views_to_depth_pruner(data: UndistortedDataSet, neighbors, dp):
    for shot in neighbors:
        if not data.clean_depthmap_exists(shot.id):
            continue
        depth, plane, score = data.load_clean_depthmap(shot.id)
        height, width = depth.shape
        color_image = data.load_undistorted_image(shot.id)
        labels = load_segmentation_labels(data, shot)
        height, width = depth.shape
        image = scale_down_image(color_image, width, height)
        labels = scale_image(labels, image.shape[1], image.shape[0], cv2.INTER_NEAREST)
        K = shot.camera.get_K_in_pixel_coordinates(width, height)
        R = shot.pose.get_rotation_matrix()
        t = shot.pose.translation
        dp.add_view(K, R, t, depth, plane, image, labels)


def compute_depth_range(tracks_manager, reconstruction, shot, config):
    """Compute min and max depth based on reconstruction points."""
    depths = []
    for track in tracks_manager.get_shot_observations(shot.id):
        if track in reconstruction.points:
            p = reconstruction.points[track].coordinates
            z = shot.pose.transform(p)[2]
            depths.append(z)
    min_depth = np.percentile(depths, 10) * 0.9
    max_depth = np.percentile(depths, 90) * 1.1

    config_min_depth = config["depthmap_min_depth"]
    config_max_depth = config["depthmap_max_depth"]

    return config_min_depth or min_depth, config_max_depth or max_depth


def common_tracks_double_dict(
    tracks_manager: pymap.TracksManager,
) -> t.Dict[str, t.Dict[str, t.List[str]]]:
    """List of track ids observed by each image pair.

    Return a dict, ``res``, such that ``res[im1][im2]`` is the list of
    common tracks between ``im1`` and ``im2``.
    """
    common_tracks_per_pair = tracking.all_common_tracks_without_features(tracks_manager)
    res = {image: {} for image in tracks_manager.get_shot_ids()}
    for (im1, im2), v in common_tracks_per_pair.items():
        res[im1][im2] = v
        res[im2][im1] = v
    return res


def find_neighboring_images(
    shot: pymap.Shot,
    common_tracks: t.Dict[str, t.Dict[str, t.List[str]]],
    reconstruction: types.Reconstruction,
    num_neighbors: int,
):
    """Find neighboring images based on common tracks."""
    theta_min = np.pi / 60
    theta_max = np.pi / 6
    ns = []
    C1 = shot.pose.get_origin()
    for other_id, tracks in common_tracks.get(shot.id, {}).items():
        if other_id not in reconstruction.shots:
            continue
        other = reconstruction.shots[other_id]
        score = 0
        C2 = other.pose.get_origin()
        for track in tracks:
            if track in reconstruction.points:
                p = reconstruction.points[track].coordinates
                theta = angle_between_points(p, C1, C2)
                if theta > theta_min and theta < theta_max:
                    score += 1
        if score > 20:
            ns.append((other, score))

    ns.sort(key=lambda ns: ns[1], reverse=True)
    return [shot] + [n for n, s in ns[:num_neighbors]]


def angle_between_points(origin, p1, p2):
    a0 = p1[0] - origin[0]
    a1 = p1[1] - origin[1]
    a2 = p1[2] - origin[2]
    b0 = p2[0] - origin[0]
    b1 = p2[1] - origin[1]
    b2 = p2[2] - origin[2]
    dot = a0 * b0 + a1 * b1 + a2 * b2
    la = a0 * a0 + a1 * a1 + a2 * a2
    lb = b0 * b0 + b1 * b1 + b2 * b2
    return np.arccos(dot / np.sqrt(la * lb))


def distance_between_shots(shot, other):
    o1 = shot.pose.get_origin()
    o2 = other.pose.get_origin()
    d = o2 - o1
    return np.sqrt(np.sum(d ** 2))


def scale_image(
    image: np.ndarray, width: int, height: int, interpolation: int
) -> np.ndarray:
    return cv2.resize(image, (width, height), interpolation=interpolation)


def scale_down_image(
    image: np.ndarray, width: int, height: int, interpolation=cv2.INTER_AREA
) -> np.ndarray:
    width = min(width, image.shape[1])
    height = min(height, image.shape[0])
    return scale_image(image, width, height, interpolation)


def depthmap_to_ply(shot, depth, image):
    """Export depthmap points as a PLY string"""
    height, width = depth.shape
    K = shot.camera.get_K_in_pixel_coordinates(width, height)
    R = shot.pose.get_rotation_matrix()
    t = shot.pose.translation
    y, x = np.mgrid[:height, :width]
    v = np.vstack((x.ravel(), y.ravel(), np.ones(width * height)))
    camera_coords = depth.reshape((1, -1)) * np.linalg.inv(K).dot(v)
    points = R.T.dot(camera_coords - t.reshape(3, 1))

    vertices = []
    for p, c, d in zip(points.T, image.reshape(-1, 3), depth.reshape(-1, 1)):
        if d != 0:  # ignore points with zero depth
            s = "{} {} {} {} {} {}".format(p[0], p[1], p[2], c[0], c[1], c[2])
            vertices.append(s)

    return io.points_to_ply_string(vertices)


def color_plane_normals(plane):
    norm = np.linalg.norm(plane, axis=2)
    normal = plane / norm[..., np.newaxis]
    normal[..., 1] *= -1  # Reverse Y because it points down
    normal[..., 2] *= -1  # Reverse Z because standard colormap does so
    return ((normal + 1) * 128).astype(np.uint8)