diff --git a/theseus/tests/test_dlm_perturbation.py b/theseus/tests/test_dlm_perturbation.py new file mode 100644 index 000000000..5377ee022 --- /dev/null +++ b/theseus/tests/test_dlm_perturbation.py @@ -0,0 +1,104 @@ +import numpy as np +import torch + +import theseus as th +from theseus.theseus_layer import _DLMPerturbation +from theseus.utils import numeric_jacobian + + +def _original_dlm_perturbation(optim_vars, aux_vars): + v = optim_vars[0] + g = aux_vars[0] + epsilon = aux_vars[1] + return epsilon.data * v.data - 0.5 * g.data + + +def test_dlm_perturbation_jacobian(): + generator = torch.Generator() + generator.manual_seed(0) + rng = np.random.default_rng(0) + dtype = torch.float64 + for _ in range(100): + group_cls = rng.choice([th.Vector, th.SE3, th.SE2, th.SO2, th.SO3]) + for batch_size in [1, 10, 100]: + epsilon = th.Variable( + data=torch.randn(batch_size, 1, dtype=dtype, generator=generator) + ) + + if group_cls == th.Vector: + dof = rng.choice([1, 2]) + var = group_cls.randn(batch_size, dof, dtype=dtype, generator=generator) + grad = group_cls.randn( + batch_size, dof, dtype=dtype, generator=generator + ) + else: + var = group_cls.randn(batch_size, dtype=dtype, generator=generator) + grad = group_cls.randn(batch_size, dtype=dtype, generator=generator) + + w = th.ScaleCostWeight(1.0).to(dtype=dtype) + cf = _DLMPerturbation(var, epsilon, grad, w) + + def new_error_fn(vars): + new_cost_function = _DLMPerturbation(vars[0], epsilon, grad, w) + return th.Vector(data=new_cost_function.error()) + + expected_jacs = numeric_jacobian( + new_error_fn, + [var], + function_dim=np.prod(var.shape[1:]), + delta_mag=1e-6, + ) + jacobians, error_jac = cf.jacobians() + error = cf.error() + assert error.allclose(error_jac) + assert jacobians[0].allclose(expected_jacs[0], atol=1e-5) + + if group_cls in [th.Vector, th.SO2, th.SE2]: + # Original cf didn't work for SO3 or SE3 + original_cf = th.AutoDiffCostFunction( + [var], + _original_dlm_perturbation, + var.shape[1], + aux_vars=[grad, epsilon], + ) + original_jac, original_err = original_cf.jacobians() + assert error.allclose(original_err) + assert jacobians[0].allclose(original_jac[0], atol=1e-5) + + +def test_backward_pass_se3_runs(): + generator = torch.Generator() + generator.manual_seed(0) + dtype = torch.float64 + batch_size = 10 + var = th.rand_se3(batch_size, generator=generator) + var.name = "v1" + target = th.rand_se3(batch_size, generator=generator) + target.name = "target" + + objective = th.Objective() + objective.add(th.Difference(var, th.ScaleCostWeight(1.0), target)) + objective.to(dtype=dtype) + optimizer = th.GaussNewton(objective) + layer = th.TheseusLayer(optimizer) + + target_data = torch.nn.Parameter(th.rand_se3(batch_size, dtype=dtype).data) + adam = torch.optim.Adam([target_data], lr=0.01) + loss0 = None + for _ in range(5): + adam.zero_grad() + with th.enable_lie_tangent(): + out, _ = layer.forward( + {"target": target_data}, + optimizer_kwargs={ + "backward_mode": th.BackwardMode.DLM, + "verbose": False, + }, + ) + + loss = out["v1"].norm() + if loss0 is None: + loss0 = loss.item() + loss.backward() + adam.step() + assert loss.item() < loss0 diff --git a/theseus/theseus_layer.py b/theseus/theseus_layer.py index f2df0db1a..d6708e496 100644 --- a/theseus/theseus_layer.py +++ b/theseus/theseus_layer.py @@ -3,13 +3,22 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from typing import Any, Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple +import numpy as np import torch import torch.nn as nn from torch.autograd.function import once_differentiable -from theseus.core import AutoDiffCostFunction, Variable, Vectorize +from theseus.core import ( + CostFunction, + CostWeight, + Objective, + ScaleCostWeight, + Variable, + Vectorize, +) +from theseus.geometry import LieGroup, Manifold from theseus.optimizer import Optimizer, OptimizerInfo from theseus.optimizer.linear import LinearSolver from theseus.optimizer.nonlinear import BackwardMode, GaussNewton @@ -38,7 +47,14 @@ def forward( ) optimizer_kwargs = optimizer_kwargs or {} backward_mode = optimizer_kwargs.get("backward_mode", None) - dlm_epsilon = optimizer_kwargs.get(TheseusLayerDLMForward._dlm_epsilon, 1e-2) + dlm_epsilon = optimizer_kwargs.get( + TheseusLayerDLMForward._DLM_EPSILON_STR, 1e-2 + ) + if not isinstance(dlm_epsilon, float): + raise ValueError( + f"{TheseusLayerDLMForward._DLM_EPSILON_STR} must be a float " + f"but {type(dlm_epsilon)} was given." + ) if backward_mode == BackwardMode.DLM: if self._dlm_bwd_objective is None: @@ -135,8 +151,8 @@ class TheseusLayerDLMForward(torch.autograd.Function): but computes the direct loss minimization in the backward pass. """ - _dlm_epsilon = "dlm_epsilon" - _grad_suffix = "_grad" + _DLM_EPSILON_STR = "dlm_epsilon" + _GRAD_SUFFIX = "_grad" @staticmethod def forward( @@ -204,12 +220,12 @@ def backward(ctx, *grad_outputs): # Add in gradient values. grad_data = { - TheseusLayerDLMForward._dlm_epsilon: torch.tensor(epsilon) + TheseusLayerDLMForward._DLM_EPSILON_STR: torch.tensor(epsilon) .to(grad_outputs[0]) .reshape(1, 1) } for i, name in enumerate(bwd_objective.optim_vars.keys()): - grad_data[name + TheseusLayerDLMForward._grad_suffix] = grad_outputs[i] + grad_data[name + TheseusLayerDLMForward._GRAD_SUFFIX] = grad_outputs[i] bwd_data.update(grad_data) # Solve backward objective. @@ -237,27 +253,70 @@ def backward(ctx, *grad_outputs): return (None, None, None, None, None, None, None, *nones, *grads) -def _dlm_perturbation(optim_vars, aux_vars): - v = optim_vars[0] - g = aux_vars[0] - epsilon = aux_vars[1] - return epsilon.data * v.data - 0.5 * g.data +class _DLMPerturbation(CostFunction): + def __init__( + self, + var: Manifold, + epsilon: Variable, + grad: Variable, + cost_weight: CostWeight, + name: Optional[str] = None, + ): + if not isinstance(var, LieGroup): + raise ValueError( + f"DLM requires LieGroup-type variables, but " + f"{var.name} has type {var.__class__.__name__}" + ) + super().__init__(cost_weight, name=name) + assert epsilon.ndim == 2 and epsilon.shape[1] == 1 + self.var = var + self.epsilon = epsilon + self.grad = grad + self.register_optim_var("var") + self.register_aux_vars(["epsilon", "grad"]) + + def error(self) -> torch.Tensor: + err = ( + self.epsilon.data.view((-1,) + (1,) * (self.var.ndim - 1)) * self.var.data + - 0.5 * self.grad.data + ) + return err.flatten(start_dim=1) + + def jacobians(self) -> Tuple[List[torch.Tensor], torch.Tensor]: + d = self.dim() + aux = torch.eye(d).unsqueeze(0).expand(self.var.shape[0], d, d) + euclidean_grad_flat = self.epsilon.data.view(-1, 1, 1) * aux + euclidean_grad = euclidean_grad_flat.unflatten(2, self.var.shape[1:]) + return [self.var.project(euclidean_grad, is_sparse=True)], self.error() + + def dim(self) -> int: + return np.prod(self.var.data.shape[1:]) + + def _copy_impl(self, new_name: Optional[str] = None) -> "CostFunction": + return _DLMPerturbation( + self.var.copy(), + self.epsilon.copy(), + self.grad.copy(), + self.weight.copy(), + name=new_name, + ) -def _instantiate_dlm_bwd_objective(objective): +def _instantiate_dlm_bwd_objective(objective: Objective): bwd_objective = objective.copy() - epsilon_var = Variable(torch.ones(1, 1), name=TheseusLayerDLMForward._dlm_epsilon) + epsilon_var = Variable( + torch.ones(1, 1, dtype=bwd_objective.dtype, device=bwd_objective.device), + name=TheseusLayerDLMForward._DLM_EPSILON_STR, + ) + unit_weight = ScaleCostWeight(1.0) + unit_weight.to(dtype=objective.dtype, device=objective.device) for name, var in bwd_objective.optim_vars.items(): grad_var = Variable( - torch.zeros_like(var.data), name=name + TheseusLayerDLMForward._grad_suffix + torch.zeros_like(var.data), name=name + TheseusLayerDLMForward._GRAD_SUFFIX ) bwd_objective.add( - AutoDiffCostFunction( - [var], - _dlm_perturbation, - var.shape[1], - aux_vars=[grad_var, epsilon_var], - name="dlm_perturbation_" + name, + _DLMPerturbation( + var, epsilon_var, grad_var, unit_weight, name="dlm_perturbation" + name ) )