Set vectorization off by default when using optimizers w/o TheseusLay…

…er (#350) * Set vectorization to be off by default when using optimizers without TheseusLayer.
facebookresearch · Nov 4, 2022 · 7d7257c · 7d7257c
1 parent b93f3fd
commit 7d7257c
Show file tree

Hide file tree

Showing 14 changed files with 73 additions and 31 deletions.
diff --git a/README.md b/README.md
@@ -133,7 +133,7 @@ skipped when its extlib is not compiled.
 
 ## Examples
 
-[Simple example](https://github.com/facebookresearch/theseus/blob/main/examples/simple_example.py). This example is fitting the curve $y$ to a dataset of $N$ observations $(x,y) \sim D$. This is modeled as an `Objective` with a single `CostFunction` that computes the residual $y - v e^x$. The `Objective` and the `GaussNewton` optimizer are encapsulated into a `TheseusLayer`. With `RMSprop` and MSE loss, $x$ is learned by differentiating through the `TheseusLayer`.
+[Simple example](https://github.com/facebookresearch/theseus/blob/main/examples/simple_example.py). This example is fitting the curve $y$ to a dataset of $N$ observations $(x,y) \sim D$. This is modeled as an `Objective` with a single `CostFunction` that computes the residual $y - v e^x$. The `Objective` and the `GaussNewton` optimizer are encapsulated into a `TheseusLayer`. With `Adam` and MSE loss, $x$ is learned by differentiating through the `TheseusLayer`.
 
 ```python
 import torch

diff --git a/theseus/constants.py b/theseus/constants.py
@@ -11,6 +11,8 @@
 EPS = 1e-10
 PI = math.pi
 
+__FROM_THESEUS_LAYER_TOKEN__ = "__FROM_THESEUS_LAYER_TOKEN__"
+
 
 def _CHECK_DTYPE_SUPPORTED(dtype):
     if dtype not in [torch.float32, torch.float64]:

diff --git a/theseus/optimizer/linear/linear_optimizer.py b/theseus/optimizer/linear/linear_optimizer.py
@@ -28,7 +28,7 @@ def __init__(
         objective: Objective,
         linear_solver_cls: Type[LinearSolver],
         *args,
-        vectorize: bool = True,
+        vectorize: bool = False,
         linearization_cls: Optional[Type[Linearization]] = None,
         linearization_kwargs: Optional[Dict[str, Any]] = None,
         linear_solver_kwargs: Optional[Dict[str, Any]] = None,

diff --git a/theseus/optimizer/nonlinear/gauss_newton.py b/theseus/optimizer/nonlinear/gauss_newton.py
@@ -19,7 +19,7 @@ def __init__(
         self,
         objective: Objective,
         linear_solver_cls: Optional[Type[LinearSolver]] = None,
-        vectorize: bool = True,
+        vectorize: bool = False,
         linearization_cls: Optional[Type[Linearization]] = None,
         linearization_kwargs: Optional[Dict[str, Any]] = None,
         linear_solver_kwargs: Optional[Dict[str, Any]] = None,

diff --git a/theseus/optimizer/nonlinear/levenberg_marquardt.py b/theseus/optimizer/nonlinear/levenberg_marquardt.py
@@ -56,7 +56,7 @@ def __init__(
         self,
         objective: Objective,
         linear_solver_cls: Optional[Type[LinearSolver]] = None,
-        vectorize: bool = True,
+        vectorize: bool = False,
         linearization_cls: Optional[Type[Linearization]] = None,
         linearization_kwargs: Optional[Dict[str, Any]] = None,
         linear_solver_kwargs: Optional[Dict[str, Any]] = None,

diff --git a/theseus/optimizer/nonlinear/nonlinear_least_squares.py b/theseus/optimizer/nonlinear/nonlinear_least_squares.py
@@ -22,7 +22,7 @@ def __init__(
         objective: Objective,
         *args,
         linear_solver_cls: Optional[Type[LinearSolver]] = None,
-        vectorize: bool = True,
+        vectorize: bool = False,
         linearization_cls: Optional[Type[Linearization]] = None,
         linearization_kwargs: Optional[Dict[str, Any]] = None,
         linear_solver_kwargs: Optional[Dict[str, Any]] = None,

diff --git a/theseus/optimizer/nonlinear/nonlinear_optimizer.py b/theseus/optimizer/nonlinear/nonlinear_optimizer.py
@@ -86,7 +86,7 @@ def __init__(
         objective: Objective,
         linear_solver_cls: Type[LinearSolver],
         *args,
-        vectorize: bool = True,
+        vectorize: bool = False,
         linearization_cls: Optional[Type[Linearization]] = None,
         linearization_kwargs: Optional[Dict[str, Any]] = None,
         linear_solver_kwargs: Optional[Dict[str, Any]] = None,

diff --git a/theseus/optimizer/nonlinear/tests/test_gauss_newton.py b/theseus/optimizer/nonlinear/tests/test_gauss_newton.py
@@ -7,8 +7,11 @@
 
 import theseus as th
 
+from theseus.constants import __FROM_THESEUS_LAYER_TOKEN__
 from .common import run_nonlinear_least_squares_check
 
 
 def test_gauss_newton():
-    run_nonlinear_least_squares_check(th.GaussNewton, {})
+    run_nonlinear_least_squares_check(
+        th.GaussNewton, {__FROM_THESEUS_LAYER_TOKEN__: True}
+    )
diff --git a/theseus/optimizer/nonlinear/tests/test_levenberg_marquardt.py b/theseus/optimizer/nonlinear/tests/test_levenberg_marquardt.py
@@ -8,6 +8,7 @@
 
 import theseus as th
 
+from theseus.constants import __FROM_THESEUS_LAYER_TOKEN__
 from theseus.optimizer.nonlinear.tests.common import run_nonlinear_least_squares_check
 
 
@@ -31,6 +32,7 @@ def test_levenberg_marquardt(damping, ellipsoidal_damping, adaptive_damping):
             "ellipsoidal_damping": ellipsoidal_damping,
             "adaptive_damping": adaptive_damping,
             "damping_eps": 0.0,
+            __FROM_THESEUS_LAYER_TOKEN__: True,
         },
         singular_check=damping < 0.001,
     )
@@ -40,15 +42,21 @@ def test_ellipsoidal_damping_compatibility(mock_objective):
     mock_objective.update({"v1": torch.ones(1, 1), "v2": torch.zeros(1, 1)})
     for lsc in [th.LUDenseSolver, th.CholeskyDenseSolver]:
         optimizer = th.LevenbergMarquardt(mock_objective, lsc)
-        optimizer.optimize(ellipsoidal_damping=True)
-        optimizer.optimize(damping_eps=0.1)
+        optimizer.optimize(
+            **{"ellipsoidal_damping": True, __FROM_THESEUS_LAYER_TOKEN__: True}
+        )
+        optimizer.optimize(**{"damping_eps": 0.1, __FROM_THESEUS_LAYER_TOKEN__: True})
 
     for lsc in [th.CholmodSparseSolver]:
         optimizer = th.LevenbergMarquardt(mock_objective, lsc)
         with pytest.raises(RuntimeError):
-            optimizer.optimize(ellipsoidal_damping=True)
+            optimizer.optimize(
+                **{"ellipsoidal_damping": True, __FROM_THESEUS_LAYER_TOKEN__: True}
+            )
         with pytest.raises(RuntimeError):
-            optimizer.optimize(damping_eps=0.1)
+            optimizer.optimize(
+                **{"damping_eps": 0.1, __FROM_THESEUS_LAYER_TOKEN__: True}
+            )
 
 
 @pytest.mark.cudaext
@@ -67,5 +75,7 @@ def test_ellipsoidal_damping_compatibility_cuda(mock_objective):
         optimizer = th.LevenbergMarquardt(
             mock_objective, lsc, linear_solver_kwargs={"batch_size": batch_size}
         )
-        optimizer.optimize(ellipsoidal_damping=True)
-        optimizer.optimize(damping_eps=0.1)
+        optimizer.optimize(
+            **{"ellipsoidal_damping": True, __FROM_THESEUS_LAYER_TOKEN__: True}
+        )
+        optimizer.optimize(**{"damping_eps": 0.1, __FROM_THESEUS_LAYER_TOKEN__: True})
diff --git a/theseus/optimizer/nonlinear/tests/test_state_history.py b/theseus/optimizer/nonlinear/tests/test_state_history.py
@@ -25,8 +25,9 @@ def test_state_history(var_type, batch_size):
 
     max_iters = 10
     optimizer = th.GaussNewton(objective, max_iterations=max_iters)
+    layer = th.TheseusLayer(optimizer)
 
-    info = optimizer.optimize(track_state_history=True)
+    _, info = layer.forward(optimizer_kwargs={"track_state_history": True})
 
     for var in objective.optim_vars.values():
         assert var.name in info.state_history

diff --git a/theseus/optimizer/optimizer.py b/theseus/optimizer/optimizer.py
@@ -4,12 +4,14 @@
 # LICENSE file in the root directory of this source tree.
 
 import abc
+import warnings
 from dataclasses import dataclass
 from typing import Dict, Optional
 
 import numpy as np
 import torch
 
+from theseus.constants import __FROM_THESEUS_LAYER_TOKEN__
 from theseus.core import Objective, Vectorize
 
 
@@ -22,7 +24,7 @@ class OptimizerInfo:
 
 
 class Optimizer(abc.ABC):
-    def __init__(self, objective: Objective, *args, vectorize: bool = True, **kwargs):
+    def __init__(self, objective: Objective, *args, vectorize: bool = False, **kwargs):
         self.objective = objective
         if vectorize:
             Vectorize(
@@ -35,6 +37,12 @@ def _optimize_impl(self, **kwargs) -> OptimizerInfo:
         pass
 
     def optimize(self, **kwargs) -> OptimizerInfo:
+        from_theseus_layer = kwargs.get(__FROM_THESEUS_LAYER_TOKEN__, False)
+        if not from_theseus_layer and not self.objective.vectorized:
+            warnings.warn(
+                "Vectorization is off by default when not running from TheseusLayer. "
+                "Using TheseusLayer is the recommended way to run our optimizers."
+            )
         if self._objectives_version != self.objective.current_version:
             raise RuntimeError(
                 "The objective was modified after optimizer construction, which is "

diff --git a/theseus/tests/test_theseus_layer.py b/theseus/tests/test_theseus_layer.py
@@ -13,6 +13,7 @@
 
 import theseus as th
 import theseus.utils as thutils
+from theseus.constants import __FROM_THESEUS_LAYER_TOKEN__
 from theseus.core.tests.common import (
     MockCostFunction,
     MockCostWeight,
@@ -432,7 +433,7 @@ def _do_check(layer_, optimizer_):
         with pytest.raises(RuntimeError):
             layer_.forward({})
         with pytest.raises(RuntimeError):
-            optimizer_.optimize()
+            optimizer_.optimize(**{__FROM_THESEUS_LAYER_TOKEN__: True})
 
     # Check for adding a factor
     new_cost = MockCostFunction(

diff --git a/theseus/theseus_layer.py b/theseus/theseus_layer.py
@@ -18,6 +18,7 @@
     Variable,
     Vectorize,
 )
+from theseus.constants import __FROM_THESEUS_LAYER_TOKEN__
 from theseus.geometry import LieGroup, Manifold
 from theseus.optimizer import Optimizer, OptimizerInfo
 from theseus.optimizer.linear import LinearSolver
@@ -146,8 +147,14 @@ def dtype(self) -> torch.dtype:
         return self.objective.dtype
 
 
-def _forward(objective, optimizer, optimizer_kwargs, input_tensors):
+def _forward(
+    objective: Objective,
+    optimizer: Optimizer,
+    optimizer_kwargs: Dict[str, Any],
+    input_tensors: Dict[str, torch.Tensor],
+):
     objective.update(input_tensors)
+    optimizer_kwargs[__FROM_THESEUS_LAYER_TOKEN__] = True
     info = optimizer.optimize(**optimizer_kwargs)
     vars = [var.tensor for var in objective.optim_vars.values()]
     return vars, info

diff --git a/tutorials/00_introduction.ipynb b/tutorials/00_introduction.ipynb
@@ -51,8 +51,8 @@
      "output_type": "stream",
      "text": [
       "x: Named variable with 3-D data of batch size 2:\n",
-      "  Variable(tensor=tensor([[ 1.1093,  1.2648,  1.7268],\n",
-      "        [ 0.4393, -0.1344,  2.8048]]), name=x)\n",
+      "  Variable(tensor=tensor([[-0.5966,  0.7318,  2.2279],\n",
+      "        [ 0.6040,  0.3843, -2.0580]]), name=x)\n",
       "\n",
       "y: Un-named variable:\n",
       "  Variable(tensor=tensor([[0.]]), name=Variable__1)\n",
@@ -96,8 +96,8 @@
      "output_type": "stream",
      "text": [
       "Example usage of `update`: \n",
-      "  Original variable: Variable(tensor=tensor([[ 1.1093,  1.2648,  1.7268],\n",
-      "        [ 0.4393, -0.1344,  2.8048]]), name=x)\n",
+      "  Original variable: Variable(tensor=tensor([[-0.5966,  0.7318,  2.2279],\n",
+      "        [ 0.6040,  0.3843, -2.0580]]), name=x)\n",
       "  Updated variable: Variable(tensor=tensor([[1., 1., 1.],\n",
       "        [1., 1., 1.]]), name=x)\n",
       "\n",
@@ -449,6 +449,14 @@
       "y: tensor([[2.]]) vs b: tensor([[2.]])\n",
       "Objective after optimization: tensor([0.])\n"
      ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/private/home/lep/code/theseus/theseus/optimizer/optimizer.py:42: UserWarning: Vectorization is off by default when not running from TheseusLayer. Using TheseusLayer is the recommended way to run our optimizers.\n",
+      "  warnings.warn(\n"
+     ]
     }
    ],
    "source": [
@@ -478,7 +486,7 @@
    "source": [
     "## 6. TheseusLayer\n",
     "\n",
-    "The `TheseusLayer` provides an interface between `torch` code upstream/downstream, and Theseus objectives and optimizers. The `forward()` method combines the functionality of `Objective.update()` and `Optimizer.optimizer()` into a single call. It receives an update dictionary as input, and returns a dictionary with the torch data of optimization variables after optimization, as well as the optimizer's output info."
+    "As the warning above indicates, the recommended way to run our optimizers is via `TheseusLayer`. The `TheseusLayer` provides an interface between `torch` code upstream/downstream, and Theseus objectives and optimizers. The `forward()` method combines the functionality of `Objective.update()` and `Optimizer.optimizer()` into a single call. It receives an update dictionary as input, and returns a dictionary with the torch data of optimization variables after optimization, as well as the optimizer's output info."
    ]
   },
   {
@@ -491,9 +499,9 @@
      "output_type": "stream",
      "text": [
       "After calling TheseusLayer's forward():\n",
-      "  Values: {'x': tensor([[1.0000]]), 'y': tensor([[2.]])}\n",
-      "  Info: OptimizerInfo(best_solution={'x': tensor([[1.0000]]), 'y': tensor([[2.]])}, status=array([<LinearOptimizerStatus.CONVERGED: 1>], dtype=object))\n",
-      "  Optimized objective: tensor([1.4211e-14])\n"
+      "  Values: {'x': tensor([[1.]]), 'y': tensor([[2.]])}\n",
+      "  Info: OptimizerInfo(best_solution={'x': tensor([[1.]]), 'y': tensor([[2.]])}, status=array([<LinearOptimizerStatus.CONVERGED: 1>], dtype=object))\n",
+      "  Optimized objective: tensor([0.])\n"
      ]
     }
    ],
@@ -525,13 +533,10 @@
   }
  ],
  "metadata": {
-  "interpreter": {
-   "hash": "cc5406e9a0deef8e8d80dfeae7f152b84172dd1229ee5c42b512f2c6ec6850e3"
-  },
   "kernelspec": {
-   "display_name": "Theseus",
+   "display_name": "Python 3.9.13 ('theseus_test')",
    "language": "python",
-   "name": "theseus_test"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -543,7 +548,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.9.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "55bd317ee24fef6b327817e02b2800e769f57ba3484ea1f59774090950883652"
+   }
   }
  },
  "nbformat": 4,