ESS: Allowing diagonal covariance root with fixed indices (#2283)

SebastianAment · facebook-github-bot · commit fb437751d2ac · 2024-04-05T13:12:44.000-07:00
Summary:

This commit adds support for a diagonal covariance root in conjunction with fixed indices for ESS. This is not generally supported, as the root would have to be re-factorized. The diagonal case allows for an efficient implementation without re-factorization.

Differential Revision: D55808235
diff --git a/botorch/utils/probability/lin_ess.py b/botorch/utils/probability/lin_ess.py
@@ -37,6 +37,7 @@
 
 import torch
 from botorch.utils.sampling import PolytopeSampler
+from linear_operator.operators import DiagLinearOperator, LinearOperator
 from torch import Tensor
 
 _twopi = 2.0 * math.pi
@@ -58,8 +59,8 @@ def __init__(
         interior_point: Optional[Tensor] = None,
         fixed_indices: Optional[Union[List[int], Tensor]] = None,
         mean: Optional[Tensor] = None,
-        covariance_matrix: Optional[Tensor] = None,
-        covariance_root: Optional[Tensor] = None,
+        covariance_matrix: Optional[Union[Tensor, LinearOperator]] = None,
+        covariance_root: Optional[Union[Tensor, LinearOperator]] = None,
         check_feasibility: bool = False,
         burnin: int = 0,
         thinning: int = 0,
@@ -88,7 +89,10 @@ def __init__(
                 distribution (if omitted, use the identity).
             covariance_root: A `d x d`-dim root of the covariance matrix such that
                 covariance_root @ covariance_root.T = covariance_matrix. NOTE: This
-                matrix is assumed to be lower triangular.
+                matrix is assumed to be lower triangular. covariance_root can only be
+                passed in conjunction with fixed_indices if covariance_root is a
+                DiagLinearOperator. Otherwise the factorization would need to be re-
+                computed, as we need to solve in `standardize`.
             check_feasibility: If True, raise an error if the sampling results in an
                 infeasible sample. This creates some overhead and so is switched off
                 by default.
@@ -123,14 +127,16 @@ def __init__(
         self._Az, self._bz = A, b
         self._is_fixed, self._not_fixed = None, None
         if fixed_indices is not None:
-            mean, covariance_matrix = self._fixed_features_initialization(
-                A=A,
-                b=b,
-                interior_point=interior_point,
-                fixed_indices=fixed_indices,
-                mean=mean,
-                covariance_matrix=covariance_matrix,
-                covariance_root=covariance_root,
+            mean, covariance_matrix, covariance_root = (
+                self._fixed_features_initialization(
+                    A=A,
+                    b=b,
+                    interior_point=interior_point,
+                    fixed_indices=fixed_indices,
+                    mean=mean,
+                    covariance_matrix=covariance_matrix,
+                    covariance_root=covariance_root,
+                )
             )
 
         self._mean = mean
@@ -185,7 +191,8 @@ def _fixed_features_initialization(
                 "If `fixed_indices` are provided, an interior point must also be "
                 "provided in order to infer feasible values of the fixed features."
             )
-        if covariance_root is not None:
+        root_is_diag = isinstance(covariance_root, DiagLinearOperator)
+        if covariance_root is not None and not root_is_diag:
             raise ValueError(
                 "Provide either covariance_root or fixed_indices, not both."
             )
@@ -205,7 +212,10 @@ def _fixed_features_initialization(
             covariance_matrix = covariance_matrix[
                 not_fixed.unsqueeze(-1), not_fixed.unsqueeze(0)
             ]
-        return mean, covariance_matrix
+        if root_is_diag:  # in the special case of diagonal root, can subselect
+            covariance_root = DiagLinearOperator(covariance_root.diagonal()[not_fixed])
+
+        return mean, covariance_matrix, covariance_root
 
     def _standardization_initialization(self) -> None:
         """For non-standard mean and covariance, we're going to rewrite the problem as
@@ -482,8 +492,10 @@ def _standardize(self, x: Tensor) -> Tensor:
         z = x
         if self._mean is not None:
             z = z - self._mean
-        if self._covariance_root is not None:
-            z = torch.linalg.solve_triangular(self._covariance_root, z, upper=False)
+        root = self._covariance_root
+        if root is not None:
+            z = torch.linalg.solve_triangular(root, z, upper=False)
+
         return z
 
     def _unstandardize(self, z: Tensor) -> Tensor:
diff --git a/test/utils/probability/test_lin_ess.py b/test/utils/probability/test_lin_ess.py
@@ -17,6 +17,7 @@
 from botorch.utils.constraints import get_monotonicity_constraints
 from botorch.utils.probability.lin_ess import LinearEllipticalSliceSampler
 from botorch.utils.testing import BotorchTestCase
+from linear_operator.operators import DiagLinearOperator
 from torch import Tensor
 
 
@@ -431,6 +432,19 @@ def test_multivariate(self):
                     covariance_root=torch.eye(d, **tkwargs),
                 )
 
+            # providing a diagonal covariance_root should work with fixed indices
+            torch.manual_seed(1234)
+            sampler = LinearEllipticalSliceSampler(
+                inequality_constraints=(A, b),
+                interior_point=interior_point,
+                fixed_indices=[0],
+                covariance_root=DiagLinearOperator(torch.full((d,), 100, **tkwargs)),
+            )
+            num_samples = 16
+            X_fixed = sampler.draw(n=num_samples)
+            self.assertTrue((X_fixed[:, 0] == interior_point[0]).all())
+            self.assertGreater(X_fixed.std().item(), 10.0)  # false if sigma = 1
+
             # high dimensional test case
             # Encodes order constraints on all d variables: Ax < b <-> x[i] < x[i + 1]
             d = 128