Added fantasizing to fully bayesian models, expanded test to include fantazation and repeated conditioning, and allowed conditioning on data without a batch shape (batch shape is inferred for sensible cases)

hvarfner · hvarfner · commit b4040b59c150 · 2023-12-20T09:49:03.000+01:00
diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py
@@ -39,6 +39,7 @@
 import torch
 from botorch.acquisition.objective import PosteriorTransform
 from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel
+from botorch.models.model import FantasizeMixin
 from botorch.models.transforms.input import InputTransform
 from botorch.models.transforms.outcome import OutcomeTransform
 from botorch.models.utils import validate_input_scaling
@@ -309,7 +310,9 @@ def load_mcmc_samples(
         return mean_module, covar_module, likelihood
 
 
-class SaasFullyBayesianSingleTaskGP(ExactGP, BatchedMultiOutputGPyTorchModel):
+class SaasFullyBayesianSingleTaskGP(
+    ExactGP, BatchedMultiOutputGPyTorchModel, FantasizeMixin
+):
     r"""A fully Bayesian single-task GP model with the SAAS prior.
 
     This model assumes that the inputs have been normalized to [0, 1]^d and that
@@ -549,24 +552,30 @@ def condition_on_observations(
         identical across models or unique per-model).
 
         Args:
-            X: (Tensor): A `(batch_shape) x num_samples x d`-dim Tensor, where `d` is
+            X: A `batch_shape x num_samples x d`-dim Tensor, where `d` is
                 the dimension of the feature space and `batch_shape` is the number of
-                 sampled models.
-            Y (Tensor): A `(batch_shape) x num_samples x 1`-dim Tensor, where `d` is
+                sampled models.
+            Y: A `batch_shape x num_samples x 1`-dim Tensor, where `d` is
                 the dimension of the feature space and `batch_shape` is the number of
-                 sampled models.
+                sampled models.
 
         Returns:
-            BatchedMultiOutputGPyTorchModel: _description_
+            BatchedMultiOutputGPyTorchModel: A fully bayesian model conditioned on
+              given observations. The returned model has `batch_shape` copies of the
+              training data in case of identical observations (and `batch_shape`
+              training datasets otherwise).
         """
-        if X.ndim < 3 or Y.ndim < 3:
-            # The can either be thrown here or in GPyTorch, when the inference of the
-            # batch dimension fails since the training data by default does not have
-            # a batch shape.
-            raise ValueError(
-                "Conditioning in fully Bayesian models must contain a batch dimension."
-                "Add a batch dimension (the leading dim) with length matching the "
-                "number of hyperparameter sets to the conditioned data."
-            )
+        if X.ndim == 2 and Y.ndim == 2:
+            # To avoid an error in GPyTorch when inferring the batch dimension, we add
+            # the explicit batch shape here. The result is that the conditioned model
+            # will have 'batch_shape' copies of the training data.
+            X = X.repeat(self.batch_shape + (1, 1))
+            Y = Y.repeat(self.batch_shape + (1, 1))
+
+        elif X.ndim < Y.ndim:
+            # this happens when fantasizing - one set of training data and multiple Y.
+            # We need to duplicate the training data to enable correct batch
+            # size inference in gpytorch.
+            X = X.repeat(*(Y.shape[:-2] + (1, 1)))
 
         return super().condition_on_observations(X, Y, **kwargs)
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
@@ -223,12 +223,7 @@ def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Mode
             >>> new_Y = torch.sin(new_X[:, 0]) + torch.cos(new_X[:, 1])
             >>> model = model.condition_on_observations(X=new_X, Y=new_Y)
         """
-        Yvar = kwargs.get("noise", None)
-
-        # for fully bayesian models, the keyword argument "noise": None
-        # throws an error in LinearOperator releted to inferring batch dims
-        if "noise" in kwargs and kwargs["noise"] is None:
-            del kwargs["noise"]
+        Yvar = kwargs.pop("noise", None)
 
         if hasattr(self, "outcome_transform"):
             # pass the transformed data to get_fantasy_model below
@@ -496,7 +491,8 @@ def condition_on_observations(
         fantasy_model._input_batch_shape = fantasy_model.train_targets.shape[
             : (-1 if self._num_outputs == 1 else -2)
         ]
-
+        if not self._is_fully_bayesian:
+            fantasy_model._aug_batch_shape = fantasy_model.train_targets.shape[:-1]
         return fantasy_model
 
     def subset_output(self, idcs: List[int]) -> BatchedMultiOutputGPyTorchModel:
diff --git a/test/models/test_fully_bayesian.py b/test/models/test_fully_bayesian.py
@@ -52,6 +52,7 @@
 from botorch.models.transforms import Normalize, Standardize
 from botorch.posteriors.fully_bayesian import batched_bisect, GaussianMixturePosterior
 from botorch.sampling.get_sampler import get_sampler
+from botorch.sampling.normal import SobolQMCNormalSampler
 from botorch.utils.datasets import SupervisedDataset
 from botorch.utils.multi_objective.box_decompositions.non_dominated import (
     NondominatedPartitioning,
@@ -125,15 +126,28 @@ def _get_unnormalized_data(self, infer_noise: bool, **tkwargs):
         return train_X, train_Y, train_Yvar, test_X
 
     def _get_unnormalized_condition_data(
-        self, num_models: int, infer_noise: bool, **tkwargs
+        self, num_models: int, num_cond: int, infer_noise: bool, **tkwargs
     ):
         with torch.random.fork_rng():
             torch.manual_seed(0)
-            cond_X = 5 + 5 * torch.rand(num_models, 2, 4, **tkwargs)
+            cond_X = 5 + 5 * torch.rand(num_models, num_cond, 4, **tkwargs)
             cond_Y = 10 + torch.sin(cond_X[..., :1])
-            cond_Yvar = None if infer_noise else 0.1 * torch.ones(cond_Y.shape)
+            cond_Yvar = (
+                None if infer_noise else 0.1 * torch.ones(cond_Y.shape, **tkwargs)
+            )
         return cond_X, cond_Y, cond_Yvar
 
+    def _get_unnormalized_fantasy_data(
+        self: int, num_cond: int, infer_noise: bool, **tkwargs
+    ):
+        with torch.random.fork_rng():
+            torch.manual_seed(0)
+            fantasy_X = 5 + 5 * torch.rand(num_cond, 4, **tkwargs)
+            fantasy_Yvar = (
+                None if infer_noise else 0.1 * torch.ones((num_cond, 1), **tkwargs)
+            )
+        return fantasy_X, fantasy_Yvar
+
     def _get_mcmc_samples(
         self, num_samples: int, dim: int, infer_noise: bool, **tkwargs
     ):
@@ -671,7 +685,7 @@ def test_condition_on_observation(self):
         num_models = 3
         num_cond = 2
         for infer_noise, dtype in itertools.product(
-            (True,), (torch.float, torch.double)
+            (True, False), (torch.float, torch.double)
         ):
             tkwargs = {"device": self.device, "dtype": dtype}
             train_X, train_Y, train_Yvar, test_X = self._get_unnormalized_data(
@@ -681,7 +695,10 @@ def test_condition_on_observation(self):
             # condition on different observations per model to obtain num_models sets
             # of training data
             cond_X, cond_Y, cond_Yvar = self._get_unnormalized_condition_data(
-                num_models=num_models, infer_noise=infer_noise, **tkwargs
+                num_models=num_models,
+                num_cond=num_cond,
+                infer_noise=infer_noise,
+                **tkwargs
             )
             model = SaasFullyBayesianSingleTaskGP(
                 train_X=train_X,
@@ -712,8 +729,12 @@ def test_condition_on_observation(self):
                 cond_model.train_inputs[0].shape,
                 torch.Size([num_models, num_train + num_cond, num_dims]),
             )
+
+            # the batch shape of the condition model is added during conditioning
+            self.assertEqual(cond_model.batch_shape, torch.Size([num_models]))
+
             # condition on identical sets of data (i.e. one set) for all models
-            # i.e, with no batch shape. This should not work.
+            # i.e, with no batch shape. This infers the batch shape.
             cond_X_nobatch, cond_Y_nobatch = cond_X[0], cond_Y[0]
             model = SaasFullyBayesianSingleTaskGP(
                 train_X=train_X,
@@ -728,14 +749,74 @@ def test_condition_on_observation(self):
             )
             model.load_mcmc_samples(mcmc_samples)
 
-            # This should __NOT__ work - conditioning must have a batch size for the
-            # conditioned point and is not supported (the training data by default
-            # does not have a batch size)
+            # conditioning without a batch size - the resulting conditioned model
+            # will still have a batch size
             model.posterior(train_X)
-            with self.assertRaises(ValueError):
-                model.condition_on_observations(
-                    cond_X_nobatch, cond_Y_nobatch, noise=cond_Yvar
-                )
+            cond_model = model.condition_on_observations(
+                cond_X_nobatch, cond_Y_nobatch, noise=cond_Yvar
+            )
+            self.assertEqual(
+                cond_model.train_inputs[0].shape,
+                torch.Size([num_models, num_train + num_cond, num_dims]),
+            )
+
+            # test repeated conditining
+            repeat_cond_X = cond_X + 5
+            repeat_cond_model = cond_model.condition_on_observations(
+                repeat_cond_X, cond_Y, noise=cond_Yvar
+            )
+            self.assertEqual(
+                repeat_cond_model.train_inputs[0].shape,
+                torch.Size([num_models, num_train + 2 * num_cond, num_dims]),
+            )
+
+            # test repeated conditioning without a batch size
+            repeat_cond_X_nobatch = cond_X_nobatch + 10
+            repeat_cond_model2 = repeat_cond_model.condition_on_observations(
+                repeat_cond_X_nobatch, cond_Y_nobatch, noise=cond_Yvar
+            )
+            self.assertEqual(
+                repeat_cond_model2.train_inputs[0].shape,
+                torch.Size([num_models, num_train + 3 * num_cond, num_dims]),
+            )
+
+    def test_fantasize(self):
+        num_models = 3
+        fantasy_size = 19
+        num_cond = 2
+        for infer_noise, dtype in itertools.product(
+            (True, False), (torch.float, torch.double)
+        ):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            train_X, train_Y, train_Yvar, _ = self._get_unnormalized_data(
+                infer_noise=infer_noise, **tkwargs
+            )
+            num_train, num_dims = train_X.shape
+
+            # fantasized X should not have a batch dim
+            fantasy_X, fantasy_Yvar = self._get_unnormalized_fantasy_data(
+                infer_noise=infer_noise, num_cond=num_cond, **tkwargs
+            )
+            model = SaasFullyBayesianSingleTaskGP(
+                train_X=train_X,
+                train_Y=train_Y,
+                train_Yvar=train_Yvar,
+            )
+            mcmc_samples = self._get_mcmc_samples(
+                num_samples=num_models,
+                dim=train_X.shape[-1],
+                infer_noise=infer_noise,
+                **tkwargs
+            )
+            model.load_mcmc_samples(mcmc_samples)
+            sampler = SobolQMCNormalSampler(torch.Size([fantasy_size]))
+            fantasy_model = model.fantasize(
+                fantasy_X, sampler, observation_noise=fantasy_Yvar
+            )
+            self.assertEqual(
+                fantasy_model.train_inputs[0].shape,
+                torch.Size([fantasy_size, num_models, num_train + num_cond, num_dims]),
+            )
 
     def test_bisect(self):
         def f(x):