Selective rollback of fantasize in Fully Bayesian GPs

hvarfner · hvarfner · commit 059549b2a7eb · 2023-12-27T09:36:21.000+01:00
diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py
@@ -549,24 +549,29 @@ def condition_on_observations(
         identical across models or unique per-model).
 
         Args:
-            X: (Tensor): A `(batch_shape) x num_samples x d`-dim Tensor, where `d` is
+            X: A `batch_shape x num_samples x d`-dim Tensor, where `d` is
                 the dimension of the feature space and `batch_shape` is the number of
-                 sampled models.
-            Y (Tensor): A `(batch_shape) x num_samples x 1`-dim Tensor, where `d` is
+                sampled models.
+            Y: A `batch_shape x num_samples x 1`-dim Tensor, where `d` is
                 the dimension of the feature space and `batch_shape` is the number of
-                 sampled models.
+                sampled models.
 
         Returns:
-            BatchedMultiOutputGPyTorchModel: _description_
+            BatchedMultiOutputGPyTorchModel: A fully bayesian model conditioned on
+              given observations. The returned model has `batch_shape` copies of the
+              training data in case of identical observations (and `batch_shape`
+              training datasets otherwise).
         """
-        if X.ndim < 3 or Y.ndim < 3:
-            # The can either be thrown here or in GPyTorch, when the inference of the
-            # batch dimension fails since the training data by default does not have
-            # a batch shape.
-            raise ValueError(
-                "Conditioning in fully Bayesian models must contain a batch dimension."
-                "Add a batch dimension (the leading dim) with length matching the "
-                "number of hyperparameter sets to the conditioned data."
-            )
+        if X.ndim == 2 and Y.ndim == 2:
+            # To avoid an error in GPyTorch when inferring the batch dimension, we add
+            # the explicit batch shape here. The result is that the conditioned model
+            # will have 'batch_shape' copies of the training data.
+            X = X.repeat(self.batch_shape + (1, 1))
+            Y = Y.repeat(self.batch_shape + (1, 1))
+
+        elif X.ndim < Y.ndim:
+            # We need to duplicate the training data to enable correct batch
+            # size inference in gpytorch.
+            X = X.repeat(*(Y.shape[:-2] + (1, 1)))
 
         return super().condition_on_observations(X, Y, **kwargs)
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
@@ -223,12 +223,7 @@ def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Mode
             >>> new_Y = torch.sin(new_X[:, 0]) + torch.cos(new_X[:, 1])
             >>> model = model.condition_on_observations(X=new_X, Y=new_Y)
         """
-        Yvar = kwargs.get("noise", None)
-
-        # for fully bayesian models, the keyword argument "noise": None
-        # throws an error in LinearOperator releted to inferring batch dims
-        if "noise" in kwargs and kwargs["noise"] is None:
-            del kwargs["noise"]
+        Yvar = kwargs.pop("noise", None)
 
         if hasattr(self, "outcome_transform"):
             # pass the transformed data to get_fantasy_model below
@@ -496,7 +491,8 @@ def condition_on_observations(
         fantasy_model._input_batch_shape = fantasy_model.train_targets.shape[
             : (-1 if self._num_outputs == 1 else -2)
         ]
-
+        if not self._is_fully_bayesian:
+            fantasy_model._aug_batch_shape = fantasy_model.train_targets.shape[:-1]
         return fantasy_model
 
     def subset_output(self, idcs: List[int]) -> BatchedMultiOutputGPyTorchModel:
diff --git a/test/models/test_fully_bayesian.py b/test/models/test_fully_bayesian.py
@@ -125,13 +125,15 @@ def _get_unnormalized_data(self, infer_noise: bool, **tkwargs):
         return train_X, train_Y, train_Yvar, test_X
 
     def _get_unnormalized_condition_data(
-        self, num_models: int, infer_noise: bool, **tkwargs
+        self, num_models: int, num_cond: int, infer_noise: bool, **tkwargs
     ):
         with torch.random.fork_rng():
             torch.manual_seed(0)
-            cond_X = 5 + 5 * torch.rand(num_models, 2, 4, **tkwargs)
+            cond_X = 5 + 5 * torch.rand(num_models, num_cond, 4, **tkwargs)
             cond_Y = 10 + torch.sin(cond_X[..., :1])
-            cond_Yvar = None if infer_noise else 0.1 * torch.ones(cond_Y.shape)
+            cond_Yvar = (
+                None if infer_noise else 0.1 * torch.ones(cond_Y.shape, **tkwargs)
+            )
         return cond_X, cond_Y, cond_Yvar
 
     def _get_mcmc_samples(
@@ -667,11 +669,15 @@ def test_custom_pyro_model(self):
                 )
 
     def test_condition_on_observation(self):
-
+        # The following conditioned data shapes should work (output describes):
+        # training data shape after cond(batch shape in output is req. in gpytorch)
+        # X: num_models x n x d, Y: num_models x n x d --> num_models x n x d
+        # X: n x d, Y: n x d --> num_models x n x d
+        # X: n x d, Y: num_models x n x d --> num_models x n x d
         num_models = 3
         num_cond = 2
         for infer_noise, dtype in itertools.product(
-            (True,), (torch.float, torch.double)
+            (True, False), (torch.float, torch.double)
         ):
             tkwargs = {"device": self.device, "dtype": dtype}
             train_X, train_Y, train_Yvar, test_X = self._get_unnormalized_data(
@@ -681,7 +687,10 @@ def test_condition_on_observation(self):
             # condition on different observations per model to obtain num_models sets
             # of training data
             cond_X, cond_Y, cond_Yvar = self._get_unnormalized_condition_data(
-                num_models=num_models, infer_noise=infer_noise, **tkwargs
+                num_models=num_models,
+                num_cond=num_cond,
+                infer_noise=infer_noise,
+                **tkwargs
             )
             model = SaasFullyBayesianSingleTaskGP(
                 train_X=train_X,
@@ -712,8 +721,12 @@ def test_condition_on_observation(self):
                 cond_model.train_inputs[0].shape,
                 torch.Size([num_models, num_train + num_cond, num_dims]),
             )
+
+            # the batch shape of the condition model is added during conditioning
+            self.assertEqual(cond_model.batch_shape, torch.Size([num_models]))
+
             # condition on identical sets of data (i.e. one set) for all models
-            # i.e, with no batch shape. This should not work.
+            # i.e, with no batch shape. This infers the batch shape.
             cond_X_nobatch, cond_Y_nobatch = cond_X[0], cond_Y[0]
             model = SaasFullyBayesianSingleTaskGP(
                 train_X=train_X,
@@ -728,14 +741,36 @@ def test_condition_on_observation(self):
             )
             model.load_mcmc_samples(mcmc_samples)
 
-            # This should __NOT__ work - conditioning must have a batch size for the
-            # conditioned point and is not supported (the training data by default
-            # does not have a batch size)
+            # conditioning without a batch size - the resulting conditioned model
+            # will still have a batch size
             model.posterior(train_X)
-            with self.assertRaises(ValueError):
-                model.condition_on_observations(
-                    cond_X_nobatch, cond_Y_nobatch, noise=cond_Yvar
-                )
+            cond_model = model.condition_on_observations(
+                cond_X_nobatch, cond_Y_nobatch, noise=cond_Yvar
+            )
+            self.assertEqual(
+                cond_model.train_inputs[0].shape,
+                torch.Size([num_models, num_train + num_cond, num_dims]),
+            )
+
+            # test repeated conditining
+            repeat_cond_X = cond_X + 5
+            repeat_cond_model = cond_model.condition_on_observations(
+                repeat_cond_X, cond_Y, noise=cond_Yvar
+            )
+            self.assertEqual(
+                repeat_cond_model.train_inputs[0].shape,
+                torch.Size([num_models, num_train + 2 * num_cond, num_dims]),
+            )
+
+            # test repeated conditioning without a batch size
+            repeat_cond_X_nobatch = cond_X_nobatch + 10
+            repeat_cond_model2 = repeat_cond_model.condition_on_observations(
+                repeat_cond_X_nobatch, cond_Y_nobatch, noise=cond_Yvar
+            )
+            self.assertEqual(
+                repeat_cond_model2.train_inputs[0].shape,
+                torch.Size([num_models, num_train + 3 * num_cond, num_dims]),
+            )
 
     def test_bisect(self):
         def f(x):