Fixed condition_on_observations in fully Bayesian models

hvarfner · hvarfner · commit f61c430af9cf · 2023-12-16T21:15:16.000+01:00
diff --git a/botorch/models/fully_bayesian.py b/botorch/models/fully_bayesian.py
@@ -498,9 +498,8 @@ def forward(self, X: Tensor) -> MultivariateNormal:
         rest of this method will not run.
         """
         self._check_if_fitted()
-        x = X.unsqueeze(MCMC_DIM)
-        mean_x = self.mean_module(x)
-        covar_x = self.covar_module(x)
+        mean_x = self.mean_module(X)
+        covar_x = self.covar_module(X)
         return MultivariateNormal(mean_x, covar_x)
 
     # pyre-ignore[14]: Inconsistent override
@@ -534,11 +533,40 @@ def posterior(
         """
         self._check_if_fitted()
         posterior = super().posterior(
-            X=X,
+            X=X.unsqueeze(MCMC_DIM),
             output_indices=output_indices,
             observation_noise=observation_noise,
             posterior_transform=posterior_transform,
             **kwargs,
         )
         posterior = GaussianMixturePosterior(distribution=posterior.distribution)
         return posterior
+
+    def condition_on_observations(
+        self, X: Tensor, Y: Tensor, **kwargs: Any
+    ) -> BatchedMultiOutputGPyTorchModel:
+        """Conditions on additional observations for a Fully Bayesian model (either
+        identical across models or unique per-model).
+
+        Args:
+            X: (Tensor): A `(batch_shape) x num_samples x d`-dim Tensor, where `d` is
+                the dimension of the feature space and `batch_shape` is the number of
+                 sampled models.
+            Y (Tensor): A `(batch_shape) x num_samples x 1`-dim Tensor, where `d` is
+                the dimension of the feature space and `batch_shape` is the number of
+                 sampled models.
+
+        Returns:
+            BatchedMultiOutputGPyTorchModel: _description_
+        """
+        if X.ndim < 3 or Y.ndim < 3:
+            # The can either be thrown here or in GPyTorch, when the inference of the
+            # batch dimension fails since the training data by default does not have
+            # a batch shape.
+            raise ValueError(
+                "Conditioning in fully Bayesian models must contain a batch dimension."
+                "Add a batch dimension (the leading dim) with length matching the "
+                "number of hyperparameter sets to the conditioned data."
+            )
+
+        return super().condition_on_observations(X, Y, **kwargs)
diff --git a/botorch/models/gpytorch.py b/botorch/models/gpytorch.py
@@ -224,6 +224,12 @@ def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Mode
             >>> model = model.condition_on_observations(X=new_X, Y=new_Y)
         """
         Yvar = kwargs.get("noise", None)
+
+        # for fully bayesian models, the keyword argument "noise": None
+        # throws an error in LinearOperator releted to inferring batch dims
+        if "noise" in kwargs and kwargs["noise"] is None:
+            del kwargs["noise"]
+
         if hasattr(self, "outcome_transform"):
             # pass the transformed data to get_fantasy_model below
             # (unless we've already trasnformed if BatchedMultiOutputGPyTorchModel)
@@ -239,6 +245,7 @@ def condition_on_observations(self, X: Tensor, Y: Tensor, **kwargs: Any) -> Mode
                 kwargs.update({"noise": Yvar.squeeze(-1)})
         # get_fantasy_model will properly copy any existing outcome transforms
         # (since it deepcopies the original model)
+
         return self.get_fantasy_model(inputs=X, targets=Y, **kwargs)
 
 
@@ -489,7 +496,7 @@ def condition_on_observations(
         fantasy_model._input_batch_shape = fantasy_model.train_targets.shape[
             : (-1 if self._num_outputs == 1 else -2)
         ]
-        fantasy_model._aug_batch_shape = fantasy_model.train_targets.shape[:-1]
+
         return fantasy_model
 
     def subset_output(self, idcs: List[int]) -> BatchedMultiOutputGPyTorchModel:
diff --git a/test/models/test_fully_bayesian.py b/test/models/test_fully_bayesian.py
@@ -124,6 +124,16 @@ def _get_unnormalized_data(self, infer_noise: bool, **tkwargs):
             )
         return train_X, train_Y, train_Yvar, test_X
 
+    def _get_unnormalized_condition_data(
+        self, num_models: int, infer_noise: bool, **tkwargs
+    ):
+        with torch.random.fork_rng():
+            torch.manual_seed(0)
+            cond_X = 5 + 5 * torch.rand(num_models, 2, 4, **tkwargs)
+            cond_Y = 10 + torch.sin(cond_X[..., :1])
+            cond_Yvar = None if infer_noise else 0.1 * torch.ones(cond_Y.shape)
+        return cond_X, cond_Y, cond_Yvar
+
     def _get_mcmc_samples(
         self, num_samples: int, dim: int, infer_noise: bool, **tkwargs
     ):
@@ -656,6 +666,77 @@ def test_custom_pyro_model(self):
                     atol=5e-4,
                 )
 
+    def test_condition_on_observation(self):
+
+        num_models = 3
+        num_cond = 2
+        for infer_noise, dtype in itertools.product(
+            (True,), (torch.float, torch.double)
+        ):
+            tkwargs = {"device": self.device, "dtype": dtype}
+            train_X, train_Y, train_Yvar, test_X = self._get_unnormalized_data(
+                infer_noise=infer_noise, **tkwargs
+            )
+            num_train, num_dims = train_X.shape
+            # condition on different observations per model to obtain num_models sets
+            # of training data
+            cond_X, cond_Y, cond_Yvar = self._get_unnormalized_condition_data(
+                num_models=num_models, infer_noise=infer_noise, **tkwargs
+            )
+            model = SaasFullyBayesianSingleTaskGP(
+                train_X=train_X,
+                train_Y=train_Y,
+                train_Yvar=train_Yvar,
+            )
+            mcmc_samples = self._get_mcmc_samples(
+                num_samples=num_models,
+                dim=train_X.shape[-1],
+                infer_noise=infer_noise,
+                **tkwargs
+            )
+            model.load_mcmc_samples(mcmc_samples)
+
+            # need to forward pass before conditioning
+            model.posterior(train_X)
+            cond_model = model.condition_on_observations(
+                cond_X, cond_Y, noise=cond_Yvar
+            )
+            posterior = cond_model.posterior(test_X)
+            self.assertEqual(
+                posterior.mean.shape, torch.Size([num_models, len(test_X), 1])
+            )
+
+            # since the data is not equal for the conditioned points, a batch size
+            # is added to the training data
+            self.assertEqual(
+                cond_model.train_inputs[0].shape,
+                torch.Size([num_models, num_train + num_cond, num_dims]),
+            )
+            # condition on identical sets of data (i.e. one set) for all models
+            # i.e, with no batch shape. This should not work.
+            cond_X_nobatch, cond_Y_nobatch = cond_X[0], cond_Y[0]
+            model = SaasFullyBayesianSingleTaskGP(
+                train_X=train_X,
+                train_Y=train_Y,
+                train_Yvar=train_Yvar,
+            )
+            mcmc_samples = self._get_mcmc_samples(
+                num_samples=num_models,
+                dim=train_X.shape[-1],
+                infer_noise=infer_noise,
+                **tkwargs
+            )
+            model.load_mcmc_samples(mcmc_samples)
+
+            # This should __NOT__ work - conditioning must have a batch size for the
+            # conditioned point and is not supported (the training data by default
+            # does not have a batch size)
+            model.posterior(train_X)
+            with self.assertRaises(ValueError):
+                model.condition_on_observations(
+                    cond_X_nobatch, cond_Y_nobatch, noise=cond_Yvar
+                )
+
     def test_bisect(self):
         def f(x):
             return 1 + x