meta-pytorch
diff --git a/‎botorch/acquisition/thompson_sampling.py‎
Lines changed: 109 additions & 22 deletions b/‎botorch/acquisition/thompson_sampling.py‎
Lines changed: 109 additions & 22 deletions
diff --git a/‎botorch/acquisition/utils.py‎
Lines changed: 5 additions & 1 deletion b/‎botorch/acquisition/utils.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎botorch/models/deterministic.py‎
Lines changed: 25 additions & 2 deletions b/‎botorch/models/deterministic.py‎
Lines changed: 25 additions & 2 deletions
diff --git a/‎botorch/sampling/pathwise/paths.py‎
Lines changed: 53 additions & 1 deletion b/‎botorch/sampling/pathwise/paths.py‎
Lines changed: 53 additions & 1 deletion
diff --git a/‎botorch/sampling/pathwise/posterior_samplers.py‎
Lines changed: 10 additions & 2 deletions b/‎botorch/sampling/pathwise/posterior_samplers.py‎
Lines changed: 10 additions & 2 deletions
@@ -6,10 +6,16 @@
 
 import torch
 from botorch.acquisition.analytic import AcquisitionFunction
-from botorch.acquisition.objective import PosteriorTransform
+from botorch.acquisition.objective import (
+    IdentityMCObjective,
+    MCAcquisitionObjective,
+    PosteriorTransform,
+)
+from botorch.exceptions.errors import UnsupportedError
+from botorch.models.deterministic import GenericDeterministicModel
 from botorch.models.model import Model
 from botorch.sampling.pathwise.posterior_samplers import get_matheron_path_model
-from botorch.utils.transforms import t_batch_mode_transform
+from botorch.utils.transforms import is_ensemble, t_batch_mode_transform
 from torch import Tensor
 
 
@@ -32,7 +38,9 @@ class PathwiseThompsonSampling(AcquisitionFunction):
     def __init__(
         self,
         model: Model,
+        objective: MCAcquisitionObjective | None = None,
         posterior_transform: PosteriorTransform | None = None,
+        samples: GenericDeterministicModel | None = None,
     ) -> None:
         r"""Single-outcome TS.
 
@@ -41,46 +49,125 @@ def __init__(
             posterior_transform: A PosteriorTransform. If using a multi-output model,
                 a PosteriorTransform that transforms the multi-output posterior into a
                 single-output posterior is required.
+            samples: A GenericDeterministicModel that evaluates a set of posterior
+                sample paths.
         """
-        if model._is_fully_bayesian:
-            raise NotImplementedError(
-                "PathwiseThompsonSampling is not supported for fully Bayesian models",
-            )
 
         super().__init__(model=model)
-        self.batch_size: int | None = None
-
-    def redraw(self) -> None:
+        self.batch_size: int | None = None if samples is None else samples.batch_shape
+
+        # NOTE: This conditional block is copied from MCAcquisitionFunction, we should
+        # consider inherting from it and e.g. getting the X_pending logic as well.
+        if objective is None and model.num_outputs != 1:
+            if posterior_transform is None:
+                raise UnsupportedError(
+                    "Must specify an objective or a posterior transform when using "
+                    "a multi-output model."
+                )
+            elif not posterior_transform.scalarize:
+                raise UnsupportedError(
+                    "If using a multi-output model without an objective, "
+                    "posterior_transform must scalarize the output."
+                )
+        if objective is None:
+            objective = IdentityMCObjective()
+        self.objective = objective
+        self.posterior_transform = posterior_transform
+        self.samples: GenericDeterministicModel | None = samples
+
+    def redraw(self, batch_size: int) -> None:
+        sample_shape = (batch_size,)
         self.samples = get_matheron_path_model(
-            model=self.model, sample_shape=torch.Size([self.batch_size])
+            model=self.model, sample_shape=torch.Size(sample_shape)
         )
+        if is_ensemble(self.model):
+            # the ensembling dimension is assumed to be part of the batch shape
+            # could add a dedicated proporty to keep track of the ensembling dimension
+            # i.e. generalizing num_mcmc_samples in AbstractFullyBayesianSingleTaskGP
+            model_batch_shape = self.model.batch_shape
+            if len(model_batch_shape) > 1:
+                raise NotImplementedError(
+                    "Ensemble models with more than one ensemble dimension are not "
+                    "yet supported."
+                )
+            num_ensemble = model_batch_shape[0]
+            self.ensemble_indices = torch.randint(
+                0,
+                num_ensemble,
+                (*sample_shape, 1, self.model.num_outputs),
+            )
 
     @t_batch_mode_transform()
     def forward(self, X: Tensor) -> Tensor:
         r"""Evaluate the pathwise posterior sample draws on the candidate set X.
 
         Args:
-            X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design points.
+            X: A `batch_shape x q x d`-dim batched tensor of `d`-dim design points.
 
         Returns:
-            A `(b1 x ... bk) x [num_models for fully bayesian]`-dim tensor of
-            evaluations on the posterior sample draws.
+            A `batch_shape [x m]`-dim tensor of evaluations on the posterior sample
+            draws, where `m` is the number of outputs of the model.
         """
-        batch_size = X.shape[-2]
-        q_dim = -2
+        objective_values = self._pathwise_forward(X)
+        # NOTE: can leverage batched L-BFGS computation instead of summing in the future
+        # sum over batch dim and squeeze num_objectives dim (-1):
+        acqf_vals = objective_values.sum(-1)  # batch_shape
+        return acqf_vals
 
+    def _pathwise_forward(self, X: Tensor) -> Tensor:
+        batch_size = X.shape[-2]
         # batch_shape x q x 1 x d
         X = X.unsqueeze(-2)
-        if self.batch_size is None:
+        if self.samples is None:
             self.batch_size = batch_size
-            self.redraw()
-        elif self.batch_size != batch_size:
+            self.redraw(batch_size=batch_size)
+
+        if self.batch_size != batch_size:
             raise ValueError(
                 BATCH_SIZE_CHANGE_ERROR.format(self.batch_size, batch_size)
             )
+        # batch_shape x q [x num_ensembles] x 1 x m
+        posterior_values = self.samples(X)
+        # batch_shape x q [x num_ensembles] x m
+        posterior_values = posterior_values.squeeze(-2)
 
-        # posterior_values.shape post-squeeze:
         # batch_shape x q x m
-        posterior_values = self.samples(X).squeeze(-2)
-        # sum over batch dim and squeeze num_objectives dim (-1)
-        return posterior_values.sum(q_dim).squeeze(-1)
+        posterior_values = self.select_from_ensemble_models(values=posterior_values)
+
+        if self.posterior_transform:
+            posterior_values = self.posterior_transform.evaluate(posterior_values)
+        # problem with this currently is that we could still have an `m` dimension,
+        # ideally that would be packed into a batch dimension instead
+        # objective removes the `m` dimension:
+        objective_values = self.objective(posterior_values)  # batch_shape x q
+        return objective_values
+
+    def select_from_ensemble_models(self, values: Tensor):
+        """Subselecting a value associated with a single sample in the ensemble for each
+        element of samples that is not associated with an ensemble dimension. NOTE: uses
+        `self.model` and `is_ensemble` to determine whether or not an ensembling
+        dimension is present.
+
+        Args:
+            values: A `batch_shape x num_draws x q [x num_ensemble] x m`-dim Tensor.
+
+        Returns:
+            A`batch_shape x num_draws x q x m`-dim where each element was chosen
+            independently randomly from the ensemble dimension.
+        """
+        if not is_ensemble(self.model):
+            return values
+
+        ensemble_dim = -2
+        # `ensemble_indices` are fixed so that the acquisition function becomes
+        # deterministic for the same input and can be optimized with LBFGS.
+        # ensemble indices have shape num_paths x 1 x m
+        self.ensemble_indices = self.ensemble_indices.to(device=values.device)
+        index = self.ensemble_indices
+        input_batch_shape = values.shape[:-3]
+        index = index.expand(*input_batch_shape, *index.shape)
+        # samples is batch_shape x q x num_ensemble x m
+        values_wo_ensemble = torch.gather(values, dim=ensemble_dim, index=index)
+        return values_wo_ensemble.squeeze(
+            ensemble_dim
+        )  # removing the ensemble dimension
@@ -575,7 +575,11 @@ def get_optimal_samples(
     else:
         sample_transform = None
 
-    paths = get_matheron_path_model(model=model, sample_shape=torch.Size([num_optima]))
+    paths = get_matheron_path_model(
+        model=model,
+        sample_shape=torch.Size([num_optima]),
+        ensemble_as_batch=True,
+    )
     optimal_inputs, optimal_outputs = optimize_posterior_samples(
         paths=paths,
         bounds=bounds,
 
@@ -64,7 +64,12 @@ class GenericDeterministicModel(DeterministicModel):
         >>> model = GenericDeterministicModel(f)
     """
 
-    def __init__(self, f: Callable[[Tensor], Tensor], num_outputs: int = 1) -> None:
+    def __init__(
+        self,
+        f: Callable[[Tensor], Tensor],
+        num_outputs: int = 1,
+        batch_shape: torch.Size | None = None,
+    ) -> None:
         r"""
         Args:
             f: A callable mapping a `batch_shape x n x d`-dim input tensor `X`
@@ -75,6 +80,12 @@ def __init__(self, f: Callable[[Tensor], Tensor], num_outputs: int = 1) -> None:
         super().__init__()
         self._f = f
         self._num_outputs = num_outputs
+        self._batch_shape = batch_shape
+
+    @property
+    def batch_shape(self) -> torch.Size | None:
+        r"""The batch shape of the model."""
+        return self._batch_shape
 
     def subset_output(self, idcs: list[int]) -> GenericDeterministicModel:
         r"""Subset the model along the output dimension.
@@ -100,7 +111,19 @@ def forward(self, X: Tensor) -> Tensor:
         Returns:
             A `batch_shape x n x m`-dimensional output tensor.
         """
-        return self._f(X)
+        Y = self._f(X)
+        batch_shape = Y.shape[:-2]
+        # allowing for old behavior of not specifying the batch_shape
+        if self.batch_shape is not None:
+            try:
+                torch.broadcast_shapes(self.batch_shape, batch_shape)
+            except RuntimeError:
+                raise ValueError(
+                    "GenericDeterministicModel was initialized with batch_shape="
+                    f"{self.batch_shape=} but the output of f has a batch_shape="
+                    f"{batch_shape=} that is not broadcastable with it."
+                )
+        return Y
 
 
 class AffineDeterministicModel(DeterministicModel):
 
@@ -6,7 +6,7 @@
 
 from __future__ import annotations
 
-from abc import ABC
+from abc import ABC, abstractmethod
 from collections.abc import Callable, Iterable, Iterator, Mapping
 from typing import Any
 
@@ -24,6 +24,16 @@
 class SamplePath(ABC, TransformedModuleMixin, Module):
     r"""Abstract base class for Botorch sample paths."""
 
+    @abstractmethod
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        pass  # pragma: no cover
+
 
 class PathDict(SamplePath):
     r"""A dictionary of SamplePaths."""
@@ -84,6 +94,16 @@ def __getitem__(self, key: str) -> SamplePath:
     def __setitem__(self, key: str, val: SamplePath) -> None:
         self.paths[key] = val
 
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        for path in self.paths.values():
+            path.set_ensemble_as_batch(ensemble_as_batch)
+
 
 class PathList(SamplePath):
     r"""A list of SamplePaths."""
@@ -136,6 +156,16 @@ def __getitem__(self, key: int) -> SamplePath:
     def __setitem__(self, key: int, val: SamplePath) -> None:
         self.paths[key] = val
 
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        for path in self.paths:
+            path.set_ensemble_as_batch(ensemble_as_batch)
+
 
 class GeneralizedLinearPath(SamplePath):
     r"""A sample path in the form of a generalized linear model."""
@@ -147,6 +177,8 @@ def __init__(
         bias_module: Module | None = None,
         input_transform: TInputTransform | None = None,
         output_transform: TOutputTransform | None = None,
+        is_ensemble: bool = False,
+        ensemble_as_batch: bool = False,
     ):
         r"""Initializes a GeneralizedLinearPath instance.
 
@@ -161,6 +193,11 @@ def __init__(
             bias_module: An optional module used to define additive offsets.
             input_transform: An optional input transform for the module.
             output_transform: An optional output transform for the module.
+            is_ensemble: Whether the associated model is an ensemble model or not.
+            ensemble_as_batch: Whether the ensemble dimension is added as a batch
+                dimension or not. If `True`, the ensemble dimension is treated as a
+                batch dimension, which allows for the joint optimization of all members
+                of the ensemble.
         """
         super().__init__()
         self.feature_map = feature_map
@@ -170,8 +207,23 @@ def __init__(
         self.bias_module = bias_module
         self.input_transform = input_transform
         self.output_transform = output_transform
+        self.is_ensemble = is_ensemble
+        self.ensemble_as_batch = ensemble_as_batch
 
     def forward(self, x: Tensor, **kwargs) -> Tensor:
+        if self.is_ensemble and not self.ensemble_as_batch:
+            # assuming that the ensembling dimension is added after (n, d), but
+            # before the other batch dimensions, starting from the left.
+            x = x.unsqueeze(-3)
         feat = self.feature_map(x, **kwargs)
         out = (feat @ self.weight.unsqueeze(-1)).squeeze(-1)
         return out if self.bias_module is None else out + self.bias_module(x)
+
+    def set_ensemble_as_batch(self, ensemble_as_batch: bool) -> None:
+        """Sets whether the ensemble dimension is considered as a batch dimension.
+
+        Args:
+            ensemble_as_batch: Whether the ensemble dimension is considered as a batch
+                dimension or not.
+        """
+        self.ensemble_as_batch = ensemble_as_batch
@@ -87,7 +87,7 @@ def __init__(
 
 
 def get_matheron_path_model(
-    model: GP, sample_shape: Size | None = None
+    model: GP, sample_shape: Size | None = None, ensemble_as_batch: bool = False
 ) -> GenericDeterministicModel:
     r"""Generates a deterministic model using a single Matheron path drawn
     from the model's posterior.
@@ -108,6 +108,9 @@ def get_matheron_path_model(
     """
     sample_shape = Size() if sample_shape is None else sample_shape
     path = draw_matheron_paths(model, sample_shape=sample_shape)
+    # for p in path.paths.values():
+    #     p.ensemble_as_batch = ensemble_as_batch
+    path.set_ensemble_as_batch(ensemble_as_batch)
     num_outputs = model.num_outputs
     if isinstance(model, ModelList) and len(model.models) != num_outputs:
         raise UnsupportedError("A model-list of multi-output models is not supported.")
@@ -137,7 +140,12 @@ def f(X: Tensor) -> Tensor:
             res = path(X.unsqueeze(-3)).transpose(-1, -2)
         return res
 
-    path_model = GenericDeterministicModel(f=f, num_outputs=num_outputs)
+    path_model = GenericDeterministicModel(
+        f=f,
+        num_outputs=num_outputs,
+        batch_shape=sample_shape + model.batch_shape,
+    )
+    # Do we need the len(sample_shape) > 0?
     path_model._is_ensemble = is_ensemble(model) or len(sample_shape) > 0
     return path_model