meta-pytorch
diff --git a/‎botorch/generation/gen.py‎
Lines changed: 82 additions & 37 deletions b/‎botorch/generation/gen.py‎
Lines changed: 82 additions & 37 deletions
diff --git a/‎botorch/optim/optimize.py‎
Lines changed: 12 additions & 1 deletion b/‎botorch/optim/optimize.py‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎botorch/optim/utils/acquisition_utils.py‎
Lines changed: 19 additions & 4 deletions b/‎botorch/optim/utils/acquisition_utils.py‎
Lines changed: 19 additions & 4 deletions
@@ -14,7 +14,7 @@
 import warnings
 from collections.abc import Callable
 from functools import partial
-from typing import Any, NoReturn
+from typing import Any, Mapping, NoReturn
 
 import numpy as np
 import numpy.typing as npt
@@ -64,7 +64,7 @@ def gen_candidates_scipy(
     equality_constraints: list[tuple[Tensor, Tensor, float]] | None = None,
     nonlinear_inequality_constraints: list[tuple[Callable, bool]] | None = None,
     options: dict[str, Any] | None = None,
-    fixed_features: dict[int, float | None] | None = None,
+    fixed_features: Mapping[int, float | Tensor] | None = None,
     timeout_sec: float | None = None,
     use_parallel_mode: bool | None = None,
 ) -> tuple[Tensor, Tensor]:
@@ -107,11 +107,11 @@ def gen_candidates_scipy(
             and SLSQP if inequality or equality constraints are present. If
             `with_grad=False`, then we use a two-point finite difference estimate
             of the gradient.
-        fixed_features: This is a dictionary of feature indices to values, where
+        fixed_features: Mapping[int, float | Tensor] | None,
             all generated candidates will have features fixed to these values.
-            If the dictionary value is None, then that feature will just be
-            fixed to the clamped value and not optimized. Assumes values to be
-            compatible with lower_bounds and upper_bounds!
+            If passing tensors as values, they should have either shape `b` or
+            `b x q` to fix the same feature to different values in the batch.
+            Assumes values to be compatible with lower_bounds and upper_bounds!
         timeout_sec: Timeout (in seconds) for `scipy.optimize.minimize` routine -
             if provided, optimization will stop after this many seconds and return
             the best solution found so far.
@@ -211,18 +211,17 @@ def f(x):
         timeout_sec=timeout_sec,
     )
 
+    f_np_wrapper = _get_f_np_wrapper(
+        clamped_candidates.shape,
+        initial_conditions.device,
+        initial_conditions.dtype,
+        with_grad,
+    )
+
     if not why_not_fast_path and use_parallel_mode is not False:
         if is_constrained:
             raise RuntimeWarning("Method L-BFGS-B cannot handle constraints.")
 
-        f_np_wrapper = _get_f_np_wrapper(
-            clamped_candidates.shape,
-            initial_conditions.device,
-            initial_conditions.dtype,
-            with_grad,
-            batched=True,
-        )
-
         batched_x0 = _arrayify(clamped_candidates).reshape(len(clamped_candidates), -1)
 
         l_bfgs_b_bounds = translate_bounds_for_lbfgsb(
@@ -242,6 +241,7 @@ def f(x):
                 bounds=l_bfgs_b_bounds,
                 # constraints=constraints,
                 callback=options.get("callback", None),
+                pass_batch_indices=True,
                 **minimize_options,
             )
         for res in results:
@@ -264,21 +264,38 @@ def f(x):
             else:
                 logger.debug(msg)
 
-        f_np_wrapper = _get_f_np_wrapper(
-            clamped_candidates.shape,
-            initial_conditions.device,
-            initial_conditions.dtype,
-            with_grad,
-        )
+        if (
+            fixed_features
+            and any(
+                torch.is_tensor(ff) and ff.ndim > 0 for ff in fixed_features.values()
+            )
+            and max_optimization_problem_aggregation_size != 1
+        ):
+            raise UnsupportedError(
+                "Batch shaped fixed features are not "
+                "supported, when optimizing more than one optimization "
+                "problem at a time."
+            )
 
         all_xs = []
         split_candidates = clamped_candidates.split(
             max_optimization_problem_aggregation_size
         )
-        for candidates_ in split_candidates:
-            # We optimize the candidates at hand as a single problem
+        for i, candidates_ in enumerate(split_candidates):
+            if fixed_features:
+                fixed_features_ = {
+                    k: ff[i : i + 1].item()
+                    # from the test above, we know that we only treat one candidate
+                    # at a time thus we can use index i
+                    if torch.is_tensor(ff) and ff.ndim > 0
+                    else ff
+                    for k, ff in fixed_features.items()
+                }
+            else:
+                fixed_features_ = None
+
             _no_fixed_features = _remove_fixed_features_from_optimization(
-                fixed_features=fixed_features,
+                fixed_features=fixed_features_,
                 acquisition_function=acquisition_function,
                 initial_conditions=None,
                 d=initial_conditions_all_features.shape[-1],
@@ -296,7 +313,7 @@ def f(x):
 
             f_np_wrapper_ = partial(
                 f_np_wrapper,
-                fixed_features=fixed_features,
+                fixed_features=fixed_features_,
             )
 
             x0 = candidates_.flatten()
@@ -363,13 +380,14 @@ def f(x):
     return clamped_candidates, batch_acquisition
 
 
-def _get_f_np_wrapper(shapeX, device, dtype, with_grad, batched=False):
+def _get_f_np_wrapper(shapeX, device, dtype, with_grad):
     if with_grad:
 
         def f_np_wrapper(
             x: npt.NDArray,
             f: Callable,
-            fixed_features: dict[int, float] | None,
+            fixed_features: Mapping[int, float | Tensor] | None,
+            batch_indices: list[int] | None = None,
         ) -> tuple[float | np.NDArray, np.NDArray]:
             """Given a torch callable, compute value + grad given a numpy array."""
             if np.isnan(x).any():
@@ -387,8 +405,21 @@ def f_np_wrapper(
                 .contiguous()
                 .requires_grad_(True)
             )
+            if fixed_features is not None:
+                if batch_indices is not None:
+                    this_fixed_features = {
+                        k: ff[batch_indices]
+                        if torch.is_tensor(ff) and ff.ndim > 0
+                        else ff
+                        for k, ff in fixed_features.items()
+                    }
+                else:
+                    this_fixed_features = fixed_features
+            else:
+                this_fixed_features = None
+
             X_fix = fix_features(
-                X, fixed_features=fixed_features, replace_current_value=False
+                X, fixed_features=this_fixed_features, replace_current_value=False
             )
             # we compute the loss on the whole batch, under the assumption that f
             # treats multiple inputs in the 0th dimension as independent
@@ -409,7 +440,7 @@ def f_np_wrapper(
                 raise OptimizationGradientError(msg, current_x=x)
             fval = (
                 losses.detach().view(-1).cpu().numpy()
-                if batched
+                if batch_indices is not None
                 else loss.detach().item()
             )  # the view(-1) seems necessary as f might return a single scalar
             return fval, gradf
@@ -485,7 +516,7 @@ def gen_candidates_torch(
     optimizer: type[Optimizer] = torch.optim.Adam,
     options: dict[str, float | str] | None = None,
     callback: Callable[[int, Tensor, Tensor], NoReturn] | None = None,
-    fixed_features: dict[int, float | None] | None = None,
+    fixed_features: Mapping[int, float | Tensor] | None = None,
     timeout_sec: float | None = None,
 ) -> tuple[Tensor, Tensor]:
     r"""Generate a set of candidates using a `torch.optim` optimizer.
@@ -507,9 +538,10 @@ def gen_candidates_torch(
             the loss and gradients, but before calling the optimizer.
         fixed_features: This is a dictionary of feature indices to values, where
             all generated candidates will have features fixed to these values.
-            If the dictionary value is None, then that feature will just be
-            fixed to the clamped value and not optimized. Assumes values to be
-            compatible with lower_bounds and upper_bounds!
+            If a float is passed it is fixed across [b,q], if a tensor is passed:
+            it might either be of shape [b,q] or [b], in which case the same value
+            is used across the q dimension.
+            Assumes values to be compatible with lower_bounds and upper_bounds!
         timeout_sec: Timeout (in seconds) for optimization. If provided,
             `gen_candidates_torch` will stop after this many seconds and return
             the best solution found so far.
@@ -533,12 +565,18 @@ def gen_candidates_torch(
                 upper_bounds=bounds[1],
             )
     """
-    assert not fixed_features or not any(
-        torch.is_tensor(v) for v in fixed_features.values()
-    ), "`gen_candidates_torch` does not support tensor-valued fixed features."
+    if fixed_features and any(torch.is_tensor(v) for v in fixed_features.values()):
+        raise UnsupportedError(
+            "`gen_candidates_torch` does not support tensor-valued fixed features."
+        )
 
     start_time = time.monotonic()
     options = options or {}
+    # We remove max_optimization_problem_aggregation_size as it does not affect
+    # the 1st order optimizers implemented in this method.
+    # Here, it does not matter whether one combines multiple optimizations into
+    # one or not.
+    options.pop("max_optimization_problem_aggregation_size", None)
 
     # if there are fixed features we may optimize over a domain of lower dimension
     if fixed_features:
@@ -572,7 +610,13 @@ def gen_candidates_torch(
         )
         return clamped_candidates, batch_acquisition
     _clamp = partial(columnwise_clamp, lower=lower_bounds, upper=upper_bounds)
-    clamped_candidates = _clamp(initial_conditions).requires_grad_(True)
+    clamped_candidates = _clamp(initial_conditions)
+    if fixed_features:
+        clamped_candidates = clamped_candidates[
+            ...,
+            [i for i in range(clamped_candidates.shape[-1]) if i not in fixed_features],
+        ]
+    clamped_candidates = clamped_candidates.requires_grad_(True)
     _optimizer = optimizer(params=[clamped_candidates], lr=options.get("lr", 0.025))
 
     i = 0
@@ -583,7 +627,7 @@ def gen_candidates_torch(
         with torch.no_grad():
             X = _clamp(clamped_candidates).requires_grad_(True)
 
-        loss = -acquisition_function(X).sum()
+        loss = -acquisition_function(fix_features(X, fixed_features)).sum()
         grad = torch.autograd.grad(loss, X)[0]
         if callback:
             callback(i, loss, grad)
@@ -602,6 +646,7 @@ def assign_grad():
                 logger.info(f"Optimization timed out after {runtime} seconds.")
 
     clamped_candidates = _clamp(clamped_candidates)
+    clamped_candidates = fix_features(clamped_candidates, fixed_features)
     with torch.no_grad():
         batch_acquisition = acquisition_function(clamped_candidates)
 
 
@@ -367,7 +367,18 @@ def _optimize_acqf_batch(opt_inputs: OptimizeAcqfInputs) -> tuple[Tensor, Tensor
     def _optimize_batch_candidates() -> tuple[Tensor, Tensor, list[Warning]]:
         batch_candidates_list: list[Tensor] = []
         batch_acq_values_list: list[Tensor] = []
+
         batched_ics = batch_initial_conditions.split(batch_limit)
+        if opt_inputs.fixed_features is None:
+            batched_fixed_features = {}
+        else:
+            batched_fixed_features = {
+                k: ff.split(batch_limit)
+                if torch.is_tensor(ff) and ff.numel() > 1
+                else [ff] * len(batched_ics)
+                for k, ff in opt_inputs.fixed_features.items()
+            }
+
         opt_warnings = []
         timeout_sec = (
             opt_inputs.timeout_sec / len(batched_ics)
@@ -393,7 +404,7 @@ def _optimize_batch_candidates() -> tuple[Tensor, Tensor, list[Warning]]:
                     lower_bounds=lower_bounds,
                     upper_bounds=upper_bounds,
                     options=gen_options,
-                    fixed_features=opt_inputs.fixed_features,
+                    fixed_features={k: v[i] for k, v in batched_fixed_features.items()},
                     timeout_sec=timeout_sec,
                     **gen_kwargs,
                 )
 
@@ -13,7 +13,7 @@
 
 import torch
 from botorch.acquisition.acquisition import AcquisitionFunction
-from botorch.exceptions.errors import BotorchError
+from botorch.exceptions.errors import BotorchError, BotorchTensorDimensionError
 from botorch.exceptions.warnings import BotorchWarning
 from botorch.models.gpytorch import ModelListGPyTorchModel
 from torch import Tensor
@@ -58,14 +58,16 @@ def columnwise_clamp(
 
     out = X.clamp(lower, upper)
     if raise_on_violation and not X.allclose(out):
-        raise BotorchError("Original value(s) are out of bounds.")
+        raise BotorchError(
+            "Original value(s) are out of bounds: " f"{out=}, {X=}, {lower=}, {upper=}."
+        )
 
     return out
 
 
 def fix_features(
     X: Tensor,
-    fixed_features: Mapping[int, float] | None = None,
+    fixed_features: Mapping[int, float | Tensor] | None = None,
     replace_current_value: bool = True,
 ) -> Tensor:
     r"""Fix feature values in a Tensor.
@@ -79,6 +81,8 @@ def fix_features(
         fixed_features: A mapping with keys as column indices and values
             equal to what the feature should be set to in `X`. Keys should be in the
             range `[0, p - 1]`.
+            If a tensor is passed as value, it has to either have shape `b x q` or
+            `b`, in which case the same value is used across the q dimension.
         replace_current_value: If True, replace the specified indexes, otherwise
             the indices are inserted.
 
@@ -102,7 +106,18 @@ def fix_features(
     for index in range(new_X.shape[-1]):
         if index in fixed_features:
             value = fixed_features[index]
-            value = torch.full_like(new_X[..., index], value)
+            if torch.is_tensor(value) and value.ndim > 0:
+                if X.ndim != 3:
+                    raise BotorchTensorDimensionError(
+                        "X must be a 3-dimensional tensor, as value is a tensor."
+                        f"X.shape = {X.shape}, value.shape = {value.shape}."
+                    )
+                _b, q, _reduced_p = X.shape
+                if value.ndim == 1:
+                    # Repeat values across the q dimension.
+                    value = value.unsqueeze(-1).repeat(1, q)
+            else:
+                value = torch.full_like(new_X[..., index], value)
             new_X[..., index] = value
         else:
             new_X[..., index] = X[..., filtered_index]