Skip to content

Commit 563cd95

Browse files
Carl Hvarfnerfacebook-github-bot
authored andcommitted
Remove maximize from info-theoretic acquisition functions (#2590)
Summary: Removes `maximize` from some info-theoretic acquisition functions (those that use `get_optimal_samples`). Pull Request resolved: #2590 Reviewed By: saitcakmak Differential Revision: D64698976 fbshipit-source-id: 4f97b38b2a89a3c0ba65c36f9aed81c3e7c57237
1 parent 04193e9 commit 563cd95

File tree

8 files changed

+92
-52
lines changed

8 files changed

+92
-52
lines changed

botorch/acquisition/input_constructors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
LearnedObjective,
8181
MCAcquisitionObjective,
8282
PosteriorTransform,
83+
ScalarizedPosteriorTransform,
8384
)
8485
from botorch.acquisition.preference import (
8586
AnalyticExpectedUtilityOfBestOption,
@@ -1801,6 +1802,7 @@ def construct_inputs_qJES(
18011802
bounds: list[tuple[float, float]],
18021803
num_optima: int = 64,
18031804
condition_noiseless: bool = True,
1805+
posterior_transform: ScalarizedPosteriorTransform | None = None,
18041806
X_pending: Tensor | None = None,
18051807
estimation_type: str = "LB",
18061808
num_samples: int = 64,
@@ -1810,13 +1812,16 @@ def construct_inputs_qJES(
18101812
model=model,
18111813
bounds=torch.as_tensor(bounds, dtype=dtype).T,
18121814
num_optima=num_optima,
1815+
posterior_transform=posterior_transform,
1816+
return_transformed=True,
18131817
)
18141818

18151819
inputs = {
18161820
"model": model,
18171821
"optimal_inputs": optimal_inputs,
18181822
"optimal_outputs": optimal_outputs,
18191823
"condition_noiseless": condition_noiseless,
1824+
"posterior_transform": posterior_transform,
18201825
"X_pending": X_pending,
18211826
"estimation_type": estimation_type,
18221827
"num_samples": num_samples,

botorch/acquisition/joint_entropy_search.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ def __init__(
7474
posterior_transform: PosteriorTransform | None = None,
7575
X_pending: Tensor | None = None,
7676
estimation_type: str = "LB",
77-
maximize: bool = True,
7877
num_samples: int = 64,
7978
) -> None:
8079
r"""Joint entropy search acquisition function.
@@ -91,11 +90,11 @@ def __init__(
9190
[Tu2022joint]_. These are sampled identically, so this only controls
9291
the fashion in which the GP is reshaped as a result of conditioning
9392
on the optimum.
93+
posterior_transform: PosteriorTransform to negate or scalarize the output.
9494
estimation_type: estimation_type: A string to determine which entropy
9595
estimate is computed: Lower bound" ("LB") or "Monte Carlo" ("MC").
9696
Lower Bound is recommended due to the relatively high variance
9797
of the MC estimator.
98-
maximize: If true, we consider a maximization problem.
9998
X_pending: A `m x d`-dim Tensor of `m` design points that have been
10099
submitted for function evaluation, but have not yet been evaluated.
101100
num_samples: The number of Monte Carlo samples used for the Monte Carlo
@@ -112,16 +111,13 @@ def __init__(
112111
# and three-dimensional otherwise.
113112
self.optimal_inputs = optimal_inputs.unsqueeze(-2)
114113
self.optimal_outputs = optimal_outputs.unsqueeze(-2)
114+
self.optimal_output_values = (
115+
posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1)
116+
if posterior_transform
117+
else self.optimal_outputs
118+
)
115119
self.posterior_transform = posterior_transform
116-
self.maximize = maximize
117-
118-
# The optima (can be maxima, can be minima) come in as the largest
119-
# values if we optimize, or the smallest (likely substantially negative)
120-
# if we minimize. Inside the acquisition function, however, we always
121-
# want to consider MAX-values. As such, we need to flip them if
122-
# we want to minimize.
123-
if not self.maximize:
124-
optimal_outputs = -optimal_outputs
120+
125121
self.num_samples = optimal_inputs.shape[0]
126122
self.condition_noiseless = condition_noiseless
127123
self.initial_model = model
@@ -203,7 +199,9 @@ def _compute_lower_bound_information_gain(
203199
A `batch_shape`-dim Tensor of acquisition values at the given design
204200
points `X`.
205201
"""
206-
initial_posterior = self.initial_model.posterior(X, observation_noise=True)
202+
initial_posterior = self.initial_model.posterior(
203+
X, observation_noise=True, posterior_transform=self.posterior_transform
204+
)
207205
# need to check if there is a two-dimensional batch shape -
208206
# the sampled optima appear in the dimension right after
209207
batch_shape = X.shape[:-2]
@@ -221,15 +219,17 @@ def _compute_lower_bound_information_gain(
221219

222220
# Compute the mixture mean and variance
223221
posterior_m = self.conditional_model.posterior(
224-
X.unsqueeze(MCMC_DIM), observation_noise=True
222+
X.unsqueeze(MCMC_DIM),
223+
observation_noise=True,
224+
posterior_transform=self.posterior_transform,
225225
)
226226
noiseless_var = self.conditional_model.posterior(
227-
X.unsqueeze(MCMC_DIM), observation_noise=False
227+
X.unsqueeze(MCMC_DIM),
228+
observation_noise=False,
229+
posterior_transform=self.posterior_transform,
228230
).variance
229231

230232
mean_m = posterior_m.mean
231-
if not self.maximize:
232-
mean_m = -mean_m
233233
variance_m = posterior_m.variance
234234

235235
check_no_nans(variance_m)
@@ -240,7 +240,7 @@ def _compute_lower_bound_information_gain(
240240
torch.zeros(1, device=X.device, dtype=X.dtype),
241241
torch.ones(1, device=X.device, dtype=X.dtype),
242242
)
243-
normalized_mvs = (self.optimal_outputs - mean_m) / stdv
243+
normalized_mvs = (self.optimal_output_values - mean_m) / stdv
244244
cdf_mvs = normal.cdf(normalized_mvs).clamp_min(CLAMP_LB)
245245
pdf_mvs = torch.exp(normal.log_prob(normalized_mvs))
246246

@@ -294,7 +294,9 @@ def _compute_monte_carlo_information_gain(
294294
A `batch_shape`-dim Tensor of acquisition values at the given design
295295
points `X`.
296296
"""
297-
initial_posterior = self.initial_model.posterior(X, observation_noise=True)
297+
initial_posterior = self.initial_model.posterior(
298+
X, observation_noise=True, posterior_transform=self.posterior_transform
299+
)
298300

299301
batch_shape = X.shape[:-2]
300302
sample_dim = len(batch_shape)
@@ -311,15 +313,17 @@ def _compute_monte_carlo_information_gain(
311313

312314
# Compute the mixture mean and variance
313315
posterior_m = self.conditional_model.posterior(
314-
X.unsqueeze(MCMC_DIM), observation_noise=True
316+
X.unsqueeze(MCMC_DIM),
317+
observation_noise=True,
318+
posterior_transform=self.posterior_transform,
315319
)
316320
noiseless_var = self.conditional_model.posterior(
317-
X.unsqueeze(MCMC_DIM), observation_noise=False
321+
X.unsqueeze(MCMC_DIM),
322+
observation_noise=False,
323+
posterior_transform=self.posterior_transform,
318324
).variance
319325

320326
mean_m = posterior_m.mean
321-
if not self.maximize:
322-
mean_m = -mean_m
323327
variance_m = posterior_m.variance.clamp_min(CLAMP_LB)
324328
conditional_samples, conditional_logprobs = self._compute_monte_carlo_variables(
325329
posterior_m

botorch_community/acquisition/input_constructors.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import torch
1919
from botorch.acquisition.input_constructors import acqf_input_constructor
20+
from botorch.acquisition.objective import ScalarizedPosteriorTransform
2021
from botorch.acquisition.utils import get_optimal_samples
2122
from botorch.models.model import Model
2223
from botorch_community.acquisition.bayesian_active_learning import (
@@ -62,7 +63,7 @@ def construct_inputs_SCoreBO(
6263
model: Model,
6364
bounds: List[Tuple[float, float]],
6465
num_optima: int = 8,
65-
maximize: bool = True,
66+
posterior_transform: Optional[ScalarizedPosteriorTransform] = None,
6667
distance_metric: str = "hellinger",
6768
X_pending: Optional[Tensor] = None,
6869
):
@@ -72,14 +73,15 @@ def construct_inputs_SCoreBO(
7273
model=model,
7374
bounds=torch.as_tensor(bounds, dtype=dtype).T,
7475
num_optima=num_optima,
76+
posterior_transform=posterior_transform,
77+
return_transformed=True,
7578
)
76-
7779
inputs = {
7880
"model": model,
7981
"optimal_inputs": optimal_inputs,
8082
"optimal_outputs": optimal_outputs,
8183
"distance_metric": distance_metric,
82-
"maximize": maximize,
84+
"posterior_transform": posterior_transform,
8385
"X_pending": X_pending,
8486
}
8587
return inputs

botorch_community/acquisition/scorebo.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from botorch.acquisition.bayesian_active_learning import (
3030
FullyBayesianAcquisitionFunction,
3131
)
32+
from botorch.acquisition.objective import ScalarizedPosteriorTransform
3233
from botorch.models.fully_bayesian import MCMC_DIM, SaasFullyBayesianSingleTaskGP
3334
from botorch.models.gp_regression import MIN_INFERRED_NOISE_LEVEL
3435
from botorch.models.utils import fantasize as fantasize_flag
@@ -50,7 +51,7 @@ def __init__(
5051
optimal_inputs: Optional[Tensor] = None,
5152
X_pending: Optional[Tensor] = None,
5253
distance_metric: Optional[str] = "hellinger",
53-
maximize: bool = True,
54+
posterior_transform: Optional[ScalarizedPosteriorTransform] = None,
5455
) -> None:
5556
r"""Self-correcting Bayesian optimization [hvarfner2023scorebo]_ acquisition
5657
function. SCoreBO seeks to find accurate hyperparameters during the course
@@ -71,14 +72,14 @@ def __init__(
7172
super().__init__(model=model)
7273
# To enable fully bayesian GP conditioning, we need to unsqueeze
7374
# to get num_optima x num_gps unique GPs
74-
self.maximize = maximize
75-
if not self.maximize:
76-
optimal_outputs = -optimal_outputs
77-
78-
# inputs come as num_optima_per_model x num_models x d
79-
# but we want it four-dimensional to condition one per model.
80-
8175
self.optimal_outputs = optimal_outputs.unsqueeze(-2)
76+
self.optimal_output_values = (
77+
posterior_transform.evaluate(self.optimal_outputs).unsqueeze(-1)
78+
if posterior_transform
79+
else self.optimal_outputs
80+
)
81+
self.posterior_transform = posterior_transform
82+
8283
# JES-like version of SCoreBO if optimal inputs are provided
8384
if optimal_inputs is not None:
8485
with warnings.catch_warnings():
@@ -122,13 +123,19 @@ def forward(self, X: Tensor) -> Tensor:
122123
# since we have two MC dims (over models and optima), we need to
123124
# unsqueeze a second dim to accomodate the posterior pass
124125
prev_posterior = self.model.posterior(
125-
X.unsqueeze(MCMC_DIM), observation_noise=True
126+
X.unsqueeze(MCMC_DIM),
127+
observation_noise=True,
128+
posterior_transform=self.posterior_transform,
126129
)
127130
noiseless_posterior = self.conditional_model.posterior(
128-
X.unsqueeze(MCMC_DIM), observation_noise=False
131+
X.unsqueeze(MCMC_DIM),
132+
observation_noise=False,
133+
posterior_transform=self.posterior_transform,
129134
)
130135
posterior = self.conditional_model.posterior(
131-
X.unsqueeze(MCMC_DIM), observation_noise=True
136+
X.unsqueeze(MCMC_DIM),
137+
observation_noise=True,
138+
posterior_transform=self.posterior_transform,
132139
)
133140

134141
marg_mean = prev_posterior.mean.mean(dim=MCMC_DIM, keepdim=True)
@@ -139,7 +146,9 @@ def forward(self, X: Tensor) -> Tensor:
139146
# the mixture variance is squeezed, need it unsqueezed
140147
marg_covar = prev_posterior.mixture_covariance_matrix.unsqueeze(MCMC_DIM)
141148
noiseless_var = noiseless_posterior.variance
142-
normalized_mvs = (self.optimal_outputs - cond_means) / noiseless_var.sqrt()
149+
normalized_mvs = (
150+
self.optimal_output_values - cond_means
151+
) / noiseless_var.sqrt()
143152
cdf_mvs = self.normal.cdf(normalized_mvs).clamp_min(CLAMP_LB)
144153
pdf_mvs = torch.exp(self.normal.log_prob(normalized_mvs))
145154
mean_truncated = cond_means - noiseless_var.sqrt() * pdf_mvs / cdf_mvs

test/acquisition/test_input_constructors.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1620,6 +1620,9 @@ def test_construct_inputs_jes(self) -> None:
16201620
training_data=self.blockX_blockY,
16211621
bounds=self.bounds,
16221622
num_optima=17,
1623+
posterior_transform=ScalarizedPosteriorTransform(
1624+
torch.rand(1, dtype=self.blockX_blockY[0].Y.dtype)
1625+
),
16231626
)
16241627

16251628
self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype)

test/acquisition/test_joint_entropy_search.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,15 @@
88

99
import torch
1010
from botorch.acquisition.joint_entropy_search import qJointEntropySearch
11+
from botorch.acquisition.objective import ScalarizedPosteriorTransform
1112
from botorch.models.fully_bayesian import SaasFullyBayesianSingleTaskGP
1213
from botorch.sampling.normal import SobolQMCNormalSampler
1314
from botorch.utils.test_helpers import get_model
1415
from botorch.utils.testing import BotorchTestCase
1516

1617

1718
class TestQJointEntropySearch(BotorchTestCase):
18-
def test_joint_entropy_search(self):
19+
def test_singleobj_joint_entropy_search(self):
1920
torch.manual_seed(1)
2021
tkwargs = {"device": self.device}
2122
estimation_types = ("LB", "MC")
@@ -26,15 +27,13 @@ def test_joint_entropy_search(self):
2627
estimation_type,
2728
use_model_list,
2829
standardize_model,
29-
maximize,
3030
condition_noiseless,
3131
) in product(
3232
(torch.float, torch.double),
3333
estimation_types,
3434
(False, True),
3535
(False, True),
3636
(False, True),
37-
(False, True),
3837
):
3938
tkwargs["dtype"] = dtype
4039
input_dim = 2
@@ -61,7 +60,6 @@ def test_joint_entropy_search(self):
6160
num_samples=64,
6261
X_pending=X_pending,
6362
condition_noiseless=condition_noiseless,
64-
maximize=maximize,
6563
)
6664
self.assertIsInstance(acq.sampler, SobolQMCNormalSampler)
6765

@@ -77,6 +75,18 @@ def test_joint_entropy_search(self):
7775
# assess shape
7876
self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2])
7977

78+
acq = qJointEntropySearch(
79+
model=model,
80+
optimal_inputs=optimal_inputs,
81+
optimal_outputs=optimal_outputs,
82+
posterior_transform=ScalarizedPosteriorTransform(
83+
weights=-torch.ones(1, **tkwargs)
84+
),
85+
)
86+
self.assertTrue(torch.all(acq.optimal_output_values == -acq.optimal_outputs))
87+
acq_X = acq(test_Xs[j])
88+
self.assertTrue(acq_X.shape == test_Xs[j].shape[:-2])
89+
8090
with self.assertRaises(ValueError):
8191
acq = qJointEntropySearch(
8292
model=model,
@@ -86,7 +96,6 @@ def test_joint_entropy_search(self):
8696
num_samples=64,
8797
X_pending=X_pending,
8898
condition_noiseless=condition_noiseless,
89-
maximize=maximize,
9099
)
91100
acq_X = acq(test_Xs[j])
92101

test_community/acquisition/test_input_constructors.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,8 @@ def test_construct_inputs_scorebo(self) -> None:
8686
training_data=self.blockX_blockY,
8787
bounds=self.bounds,
8888
num_optima=num_optima,
89-
maximize=False,
9089
distance_metric="kl_divergence",
9190
)
92-
self.assertFalse(kwargs["maximize"])
9391
self.assertEqual(self.blockX_blockY[0].X.dtype, kwargs["optimal_inputs"].dtype)
9492
self.assertEqual(len(kwargs["optimal_inputs"]), num_optima)
9593
self.assertEqual(len(kwargs["optimal_outputs"]), num_optima)

0 commit comments

Comments
 (0)