Skip to content

Commit f6b7530

Browse files
Fixes inverse cost-weighted utility behaviour for AF values <=0 (#2297)
Summary: Instead of clamping negative AF values to 0.0 or dividing it by the cost, this changes the behaviour to multiply AF values by the cost if they are <=0.0. One test asserting that the value of the AF has been clamped was removed, as it is no longer applicable. <!-- Thank you for sending the PR! We appreciate you spending the time to make BoTorch better. Help us understand your motivation by explaining why you decided to make this change. You can learn more about contributing to BoTorch here: https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md --> ## Motivation This PR addresses the issue discussed in #2194, where dividing negative KG AF values by cost results in more expensive points of equal or lower value being regarded as having higher AF values. ### Have you read the [Contributing Guidelines on pull requests](https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md#pull-requests)? Yes. Pull Request resolved: #2297 Test Plan: Only minor code changes were necessary, where the unit tests already account for the changes. Furthermore, I have been using this version of the ```InverseCostWeightedUtility``` in my own experiments. ## Related PRs N/A Reviewed By: SebastianAment Differential Revision: D56266841 Pulled By: esantorella fbshipit-source-id: 04b91a9b6b0a8b38957af2197605be3ad3d8c802
1 parent ee8aef0 commit f6b7530

File tree

5 files changed

+124
-83
lines changed

5 files changed

+124
-83
lines changed

botorch/acquisition/cost_aware.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ class InverseCostWeightedUtility(CostAwareUtility):
9494
performs the inverse weighting on the sample level:
9595
`weighted utility = mean(u_1 / c_1, ..., u_N / c_N)`.
9696
97+
Where values in (u_1, ..., u_N) are negative, or for mean(U) < 0, the
98+
weighted utility is instead calculated via scaling by the cost, i.e. if
99+
`use_mean=True`: `weighted_utility = mean(U) * mean_cost` and if
100+
`use_mean=False`:
101+
`weighted utility = mean(u_1 * c_1, u_2 / c_2, u_3 * c_3, ..., u_N / c_N)`,
102+
depending on whether (`u_*` >= 0), as with `u_2` and `u_N` in this case, or
103+
(`u_*` < 0) as with `u_1` and `u_3`.
104+
97105
The cost is additive across multiple elements of a q-batch.
98106
"""
99107

@@ -105,6 +113,8 @@ def __init__(
105113
min_cost: float = 1e-2,
106114
) -> None:
107115
r"""Cost-aware utility that weights increase in utility by inverse cost.
116+
For negative increases in utility, the utility is instead scaled by the
117+
cost. See the class description for more information.
108118
109119
Args:
110120
cost_model: A model of the cost of evaluating a candidate
@@ -145,7 +155,9 @@ def forward(
145155
X_evaluation_mask: Optional[Tensor] = None,
146156
**kwargs: Any,
147157
) -> Tensor:
148-
r"""Evaluate the cost function on the candidates and improvements.
158+
r"""Evaluate the cost function on the candidates and improvements. Note
159+
that negative values of `deltas` are instead scaled by the cost, and not
160+
inverse-weighted. See the class description for more information.
149161
150162
Args:
151163
X: A `batch_shape x q x d`-dim Tensor of with `q` `d`-dim design
@@ -201,10 +213,7 @@ def forward(
201213
# this will be of shape `num_fantasies x batch_shape` or `batch_shape`
202214
cost = cost.clamp_min(self._min_cost).sum(dim=-1)
203215

204-
# if we are doing inverse weighting on the sample level, clamp numerator.
205-
if not self._use_mean:
206-
deltas = deltas.clamp_min(0.0)
207-
208216
# compute and return the ratio on the sample level - If `use_mean=True`
209-
# this operation involves broadcasting the cost across fantasies
210-
return deltas / cost
217+
# this operation involves broadcasting the cost across fantasies.
218+
# We multiply by the cost if the deltas are <= 0, see discussion #2914
219+
return torch.where(deltas > 0, deltas / cost, deltas * cost)

botorch/acquisition/multi_objective/hypervolume_knowledge_gradient.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,7 @@ def forward(self, X: Tensor) -> Tensor:
258258
values = self.cost_aware_utility(
259259
# exclude pending points
260260
X=X_actual[..., :q, :],
261-
# cost-weighting relies on nonnegative deltas
262-
deltas=values.clamp_min(0.0),
261+
deltas=values,
263262
sampler=self.cost_sampler,
264263
X_evaluation_mask=self.X_evaluation_mask,
265264
)
@@ -477,8 +476,7 @@ def forward(self, X: Tensor) -> Tensor:
477476
values = self.cost_aware_utility(
478477
# exclude pending points
479478
X=X_actual[..., :q, :],
480-
# cost-weighting relies on nonnegative deltas
481-
deltas=values.clamp_min(0.0),
479+
deltas=values,
482480
sampler=self.cost_sampler,
483481
X_evaluation_mask=self.X_evaluation_mask,
484482
)

test/acquisition/multi_objective/test_hypervolume_knowledge_gradient.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -304,26 +304,6 @@ def test_evaluate_q_hvkg(self):
304304
self.assertTrue(
305305
torch.equal(qHVKG.extract_candidates(X), X[..., : -n_f * num_pareto, :])
306306
)
307-
# test that cost-weighted HVKG is clamped
308-
with mock.patch.object(
309-
ModelListGP, "fantasize", return_value=mfm
310-
) as patch_f:
311-
with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
312-
mock_num_outputs.return_value = 2
313-
qHVKG = acqf_class(
314-
model=model,
315-
num_fantasies=n_f,
316-
X_pending=X_pending,
317-
X_pending_evaluation_mask=X_pending_evaluation_mask,
318-
X_evaluation_mask=X_evaluation_mask,
319-
current_value=torch.tensor(1000, **tkwargs),
320-
ref_point=ref_point,
321-
num_pareto=num_pareto,
322-
cost_aware_utility=cost_aware_utility,
323-
**mf_kwargs,
324-
)
325-
val = qHVKG(X)
326-
self.assertEqual(val.item(), 0.0)
327307

328308
# test mfkg
329309
if acqf_class == qMultiFidelityHypervolumeKnowledgeGradient:

test/acquisition/test_cost_aware.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ def test_InverseCostWeightedUtility(self):
5151

5252
X = torch.randn(*batch_shape, 3, 2, device=self.device, dtype=dtype)
5353
deltas = torch.rand(4, *batch_shape, device=self.device, dtype=dtype)
54+
neg_deltas = -torch.rand(
55+
4, *batch_shape, device=self.device, dtype=dtype
56+
)
5457

5558
# test that sampler is required if use_mean=False
5659
icwu = InverseCostWeightedUtility(mm, use_mean=False)
@@ -66,13 +69,36 @@ def test_InverseCostWeightedUtility(self):
6669
any(issubclass(w.category, CostAwareWarning) for w in ws)
6770
)
6871

69-
# basic test
72+
# basic test for both positive and negative delta values
7073
mm = MockModel(MockPosterior(mean=mean))
7174
icwu = InverseCostWeightedUtility(mm)
7275
ratios = icwu(X, deltas)
7376
self.assertTrue(
7477
torch.equal(ratios, deltas / mean.squeeze(-1).sum(dim=-1))
7578
)
79+
neg_ratios = icwu(X, neg_deltas)
80+
self.assertTrue(
81+
torch.equal(neg_ratios, neg_deltas * mean.squeeze(-1).sum(dim=-1))
82+
)
83+
84+
# test that ensures candidates of lower cost are preferred when they
85+
# have equal, negative delta values
86+
low_mean = 1 + torch.rand(
87+
*batch_shape, 2, 1, device=self.device, dtype=dtype
88+
)
89+
high_mean = 2 + torch.rand(
90+
*batch_shape, 2, 1, device=self.device, dtype=dtype
91+
)
92+
h_mm = MockModel(MockPosterior(mean=high_mean))
93+
h_icwu = InverseCostWeightedUtility(h_mm)
94+
# high cost ratios
95+
h_ratios = h_icwu(X, neg_deltas)
96+
l_mm = MockModel(MockPosterior(mean=low_mean))
97+
l_icwu = InverseCostWeightedUtility(l_mm)
98+
# low cost ratios
99+
l_ratios = l_icwu(X, neg_deltas)
100+
# assert that the low cost candidates are preferred
101+
self.assertTrue(torch.all(h_ratios < l_ratios))
76102

77103
# sampling test
78104
samples = 1 + torch.rand( # event shape is q x m

0 commit comments

Comments
 (0)