Fixes inverse cost-weighted utility behaviour for AF values <=0 (#2297)

AlexanderMouton · facebook-github-bot · commit f6b7530c71f1 · 2024-04-22T14:36:18.000-07:00
Summary: Instead of clamping negative AF values to 0.0 or dividing it by the cost, this changes the behaviour to multiply AF values by the cost if they are <=0.0. One test asserting that the value of the AF has been clamped was removed, as it is no longer applicable.  ## Motivation This PR addresses the issue discussed in #2194, where dividing negative KG AF values by cost results in more expensive points of equal or lower value being regarded as having higher AF values. ### Have you read the [Contributing Guidelines on pull requests](https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md#pull-requests)? Yes. Pull Request resolved: #2297 Test Plan: Only minor code changes were necessary, where the unit tests already account for the changes. Furthermore, I have been using this version of the ```InverseCostWeightedUtility``` in my own experiments. ## Related PRs N/A Reviewed By: SebastianAment Differential Revision: D56266841 Pulled By: esantorella fbshipit-source-id: 04b91a9b6b0a8b38957af2197605be3ad3d8c802
diff --git a/botorch/acquisition/cost_aware.py b/botorch/acquisition/cost_aware.py
@@ -94,6 +94,14 @@ class InverseCostWeightedUtility(CostAwareUtility):
     performs the inverse weighting on the sample level:
     `weighted utility = mean(u_1 / c_1, ..., u_N / c_N)`.
 
+    Where values in (u_1, ..., u_N) are negative, or for mean(U) < 0, the
+    weighted utility is instead calculated via scaling by the cost, i.e. if
+    `use_mean=True`: `weighted_utility = mean(U) * mean_cost` and if
+    `use_mean=False`:
+    `weighted utility = mean(u_1 * c_1, u_2 / c_2, u_3 * c_3, ..., u_N / c_N)`,
+    depending on whether (`u_*` >= 0), as with `u_2` and `u_N` in this case, or
+    (`u_*` < 0) as with `u_1` and `u_3`.
+
     The cost is additive across multiple elements of a q-batch.
     """
 
@@ -105,6 +113,8 @@ def __init__(
         min_cost: float = 1e-2,
     ) -> None:
         r"""Cost-aware utility that weights increase in utility by inverse cost.
+        For negative increases in utility, the utility is instead scaled by the
+        cost. See the class description for more information.
 
         Args:
             cost_model: A model of the cost of evaluating a candidate
@@ -145,7 +155,9 @@ def forward(
         X_evaluation_mask: Optional[Tensor] = None,
         **kwargs: Any,
     ) -> Tensor:
-        r"""Evaluate the cost function on the candidates and improvements.
+        r"""Evaluate the cost function on the candidates and improvements. Note
+        that negative values of `deltas` are instead scaled by the cost, and not
+        inverse-weighted. See the class description for more information.
 
         Args:
             X: A `batch_shape x q x d`-dim Tensor of with `q` `d`-dim design
@@ -201,10 +213,7 @@ def forward(
         # this will be of shape `num_fantasies x batch_shape` or `batch_shape`
         cost = cost.clamp_min(self._min_cost).sum(dim=-1)
 
-        # if we are doing inverse weighting on the sample level, clamp numerator.
-        if not self._use_mean:
-            deltas = deltas.clamp_min(0.0)
-
         # compute and return the ratio on the sample level - If `use_mean=True`
-        # this operation involves broadcasting the cost across fantasies
-        return deltas / cost
+        # this operation involves broadcasting the cost across fantasies.
+        # We multiply by the cost if the deltas are <= 0, see discussion #2914
+        return torch.where(deltas > 0, deltas / cost, deltas * cost)
diff --git a/botorch/acquisition/multi_objective/hypervolume_knowledge_gradient.py b/botorch/acquisition/multi_objective/hypervolume_knowledge_gradient.py
@@ -258,8 +258,7 @@ def forward(self, X: Tensor) -> Tensor:
             values = self.cost_aware_utility(
                 # exclude pending points
                 X=X_actual[..., :q, :],
-                # cost-weighting relies on nonnegative deltas
-                deltas=values.clamp_min(0.0),
+                deltas=values,
                 sampler=self.cost_sampler,
                 X_evaluation_mask=self.X_evaluation_mask,
             )
@@ -477,8 +476,7 @@ def forward(self, X: Tensor) -> Tensor:
             values = self.cost_aware_utility(
                 # exclude pending points
                 X=X_actual[..., :q, :],
-                # cost-weighting relies on nonnegative deltas
-                deltas=values.clamp_min(0.0),
+                deltas=values,
                 sampler=self.cost_sampler,
                 X_evaluation_mask=self.X_evaluation_mask,
             )
diff --git a/test/acquisition/multi_objective/test_hypervolume_knowledge_gradient.py b/test/acquisition/multi_objective/test_hypervolume_knowledge_gradient.py
@@ -304,26 +304,6 @@ def test_evaluate_q_hvkg(self):
             self.assertTrue(
                 torch.equal(qHVKG.extract_candidates(X), X[..., : -n_f * num_pareto, :])
             )
-            # test that cost-weighted HVKG is clamped
-            with mock.patch.object(
-                ModelListGP, "fantasize", return_value=mfm
-            ) as patch_f:
-                with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
-                    mock_num_outputs.return_value = 2
-                    qHVKG = acqf_class(
-                        model=model,
-                        num_fantasies=n_f,
-                        X_pending=X_pending,
-                        X_pending_evaluation_mask=X_pending_evaluation_mask,
-                        X_evaluation_mask=X_evaluation_mask,
-                        current_value=torch.tensor(1000, **tkwargs),
-                        ref_point=ref_point,
-                        num_pareto=num_pareto,
-                        cost_aware_utility=cost_aware_utility,
-                        **mf_kwargs,
-                    )
-                    val = qHVKG(X)
-                    self.assertEqual(val.item(), 0.0)
 
             # test mfkg
             if acqf_class == qMultiFidelityHypervolumeKnowledgeGradient:
diff --git a/test/acquisition/test_cost_aware.py b/test/acquisition/test_cost_aware.py
@@ -51,6 +51,9 @@ def test_InverseCostWeightedUtility(self):
 
                 X = torch.randn(*batch_shape, 3, 2, device=self.device, dtype=dtype)
                 deltas = torch.rand(4, *batch_shape, device=self.device, dtype=dtype)
+                neg_deltas = -torch.rand(
+                    4, *batch_shape, device=self.device, dtype=dtype
+                )
 
                 # test that sampler is required if use_mean=False
                 icwu = InverseCostWeightedUtility(mm, use_mean=False)
@@ -66,13 +69,36 @@ def test_InverseCostWeightedUtility(self):
                         any(issubclass(w.category, CostAwareWarning) for w in ws)
                     )
 
-                # basic test
+                # basic test for both positive and negative delta values
                 mm = MockModel(MockPosterior(mean=mean))
                 icwu = InverseCostWeightedUtility(mm)
                 ratios = icwu(X, deltas)
                 self.assertTrue(
                     torch.equal(ratios, deltas / mean.squeeze(-1).sum(dim=-1))
                 )
+                neg_ratios = icwu(X, neg_deltas)
+                self.assertTrue(
+                    torch.equal(neg_ratios, neg_deltas * mean.squeeze(-1).sum(dim=-1))
+                )
+
+                # test that ensures candidates of lower cost are preferred when they
+                # have equal, negative delta values
+                low_mean = 1 + torch.rand(
+                    *batch_shape, 2, 1, device=self.device, dtype=dtype
+                )
+                high_mean = 2 + torch.rand(
+                    *batch_shape, 2, 1, device=self.device, dtype=dtype
+                )
+                h_mm = MockModel(MockPosterior(mean=high_mean))
+                h_icwu = InverseCostWeightedUtility(h_mm)
+                # high cost ratios
+                h_ratios = h_icwu(X, neg_deltas)
+                l_mm = MockModel(MockPosterior(mean=low_mean))
+                l_icwu = InverseCostWeightedUtility(l_mm)
+                # low cost ratios
+                l_ratios = l_icwu(X, neg_deltas)
+                # assert that the low cost candidates are preferred
+                self.assertTrue(torch.all(h_ratios < l_ratios))
 
                 # sampling test
                 samples = 1 + torch.rand(  # event shape is q x m
diff --git a/tutorials/Multi_objective_multi_fidelity_BO.ipynb b/tutorials/Multi_objective_multi_fidelity_BO.ipynb