Add Posterior Standard Deviation acquisition function (#2060)

pjpollot · facebook-github-bot · commit fc6fdbaf9111 · 2023-10-24T21:31:25.000-07:00
Summary: ## Motivation I am a machine learning engineer actively researching Bayesian optimization solutions to apply in my company's products. Lately, I've been trying to find the right balance between exploitation and exploration by incorporating pure exploration into a sequential batch Bayesian optimization algorithm. `qNegIntegratedPosteriorVariance` was a suitable choice for this purpose, but it proved a bit slower when compared to the Posterior Standard Deviation acquisition function that I'm introducing in this pull request. The acquisition function simply returns the posterior standard deviation of the Gaussian process model, so the time complexity remains relatively low when compared to a Monte Carlo acquisition function. ### Have you read the [Contributing Guidelines on pull requests](https://github.com/pytorch/botorch/blob/main/CONTRIBUTING.md#pull-requests)? Yes. Pull Request resolved: #2060 Test Plan: `PosteriorStandardDeviation` class I implemented is extremely similar to `PosteriorMean`, so the implementation and the unit tests also look almost the same. I just made sure to define a variance for `MockPosterior` in order to effectively run the unit tests. As I may have some doubts about batch unit testing in order to verify if my solution only applies for `q=1`, I would be glad to hear it from you about my current implementation! PS: I guess there is no problem with the integration of the new acquisition function in the documentation! ⬇️ <img width="809" alt="doc screenshot" src="https://github.com/pytorch/botorch/assets/47068641/52e2cdb2-806c-4718-8c0d-63f7a0ac8efb"> ## Related PRs (If this PR adds or changes functionality, please take some time to update the docs at https://github.com/pytorch/botorch, and link to your PR here.) Reviewed By: Balandat Differential Revision: D50559929 Pulled By: saitcakmak fbshipit-source-id: 2c0b98d535315cd33b38eeb67a89e50682716ff8
diff --git a/botorch/acquisition/__init__.py b/botorch/acquisition/__init__.py
@@ -20,6 +20,7 @@
     LogNoisyExpectedImprovement,
     NoisyExpectedImprovement,
     PosteriorMean,
+    PosteriorStandardDeviation,
     ProbabilityOfImprovement,
     qAnalyticProbabilityOfImprovement,
     UpperConfidenceBound,
@@ -91,6 +92,7 @@
     "PairwiseBayesianActiveLearningByDisagreement",
     "PairwiseMCPosteriorVariance",
     "PosteriorMean",
+    "PosteriorStandardDeviation",
     "PriorGuidedAcquisitionFunction",
     "ProbabilityOfImprovement",
     "ProximalAcquisitionFunction",
diff --git a/botorch/acquisition/analytic.py b/botorch/acquisition/analytic.py
@@ -869,6 +869,58 @@ def forward(self, X: Tensor) -> Tensor:
         return self._mean_and_sigma(X, compute_sigma=False)[0] @ self.weights
 
 
+class PosteriorStandardDeviation(AnalyticAcquisitionFunction):
+    r"""Single-outcome Posterior Standard Deviation.
+
+    An acquisition function for pure exploration.
+    Only supports the case of q=1. Requires the model's posterior to have
+    `mean` and `variance` properties. The model must be either single-outcome
+    or combined with a `posterior_transform` to produce a single-output posterior.
+
+    Example:
+        >>> model = SingleTaskGP(train_X, train_Y)
+        >>> PSTD = PosteriorMean(model)
+        >>> std = PSTD(test_X)
+    """
+
+    def __init__(
+        self,
+        model: Model,
+        posterior_transform: Optional[PosteriorTransform] = None,
+        maximize: bool = True,
+    ) -> None:
+        r"""Single-outcome Posterior Mean.
+
+        Args:
+            model: A fitted single-outcome GP model (must be in batch mode if
+                candidate sets X will be)
+            posterior_transform: A PosteriorTransform. If using a multi-output model,
+                a PosteriorTransform that transforms the multi-output posterior into a
+                single-output posterior is required.
+            maximize: If True, consider the problem a maximization problem. Note
+                that if `maximize=False`, the posterior standard deviation is negated.
+                As a consequence,
+                `optimize_acqf(PosteriorStandardDeviation(gp, maximize=False))`
+                actually returns -1 * minimum of the posterior standard deviation.
+        """
+        super().__init__(model=model, posterior_transform=posterior_transform)
+        self.maximize = maximize
+
+    @t_batch_mode_transform(expected_q=1)
+    def forward(self, X: Tensor) -> Tensor:
+        r"""Evaluate the posterior standard deviation on the candidate set X.
+
+        Args:
+            X: A `(b1 x ... bk) x 1 x d`-dim batched tensor of `d`-dim design points.
+
+        Returns:
+            A `(b1 x ... bk)`-dim tensor of Posterior Mean values at the
+            given design points `X`.
+        """
+        _, std = self._mean_and_sigma(X)
+        return std if self.maximize else -std
+
+
 # --------------- Helper functions for analytic acquisition functions. ---------------
 
 
diff --git a/test/acquisition/test_analytic.py b/test/acquisition/test_analytic.py
@@ -21,6 +21,7 @@
     LogProbabilityOfImprovement,
     NoisyExpectedImprovement,
     PosteriorMean,
+    PosteriorStandardDeviation,
     ProbabilityOfImprovement,
     ScalarizedPosteriorMean,
     UpperConfidenceBound,
@@ -292,6 +293,48 @@ def test_posterior_mean_batch(self):
                 PosteriorMean(model=mm2)
 
 
+class TestPosteriorStandardDeviation(BotorchTestCase):
+    def test_posterior_stddev(self):
+        for dtype in (torch.float, torch.double):
+            mean = torch.rand(3, 1, device=self.device, dtype=dtype)
+            std = torch.rand_like(mean)
+            mm = MockModel(MockPosterior(mean=mean, variance=std.square()))
+
+            acqf = PosteriorStandardDeviation(model=mm)
+            X = torch.rand(3, 1, 2, device=self.device, dtype=dtype)
+            pm = acqf(X)
+            self.assertTrue(torch.equal(pm, std.view(-1)))
+
+            acqf = PosteriorStandardDeviation(model=mm, maximize=False)
+            X = torch.rand(3, 1, 2, device=self.device, dtype=dtype)
+            pm = acqf(X)
+            self.assertTrue(torch.equal(pm, -std.view(-1)))
+
+            # check for proper error if multi-output model
+            mean2 = torch.rand(1, 2, device=self.device, dtype=dtype)
+            std2 = torch.rand_like(mean2)
+            mm2 = MockModel(MockPosterior(mean=mean2, variance=std2.square()))
+            with self.assertRaises(UnsupportedError):
+                PosteriorStandardDeviation(model=mm2)
+
+    def test_posterior_stddev_batch(self):
+        for dtype in (torch.float, torch.double):
+            mean = torch.rand(3, 1, 1, device=self.device, dtype=dtype)
+            std = torch.rand_like(mean)
+            mm = MockModel(MockPosterior(mean=mean, variance=std.square()))
+            acqf = PosteriorStandardDeviation(model=mm)
+            X = torch.empty(3, 1, 1, device=self.device, dtype=dtype)
+            pm = acqf(X)
+            self.assertTrue(torch.equal(pm, std.view(-1)))
+            # check for proper error if multi-output model
+            mean2 = torch.rand(3, 1, 2, device=self.device, dtype=dtype)
+            std2 = torch.rand_like(mean2)
+            mm2 = MockModel(MockPosterior(mean=mean2, variance=std2.square()))
+            msg = "Must specify a posterior transform when using a multi-output model."
+            with self.assertRaisesRegex(UnsupportedError, msg):
+                PosteriorStandardDeviation(model=mm2)
+
+
 class TestProbabilityOfImprovement(BotorchTestCase):
     def test_probability_of_improvement(self):
         for dtype in (torch.float, torch.double):