facebook · esantorella · Jul 30, 2024 · Jul 30, 2024
diff --git a/ax/benchmark/benchmark_problem.py b/ax/benchmark/benchmark_problem.py
@@ -206,13 +206,12 @@ class MultiObjectiveBenchmarkProblem(BenchmarkProblem):
     A `BenchmarkProblem` that supports multiple objectives.
 
     For multi-objective problems, `optimal_value` indicates the maximum
-    hypervolume attainable with the given `reference_point`.
+    hypervolume attainable with the objective thresholds provided on the
+    `optimization_config`.
 
-    For argument descriptions, see `BenchmarkProblem`; it additionally takes a `runner`
-    and a `reference_point`.
+    For argument descriptions, see `BenchmarkProblem`.
     """
 
-    reference_point: List[float]
     optimization_config: MultiObjectiveOptimizationConfig
 
 
@@ -289,5 +288,4 @@ def create_multi_objective_problem_from_botorch(
         observe_noise_stds=observe_noise_sd,
         has_ground_truth=problem.has_ground_truth,
         optimal_value=test_problem.max_hv,
-        reference_point=test_problem._ref_point,
     )
diff --git a/ax/benchmark/problems/surrogate.py b/ax/benchmark/problems/surrogate.py
@@ -4,12 +4,19 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-strict
+"""
+Benchmark problems based on surrogates.
+
+These problems might appear to function identically to their non-surrogate
+counterparts, `BenchmarkProblem` and `MultiObjectiveBenchmarkProblem`, aside
+from the restriction that their runners are of type `SurrogateRunner`. However,
+they are treated specially within JSON storage because surrogates cannot be
+easily serialized.
+"""
 
 from dataclasses import dataclass, field
-from typing import List
 
 from ax.benchmark.benchmark_problem import BenchmarkProblem
-
 from ax.benchmark.runners.surrogate import SurrogateRunner
 from ax.core.optimization_config import MultiObjectiveOptimizationConfig
 
@@ -21,6 +28,8 @@ class SurrogateBenchmarkProblemBase(BenchmarkProblem):
 
     Its `runner` is a `SurrogateRunner`, which allows for the surrogate to be
     constructed lazily and datasets to be downloaded lazily.
+
+    For argument descriptions, see `BenchmarkProblem`.
     """
 
     runner: SurrogateRunner = field(repr=False)
@@ -34,9 +43,10 @@ class SOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
 class MOOSurrogateBenchmarkProblem(SurrogateBenchmarkProblemBase):
     """
     Has the same attributes/properties as a `MultiObjectiveBenchmarkProblem`,
-    but its runner is not constructed until needed, to allow for deferring
-    constructing the surrogate and downloading data.
+    but its `runner` is a `SurrogateRunner`, which allows for the surrogate to be
+    constructed lazily and datasets to be downloaded lazily.
+
+    For argument descriptions, see `BenchmarkProblem`.
     """
 
     optimization_config: MultiObjectiveOptimizationConfig
-    reference_point: List[float]
diff --git a/ax/benchmark/tests/test_benchmark.py b/ax/benchmark/tests/test_benchmark.py
@@ -9,6 +9,7 @@
 from unittest.mock import patch
 
 import numpy as np
+import torch
 from ax.benchmark.benchmark import (
     _create_benchmark_experiment,
     benchmark_multiple_problems_methods,
@@ -53,6 +54,12 @@
 from botorch.models.gp_regression import SingleTaskGP
 from botorch.optim.optimize import optimize_acqf
 from botorch.test_functions.synthetic import Branin
+from pyre_extensions import none_throws
+
+
+class ThreeProblem(Branin):
+    def evaluate_true(self, X: torch.Tensor) -> torch.Tensor:
+        return torch.tensor(3.0, dtype=torch.float64)
 
 
 class TestBenchmark(TestCase):
@@ -90,6 +97,80 @@ def test_batch(self) -> None:
                 )
                 self.assertEqual(mock_optimize_acqf.call_args.kwargs["q"], batch_size)
 
+    def test_noisy(self) -> None:
+
+        with self.subTest("property test"):
+            problem = create_single_objective_problem_from_botorch(
+                test_problem_class=ThreeProblem,
+                test_problem_kwargs={"noise_std": 1.0},
+                lower_is_better=True,
+                num_trials=5,
+            )
+
+            res = benchmark_replication(
+                problem=problem,
+                method=get_sobol_benchmark_method(),
+                seed=0,
+            )
+            self.assertTrue((res.optimization_trace == 3).all())
+            df = none_throws(res.experiment).fetch_data().df
+            self.assertTrue(
+                (df.loc[df["metric_name"] == "ThreeProblem", "mean"] != 3).all()
+            )
+
+        # characterization test
+        soo_problem = get_single_objective_benchmark_problem(
+            test_problem_kwargs={"noise_std": 1.0}
+        )
+        moo_problem = get_multi_objective_benchmark_problem(
+            test_problem_kwargs={"noise_std": 1.0}
+        )
+        # Surrogate construction shouldn't be random
+        torch.manual_seed(0)
+        soo_surrogate_problem = get_soo_surrogate(noise_stds=1.0)
+        moo_surrogate_problem = get_moo_surrogate(noise_stds=1.0)
+
+        cases = {
+            "SOO": (
+                soo_problem,
+                [
+                    104.36542659448965,
+                    90.20076516036951,
+                    56.18382601460175,
+                    7.5867050504430775,
+                ],
+            ),
+            "MOO": (moo_problem, [0.0, 0.0, 6.952682254445629, 6.952682254445629]),
+            "SOO surrogate": (
+                soo_surrogate_problem,
+                # The numbers are all close because the surrogate is contrived
+                # and predicts almost the same value everywhere
+                [
+                    104.36542659448779,
+                    104.36542659448779,
+                    104.36542659448779,
+                    104.36542659448779,
+                ],
+            ),
+            "MOO surrogate": (
+                moo_surrogate_problem,
+                [
+                    24.999992185973763,
+                    24.999992185973763,
+                    24.999992185973763,
+                    24.999992185973763,
+                ],
+            ),
+        }
+        for name, (problem, expected) in cases.items():
+            with self.subTest(name):
+                res = benchmark_replication(
+                    problem=problem,
+                    method=get_sobol_benchmark_method(),
+                    seed=0,
+                )
+                self.assertTrue((res.optimization_trace == expected).all())
+
     def test_storage(self) -> None:
         problem = get_single_objective_benchmark_problem()
         res = benchmark_replication(

diff --git a/ax/benchmark/tests/test_benchmark_problem.py b/ax/benchmark/tests/test_benchmark_problem.py
@@ -203,7 +203,11 @@ def test_moo_from_botorch(self) -> None:
 
         # Test hypervolume
         self.assertEqual(branin_currin_problem.optimal_value, test_problem._max_hv)
-        self.assertEqual(branin_currin_problem.reference_point, test_problem._ref_point)
+        opt_config = branin_currin_problem.optimization_config
+        reference_point = [
+            threshold.bound for threshold in opt_config.objective_thresholds
+        ]
+        self.assertEqual(reference_point, test_problem._ref_point)
 
     def test_moo_from_botorch_constrained(self) -> None:
         with self.assertRaisesRegex(

diff --git a/ax/utils/testing/benchmark_stubs.py b/ax/utils/testing/benchmark_stubs.py
@@ -17,24 +17,26 @@
     MultiObjectiveBenchmarkProblem,
 )
 from ax.benchmark.benchmark_result import AggregatedBenchmarkResult, BenchmarkResult
+from ax.benchmark.metrics.benchmark import BenchmarkMetric
 from ax.benchmark.problems.surrogate import (
     MOOSurrogateBenchmarkProblem,
     SOOSurrogateBenchmarkProblem,
 )
 from ax.benchmark.runners.surrogate import SurrogateRunner
 from ax.core.experiment import Experiment
+from ax.core.objective import MultiObjective, Objective
 from ax.core.optimization_config import (
     MultiObjectiveOptimizationConfig,
     OptimizationConfig,
 )
+from ax.core.outcome_constraint import ObjectiveThreshold
 from ax.modelbridge.generation_strategy import GenerationStep, GenerationStrategy
 from ax.modelbridge.registry import Models
 from ax.modelbridge.torch import TorchModelBridge
 from ax.models.torch.botorch_modular.model import BoTorchModel
 from ax.models.torch.botorch_modular.surrogate import Surrogate
 from ax.service.scheduler import SchedulerOptions
 from ax.utils.common.constants import Keys
-from ax.utils.common.typeutils import checked_cast
 from ax.utils.testing.core_stubs import (
     get_branin_experiment,
     get_branin_experiment_with_multi_objective,
@@ -60,11 +62,13 @@ def get_single_objective_benchmark_problem(
 
 
 def get_multi_objective_benchmark_problem(
-    observe_noise_sd: bool = False, num_trials: int = 4
+    observe_noise_sd: bool = False,
+    num_trials: int = 4,
+    test_problem_kwargs: Optional[Dict[str, Any]] = None,
 ) -> MultiObjectiveBenchmarkProblem:
     return create_multi_objective_problem_from_botorch(
         test_problem_class=BraninCurrin,
-        test_problem_kwargs={},
+        test_problem_kwargs={} if test_problem_kwargs is None else test_problem_kwargs,
         num_trials=num_trials,
         observe_noise_sd=observe_noise_sd,
     )
@@ -94,61 +98,96 @@ def get_sobol_benchmark_method() -> BenchmarkMethod:
     )
 
 
-def get_soo_surrogate() -> SOOSurrogateBenchmarkProblem:
+def get_soo_surrogate(noise_stds: float = 0.0) -> SOOSurrogateBenchmarkProblem:
+    outcome_name = "branin"
+    observe_noise_stds = True
     experiment = get_branin_experiment(with_completed_trial=True)
-    surrogate = TorchModelBridge(
+
+    optimization_config = OptimizationConfig(
+        objective=Objective(
+            metric=BenchmarkMetric(
+                name=outcome_name,
+                lower_is_better=False,
+                observe_noise_sd=observe_noise_stds,
+            ),
+            minimize=False,
+        )
+    )
+    surrogate = Surrogate(botorch_model_class=SingleTaskGP)
+    model_bridge = TorchModelBridge(
         experiment=experiment,
         search_space=experiment.search_space,
-        model=BoTorchModel(surrogate=Surrogate(botorch_model_class=SingleTaskGP)),
+        model=BoTorchModel(surrogate=surrogate),
         data=experiment.lookup_data(),
         transforms=[],
     )
+    datasets = surrogate.training_data
     runner = SurrogateRunner(
-        name="test",
+        name=outcome_name,
         search_space=experiment.search_space,
-        outcome_names=["branin"],
-        get_surrogate_and_datasets=lambda: (surrogate, []),
+        outcome_names=[outcome_name],
+        get_surrogate_and_datasets=lambda: (model_bridge, datasets),
+        noise_stds=noise_stds,
     )
     return SOOSurrogateBenchmarkProblem(
         name="test",
         search_space=experiment.search_space,
-        optimization_config=checked_cast(
-            OptimizationConfig, experiment.optimization_config
-        ),
+        optimization_config=optimization_config,
         num_trials=6,
-        observe_noise_stds=True,
+        observe_noise_stds=observe_noise_stds,
         optimal_value=0.0,
         runner=runner,
         is_noiseless=runner.is_noiseless,
     )
 
 
-def get_moo_surrogate() -> MOOSurrogateBenchmarkProblem:
+def get_moo_surrogate(noise_stds: float = 0.0) -> MOOSurrogateBenchmarkProblem:
+    observe_noise_stds = True
+    outcome_names = ["branin_a", "branin_b"]
+    # set this to be easy to beat, so hypervolume computations aren't all zero
+    ref_point = [10.0, 10.0]
+    surrogate = Surrogate(botorch_model_class=SingleTaskGP)
     experiment = get_branin_experiment_with_multi_objective(with_completed_trial=True)
-    surrogate = TorchModelBridge(
+    model_bridge = TorchModelBridge(
         experiment=experiment,
         search_space=experiment.search_space,
-        model=BoTorchModel(surrogate=Surrogate(botorch_model_class=SingleTaskGP)),
+        model=BoTorchModel(surrogate=surrogate),
         data=experiment.lookup_data(),
         transforms=[],
     )
 
     runner = SurrogateRunner(
         name="test",
         search_space=experiment.search_space,
-        outcome_names=["branin_a", "branin_b"],
-        get_surrogate_and_datasets=lambda: (surrogate, []),
+        outcome_names=outcome_names,
+        get_surrogate_and_datasets=lambda: (model_bridge, surrogate.training_data),
+        noise_stds=noise_stds,
     )
+    metrics = [
+        BenchmarkMetric(
+            name=name,
+            lower_is_better=True,
+            observe_noise_sd=observe_noise_stds,
+        )
+        for name in outcome_names
+    ]
+    objectives = [Objective(metric=metric) for metric in metrics]
+    objective_thresholds = [
+        ObjectiveThreshold(metric=metric, bound=ref_p, relative=False)
+        for metric, ref_p in zip(metrics, ref_point)
+    ]
+    optimization_config = MultiObjectiveOptimizationConfig(
+        objective=MultiObjective(objectives=objectives),
+        objective_thresholds=objective_thresholds,
+    )
+
     return MOOSurrogateBenchmarkProblem(
         name="test",
         search_space=experiment.search_space,
-        optimization_config=checked_cast(
-            MultiObjectiveOptimizationConfig, experiment.optimization_config
-        ),
+        optimization_config=optimization_config,
         num_trials=10,
         observe_noise_stds=True,
         optimal_value=1.0,
-        reference_point=[],
         runner=runner,
         is_noiseless=runner.is_noiseless,
     )