[OSS Vizier] Leverage sequential prior transfer learning in gp_bandit.py.

vizier-team · copybara-github · commit 35ba80e12b15 · 2023-08-04T14:25:11.000-07:00
PiperOrigin-RevId: 553912121
diff --git a/vizier/_src/algorithms/designers/gp/gp_models.py b/vizier/_src/algorithms/designers/gp/gp_models.py
@@ -213,7 +213,7 @@ def _pred_mean(
   return pred.predict_with_aux(features)[0].mean()
 
 
-def _train_stacked_residual_gp(
+def train_stacked_residual_gp(
     base_gp: GPState,
     spec: GPTrainingSpec,
     data: types.ModelData,
@@ -322,7 +322,7 @@ def train_gp(
     else:
       # Otherwise, we have a base GP to use - the GP trained on the last
       # iteration.
-      curr_gp = _train_stacked_residual_gp(
+      curr_gp = train_stacked_residual_gp(
           base_gp=curr_gp,
           spec=curr_spec,
           data=curr_data,
diff --git a/vizier/_src/algorithms/designers/gp_bandit.py b/vizier/_src/algorithms/designers/gp_bandit.py
@@ -129,6 +129,10 @@ class VizierGPBandit(vza.Designer, vza.Predictor):
 
   _last_computed_gp: gp_models.GPState = attr.field(init=False)
 
+  # The prior GP used in transfer learning. `last_computed_gp` is trained
+  # on the residuals of `_prior_gp`, if one is trained.
+  _prior_gp: Optional[gp_models.GPState] = attr.field(init=False, default=None)
+
   default_acquisition_optimizer_factory = vb.VectorizedOptimizerFactory(
       strategy_factory=es.VectorizedEagleStrategyFactory()
   )
@@ -205,6 +209,37 @@ def update(
     del all_active
     self._trials.extend(copy.deepcopy(completed.trials))
 
+  def set_priors(self, prior_studies: Sequence[vza.CompletedTrials]) -> None:
+    """Updates the list of prior studies for transfer learning.
+
+    Each element is treated as a new prior study, and will be stacked in order
+    received - i.e. the first entry is for the first GP, the second entry is for
+    the GP trained on the residuals of the first GP, etc.
+
+    See section 3.3 of https://dl.acm.org/doi/10.1145/3097983.3098043 for more
+    information, or see `gp/gp_models.py` and `gp/transfer_learning.py`
+
+    Transfer learning is resilient to bad priors.
+
+    Multiple calls are permitted, but unadvised. Each call will trigger
+    retraining of the prior GPs - on only the state provided to `set_priors`.
+    State is not incrementally updated.
+
+    TODO: Decide on whether this method should become part of an
+    interface.
+
+    Args:
+      prior_studies: A list of lists of completed trials, with one list per
+        prior study. The designer will train a prior GP for each list of prior
+        trials (for each `CompletedStudy` entry), in the order received.
+    """
+    self._rng, ard_rng = jax.random.split(self._rng)
+    prior_data = [
+        self._trials_to_data(prior_study.trials)
+        for prior_study in prior_studies
+    ]
+    self._prior_gp = self._train_prior_gp(priors=prior_data, ard_rng=ard_rng)
+
   @property
   def _metric_info(self) -> vz.MetricInformation:
     return self._problem.metric_information.item()
@@ -286,23 +321,49 @@ def _trials_to_data(self, trials: Sequence[vz.Trial]) -> types.ModelData:
     return types.ModelData(model_data.features, labels)
 
   @_experimental_override_allowed
-  def _train_gp(
+  def _create_gp_spec(
       self, data: types.ModelData, ard_rng: jax.random.KeyArray
-  ) -> gp_models.GPState:
-    """Overrideable training of a pre-computed ensemble GP."""
-    trained_gp = gp_models.train_gp(
-        spec=gp_models.GPTrainingSpec(
-            ard_optimizer=self._ard_optimizer,
-            ard_rng=ard_rng,
-            coroutine=gp_models.get_vizier_gp_coroutine(
-                features=data.features, linear_coef=self._linear_coef
-            ),
-            ensemble_size=self._ensemble_size,
-            ard_random_restarts=self._ard_random_restarts,
+  ) -> gp_models.GPTrainingSpec:
+    """Overrideable creation of a training spec for a GP model."""
+    return gp_models.GPTrainingSpec(
+        ard_optimizer=self._ard_optimizer,
+        ard_rng=ard_rng,
+        coroutine=gp_models.get_vizier_gp_coroutine(
+            features=data.features, linear_coef=self._linear_coef
         ),
-        data=data,
+        ensemble_size=self._ensemble_size,
+        ard_random_restarts=self._ard_random_restarts,
     )
-    return trained_gp
+
+  @_experimental_override_allowed
+  def _train_prior_gp(
+      self,
+      priors: Sequence[types.ModelData],
+      ard_rng: jax.random.KeyArray,
+  ):
+    """Trains a transfer-learning-enabled GP with prior studies.
+
+    Args:
+      priors: Data for each sequential prior to train for transfer learning.
+        Assumed to be in order of training, i.e. element 0 is priors[0] is the
+        first GP trained, and priors[1] trains a GP on the residuals of the GP
+        trained on priors[0], and so on.
+      ard_rng: RNG to do ARD to optimize GP parameters.
+
+    Returns:
+      A trained pre-computed ensemble GP.
+    """
+    ard_rngs = jax.random.split(ard_rng, len(priors))
+
+    # Order `specs` in training order, i.e. `specs[0]` is trained first.
+    specs = [
+        self._create_gp_spec(prior_data, ard_rngs[i])
+        for i, prior_data in enumerate(priors)
+    ]
+
+    # `train_gp` expects `specs` and `data` in training order, which is how
+    # they were prepared above.
+    return gp_models.train_gp(spec=specs, data=priors)
 
   @profiler.record_runtime
   def _update_gp(self, data: types.ModelData) -> gp_models.GPState:
@@ -312,7 +373,7 @@ def _update_gp(self, data: types.ModelData) -> gp_models.GPState:
       data: Data to go into GP.
 
     Returns:
-      GPBanditState object containing the designer's state.
+      `GPState` object containing the trained GP.
 
     1. Convert trials to features and labels.
     2. Trains a pre-computed ensemble GP.
@@ -324,8 +385,16 @@ def _update_gp(self, data: types.ModelData) -> gp_models.GPState:
       # state. The assumption is that trials can't be removed.
       return self._last_computed_gp
     self._incorporated_trials_count = len(self._trials)
+
     self._rng, ard_rng = jax.random.split(self._rng, 2)
-    self._last_computed_gp = self._train_gp(data=data, ard_rng=ard_rng)
+    spec = self._create_gp_spec(data, ard_rng)
+    if self._prior_gp:
+      self._last_computed_gp = gp_models.train_stacked_residual_gp(
+          base_gp=self._prior_gp, spec=spec, data=data
+      )
+    else:
+      self._last_computed_gp = gp_models.train_gp(spec=spec, data=data)
+
     return self._last_computed_gp
 
   @_experimental_override_allowed
@@ -437,7 +506,8 @@ def sample(
     if not trials:
       return np.zeros((num_samples, 0))
 
-    gp = self._update_gp(self._trials_to_data(self._trials))
+    data = self._trials_to_data(self._trials)
+    gp = self._update_gp(data)
     xs = self._converter.to_features(trials)
     xs = types.ModelInput(
         continuous=xs.continuous.replace_fill_value(0.0),
diff --git a/vizier/_src/algorithms/designers/gp_bandit_test.py b/vizier/_src/algorithms/designers/gp_bandit_test.py
@@ -16,6 +16,7 @@
 
 """Tests for gp_bandit."""
 
+from typing import Callable
 from unittest import mock
 
 import jax
@@ -47,6 +48,69 @@ def _build_mock_continuous_array_specs(n):
   return [continuous_spec] * n
 
 
+def _setup_lambda_search(
+    f: Callable[[float], float], num_trials: int = 100
+) -> tuple[gp_bandit.VizierGPBandit, list[vz.Trial], vz.ProblemStatement]:
+  """Sets up a GP designer and outputs completed studies for `f`.
+
+  Args:
+    f: 1D objective to be optimized, i.e. f(x), where x is a scalar in [-5., 5.)
+    num_trials: Number of mock "evaluated" trials to return.
+
+  Returns:
+  A GP designer set up for the problem of optimizing the objective, without any
+  data updated.
+  Evaluated trials against `f`.
+  """
+  assert (
+      num_trials > 0
+  ), f'Must provide a positive number of trials. Got {num_trials}.'
+
+  search_space = vz.SearchSpace()
+  search_space.root.add_float_param('x0', -5.0, 5.0)
+  problem = vz.ProblemStatement(
+      search_space=search_space,
+      metric_information=vz.MetricsConfig(
+          metrics=[
+              vz.MetricInformation('obj', goal=vz.ObjectiveMetricGoal.MAXIMIZE),
+          ]
+      ),
+  )
+
+  suggestions = quasi_random.QuasiRandomDesigner(
+      problem.search_space, seed=1
+  ).suggest(num_trials)
+
+  obs_trials = []
+  for idx, suggestion in enumerate(suggestions):
+    trial = suggestion.to_trial(idx)
+    x = suggestions[idx].parameters['x0'].value
+    trial.complete(vz.Measurement(metrics={'obj': f(x)}))
+    obs_trials.append(trial)
+
+  gp_designer = gp_bandit.VizierGPBandit(problem, ard_optimizer=ard_optimizer)
+  return gp_designer, obs_trials, problem
+
+
+def _compute_mse(
+    designer: gp_bandit.VizierGPBandit,
+    test_trials: list[vz.Trial],
+    y_test: list[float],
+) -> float:
+  """Evaluate the designer's accuracy on the test set.
+
+  Args:
+    designer: The GP bandit designer to predict from.
+    test_trials: The trials of the test set
+    y_test: The results of the test set
+
+  Returns:
+    The MSE of `designer` on `test_trials` and `y_test`
+  """
+  preds = designer.predict(test_trials)
+  return np.sum(np.square(preds.mean - y_test))
+
+
 class GoogleGpBanditTest(parameterized.TestCase):
 
   @parameterized.parameters(
@@ -216,32 +280,8 @@ def test_on_flat_mixed_space(
     self.assertFalse(np.isnan(prediction.stddev).any())
 
   def test_prediction_accuracy(self):
-    search_space = vz.SearchSpace()
-    search_space.root.add_float_param('x0', -5.0, 5.0)
-    problem = vz.ProblemStatement(
-        search_space=search_space,
-        metric_information=vz.MetricsConfig(
-            metrics=[
-                vz.MetricInformation(
-                    'obj', goal=vz.ObjectiveMetricGoal.MAXIMIZE
-                ),
-            ]
-        ),
-    )
     f = lambda x: -((x - 0.5) ** 2)
-
-    suggestions = quasi_random.QuasiRandomDesigner(
-        problem.search_space, seed=1
-    ).suggest(100)
-
-    obs_trials = []
-    for idx, suggestion in enumerate(suggestions):
-      trial = suggestion.to_trial(idx)
-      x = suggestions[idx].parameters['x0'].value
-      trial.complete(vz.Measurement(metrics={'obj': f(x)}))
-      obs_trials.append(trial)
-
-    gp_designer = gp_bandit.VizierGPBandit(problem, ard_optimizer=ard_optimizer)
+    gp_designer, obs_trials, _ = _setup_lambda_search(f)
     gp_designer.update(vza.CompletedTrials(obs_trials), vza.ActiveTrials())
     pred_trial = vz.Trial({'x0': 0.0})
     pred = gp_designer.predict([pred_trial])
@@ -261,6 +301,7 @@ def test_jit_once(self, *args):
             name='metric', goal=vz.ObjectiveMetricGoal.MAXIMIZE
         )
     )
+
     def create_designer(problem):
       return gp_bandit.VizierGPBandit(
           problem=problem,
@@ -299,6 +340,83 @@ def create_runner(problem):
     create_runner(problem).run_designer(designer2)
 
 
+class GPBanditPriorsTest(parameterized.TestCase):
+
+  def test_prior_warping(self):
+    """Tests linear transform of objective has no impact on transfer learning."""
+    f = lambda x: -((x - 0.5) ** 2)
+    transform_f = lambda x: -3 * ((x - 0.5) ** 2) + 10
+
+    # X is in range of what is defined in `_setup_lambda_search`, [-5.0, 5.0)
+    x_test = np.random.default_rng(1).uniform(-5.0, 5.0, 100)
+    y_test = [transform_f(x) for x in x_test]
+    test_trials = [vz.Trial({'x0': x}) for x in x_test]
+
+    # Create the designer with a prior and the trials to train the prior.
+    gp_designer_with_prior, obs_trials_for_prior, _ = _setup_lambda_search(
+        f=f, num_trials=100
+    )
+
+    # Set priors to above trials.
+    gp_designer_with_prior.set_priors(
+        [vza.CompletedTrials(obs_trials_for_prior)]
+    )
+
+    # Create a no prior designer on the transformed function `transform_f`.
+    # Also use the generated trials to update both the designer with prior and
+    # the designer without. This tests that the prior designer is resilient
+    # to linear transforms between the prior and the top level study.
+    gp_designer_no_prior, obs_trials, _ = _setup_lambda_search(
+        f=transform_f, num_trials=20
+    )
+
+    # Update both designers with the actual study.
+    gp_designer_no_prior.update(
+        vza.CompletedTrials(obs_trials), vza.ActiveTrials()
+    )
+    gp_designer_with_prior.update(
+        vza.CompletedTrials(obs_trials), vza.ActiveTrials()
+    )
+
+    # Evaluate the no prior designer's accuracy on the test set.
+    mse_no_prior = _compute_mse(gp_designer_no_prior, test_trials, y_test)
+
+    # Evaluate the designer with prior's accuracy on the test set.
+    mse_with_prior = _compute_mse(gp_designer_with_prior, test_trials, y_test)
+
+    # The designer with a prior should predict better.
+    self.assertLess(mse_with_prior, mse_no_prior)
+
+  @parameterized.parameters(
+      dict(iters=3, batch_size=5),
+      dict(iters=5, batch_size=1),
+  )
+  def test_run_with_priors(self, *, iters, batch_size):
+    f = lambda x: -((x - 0.5) ** 2)
+
+    # Create the designer with a prior and the trials to train the prior.
+    gp_designer_with_prior, obs_trials_for_prior, problem = (
+        _setup_lambda_search(f=f, num_trials=100)
+    )
+
+    # Set priors to the above trials.
+    gp_designer_with_prior.set_priors(
+        [vza.CompletedTrials(obs_trials_for_prior)]
+    )
+
+    self.assertLen(
+        test_runners.RandomMetricsRunner(
+            problem,
+            iters=iters,
+            batch_size=batch_size,
+            verbose=1,
+            validate_parameters=True,
+            seed=1,
+        ).run_designer(gp_designer_with_prior),
+        iters * batch_size,
+    )
+
+
 if __name__ == '__main__':
   # Jax disables float64 computations by default and will silently convert
   # float64s to float32s. We must explicitly enable float64.