|
10 | 10 | These models are often a good starting point and are further documented in the |
11 | 11 | tutorials. |
12 | 12 |
|
13 | | -`SingleTaskGP` and `HeteroskedasticSingleTaskGP` are single-task exact GP models, |
14 | | -differing in how they treat noise. They use relatively strong priors on the Kernel |
15 | | -hyperparameters, which work best when covariates are normalized to the unit cube |
16 | | -and outcomes are standardized (zero mean, unit variance). By default, these models |
17 | | -use a `Standardize` outcome transform, which applies this standardization. However, |
18 | | -they do not (yet) use an input transform by default. |
19 | | -
|
20 | | -These models all work in batch mode (each batch having its own hyperparameters). |
21 | | -When the training observations include multiple outputs, these models use |
| 13 | +`SingleTaskGP` is a single-task exact GP model that uses relatively strong priors on |
| 14 | +the Kernel hyperparameters, which work best when covariates are normalized to the unit |
| 15 | +cube and outcomes are standardized (zero mean, unit variance). By default, this model |
| 16 | +uses a `Standardize` outcome transform, which applies this standardization. However, |
| 17 | +it does not (yet) use an input transform by default. |
| 18 | +
|
| 19 | +`SingleTaskGP` model works in batch mode (each batch having its own hyperparameters). |
| 20 | +When the training observations include multiple outputs, `SingleTaskGP` uses |
22 | 21 | batching to model outputs independently. |
23 | 22 |
|
24 | | -These models all support multiple outputs. However, as single-task models, |
25 | | -`SingleTaskGP` and `HeteroskedasticSingleTaskGP` should be used only when the |
26 | | -outputs are independent and all use the same training data. If outputs are |
27 | | -independent and outputs have different training data, use the `ModelListGP`. |
28 | | -When modeling correlations between outputs, use a multi-task model like `MultiTaskGP`. |
| 23 | +`SingleTaskGP` supports multiple outputs. However, as a single-task model, |
| 24 | +`SingleTaskGP` should be used only when the outputs are independent and all |
| 25 | +use the same training inputs. If outputs are independent but they have different |
| 26 | +training inputs, use the `ModelListGP`. When modeling correlations between outputs, |
| 27 | +use a multi-task model like `MultiTaskGP`. |
29 | 28 | """ |
30 | 29 |
|
31 | 30 | from __future__ import annotations |
32 | 31 |
|
33 | 32 | import warnings |
34 | | -from typing import NoReturn |
35 | 33 |
|
36 | 34 | import torch |
37 | 35 | from botorch.models.gpytorch import BatchedMultiOutputGPyTorchModel |
38 | 36 | from botorch.models.model import FantasizeMixin |
39 | 37 | from botorch.models.transforms.input import InputTransform |
40 | | -from botorch.models.transforms.outcome import Log, OutcomeTransform, Standardize |
| 38 | +from botorch.models.transforms.outcome import OutcomeTransform, Standardize |
41 | 39 | from botorch.models.utils import validate_input_scaling |
42 | 40 | from botorch.models.utils.gpytorch_modules import ( |
43 | 41 | get_covar_module_with_dim_scaled_prior, |
44 | 42 | get_gaussian_likelihood_with_lognormal_prior, |
45 | | - MIN_INFERRED_NOISE_LEVEL, |
46 | 43 | ) |
47 | 44 | from botorch.utils.containers import BotorchContainer |
48 | 45 | from botorch.utils.datasets import SupervisedDataset |
49 | 46 | from botorch.utils.types import _DefaultType, DEFAULT |
50 | | -from gpytorch.constraints.constraints import GreaterThan |
51 | 47 | from gpytorch.distributions.multivariate_normal import MultivariateNormal |
52 | | -from gpytorch.likelihoods.gaussian_likelihood import ( |
53 | | - _GaussianLikelihoodBase, |
54 | | - FixedNoiseGaussianLikelihood, |
55 | | - GaussianLikelihood, |
56 | | -) |
| 48 | +from gpytorch.likelihoods.gaussian_likelihood import FixedNoiseGaussianLikelihood |
57 | 49 | from gpytorch.likelihoods.likelihood import Likelihood |
58 | | -from gpytorch.likelihoods.noise_models import HeteroskedasticNoise |
59 | 50 | from gpytorch.means.constant_mean import ConstantMean |
60 | 51 | from gpytorch.means.mean import Mean |
61 | | -from gpytorch.mlls.noise_model_added_loss_term import NoiseModelAddedLossTerm |
62 | 52 | from gpytorch.models.exact_gp import ExactGP |
63 | 53 | from gpytorch.module import Module |
64 | | -from gpytorch.priors.smoothed_box_prior import SmoothedBoxPrior |
65 | 54 | from torch import Tensor |
66 | 55 |
|
67 | 56 |
|
@@ -253,107 +242,3 @@ def forward(self, x: Tensor) -> MultivariateNormal: |
253 | 242 | mean_x = self.mean_module(x) |
254 | 243 | covar_x = self.covar_module(x) |
255 | 244 | return MultivariateNormal(mean_x, covar_x) |
256 | | - |
257 | | - |
258 | | -class HeteroskedasticSingleTaskGP(BatchedMultiOutputGPyTorchModel, ExactGP): |
259 | | - r"""A single-task exact GP model using a heteroskedastic noise model. |
260 | | -
|
261 | | - This model differs from `SingleTaskGP` with observed observation noise |
262 | | - variances (`train_Yvar`) in that it can predict noise levels out of sample. |
263 | | - This is achieved by internally wrapping another GP (a `SingleTaskGP`) to model |
264 | | - the (log of) the observation noise. Noise levels must be provided to |
265 | | - `HeteroskedasticSingleTaskGP` as `train_Yvar`. |
266 | | -
|
267 | | - Examples of cases in which noise levels are known include online |
268 | | - experimentation and simulation optimization. |
269 | | -
|
270 | | - Example: |
271 | | - >>> train_X = torch.rand(20, 2) |
272 | | - >>> train_Y = torch.sin(train_X).sum(dim=1, keepdim=True) |
273 | | - >>> se = torch.linalg.norm(train_X, dim=1, keepdim=True) |
274 | | - >>> train_Yvar = 0.1 + se * torch.rand_like(train_Y) |
275 | | - >>> model = HeteroskedasticSingleTaskGP(train_X, train_Y, train_Yvar) |
276 | | - """ |
277 | | - |
278 | | - def __init__( |
279 | | - self, |
280 | | - train_X: Tensor, |
281 | | - train_Y: Tensor, |
282 | | - train_Yvar: Tensor, |
283 | | - outcome_transform: OutcomeTransform | None = None, |
284 | | - input_transform: InputTransform | None = None, |
285 | | - ) -> None: |
286 | | - r""" |
287 | | - Args: |
288 | | - train_X: A `batch_shape x n x d` tensor of training features. |
289 | | - train_Y: A `batch_shape x n x m` tensor of training observations. |
290 | | - train_Yvar: A `batch_shape x n x m` tensor of observed measurement |
291 | | - noise. |
292 | | - outcome_transform: An outcome transform that is applied to the |
293 | | - training data during instantiation and to the posterior during |
294 | | - inference (that is, the `Posterior` obtained by calling |
295 | | - `.posterior` on the model will be on the original scale). |
296 | | - Note that the noise model internally log-transforms the |
297 | | - variances, which will happen after this transform is applied. |
298 | | - input_transform: An input transfrom that is applied in the model's |
299 | | - forward pass. |
300 | | - """ |
301 | | - if outcome_transform is not None: |
302 | | - train_Y, train_Yvar = outcome_transform(train_Y, train_Yvar) |
303 | | - self._validate_tensor_args(X=train_X, Y=train_Y, Yvar=train_Yvar) |
304 | | - validate_input_scaling(train_X=train_X, train_Y=train_Y, train_Yvar=train_Yvar) |
305 | | - self._set_dimensions(train_X=train_X, train_Y=train_Y) |
306 | | - noise_likelihood = GaussianLikelihood( |
307 | | - noise_prior=SmoothedBoxPrior(-3, 5, 0.5, transform=torch.log), |
308 | | - batch_shape=self._aug_batch_shape, |
309 | | - noise_constraint=GreaterThan( |
310 | | - MIN_INFERRED_NOISE_LEVEL, transform=None, initial_value=1.0 |
311 | | - ), |
312 | | - ) |
313 | | - # Likelihood will always get evaluated with transformed X, so we need to |
314 | | - # transform the training data before constructing the noise model. |
315 | | - with torch.no_grad(): |
316 | | - transformed_X = self.transform_inputs( |
317 | | - X=train_X, input_transform=input_transform |
318 | | - ) |
319 | | - noise_model = SingleTaskGP( |
320 | | - train_X=transformed_X, |
321 | | - train_Y=train_Yvar, |
322 | | - likelihood=noise_likelihood, |
323 | | - outcome_transform=Log(), |
324 | | - ) |
325 | | - likelihood = _GaussianLikelihoodBase(HeteroskedasticNoise(noise_model)) |
326 | | - # This is hacky -- this class used to inherit from SingleTaskGP, but it |
327 | | - # shouldn't so this is a quick fix to enable getting rid of that |
328 | | - # inheritance |
329 | | - SingleTaskGP.__init__( |
330 | | - # pyre-fixme[6]: Incompatible parameter type |
331 | | - self, |
332 | | - train_X=train_X, |
333 | | - train_Y=train_Y, |
334 | | - likelihood=likelihood, |
335 | | - outcome_transform=None, |
336 | | - input_transform=input_transform, |
337 | | - ) |
338 | | - self.register_added_loss_term("noise_added_loss") |
339 | | - self.update_added_loss_term( |
340 | | - "noise_added_loss", NoiseModelAddedLossTerm(noise_model) |
341 | | - ) |
342 | | - if outcome_transform is not None: |
343 | | - self.outcome_transform = outcome_transform |
344 | | - self.to(train_X) |
345 | | - |
346 | | - # pyre-fixme[15]: Inconsistent override |
347 | | - def condition_on_observations(self, *_, **__) -> NoReturn: |
348 | | - raise NotImplementedError |
349 | | - |
350 | | - # pyre-fixme[15]: Inconsistent override |
351 | | - def subset_output(self, idcs) -> NoReturn: |
352 | | - raise NotImplementedError |
353 | | - |
354 | | - def forward(self, x: Tensor) -> MultivariateNormal: |
355 | | - if self.training: |
356 | | - x = self.transform_inputs(x) |
357 | | - mean_x = self.mean_module(x) |
358 | | - covar_x = self.covar_module(x) |
359 | | - return MultivariateNormal(mean_x, covar_x) |
0 commit comments