Do not normalize or standardize dimension if all values are equal (#2185)

saitcakmak · facebook-github-bot · commit 3384c24c6f2a · 2024-01-30T22:49:19.000-08:00
Summary: Pull Request resolved: #2185 Issue description with `Normalize` transform: Suppose that the train data has x0 as a constant (can happen with few data points) but it is being optimized in [0, 1]. In the current behavior, we first calculate a coefficient of 0.0, then clamp this up to 1e-8. During acqf optimization, we will evaluate the model with values in [0, 1], which will then get normalized to [0, 1e8]. This can cause numerical issues in GPyTorch and lead to non-PSD covariance matrices since the model was trained with constant inputs, likely learning much more reasonable lengthscales that don't play well with these large values. This diff updates the behavior of `min_range/min_std` in `Normalize/InputStandardize` transforms to skip transforming the given dimension if the range / std of the dimension is less than the minimum. This is achieved using an offset of 0 and a coefficient of 1 for the given dimension. Reviewed By: esantorella Differential Revision: D53213759 fbshipit-source-id: 9f738e9c6654e184f6e8a74bb8abe8a530290691
diff --git a/botorch/models/transforms/input.py b/botorch/models/transforms/input.py
@@ -21,7 +21,6 @@
 from warnings import warn
 
 import numpy as np
-
 import torch
 from botorch.exceptions.errors import BotorchTensorDimensionError
 from botorch.exceptions.warnings import UserInputWarning
@@ -513,6 +512,7 @@ def __init__(
         reverse: bool = False,
         min_range: float = 1e-8,
         learn_bounds: Optional[bool] = None,
+        almost_zero: float = 1e-12,
     ) -> None:
         r"""Normalize the inputs to the unit cube.
 
@@ -533,10 +533,28 @@ def __init__(
                 transform when called from within a `fantasize` call. Default: True.
             reverse: A boolean indicating whether the forward pass should untransform
                 the inputs.
-            min_range: Amount of noise to add to the range to ensure no division by
-                zero errors.
+            min_range: If the range of an input dimension is smaller than `min_range`,
+                that input dimension will not be normalized. This is equivalent to
+                using bounds of `[0, 1]` for this dimension, and helps avoid division
+                by zero errors and related numerical issues. See the example below.
+                NOTE: This only applies if `learn_bounds=True`.
             learn_bounds: Whether to learn the bounds in train mode. Defaults
                 to False if bounds are provided, otherwise defaults to True.
+
+        Example:
+            >>> t = Normalize(d=2)
+            >>> t(torch.tensor([[3., 2.], [3., 6.]]))
+            ... tensor([[3., 2.],
+            ...         [3., 6.]])
+            >>> t.eval()
+            ... Normalize()
+            >>> t(torch.tensor([[3.5, 2.8]]))
+            ... tensor([[3.5, 0.2]])
+            >>> t.bounds
+            ... tensor([[0., 2.],
+            ...         [1., 6.]])
+            >>> t.coefficient
+            ... tensor([[1., 4.]])
         """
         if learn_bounds is not None:
             self.learn_coefficients = learn_bounds
@@ -601,9 +619,11 @@ def _update_coefficients(self, X) -> None:
         # Aggregate mins and ranges over extra batch and marginal dims
         batch_ndim = min(len(self.batch_shape), X.ndim - 2)  # batch rank of `X`
         reduce_dims = (*range(X.ndim - batch_ndim - 2), X.ndim - 2)
-        self._offset = torch.amin(X, dim=reduce_dims).unsqueeze(-2)
-        self._coefficient = torch.amax(X, dim=reduce_dims).unsqueeze(-2) - self.offset
-        self._coefficient.clamp_(min=self.min_range)
+        offset = torch.amin(X, dim=reduce_dims).unsqueeze(-2)
+        coefficient = torch.amax(X, dim=reduce_dims).unsqueeze(-2) - offset
+        almost_zero = coefficient < self.min_range
+        self._coefficient = torch.where(almost_zero, 1.0, coefficient)
+        self._offset = torch.where(almost_zero, 0.0, offset)
 
     def get_init_args(self) -> Dict[str, Any]:
         r"""Get the arguments necessary to construct an exact copy of the transform."""
@@ -655,8 +675,11 @@ def __init__(
                 transform in eval() mode. Default: True
             reverse: A boolean indicating whether the forward pass should untransform
                 the inputs.
-            min_std: Amount of noise to add to the standard deviation to ensure no
-                division by zero errors.
+            min_std: If the standard deviation of an input dimension is smaller than
+                `min_std`, that input dimension will not be standardized. This is
+                equivalent to using a standard deviation of 1.0 and a mean of 0.0 for
+                this dimension, and helps avoid division by zero errors and related
+                numerical issues.
         """
         transform_dimension = d if indices is None else len(indices)
         super().__init__(
@@ -688,11 +711,13 @@ def _update_coefficients(self, X: Tensor) -> None:
         # Aggregate means and standard deviations over extra batch and marginal dims
         batch_ndim = min(len(self.batch_shape), X.ndim - 2)  # batch rank of `X`
         reduce_dims = (*range(X.ndim - batch_ndim - 2), X.ndim - 2)
-        coefficient, self._offset = (
+        coefficient, offset = (
             values.unsqueeze(-2)
             for values in torch.std_mean(X, dim=reduce_dims, unbiased=True)
         )
-        self._coefficient = coefficient.clamp_(min=self.min_std)
+        almost_zero = coefficient < self.min_std
+        self._coefficient = torch.where(almost_zero, 1.0, coefficient)
+        self._offset = torch.where(almost_zero, 0.0, offset)
 
 
 class Round(InputTransform, Module):
diff --git a/test/models/transforms/test_input.py b/test/models/transforms/test_input.py
@@ -229,8 +229,8 @@ def test_normalize(self) -> None:
             nlz.to(other_dtype)
             self.assertTrue(nlz.mins.dtype == other_dtype)
             # test incompatible dimensions of specified bounds
+            bounds = torch.zeros(2, 3, device=self.device, dtype=dtype)
             with self.assertRaises(BotorchTensorDimensionError):
-                bounds = torch.zeros(2, 3, device=self.device, dtype=dtype)
                 Normalize(d=2, bounds=bounds)
 
             # test jitter
@@ -380,7 +380,25 @@ def test_normalize(self) -> None:
                 self.assertIsNone(nlz.coefficient.grad_fn)
                 self.assertIsNone(nlz.offset.grad_fn)
 
-    def test_standardize(self):
+            # test that zero range is not scaled.
+            nlz = Normalize(d=2)
+            X = torch.tensor([[1.0, 0.0], [1.0, 2.0]], device=self.device, dtype=dtype)
+            nlzd_X = nlz(X)
+            self.assertAllClose(
+                nlz.coefficient,
+                torch.tensor([[1.0, 2.0]], device=self.device, dtype=dtype),
+            )
+            expected_X = torch.tensor(
+                [[1.0, 0.0], [1.0, 1.0]], device=self.device, dtype=dtype
+            )
+            self.assertAllClose(nlzd_X, expected_X)
+            nlz.eval()
+            X = torch.tensor([[1.5, 1.5]], device=self.device, dtype=dtype)
+            nlzd_X = nlz(X)
+            expected_X = torch.tensor([[1.5, 0.75]], device=self.device, dtype=dtype)
+            self.assertAllClose(nlzd_X, expected_X)
+
+    def test_standardize(self) -> None:
         for dtype in (torch.float, torch.double):
             # basic init
             stdz = InputStandardize(d=2)
@@ -527,7 +545,7 @@ def test_standardize(self):
                 stdz8 = InputStandardize(d=3, batch_shape=batch_shape, indices=[0, 2])
                 self.assertFalse(stdz7.equals(stdz8))
 
-    def test_chained_input_transform(self):
+    def test_chained_input_transform(self) -> None:
         ds = (1, 2)
         batch_shapes = (torch.Size(), torch.Size([2]))
         dtypes = (torch.float, torch.double)
@@ -1157,7 +1175,7 @@ def test_one_hot_to_numeric(self) -> None:
 
 
 class TestAppendFeatures(BotorchTestCase):
-    def test_append_features(self):
+    def test_append_features(self) -> None:
         with self.assertRaises(ValueError):
             AppendFeatures(torch.ones(1))
         with self.assertRaises(ValueError):
@@ -1198,7 +1216,7 @@ def test_append_features(self):
             self.assertEqual(transform.feature_set.device.type, "cpu")
             self.assertEqual(transform.feature_set.dtype, torch.half)
 
-    def test_w_skip_expand(self):
+    def test_w_skip_expand(self) -> None:
         for dtype in (torch.float, torch.double):
             tkwargs = {"device": self.device, "dtype": dtype}
             feature_set = torch.tensor([[0.0], [1.0]], **tkwargs)
@@ -1221,7 +1239,7 @@ def test_w_skip_expand(self):
             tf_X = append_tf(pert_tf(test_X.expand(3, 5, -1, -1)))
             self.assertAllClose(tf_X, expected_X.expand(3, 5, -1, -1))
 
-    def test_w_f(self):
+    def test_w_f(self) -> None:
         def f1(x: Tensor, n_f: int = 1) -> Tensor:
             result = torch.sum(x, dim=-1, keepdim=True).unsqueeze(-2)
             return result.expand(*result.shape[:-2], n_f, -1)
@@ -1453,7 +1471,7 @@ def f2(x: Tensor, n_f: int = 1) -> Tensor:
 
 
 class TestFilterFeatures(BotorchTestCase):
-    def test_filter_features(self):
+    def test_filter_features(self) -> None:
         with self.assertRaises(ValueError):
             FilterFeatures(torch.tensor([[1, 2]], dtype=torch.long))
         with self.assertRaises(ValueError):
@@ -1527,7 +1545,7 @@ def test_filter_features(self):
 
 
 class TestInputPerturbation(BotorchTestCase):
-    def test_input_perturbation(self):
+    def test_input_perturbation(self) -> None:
         with self.assertRaisesRegex(ValueError, "-dim tensor!"):
             InputPerturbation(torch.ones(1))
         with self.assertRaisesRegex(ValueError, "-dim tensor!"):