[SymForce] Fix singularity in barriers

The derivative of `Pow(x, y)` wrt `x` is implemented both in sympy and symengine (simplified for a constant exponent) as effectively: ``` Pow(x, y) * y / x ``` This is a simplification of the full rule where `x` and `y` can be functions of the independent variable, e.g. from sympy: ``` def _eval_derivative(self, s): from sympy.functions.elementary.exponential import log dbase = self.base.diff(s) dexp = self.exp.diff(s) return self * (dexp * log(self.base) + dbase * self.exp/self.base) ``` SymEngine has a shortcut for when the exponent is a `Number` specifically, although SymPy achieves the same result through automatic simplification: ``` void DiffVisitor::bvisit(const Pow &self) { if (is_a_Number(*(self.get_exp()))) { apply(self.get_base()); result_ = mul( mul(self.get_exp(), pow(self.get_base(), sub(self.get_exp(), one))), result_); } else { apply(mul(self.get_exp(), log(self.get_base()))); result_ = mul(self.rcp_from_this(), result_); } } ``` Of course, the problem is that all of these require dividing by the base, and are therefore singular if the base is 0. You might want to instead formulate this derivative when the exponent is a Number, or more generally an expression whose derivative is 0, as something like `Pow(x, y - 1) * y` instead. I've tested this, and for our uses where we typically need both the value and the derivative, it's more ops and in particular more `pow`s, so we'd rather have the divide. Again, for `Number` exponents, both SymEngine and SymPy simplify this down automatically to the `Pow(x, y - 1) * y` version, it's only actual runtime floating-point Pows that are this way. So, the barrier functions that can use `Pow` this way need to handle this, specifically if the power is not a constant at codegen time. It's usually a bad idea to do this for performance reasons, but it needs to work anyway. Added a test that checks the result is nonsingular. The interesting point to test for epsilon handling would be the transition point, where the derivative doesn't always even exist, so I'm not adding an epsilon handling check. Another thing we could possibly do is have `sf.Pow` take an epsilon? Topic: sf-barrier-pow-epsilon Reviewers: bradley,nathan,philipp,hayk GitOrigin-RevId: 76297a5c73d5845b381a747460948e5d334cd23a
Krishan0507 · Nov 2, 2023 · 9ec1de7 · 9ec1de7
1 parent bdbcbf7
commit 9ec1de7
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 1 deletion.
diff --git a/symforce/opt/barrier_functions.py b/symforce/opt/barrier_functions.py
@@ -12,6 +12,7 @@ def max_power_barrier(
     error_nominal: sf.Scalar,
     dist_zero_to_nominal: sf.Scalar,
     power: sf.Scalar,
+    epsilon: sf.Scalar = sf.epsilon(),
 ) -> sf.Scalar:
     """
     A one-sided, non-symmetric scalar barrier function. The barrier passes through the points
@@ -52,9 +53,23 @@ def max_power_barrier(
         dist_zero_to_nominal: The distance from x_nominal to the region of zero error. Must be
             a positive number.
         power: The power used to describe the curve of the error tails.
+        epsilon: Used iff power is not an `sf.Number`
     """
     x_zero_error = x_nominal - dist_zero_to_nominal
-    return error_nominal * sf.Pow(sf.Max(0, x - x_zero_error) / dist_zero_to_nominal, power)
+
+    # If power is a number, then both sympy and symengine represent the derivative of Pow without a
+    # division by the base.  Otherwise, for a constant exponent, they use ``Pow(x, y) * y / x``,
+    # so it needs to be safe to divide by the base.  We still want to represent the derivative this
+    # way, instead of using ``Pow(x, y - 1) * y``, because we typically need both the value and its
+    # derivative, so the former is better for CSE.
+    if isinstance(sf.sympify(power), sf.Number):
+        base_floor = 0
+    else:
+        base_floor = epsilon
+
+    return error_nominal * sf.Pow(
+        sf.Max(base_floor, x - x_zero_error) / dist_zero_to_nominal, power
+    )
 
 
 def max_linear_barrier(
@@ -65,6 +80,7 @@ def max_linear_barrier(
     problem, this produces a quadratic cost in the optimization problem because
     cost = 1/2 * residual^2. See :func:`max_power_barrier` for more details.
     """
+    # NOTE(aaron): For power=1, epsilon is not used
     return max_power_barrier(
         x=x,
         x_nominal=x_nominal,
@@ -80,6 +96,7 @@ def min_power_barrier(
     error_nominal: sf.Scalar,
     dist_zero_to_nominal: sf.Scalar,
     power: sf.Scalar,
+    epsilon: sf.Scalar = sf.epsilon(),
 ) -> sf.Scalar:
     """
     A one-sided, non-symmetric scalar barrier function. The barrier passes through the points
@@ -112,6 +129,7 @@ def min_power_barrier(
         error_nominal=error_nominal,
         dist_zero_to_nominal=dist_zero_to_nominal,
         power=power,
+        epsilon=epsilon,
     )
 
 
@@ -123,6 +141,7 @@ def min_linear_barrier(
     problem, this produces a quadratic cost in the optimization problem because
     cost = 1/2 * residual^2. See :func:`min_power_barrier` for more details.
     """
+    # NOTE(aaron): For power=1, epsilon is not used
     return min_power_barrier(
         x=x,
         x_nominal=x_nominal,
@@ -138,6 +157,7 @@ def symmetric_power_barrier(
     error_nominal: sf.Scalar,
     dist_zero_to_nominal: sf.Scalar,
     power: sf.Scalar,
+    epsilon: sf.Scalar = sf.epsilon(),
 ) -> sf.Scalar:
     """
     A symmetric barrier centered around x = 0, meaning the error at -x is equal to the error at x.
@@ -184,6 +204,7 @@ def symmetric_power_barrier(
         error_nominal=error_nominal,
         dist_zero_to_nominal=dist_zero_to_nominal,
         power=power,
+        epsilon=epsilon,
     )
 
 
@@ -194,6 +215,7 @@ def min_max_power_barrier(
     error_nominal: sf.Scalar,
     dist_zero_to_nominal: sf.Scalar,
     power: sf.Scalar,
+    epsilon: sf.Scalar = sf.epsilon(),
 ) -> sf.Scalar:
     """
     A symmetric barrier centered between x_nominal_lower and x_nominal_upper. See
@@ -232,6 +254,7 @@ def min_max_power_barrier(
         error_nominal=error_nominal,
         dist_zero_to_nominal=dist_zero_to_nominal,
         power=power,
+        epsilon=epsilon,
     )
 
 
@@ -247,6 +270,7 @@ def min_max_linear_barrier(
     problem, this produces a quadratic cost in the optimization problem because
     cost = 1/2 * residual^2. See :func:`min_max_power_barrier` for more details.
     """
+    # NOTE(aaron): For power=1, epsilon is not used
     return min_max_power_barrier(
         x=x,
         x_nominal_lower=x_nominal_lower,
@@ -265,6 +289,7 @@ def min_max_centering_power_barrier(
     dist_zero_to_nominal: sf.Scalar,
     power: sf.Scalar,
     centering_scale: sf.Scalar,
+    epsilon: sf.Scalar = sf.epsilon(),
 ) -> sf.Scalar:
     """
     This barrier is the maximum of two power barriers which we call the "bounding" barrier
@@ -341,6 +366,7 @@ def min_max_centering_power_barrier(
         error_nominal=error_nominal,
         dist_zero_to_nominal=dist_zero_to_nominal,
         power=power,
+        epsilon=epsilon,
     )
     center = (x_nominal_lower + x_nominal_upper) / 2
     centering_barrier = min_max_power_barrier(
@@ -350,5 +376,6 @@ def min_max_centering_power_barrier(
         error_nominal=centering_scale * error_nominal,
         dist_zero_to_nominal=x_nominal_upper - center,
         power=power,
+        epsilon=epsilon,
     )
     return sf.Max(bounding_barrier, centering_barrier)
diff --git a/symforce/opt/objectives/min_max_barrier_objective.py b/symforce/opt/objectives/min_max_barrier_objective.py
@@ -51,6 +51,7 @@ def residual_at_timestep(
         params: MinMaxBarrierObjective.Params,
         power: sf.Scalar = 1,
         cost_scaling: sf.Scalar = 1,
+        epsilon: sf.Scalar = sf.epsilon(),
     ) -> ResidualBlock:
         """
         Returns the residual block for the given timestep, where the residual is computed by
@@ -76,6 +77,7 @@ def residual_at_timestep(
             error_nominal=params.error_nominal,
             dist_zero_to_nominal=params.dist_zero_to_nominal,
             power=power,
+            epsilon=epsilon,
         )
         unwhitened_residual = vector.applyfunc(barrier)
 

diff --git a/symforce/opt/objectives/norm_barrier_objective.py b/symforce/opt/objectives/norm_barrier_objective.py
@@ -67,6 +67,7 @@ def residual_at_timestep(
                 error_nominal=params.error_nominal,
                 dist_zero_to_nominal=params.dist_zero_to_nominal,
                 power=power,
+                epsilon=epsilon,
             )
         )
 

diff --git a/test/symforce_opt_barriers_test.py b/test/symforce_opt_barriers_test.py
@@ -3,6 +3,12 @@
 # This source code is under the Apache 2.0 license found in the LICENSE file.
 # ----------------------------------------------------------------------------
 
+import symforce
+
+symforce.set_epsilon_to_symbol()
+
+import symforce.symbolic as sf
+from symforce import util
 from symforce.opt.barrier_functions import max_linear_barrier
 from symforce.opt.barrier_functions import max_power_barrier
 from symforce.opt.barrier_functions import min_linear_barrier
@@ -203,6 +209,24 @@ def test_centering_barrier(self) -> None:
                     centering_barrier_helper(x_centering, power), expected_error_centering
                 )
 
+    def test_not_singular(self) -> None:
+        """
+        Tests that the x=0 singularity is handled
+        """
+
+        def f(x: sf.Scalar, y: sf.Scalar, d: sf.Scalar, epsilon: sf.Scalar) -> sf.Scalar:
+            return max_power_barrier(
+                x=x, x_nominal=2, error_nominal=1, dist_zero_to_nominal=d, power=y, epsilon=epsilon
+            )
+
+        def f_deriv(x: sf.Scalar, y: sf.Scalar, d: sf.Scalar, epsilon: sf.Scalar) -> sf.Scalar:
+            return f(x, y, d, epsilon).diff(x)
+
+        f_deriv_numeric = util.lambdify(f_deriv)
+        with self.assertRaises(ZeroDivisionError):
+            f_deriv_numeric(x=0, y=1, d=1, epsilon=0)
+        self.assertEqual(f_deriv_numeric(x=0, y=1, d=1, epsilon=sf.numeric_epsilon), 0)
+
 
 if __name__ == "__main__":
     TestCase.main()