Merge pull request #30 from zapatacomputing/fix/volodyaco/simple-grad…

…ient-descent-converts-function-to-callable-with-gradient Simple gradient descent works for callables without gradient
zapata-engineering · Mar 15, 2023 · 4f3e561 · 4f3e561
2 parents 76e96c9 + f5f1fa4
commit 4f3e561
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 11 deletions.
diff --git a/src/orquestra/opt/optimizers/pso/continuous_pso_optimizer.py b/src/orquestra/opt/optimizers/pso/continuous_pso_optimizer.py
@@ -36,7 +36,7 @@ def _get_bounds_like_array(
     if isinstance(bounds, ScipyBounds):
         return bounds.lb, bounds.ub
     else:
-        _bounds = np.array(bounds).T
+        _bounds = np.array(bounds, dtype=float).T
         return _bounds[0], _bounds[1]
 
 

diff --git a/src/orquestra/opt/optimizers/simple_gradient_descent.py b/src/orquestra/opt/optimizers/simple_gradient_descent.py
@@ -13,6 +13,7 @@
     construct_history_info,
     optimization_result,
 )
+from ..gradients import finite_differences_gradient
 from ..history.recorder import RecorderFactory
 from ..history.recorder import recorder as _recorder
 
@@ -22,11 +23,14 @@ def __init__(
         self,
         learning_rate: float,
         number_of_iterations: int,
+        patience: Optional[int] = None,
         recorder: RecorderFactory = _recorder,
     ):
         """
         Args:
-            parameter_values_list: list of parameter values to evaluate
+            learning_rate: learning rate.
+            number_of_iterations: number of gradient descent iterations.
+            patience: number of iterations to wait before early stopping.
             recorder: recorder object which defines how to store
                 the optimization history.
         """
@@ -35,6 +39,7 @@ def __init__(
 
         assert number_of_iterations > 0
         self.number_of_iterations = number_of_iterations
+        self.patience = patience
 
     def _minimize(
         self,
@@ -62,22 +67,55 @@ def _minimize(
                 see note.
 
         """
-        assert isinstance(cost_function, CallableWithGradient)
-
+        # So that mypy does not complain about missing attributes:
+        assert hasattr(cost_function, "gradient")
         current_parameters = copy.deepcopy(initial_params)
-        for _ in range(self.number_of_iterations):
+        if self.patience is not None:
+            best_value = np.inf
+            best_iteration = 0
+        for iteration in range(self.number_of_iterations):
             gradients = cost_function.gradient(current_parameters)
             current_parameters = current_parameters - (self.learning_rate * gradients)
             if keep_history:
                 final_value = cost_function(current_parameters)
+            if self.patience is not None:
+                if keep_history:
+                    current_value = final_value
+                else:
+                    current_value = cost_function(current_parameters)
+                improvement = best_value - current_value
+                if improvement > 1e-8:
+                    best_value = current_value
+                    best_iteration = iteration
+                elif iteration - best_iteration >= self.patience:
+                    break
 
         if not keep_history:
             final_value = cost_function(current_parameters)
 
         return optimization_result(
             opt_value=final_value,
             opt_params=current_parameters,
-            nit=self.number_of_iterations,
+            nit=iteration + 1,
             nfev=None,
             **construct_history_info(cost_function, keep_history),  # type: ignore
         )
+
+    def _preprocess_cost_function(
+        self, cost_function: Union[CallableWithGradient, Callable]
+    ) -> CallableWithGradient:
+        if not isinstance(cost_function, CallableWithGradient):
+            gradient_fn = finite_differences_gradient(cost_function)
+
+            class WrappedCostFunction:
+                def __init__(self, cost_function):
+                    self.cost_function = cost_function
+
+                def __call__(self, params: np.ndarray) -> float:
+                    return self.cost_function(params)
+
+                def gradient(self, params: np.ndarray) -> np.ndarray:
+                    return gradient_fn(params)
+
+            cost_function = WrappedCostFunction(cost_function=cost_function)
+        return cost_function
diff --git a/tests/orquestra/opt/optimizers/simple_gradient_descent_test.py b/tests/orquestra/opt/optimizers/simple_gradient_descent_test.py
@@ -62,14 +62,15 @@ def test_fails_to_initialize_when_number_of_iterations_is_negative(self):
         with pytest.raises(AssertionError):
             SimpleGradientDescent(0.1, -1)
 
-    def test_fails_to_minimize_when_cost_function_does_not_have_gradient_method(
+    def test_minimize_succeeds_when_cost_function_does_not_have_gradient_method(
         self, optimizer
     ):
-        def cost_function(x):
-            return sum(x)
+        def sum_x_squared_no_gradient(x):
+            return sum(x**2)
 
-        with pytest.raises(AssertionError):
-            optimizer.minimize(cost_function, np.array([0, 0]))
+        result = optimizer.minimize(sum_x_squared_no_gradient, np.array([1.0, 1.0]))
+        np.testing.assert_almost_equal(result.opt_value, 0)
+        np.testing.assert_almost_equal(result.opt_params, np.array([0, 0]))
 
     def test_history_contains_function_evaluations(self, optimizer, sum_x_squared):
         results = optimizer.minimize(sum_x_squared, np.array([1, 0]), keep_history=True)