codeflash-ai · alvin-r · Feb 15, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -94,7 +94,7 @@ def __init__(
         aiservice_client: AiServiceClient | None = None,
         args: Namespace | None = None,
     ) -> None:
-        self.project_root: str = str(test_cfg.project_root_path)
+        self.project_root = test_cfg.project_root_path
         self.test_cfg = test_cfg
         self.aiservice_client = aiservice_client if aiservice_client else AiServiceClient()
         self.function_to_optimize = function_to_optimize
@@ -425,7 +425,7 @@ def determine_best_candidate(
                     )
                     speedup_ratios[candidate.optimization_id] = perf_gain
 
-                    tree = Tree(f"Candidate #{candidate_index} - Sum of Minimum Runtimes")
+                    tree = Tree(f"Candidate #{candidate_index} - Runtime Information")
                     if speedup_critic(
                         candidate_result, original_code_baseline.runtime, best_runtime_until_now
                     ) and quantity_of_tests_critic(candidate_result):
@@ -858,6 +858,13 @@ def establish_original_code_baseline(
                 self.write_code_and_helpers(
                     self.function_to_optimize_source_code, original_helper_code, self.function_to_optimize.file_path
                 )
+            if not behavioral_results:
+                logger.warning(
+                    f"Couldn't run any tests for original function {self.function_to_optimize.function_name}. SKIPPING OPTIMIZING THIS FUNCTION."
+                )
+                console.rule()
+                return Failure("Failed to establish a baseline for the original code - bevhavioral tests failed.")
+
             if test_framework == "pytest":
                 benchmarking_results, _ = self.run_and_parse_tests(
                     testing_type=TestingMode.PERFORMANCE,
@@ -897,19 +904,14 @@ def establish_original_code_baseline(
             )
             console.rule()
 
+
             total_timing = benchmarking_results.total_passed_runtime()  # caution: doesn't handle the loop index
             functions_to_remove = [
                 result.id.test_function_name
                 for result in behavioral_results
                 if (result.test_type == TestType.GENERATED_REGRESSION and not result.did_pass)
             ]
 
-            if not behavioral_results:
-                logger.warning(
-                    f"Couldn't run any tests for original function {self.function_to_optimize.function_name}. SKIPPING OPTIMIZING THIS FUNCTION."
-                )
-                console.rule()
-                success = False
             if total_timing == 0:
                 logger.warning(
                     "The overall summed benchmark runtime of the original function is 0, couldn't run tests."
@@ -929,19 +931,7 @@ def establish_original_code_baseline(
                 f"{humanize_runtime(total_timing)} per full loop"
             )
             console.rule()
-            logger.debug(f"Total original code summed runtime (ns): {total_timing}")
-            console.rule()
-            runtime_distribution, runtime_statistics = benchmarking_results.bayesian_nonparametric_bootstrap_analysis(
-                100_000
-            )
-            logger.info(
-                f"Bayesian Bootstrapping Nonparametric Analysis"
-                f"\nExpected original code summed runtime (95% Credible Interval) = ["
-                f"{humanize_runtime(round(runtime_statistics['credible_interval_lower_bound']))}, "
-                f"{humanize_runtime(round(runtime_statistics['credible_interval_upper_bound']))}], "
-                f"\nmedian: {humanize_runtime(round(runtime_statistics['median']))}"
-            )
-
+            logger.debug(f"Total original code runtime (ns): {total_timing}")
             return Success(
                 (
                     OriginalCodeBaseline(
@@ -950,8 +940,6 @@ def establish_original_code_baseline(
                         runtime=total_timing,
                         coverage_results=coverage_results,
                     ),
-                    runtime_distribution,
-                    runtime_statistics,
                     functions_to_remove,
                 )
             )
@@ -1117,7 +1105,7 @@ def run_and_parse_tests(
                 f'Error running tests in {", ".join(str(f) for f in test_files.test_files)}.\nTimeout Error'
             )
             return TestResults(), None
-        if run_result.returncode != 0:
+        if run_result.returncode != 0 and testing_type == TestingMode.BEHAVIOR:
             logger.debug(
                 f'Nonzero return code {run_result.returncode} when running tests in '
                 f'{", ".join([str(f.instrumented_behavior_file_path) for f in test_files.test_files])}.\n'
@@ -1167,10 +1155,3 @@ def generate_and_instrument_tests(
             )
         ]
 
-    @staticmethod
-    def write_code_and_helpers(original_code: str, original_helper_code: dict[Path, str], path: Path) -> None:
-        with path.open("w", encoding="utf8") as f:
-            f.write(original_code)
-        for module_abspath in original_helper_code:
-            with Path(module_abspath).open("w", encoding="utf8") as f:
-                f.write(original_helper_code[module_abspath])
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
@@ -53,7 +53,6 @@ def create_function_optimizer(
         function_to_optimize_ast: ast.FunctionDef | None = None,
         function_to_tests: dict[str, list[FunctionCalledInTest]] | None = None,
     ) -> FunctionOptimizer:
-        # TODO: Any validations should be done here
         return FunctionOptimizer(
             test_cfg=self.test_cfg,
             aiservice_client=self.aiservice_client,