codeflash-ai · alvin-r · Feb 20, 2025 · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/codeflash/code_utils/coverage_utils.py b/codeflash/code_utils/coverage_utils.py
@@ -25,7 +25,18 @@ def extract_dependent_function(main_function: str, code_context: CodeOptimizatio
     if len(dependent_functions) != 1:
         return False
 
-    return dependent_functions.pop()
+    return build_fully_qualified_name(dependent_functions.pop(), code_context)
+
+
+def build_fully_qualified_name(function_name: str, code_context: CodeOptimizationContext) -> str:
+    full_name = function_name
+    for obj_name, parents in code_context.preexisting_objects:
+        if obj_name == function_name:
+            for parent in parents:
+                if parent.type == "ClassDef":
+                    full_name = f"{parent.name}.{full_name}"
+            break
+    return full_name
 
 
 def generate_candidates(source_code_path: Path) -> list[str]:

diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -16,7 +16,11 @@
 
 from codeflash.cli_cmds.console import console, logger
 from codeflash.code_utils.code_utils import validate_python_code
-from codeflash.code_utils.coverage_utils import extract_dependent_function, generate_candidates
+from codeflash.code_utils.coverage_utils import (
+    build_fully_qualified_name,
+    extract_dependent_function,
+    generate_candidates,
+)
 from codeflash.code_utils.env_utils import is_end_to_end
 from codeflash.verification.test_results import TestResults, TestType
 
@@ -322,18 +326,19 @@ def _fetch_function_coverages(
         coverage_data: dict[str, dict[str, Any]],
         original_cov_data: dict[str, dict[str, Any]],
     ) -> tuple[FunctionCoverage, Union[FunctionCoverage, None]]:
+        resolved_name = build_fully_qualified_name(function_name, code_context)
         try:
             main_function_coverage = FunctionCoverage(
-                name=function_name,
-                coverage=coverage_data[function_name]["summary"]["percent_covered"],
-                executed_lines=coverage_data[function_name]["executed_lines"],
-                unexecuted_lines=coverage_data[function_name]["missing_lines"],
-                executed_branches=coverage_data[function_name]["executed_branches"],
-                unexecuted_branches=coverage_data[function_name]["missing_branches"],
+                name=resolved_name,
+                coverage=coverage_data[resolved_name]["summary"]["percent_covered"],
+                executed_lines=coverage_data[resolved_name]["executed_lines"],
+                unexecuted_lines=coverage_data[resolved_name]["missing_lines"],
+                executed_branches=coverage_data[resolved_name]["executed_branches"],
+                unexecuted_branches=coverage_data[resolved_name]["missing_branches"],
             )
         except KeyError:
             main_function_coverage = FunctionCoverage(
-                name=function_name,
+                name=resolved_name,
                 coverage=0,
                 executed_lines=[],
                 unexecuted_lines=[],

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
diff --git a/codeflash/verification/test_results.py b/codeflash/verification/test_results.py
@@ -168,7 +168,7 @@ def usable_runtime_data_by_test_case(self) -> dict[InvocationId, list[int]]:
         for result in self.test_results:
             if result.did_pass and not result.runtime:
                 logger.debug(
-                    f"Ignoring test case that passed but had no runtime -> {result.id}, Loop # {result.loop_index}"
+                    f"Ignoring test case that passed but had no runtime -> {result.id}, Loop # {result.loop_index}, Test Type: {result.test_type}, Verification Type: {result.verification_type}"
                 )
         usable_runtimes = [
             (result.id, result.runtime) for result in self.test_results if result.did_pass and result.runtime

diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
@@ -87,20 +87,25 @@ def run_behavioral_tests(
                 env=pytest_test_env,
                 timeout=600,
             )
-            logger.debug(results)
+            logger.debug(
+                f"""Result return code: {results.returncode}, {"Result stderr:" + str(results.stderr) if results.stderr else ''}""")
         else:
             results = execute_test_subprocess(
                 pytest_cmd_list + common_pytest_args + result_args + test_files,
                 cwd=cwd,
                 env=pytest_test_env,
                 timeout=600,  # TODO: Make this dynamic
             )
+            logger.debug(
+                f"""Result return code: {results.returncode}, {"Result stderr:" + str(results.stderr) if results.stderr else ''}""")
     elif test_framework == "unittest":
         if enable_coverage:
             raise ValueError("Coverage is not supported yet for unittest framework")
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
         test_files = [file.instrumented_behavior_file_path for file in test_paths.test_files]
         result_file_path, results = run_unittest_tests(verbose, test_files, test_env, cwd)
+        logger.debug(
+            f"""Result return code: {results.returncode}, {"Result stderr:" + str(results.stderr) if results.stderr else ''}""")
     else:
         raise ValueError(f"Unsupported test framework: {test_framework}")
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -119,6 +119,7 @@ types-gevent = "^24.11.0.20241230"
 types-greenlet = "^3.1.0.20241221"
 types-pexpect = "^4.9.0.20241208"
 types-unidiff = "^0.7.0.20240505"
+sqlalchemy = "^2.0.38"
 
 [tool.poetry.build]
 script = "codeflash/update_license_version.py"

diff --git a/tests/scripts/end_to_end_test_init_optimization.py b/tests/scripts/end_to_end_test_init_optimization.py
@@ -1,7 +1,7 @@
 import os
 import pathlib
 
-from end_to_end_test_utilities import TestConfig, run_codeflash_command, run_with_retries
+from end_to_end_test_utilities import CoverageExpectation, TestConfig, run_codeflash_command, run_with_retries
 
 
 def run_test(expected_improvement_pct: int) -> bool:
@@ -10,6 +10,11 @@ def run_test(expected_improvement_pct: int) -> bool:
         function_name="CharacterRemover.remove_control_characters",
         test_framework="pytest",
         min_improvement_x=1.0,
+        coverage_expectations=[
+            CoverageExpectation(
+                function_name="CharacterRemover.remove_control_characters", expected_coverage=100.0, expected_lines=[14]
+            )
+        ],
     )
     cwd = (pathlib.Path(__file__).parent.parent.parent / "code_to_optimize").resolve()
     return run_codeflash_command(cwd, config, expected_improvement_pct)

diff --git a/tests/test_code_replacement.py b/tests/test_code_replacement.py
@@ -2,7 +2,6 @@
 
 import dataclasses
 import os
-from argparse import Namespace
 from collections import defaultdict
 from pathlib import Path
 
@@ -14,7 +13,8 @@
 )
 from codeflash.discovery.functions_to_optimize import FunctionToOptimize
 from codeflash.models.models import FunctionParent
-from codeflash.optimization.optimizer import Optimizer
+from codeflash.optimization.function_optimizer import FunctionOptimizer
+from codeflash.verification.verification_utils import TestConfig
 
 os.environ["CODEFLASH_API_KEY"] = "cf-test-key"
 
@@ -766,24 +766,18 @@ def main_method(self):
         return HelperClass(self.name).helper_method()
 """
     file_path = Path(__file__).resolve()
-    opt = Optimizer(
-        Namespace(
-            project_root=file_path.parent.resolve(),
-            disable_telemetry=True,
-            tests_root="tests",
-            test_framework="pytest",
-            pytest_cmd="pytest",
-            experiment_id=None,
-            test_project_root=file_path.parent.resolve(),
-        )
-    )
     func_top_optimize = FunctionToOptimize(
         function_name="main_method", file_path=file_path, parents=[FunctionParent("MainClass", "ClassDef")]
     )
-    original_code = file_path.read_text()
-    code_context = opt.get_code_optimization_context(
-        function_to_optimize=func_top_optimize, project_root=file_path.parent, original_source_code=original_code
-    ).unwrap()
+    test_config = TestConfig(
+        tests_root=file_path.parent,
+        tests_project_rootdir=file_path.parent,
+        project_root_path=file_path.parent,
+        test_framework="pytest",
+        pytest_cmd="pytest",
+    )
+    func_optimizer = FunctionOptimizer(function_to_optimize=func_top_optimize, test_cfg=test_config)
+    code_context = func_optimizer.get_code_optimization_context().unwrap()
     assert code_context.code_to_optimize_with_helpers == get_code_output
 
 
@@ -1013,35 +1007,35 @@ def to_name(self) -> str:
 class TestResults(BaseModel):
     def __iter__(self) -> Iterator[FunctionTestInvocation]:
         return iter(self.test_results)
-    
+
     def __len__(self) -> int:
         return len(self.test_results)
-    
+
     def __getitem__(self, index: int) -> FunctionTestInvocation:
         return self.test_results[index]
-    
+
     def __setitem__(self, index: int, value: FunctionTestInvocation) -> None:
         self.test_results[index] = value
-    
+
     def __delitem__(self, index: int) -> None:
         del self.test_results[index]
-    
+
     def __contains__(self, value: FunctionTestInvocation) -> bool:
         return value in self.test_results
-    
+
     def __bool__(self) -> bool:
         return bool(self.test_results)
-    
+
     def __eq__(self, other: object) -> bool:
         # Unordered comparison
         if not isinstance(other, TestResults) or len(self) != len(other):
             return False
-        
+
         # Increase recursion limit only if necessary
         original_recursion_limit = sys.getrecursionlimit()
         if original_recursion_limit < 5000:
             sys.setrecursionlimit(5000)
-        
+
         for test_result in self:
             other_test_result = other.get_by_id(test_result.id)
             if other_test_result is None or not (
@@ -1054,10 +1048,10 @@ def __eq__(self, other: object) -> bool:
             ):
                 sys.setrecursionlimit(original_recursion_limit)
                 return False
-        
+
         sys.setrecursionlimit(original_recursion_limit)
         return True
-    
+
     def get_test_pass_fail_report_by_type(self) -> dict[TestType, dict[str, int]]:
         report = {test_type: {"passed": 0, "failed": 0} for test_type in TestType}
         for test_result in self.test_results:
@@ -1105,8 +1099,8 @@ def get_test_pass_fail_report_by_type(self) -> dict[TestType, dict[str, int]]:
         )
 
     assert (
-        new_code
-        == """from __future__ import annotations
+            new_code
+            == """from __future__ import annotations
 import sys
 from codeflash.verification.comparator import comparator
 from enum import Enum
@@ -1245,21 +1239,21 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
     """Row-wise cosine similarity between two equal-width matrices."""
     if len(X.data) == 0 or len(Y.data) == 0:
         return np.array([])
-    
+
     X_np, Y_np = np.asarray(X.data), np.asarray(Y.data)
     if X_np.shape[1] != Y_np.shape[1]:
         raise ValueError(f"Number of columns in X and Y must be the same. X has shape {X_np.shape} and Y has shape {Y_np.shape}.")
     X_norm = np.linalg.norm(X_np, axis=1, keepdims=True)
     Y_norm = np.linalg.norm(Y_np, axis=1, keepdims=True)
-    
+
     norm_product = X_norm * Y_norm.T
     norm_product[norm_product == 0] = np.inf  # Prevent division by zero
     dot_product = np.dot(X_np, Y_np.T)
     similarity = dot_product / norm_product
-    
+
     # Any NaN or Inf values are set to 0.0
     np.nan_to_num(similarity, copy=False)
-    
+
     return similarity
 def cosine_similarity_top_k(
     X: Matrix,
@@ -1270,15 +1264,15 @@ def cosine_similarity_top_k(
     """Row-wise cosine similarity with optional top-k and score threshold filtering."""
     if len(X.data) == 0 or len(Y.data) == 0:
         return [], []
-    
+
     score_array = cosine_similarity(X, Y)
-    
+
     sorted_idxs = np.argpartition(-score_array.flatten(), range(top_k or len(score_array.flatten())))[:(top_k or len(score_array.flatten()))]
     sorted_idxs = sorted_idxs[score_array.flatten()[sorted_idxs] > (score_threshold if score_threshold is not None else -1)]
-    
+
     ret_idxs = [(x // score_array.shape[1], x % score_array.shape[1]) for x in sorted_idxs]
     scores = score_array.flatten()[sorted_idxs].tolist()
-    
+
     return ret_idxs, scores
 '''
     preexisting_objects: list[tuple[str, list[FunctionParent]]] = find_preexisting_objects(original_code)
@@ -1311,8 +1305,8 @@ def cosine_similarity_top_k(
         project_root_path=Path(__file__).parent.parent.resolve(),
     )
     assert (
-        new_code
-        == '''import numpy as np
+            new_code
+            == '''import numpy as np
 from pydantic.dataclasses import dataclass
 from typing import List, Optional, Tuple, Union
 @dataclass(config=dict(arbitrary_types_allowed=True))
@@ -1343,15 +1337,15 @@ def cosine_similarity_top_k(
     """Row-wise cosine similarity with optional top-k and score threshold filtering."""
     if len(X.data) == 0 or len(Y.data) == 0:
         return [], []
-    
+
     score_array = cosine_similarity(X, Y)
-    
+
     sorted_idxs = np.argpartition(-score_array.flatten(), range(top_k or len(score_array.flatten())))[:(top_k or len(score_array.flatten()))]
     sorted_idxs = sorted_idxs[score_array.flatten()[sorted_idxs] > (score_threshold if score_threshold is not None else -1)]
-    
+
     ret_idxs = [(x // score_array.shape[1], x % score_array.shape[1]) for x in sorted_idxs]
     scores = score_array.flatten()[sorted_idxs].tolist()
-    
+
     return ret_idxs, scores
 '''
     )
@@ -1370,8 +1364,8 @@ def cosine_similarity_top_k(
         )
 
     assert (
-        new_helper_code
-        == '''import numpy as np
+            new_helper_code
+            == '''import numpy as np
 from pydantic.dataclasses import dataclass
 from typing import List, Optional, Tuple, Union
 @dataclass(config=dict(arbitrary_types_allowed=True))
@@ -1381,21 +1375,21 @@ def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
     """Row-wise cosine similarity between two equal-width matrices."""
     if len(X.data) == 0 or len(Y.data) == 0:
         return np.array([])
-    
+
     X_np, Y_np = np.asarray(X.data), np.asarray(Y.data)
     if X_np.shape[1] != Y_np.shape[1]:
         raise ValueError(f"Number of columns in X and Y must be the same. X has shape {X_np.shape} and Y has shape {Y_np.shape}.")
     X_norm = np.linalg.norm(X_np, axis=1, keepdims=True)
     Y_norm = np.linalg.norm(Y_np, axis=1, keepdims=True)
-    
+
     norm_product = X_norm * Y_norm.T
     norm_product[norm_product == 0] = np.inf  # Prevent division by zero
     dot_product = np.dot(X_np, Y_np.T)
     similarity = dot_product / norm_product
-    
+
     # Any NaN or Inf values are set to 0.0
     np.nan_to_num(similarity, copy=False)
-    
+
     return similarity
 def cosine_similarity_top_k(
     X: Matrix,
@@ -1406,15 +1400,15 @@ def cosine_similarity_top_k(
     """Row-wise cosine similarity with optional top-k and score threshold filtering."""
     if len(X.data) == 0 or len(Y.data) == 0:
         return [], []
-    
+
     score_array = cosine_similarity(X, Y)
-    
+
     sorted_idxs = np.argpartition(-score_array.flatten(), range(top_k or len(score_array.flatten())))[:(top_k or len(score_array.flatten()))]
     sorted_idxs = sorted_idxs[score_array.flatten()[sorted_idxs] > (score_threshold if score_threshold is not None else -1)]
-    
+
     ret_idxs = [(x // score_array.shape[1], x % score_array.shape[1]) for x in sorted_idxs]
     scores = score_array.flatten()[sorted_idxs].tolist()
-    
+
     return ret_idxs, scores
 '''
     )
@@ -1481,7 +1475,7 @@ def test_future_aliased_imports_removal() -> None:
 
 def test_0_diff_code_replacement():
     original_code = """from __future__ import annotations
-    
+
 import numpy as np
 def functionA():
     return np.array([1, 2, 3])