Skip to content

Commit 4b41d14

Browse files
authored
Merge branch 'main' into array-comparator
2 parents 92a6032 + e274a06 commit 4b41d14

File tree

14 files changed

+61
-39
lines changed

14 files changed

+61
-39
lines changed

.github/workflows/end-to-end-test-init-optim.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
COLUMNS: 110
2121
MAX_RETRIES: 3
2222
RETRY_DELAY: 5
23-
EXPECTED_IMPROVEMENT_PCT: 300
23+
EXPECTED_IMPROVEMENT_PCT: 30
2424
CODEFLASH_END_TO_END: 1
2525
steps:
2626
- name: 🛎️ Checkout

codeflash/api/aiservice.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
import time
4+
35
import json
46
import os
57
import platform
@@ -95,6 +97,7 @@ def optimize_python_code(
9597
- List[OptimizationCandidate]: A list of Optimization Candidates.
9698
9799
"""
100+
start_time = time.perf_counter()
98101
payload = {
99102
"source_code": source_code,
100103
"dependency_code": dependency_code,
@@ -118,6 +121,8 @@ def optimize_python_code(
118121
optimizations_json = response.json()["optimizations"]
119122
logger.info(f"Generated {len(optimizations_json)} candidates.")
120123
console.rule()
124+
end_time = time.perf_counter()
125+
logger.debug(f"Optimization took {end_time - start_time:.2f} seconds.")
121126
return [
122127
OptimizedCandidate(
123128
source_code=opt["source_code"],

codeflash/context/code_context_extractor.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,9 @@ def extract_code_string_context_from_files(
217217
continue
218218
try:
219219
qualified_helper_function_names = {func.qualified_name for func in helper_function_sources}
220-
code_without_unused_defs = remove_unused_definitions_by_function_names(original_code, qualified_helper_function_names)
220+
code_without_unused_defs = remove_unused_definitions_by_function_names(
221+
original_code, qualified_helper_function_names
222+
)
221223
code_context = parse_code_and_prune_cst(
222224
code_without_unused_defs, code_context_type, set(), qualified_helper_function_names, remove_docstrings
223225
)
@@ -325,7 +327,9 @@ def extract_code_markdown_context_from_files(
325327
continue
326328
try:
327329
qualified_helper_function_names = {func.qualified_name for func in helper_function_sources}
328-
code_without_unused_defs = remove_unused_definitions_by_function_names(original_code, qualified_helper_function_names)
330+
code_without_unused_defs = remove_unused_definitions_by_function_names(
331+
original_code, qualified_helper_function_names
332+
)
329333
code_context = parse_code_and_prune_cst(
330334
code_without_unused_defs, code_context_type, set(), qualified_helper_function_names, remove_docstrings
331335
)
@@ -403,12 +407,8 @@ def get_function_sources_from_jedi(
403407
for name in names:
404408
try:
405409
definitions: list[Name] = name.goto(follow_imports=True, follow_builtin_imports=False)
406-
except Exception as e:
407-
try:
408-
logger.exception(f"Error while getting definition for {name.full_name}: {e}")
409-
except Exception as e:
410-
# name.full_name can also throw exceptions sometimes
411-
logger.exception(f"Error while getting definition: {e}")
410+
except Exception: # noqa: BLE001
411+
logger.debug(f"Error while getting definitions for {qualified_function_name}")
412412
definitions = []
413413
if definitions:
414414
# TODO: there can be multiple definitions, see how to handle such cases
@@ -424,7 +424,12 @@ def get_function_sources_from_jedi(
424424
and not belongs_to_function_qualified(definition, qualified_function_name)
425425
and definition.full_name.startswith(definition.module_name)
426426
# Avoid nested functions or classes. Only class.function is allowed
427-
and len((qualified_name := get_qualified_name(definition.module_name, definition.full_name)).split(".")) <= 2
427+
and len(
428+
(qualified_name := get_qualified_name(definition.module_name, definition.full_name)).split(
429+
"."
430+
)
431+
)
432+
<= 2
428433
):
429434
function_source = FunctionSource(
430435
file_path=definition_path,

codeflash/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from codeflash.cli_cmds.cli import parse_args, process_pyproject_config
88
from codeflash.cli_cmds.cmd_init import CODEFLASH_LOGO, ask_run_end_to_end_test
99
from codeflash.cli_cmds.console import paneled_text
10+
from codeflash.code_utils.checkpoint import ask_should_use_checkpoint_get_functions
1011
from codeflash.code_utils.config_parser import parse_config_file
1112
from codeflash.optimization import optimizer
1213
from codeflash.telemetry import posthog_cf
@@ -35,6 +36,7 @@ def main() -> None:
3536
ask_run_end_to_end_test(args)
3637
else:
3738
args = process_pyproject_config(args)
39+
args.previous_checkpoint_functions = ask_should_use_checkpoint_get_functions(args)
3840
init_sentry(not args.disable_telemetry, exclude_errors=True)
3941
posthog_cf.initialize_posthog(not args.disable_telemetry)
4042
optimizer.run_with_args(args)

codeflash/optimization/optimizer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from codeflash.benchmarking.utils import print_benchmark_table, validate_and_format_benchmark_table
1717
from codeflash.cli_cmds.console import console, logger, progress_bar
1818
from codeflash.code_utils import env_utils
19-
from codeflash.code_utils.checkpoint import CodeflashRunCheckpoint, ask_should_use_checkpoint_get_functions
19+
from codeflash.code_utils.checkpoint import CodeflashRunCheckpoint
2020
from codeflash.code_utils.code_replacer import normalize_code, normalize_node
2121
from codeflash.code_utils.code_utils import cleanup_paths, get_run_tmp_file
2222
from codeflash.code_utils.static_analysis import analyze_imported_modules, get_first_top_level_function_or_method_ast
@@ -85,7 +85,6 @@ def run(self) -> None:
8585
function_optimizer = None
8686
file_to_funcs_to_optimize: dict[Path, list[FunctionToOptimize]]
8787
num_optimizable_functions: int
88-
previous_checkpoint_functions = ask_should_use_checkpoint_get_functions(self.args)
8988
# discover functions
9089
(file_to_funcs_to_optimize, num_optimizable_functions) = get_functions_to_optimize(
9190
optimize_all=self.args.all,
@@ -96,7 +95,7 @@ def run(self) -> None:
9695
ignore_paths=self.args.ignore_paths,
9796
project_root=self.args.project_root,
9897
module_root=self.args.module_root,
99-
previous_checkpoint_functions=previous_checkpoint_functions,
98+
previous_checkpoint_functions=self.args.previous_checkpoint_functions,
10099
)
101100
function_benchmark_timings: dict[str, dict[BenchmarkKey, int]] = {}
102101
total_benchmark_timings: dict[BenchmarkKey, int] = {}

codeflash/verification/concolic_testing.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
import time
4+
35
import ast
46
import subprocess
57
import tempfile
@@ -20,6 +22,7 @@
2022
def generate_concolic_tests(
2123
test_cfg: TestConfig, args: Namespace, function_to_optimize: FunctionToOptimize, function_to_optimize_ast: ast.AST
2224
) -> tuple[dict[str, list[FunctionCalledInTest]], str]:
25+
start_time = time.perf_counter()
2326
function_to_concolic_tests = {}
2427
concolic_test_suite_code = ""
2528
if (
@@ -84,4 +87,6 @@ def generate_concolic_tests(
8487
else:
8588
logger.debug(f"Error running CrossHair Cover {': ' + cover_result.stderr if cover_result.stderr else '.'}")
8689
console.rule()
90+
end_time = time.perf_counter()
91+
logger.debug(f"Generated concolic tests in {end_time - start_time:.2f} seconds")
8792
return function_to_concolic_tests, concolic_test_suite_code

codeflash/verification/parse_test_output.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,13 +107,19 @@ def parse_sqlite_test_results(sqlite_file_path: Path, test_files: TestFiles, tes
107107
logger.warning(f"No test results for {sqlite_file_path} found.")
108108
console.rule()
109109
return test_results
110+
db = None
110111
try:
111112
db = sqlite3.connect(sqlite_file_path)
112113
cur = db.cursor()
113114
data = cur.execute(
114115
"SELECT test_module_path, test_class_name, test_function_name, "
115116
"function_getting_tested, loop_index, iteration_id, runtime, return_value,verification_type FROM test_results"
116117
).fetchall()
118+
except Exception as e:
119+
logger.warning(f"Failed to parse test results from {sqlite_file_path}. Exception: {e}")
120+
if db is not None:
121+
db.close()
122+
return test_results
117123
finally:
118124
db.close()
119125
for val in data:

codeflash/verification/test_runner.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
if TYPE_CHECKING:
1616
from codeflash.models.models import TestFiles
1717

18-
BEHAVIORAL_BLOCKLISTED_PLUGINS = ["benchmark"]
19-
BENCHMARKING_BLOCKLISTED_PLUGINS = ["codspeed", "cov", "benchmark", "profiling"]
18+
BEHAVIORAL_BLOCKLISTED_PLUGINS = ["benchmark", "codspeed", "xdist", "sugar"]
19+
BENCHMARKING_BLOCKLISTED_PLUGINS = ["codspeed", "cov", "benchmark", "profiling", "xdist", "sugar"]
2020

2121

2222
def execute_test_subprocess(
@@ -141,6 +141,7 @@ def run_behavioral_tests(
141141
coverage_config_file if enable_coverage else None,
142142
)
143143

144+
144145
def run_line_profile_tests(
145146
test_paths: TestFiles,
146147
pytest_cmd: str,
@@ -154,7 +155,6 @@ def run_line_profile_tests(
154155
pytest_min_loops: int = 5,
155156
pytest_max_loops: int = 100_000,
156157
line_profiler_output_file: Path | None = None,
157-
158158
) -> tuple[Path, subprocess.CompletedProcess]:
159159
if test_framework == "pytest":
160160
pytest_cmd_list = (
@@ -191,7 +191,7 @@ def run_line_profile_tests(
191191
pytest_test_env = test_env.copy()
192192
pytest_test_env["PYTEST_PLUGINS"] = "codeflash.verification.pytest_plugin"
193193
blocklist_args = [f"-p no:{plugin}" for plugin in BENCHMARKING_BLOCKLISTED_PLUGINS]
194-
pytest_test_env["LINE_PROFILE"]="1"
194+
pytest_test_env["LINE_PROFILE"] = "1"
195195
results = execute_test_subprocess(
196196
pytest_cmd_list + pytest_args + blocklist_args + result_args + test_files,
197197
cwd=cwd,
@@ -203,6 +203,7 @@ def run_line_profile_tests(
203203
raise ValueError(msg)
204204
return line_profiler_output_file, results
205205

206+
206207
def run_benchmarking_tests(
207208
test_paths: TestFiles,
208209
pytest_cmd: str,

codeflash/verification/verifier.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from __future__ import annotations
22

3+
import time
4+
35
import ast
46
from pathlib import Path
57
from typing import TYPE_CHECKING
@@ -29,6 +31,7 @@ def generate_tests(
2931
) -> tuple[str, str, Path] | None:
3032
# TODO: Sometimes this recreates the original Class definition. This overrides and messes up the original
3133
# class import. Remove the recreation of the class definition
34+
start_time = time.perf_counter()
3235
test_module_path = Path(module_name_from_file_path(test_path, test_cfg.tests_project_rootdir))
3336
response = aiservice_client.generate_regression_tests(
3437
source_code_being_tested=source_code_being_tested,
@@ -54,7 +57,8 @@ def generate_tests(
5457
else:
5558
logger.warning(f"Failed to generate and instrument tests for {function_to_optimize.function_name}")
5659
return None
57-
60+
end_time = time.perf_counter()
61+
logger.debug(f"Generated tests in {end_time - start_time:.2f} seconds")
5862
return (
5963
generated_test_source,
6064
instrumented_behavior_test_source,

docs/docs/codeflash-concepts/how-codeflash-works.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,12 @@ Codeflash currently only runs tests that directly call the target function in th
2121

2222
## Optimization Generation
2323

24-
To optimize code, Codeflash first gathers all necessary context from the codebase. It then calls our backend to generate several candidate optimizations. These are called "candidates" because their speed and correctness haven't been verified yet. Both properties will be verified in later steps.
25-
24+
To optimize code, Codeflash first gathers all necessary context from the codebase. It also line-profiles your code to understand where the bottlenecks might reside. It then calls our backend to generate several candidate optimizations. These are called "candidates" because their speed and correctness haven't been verified yet. Both properties will be verified in later steps.
2625
## Verification of correctness
2726

2827
![Verification](/img/codeflash_arch_diagram.gif)
2928

30-
The goal of correctness verification is to ensure that when the original code is replaced by the new code, there are no behavioral changes in the code and the rest of the system. This means the replacement should be completely safe.
29+
The goal of correctness verification is to ensure that when the new code replaces the original code, there are no behavioral changes in the code and the rest of the system. This means the replacement should be completely safe.
3130

3231
To verify correctness, Codeflash calls the function with numerous inputs, confirming that the new function behaves identically to the original.
3332

@@ -60,4 +59,4 @@ Codeflash implements several techniques to measure code performance accurately.
6059

6160
## Creating Pull Requests
6261

63-
Once an optimization passes all checks, Codeflash creates a pull request through the Codeflash GitHub app directly in your repository. The pull request includes the new code, the speedup percentage, an explanation of the optimization, test statistics including coverage, and the test content itself. You can review and merge the new code if it meets your standards. Feel free to modify the code as needed—we welcome your improvements!
62+
Once an optimization passes all checks, Codeflash creates a pull request through the Codeflash GitHub app directly in your repository. The pull request includes the new code, the speedup percentage, an explanation of the optimization, test statistics including coverage, and the test content itself. You can review, edit, and merge the new code.

0 commit comments

Comments
 (0)