Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
c53f2f6
windows fix in vsc correct function choosing from the file
mashraf-222 Nov 28, 2025
f869b61
ensure the optimization starts for the function
mashraf-222 Nov 28, 2025
da1fca0
using pathlib
mashraf-222 Nov 28, 2025
f0b4657
formatting and linting fixes
mashraf-222 Nov 28, 2025
d62462d
fix linting errors
mashraf-222 Nov 28, 2025
8b4a5f5
FIX FAILING TEST
mashraf-222 Dec 1, 2025
d2a2e96
fix linting error
mashraf-222 Dec 1, 2025
3694c80
fixing one test after windows changes
mashraf-222 Dec 2, 2025
5987a57
update the unicode delimiter to be windows compatible
mashraf-222 Dec 4, 2025
1964abd
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Dec 4, 2025
4344374
fix tests discovery in windows for vsc
mashraf-222 Dec 4, 2025
9b78d88
fix Starting baseline establishment in windows
mashraf-222 Dec 8, 2025
784378c
adding _manual_cleanup_worktree_directory
mashraf-222 Dec 8, 2025
194bad5
Improved the manual cleanup function to handle Windows file locking a…
mashraf-222 Dec 8, 2025
5e1b108
clean and optimize the git worktree deletion failure handling
mashraf-222 Dec 8, 2025
7404d01
clean up added logs
mashraf-222 Dec 9, 2025
5b2dfc1
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Dec 9, 2025
dff190d
fix conflict
mashraf-222 Dec 9, 2025
9833272
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Dec 9, 2025
04fbab9
fix capture_mode for windows for failing tests
mashraf-222 Dec 9, 2025
c443801
fix linting
mashraf-222 Dec 9, 2025
9336f3b
fix linting
mashraf-222 Dec 9, 2025
cc742fc
fix pre-commit errors
mashraf-222 Dec 9, 2025
db8cb46
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Dec 10, 2025
f824ea5
FIX for failing test test_function_discovery.py::test_filter_functions
mashraf-222 Dec 10, 2025
e05aab2
removing git worktree logs
mashraf-222 Dec 10, 2025
a12f002
fix linting after removing logs
mashraf-222 Dec 10, 2025
52f19e4
Fixed inconsistent path resolution which can cause relative_to() chec…
mashraf-222 Dec 10, 2025
bc779ff
Updated trace_benchmarks_pytest to use the same Windows-safe subproce…
mashraf-222 Dec 10, 2025
ac35af3
pre-commit fixes
mashraf-222 Dec 10, 2025
84d056d
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
Saga4 Dec 12, 2025
1d0802d
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
Saga4 Dec 12, 2025
df29969
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mohammedahmed18 Dec 19, 2025
f5a46d7
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Dec 22, 2025
db501da
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
Saga4 Dec 23, 2025
6371ca5
addressing all Required items by claude code review
mashraf-222 Dec 24, 2025
397b741
fix linting
mashraf-222 Dec 24, 2025
207d671
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Dec 24, 2025
d896c90
Refactor path validation and error handling in discovery functions
mashraf-222 Dec 24, 2025
d0c6c42
increasing timeout and changing comment
mashraf-222 Jan 6, 2026
1973e07
revert not needed change
mashraf-222 Jan 6, 2026
2d8654e
Restor debug statements
mashraf-222 Jan 6, 2026
7732360
adding the Progress bar for test files and logger
mashraf-222 Jan 6, 2026
67fcffc
adding the removed loggers
mashraf-222 Jan 6, 2026
6be94a7
Refactor error handling in test discovery subprocess
mashraf-222 Jan 6, 2026
79719ac
revert change
mashraf-222 Jan 6, 2026
217b95a
update the comment of execute_test_subprocess
mashraf-222 Jan 6, 2026
e0d9f57
revert not needed changes for initialize_function_optimization
mashraf-222 Jan 6, 2026
4c9f51b
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Jan 6, 2026
f75e353
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Jan 6, 2026
16f176d
fix linting
mashraf-222 Jan 6, 2026
1b96f51
Merge branch 'ashraf/cf-918-fix-vsc-extension-windows-bugs' of https:…
mashraf-222 Jan 6, 2026
3fdcb35
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Jan 6, 2026
2943537
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Jan 6, 2026
cfc3363
Merge branch 'main' into ashraf/cf-918-fix-vsc-extension-windows-bugs
mashraf-222 Jan 7, 2026
3345e2c
cleaning up
mohammedahmed18 Jan 7, 2026
56ffc37
Merge branch 'ashraf/cf-918-fix-vsc-extension-windows-bugs' of https:…
mohammedahmed18 Jan 7, 2026
ba46de2
formatting and linting
mohammedahmed18 Jan 7, 2026
01a868b
beautify
mohammedahmed18 Jan 7, 2026
6849554
format
mohammedahmed18 Jan 7, 2026
fc2ea80
fix conlict and merge main
mashraf-222 Jan 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions codeflash/benchmarking/trace_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from codeflash.cli_cmds.console import logger
from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE
from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args


def trace_benchmarks_pytest(
Expand All @@ -17,20 +18,18 @@ def trace_benchmarks_pytest(
benchmark_env["PYTHONPATH"] = str(project_root)
else:
benchmark_env["PYTHONPATH"] += os.pathsep + str(project_root)
result = subprocess.run(
run_args = get_cross_platform_subprocess_run_args(
cwd=project_root, env=benchmark_env, timeout=timeout, check=False, text=True, capture_output=True
)
result = subprocess.run( # noqa: PLW1510
[
SAFE_SYS_EXECUTABLE,
Path(__file__).parent / "pytest_new_process_trace_benchmarks.py",
benchmarks_root,
tests_root,
trace_file,
],
cwd=project_root,
check=False,
capture_output=True,
text=True,
env=benchmark_env,
timeout=timeout,
**run_args,
)
if result.returncode != 0:
if "ERROR collecting" in result.stdout:
Expand Down
24 changes: 20 additions & 4 deletions codeflash/cli_cmds/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,19 @@
)


class DummyTask:
def __init__(self) -> None:
self.id = 0


class DummyProgress:
def __init__(self) -> None:
pass

def advance(self, task_id: TaskID, advance: int = 1) -> None:
pass


def lsp_log(message: LspMessage) -> None:
if not is_LSP_enabled():
return
Expand Down Expand Up @@ -120,10 +133,6 @@ def progress_bar(
logger.info(message)

# Create a fake task ID since we still need to yield something
class DummyTask:
def __init__(self) -> None:
self.id = 0

yield DummyTask().id
else:
progress = Progress(
Expand All @@ -141,6 +150,13 @@ def __init__(self) -> None:
@contextmanager
def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]:
"""Progress bar for test files."""
if is_LSP_enabled():
lsp_log(LspTextMessage(text=description, takes_time=True))
dummy_progress = DummyProgress()
dummy_task = DummyTask()
yield dummy_progress, dummy_task.id
return

with Progress(
SpinnerColumn(next(spinners)),
TextColumn("[progress.description]{task.description}"),
Expand Down
2 changes: 1 addition & 1 deletion codeflash/code_utils/code_replacer.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def replace_function_definitions_in_module(

new_code: str = replace_functions_and_add_imports(
# adding the global assignments before replacing the code, not after
# becuase of an "edge case" where the optimized code intoduced a new import and a global assignment using that import
# because of an "edge case" where the optimized code intoduced a new import and a global assignment using that import
# and that import wasn't used before, so it was ignored when calling AddImportsVisitor.add_needed_import inside replace_functions_and_add_imports (because the global assignment wasn't added yet)
# this was added at https://github.com/codeflash-ai/codeflash/pull/448
add_global_assignments(code_to_apply, source_code) if should_add_global_assignments else source_code,
Expand Down
26 changes: 26 additions & 0 deletions codeflash/code_utils/shell_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import contextlib
import os
import re
import subprocess
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Optional

Expand All @@ -11,8 +13,11 @@
from codeflash.either import Failure, Success

if TYPE_CHECKING:
from collections.abc import Mapping

from codeflash.either import Result


# PowerShell patterns and prefixes
POWERSHELL_RC_EXPORT_PATTERN = re.compile(
r'^\$env:CODEFLASH_API_KEY\s*=\s*(?:"|\')?(cf-[^\s"\']+)(?:"|\')?\s*$', re.MULTILINE
Expand Down Expand Up @@ -231,3 +236,24 @@ def save_api_key_to_rc(api_key: str) -> Result[str, str]:
f"To ensure your Codeflash API key is automatically loaded into your environment at startup, you can create {shell_rc_path} and add the following line:{LF}"
f"{LF}{api_key_line}{LF}"
)


def get_cross_platform_subprocess_run_args(
cwd: Path | str | None = None,
env: Mapping[str, str] | None = None,
timeout: Optional[float] = None,
check: bool = False, # noqa: FBT001, FBT002
text: bool = True, # noqa: FBT001, FBT002
capture_output: bool = True, # noqa: FBT001, FBT002 (only for non-Windows)
) -> dict[str, str]:
run_args = {"cwd": cwd, "env": env, "text": text, "timeout": timeout, "check": check}
if sys.platform == "win32":
creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
run_args["creationflags"] = creationflags
run_args["stdout"] = subprocess.PIPE
run_args["stderr"] = subprocess.PIPE
run_args["stdin"] = subprocess.DEVNULL
else:
run_args["capture_output"] = capture_output

return run_args
14 changes: 7 additions & 7 deletions codeflash/discovery/discover_unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

if TYPE_CHECKING:
from codeflash.discovery.functions_to_optimize import FunctionToOptimize

from pydantic.dataclasses import dataclass
from rich.panel import Panel
from rich.text import Text
Expand All @@ -29,6 +28,7 @@
module_name_from_file_path,
)
from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE, codeflash_cache_db
from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args
from codeflash.models.models import CodePosition, FunctionCalledInTest, TestsInFile, TestType

if TYPE_CHECKING:
Expand Down Expand Up @@ -331,7 +331,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
# Be conservative except when an alias is used (which requires exact method matching)
for target_func in fnames:
if "." in target_func:
class_name, method_name = target_func.split(".", 1)
class_name, _method_name = target_func.split(".", 1)
if aname == class_name and not alias.asname:
self.found_any_target_function = True
self.found_qualified_name = target_func
Expand Down Expand Up @@ -585,18 +585,18 @@ def discover_tests_pytest(

tmp_pickle_path = get_run_tmp_file("collected_tests.pkl")
with custom_addopts():
result = subprocess.run(
run_kwargs = get_cross_platform_subprocess_run_args(
cwd=project_root, check=False, text=True, capture_output=True
)
result = subprocess.run( # noqa: PLW1510
[
SAFE_SYS_EXECUTABLE,
Path(__file__).parent / "pytest_new_process_discovery.py",
str(project_root),
str(tests_root),
str(tmp_pickle_path),
],
cwd=project_root,
check=False,
capture_output=True,
text=True,
**run_kwargs,
)
try:
with tmp_pickle_path.open(mode="rb") as f:
Expand Down
3 changes: 2 additions & 1 deletion codeflash/discovery/functions_to_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def qualified_name_with_modules_from_root(self, project_root_path: Path) -> str:
def get_functions_to_optimize(
optimize_all: str | None,
replay_test: list[Path] | None,
file: Path | None,
file: Path | str | None,
only_get_this_function: str | None,
test_cfg: TestConfig,
ignore_paths: list[Path],
Expand All @@ -202,6 +202,7 @@ def get_functions_to_optimize(
elif file is not None:
logger.info("!lsp|Finding all functions in the file '%s'…", file)
console.rule()
file = Path(file) if isinstance(file, str) else file
functions: dict[Path, list[FunctionToOptimize]] = find_all_functions_in_file(file)
if only_get_this_function is not None:
split_function = only_get_this_function.split(".")
Expand Down
4 changes: 2 additions & 2 deletions codeflash/lsp/beta.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def get_optimizable_functions(params: OptimizableFunctionsParams) -> dict[str, l
document_uri = params.textDocument.uri
document = server.workspace.get_text_document(document_uri)

file_path = Path(document.path)
file_path = Path(document.path).resolve()

if not server.optimizer:
return {"status": "error", "message": "optimizer not initialized"}
Expand Down Expand Up @@ -517,7 +517,7 @@ def initialize_function_optimization(params: FunctionOptimizationInitParams) ->
files = [document.path]

_, _, original_helpers = server.current_optimization_init_result
files.extend([str(helper_path) for helper_path in original_helpers])
files.extend([str(helper_path.resolve()) for helper_path in original_helpers])

return {"functionName": params.functionName, "status": "success", "files_inside_context": files}

Expand Down
2 changes: 1 addition & 1 deletion codeflash/lsp/features/perform_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def run_generate_optimizations(): # noqa: ANN202
future_tests = function_optimizer.executor.submit(ctx_tests.run, run_generate_tests)
future_optimizations = function_optimizer.executor.submit(ctx_opts.run, run_generate_optimizations)

logger.info(f"loading|Generating new tests and optimizations for function '{params.functionName}'...")
logger.info(f"loading|Generating new tests and optimizations for function '{params.functionName}'")
concurrent.futures.wait([future_tests, future_optimizations])

test_setup_result = future_tests.result()
Expand Down
4 changes: 2 additions & 2 deletions codeflash/lsp/lsp_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
json_primitive_types = (str, float, int, bool)
max_code_lines_before_collapse = 45

# \u241F is the message delimiter becuase it can be more than one message sent over the same message, so we need something to separate each message
message_delimiter = "\u241f"
# \\u241F is the message delimiter because it can be more than one message sent over the same message, so we need something to separate each message
message_delimiter = "\\u241F"


# allow the client to know which message it is receiving
Expand Down
3 changes: 2 additions & 1 deletion codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def __init__(

def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]:
should_run_experiment = self.experiment_id is not None
logger.info(f"Function Trace ID: {self.function_trace_id}")
logger.info(f"!lsp|Function Trace ID: {self.function_trace_id}")
ph("cli-optimize-function-start", {"function_trace_id": self.function_trace_id})
self.cleanup_leftover_test_return_values()
file_name_from_test_module_name.cache_clear()
Expand Down Expand Up @@ -1934,6 +1934,7 @@ def establish_original_code_baseline(
instrument_codeflash_capture(
self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
)

total_looping_time = TOTAL_LOOPING_TIME_EFFECTIVE
behavioral_results, coverage_results = self.run_and_parse_tests(
testing_type=TestingMode.BEHAVIOR,
Expand Down
4 changes: 2 additions & 2 deletions codeflash/optimization/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,8 +652,8 @@ def mirror_paths_for_worktree_mode(self, worktree_dir: Path) -> None:


def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path:
relative_path = path.relative_to(src_root)
return dest_root / relative_path
relative_path = path.resolve().relative_to(src_root.resolve())
return Path(dest_root / relative_path)


def run_with_args(args: Namespace) -> None:
Expand Down
32 changes: 24 additions & 8 deletions codeflash/verification/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import contextlib
import shlex
import subprocess
import sys
from pathlib import Path
from typing import TYPE_CHECKING

Expand All @@ -10,6 +12,7 @@
from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE
from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE
from codeflash.code_utils.coverage_utils import prepare_coverage_files
from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args
from codeflash.models.models import TestFiles, TestType

if TYPE_CHECKING:
Expand All @@ -23,9 +26,12 @@ def execute_test_subprocess(
cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600
) -> subprocess.CompletedProcess:
"""Execute a subprocess with the given command list, working directory, environment variables, and timeout."""
logger.debug(f"executing test run with command: {' '.join(cmd_list)}")
with custom_addopts():
logger.debug(f"executing test run with command: {' '.join(cmd_list)}")
return subprocess.run(cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False)
run_args = get_cross_platform_subprocess_run_args(
cwd=cwd, env=env, timeout=timeout, check=False, text=True, capture_output=True
)
return subprocess.run(cmd_list, **run_args) # noqa: PLW1510


def run_behavioral_tests(
Expand All @@ -39,6 +45,7 @@ def run_behavioral_tests(
pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE,
enable_coverage: bool = False,
) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]:
"""Run behavioral tests with optional coverage."""
if test_framework in {"pytest", "unittest"}:
test_files: list[str] = []
for file in test_paths.test_files:
Expand All @@ -53,12 +60,14 @@ def run_behavioral_tests(
)
else:
test_files.append(str(file.instrumented_behavior_file_path))

pytest_cmd_list = (
shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX)
if pytest_cmd == "pytest"
else [SAFE_SYS_EXECUTABLE, "-m", *shlex.split(pytest_cmd, posix=IS_POSIX)]
)
test_files = list(set(test_files)) # remove multiple calls in the same test function

common_pytest_args = [
"--capture=tee-sys",
"-q",
Expand All @@ -85,11 +94,18 @@ def run_behavioral_tests(
pytest_test_env["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=0"
pytest_test_env["TF_ENABLE_ONEDNN_OPTS"] = str(0)
pytest_test_env["JAX_DISABLE_JIT"] = str(0)
cov_erase = execute_test_subprocess(
shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env
) # this cleanup is necessary to avoid coverage data from previous runs, if there are any,
# then the current run will be appended to the previous data, which skews the results
logger.debug(cov_erase)

is_windows = sys.platform == "win32"
if is_windows:
# On Windows, delete coverage database file directly instead of using 'coverage erase', to avoid locking issues
if coverage_database_file.exists():
with contextlib.suppress(PermissionError, OSError):
coverage_database_file.unlink()
else:
cov_erase = execute_test_subprocess(
shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30
) # this cleanup is necessary to avoid coverage data from previous runs, if there are any, then the current run will be appended to the previous data, which skews the results
logger.debug(cov_erase)
coverage_cmd = [
SAFE_SYS_EXECUTABLE,
"-m",
Expand All @@ -105,7 +121,6 @@ def run_behavioral_tests(
coverage_cmd.extend(shlex.split(pytest_cmd, posix=IS_POSIX)[1:])

blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"]

results = execute_test_subprocess(
coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files,
cwd=cwd,
Expand All @@ -118,6 +133,7 @@ def run_behavioral_tests(
)
else:
blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS]

results = execute_test_subprocess(
pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files,
cwd=cwd,
Expand Down
18 changes: 11 additions & 7 deletions tests/test_function_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,13 +411,17 @@ def not_in_checkpoint_function():

discovered = find_all_functions_in_file(test_file_path)
modified_functions = {test_file_path: discovered[test_file_path]}
filtered, count = filter_functions(
modified_functions,
tests_root=Path("tests"),
ignore_paths=[],
project_root=temp_dir,
module_root=temp_dir,
)
# Use an absolute path for tests_root that won't match the temp directory
# This avoids path resolution issues in CI where the working directory might differ
tests_root_absolute = (temp_dir.parent / "nonexistent_tests_dir").resolve()
with unittest.mock.patch("codeflash.discovery.functions_to_optimize.get_blocklisted_functions", return_value={}):
filtered, count = filter_functions(
modified_functions,
tests_root=tests_root_absolute,
ignore_paths=[],
project_root=temp_dir,
module_root=temp_dir,
)
function_names = [fn.function_name for fn in filtered.get(test_file_path, [])]
assert "propagate_attributes" in function_names
assert count == 3
Expand Down
Loading