codeflash-ai · KRRT7 · Jan 8, 2026 · Nov 28, 2025 · Nov 28, 2025 · Nov 28, 2025
diff --git a/codeflash/benchmarking/trace_benchmarks.py b/codeflash/benchmarking/trace_benchmarks.py
@@ -7,6 +7,7 @@
 
 from codeflash.cli_cmds.console import logger
 from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE
+from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args
 
 
 def trace_benchmarks_pytest(
@@ -17,20 +18,18 @@ def trace_benchmarks_pytest(
         benchmark_env["PYTHONPATH"] = str(project_root)
     else:
         benchmark_env["PYTHONPATH"] += os.pathsep + str(project_root)
-    result = subprocess.run(
+    run_args = get_cross_platform_subprocess_run_args(
+        cwd=project_root, env=benchmark_env, timeout=timeout, check=False, text=True, capture_output=True
+    )
+    result = subprocess.run(  # noqa: PLW1510
         [
             SAFE_SYS_EXECUTABLE,
             Path(__file__).parent / "pytest_new_process_trace_benchmarks.py",
             benchmarks_root,
             tests_root,
             trace_file,
         ],
-        cwd=project_root,
-        check=False,
-        capture_output=True,
-        text=True,
-        env=benchmark_env,
-        timeout=timeout,
+        **run_args,
     )
     if result.returncode != 0:
         if "ERROR collecting" in result.stdout:

diff --git a/codeflash/cli_cmds/console.py b/codeflash/cli_cmds/console.py
@@ -58,6 +58,19 @@
     )
 
 
+class DummyTask:
+    def __init__(self) -> None:
+        self.id = 0
+
+
+class DummyProgress:
+    def __init__(self) -> None:
+        pass
+
+    def advance(self, task_id: TaskID, advance: int = 1) -> None:
+        pass
+
+
 def lsp_log(message: LspMessage) -> None:
     if not is_LSP_enabled():
         return
@@ -120,10 +133,6 @@ def progress_bar(
         logger.info(message)
 
         # Create a fake task ID since we still need to yield something
-        class DummyTask:
-            def __init__(self) -> None:
-                self.id = 0
-
         yield DummyTask().id
     else:
         progress = Progress(
@@ -141,6 +150,13 @@ def __init__(self) -> None:
 @contextmanager
 def test_files_progress_bar(total: int, description: str) -> Generator[tuple[Progress, TaskID], None, None]:
     """Progress bar for test files."""
+    if is_LSP_enabled():
+        lsp_log(LspTextMessage(text=description, takes_time=True))
+        dummy_progress = DummyProgress()
+        dummy_task = DummyTask()
+        yield dummy_progress, dummy_task.id
+        return
+
     with Progress(
         SpinnerColumn(next(spinners)),
         TextColumn("[progress.description]{task.description}"),

diff --git a/codeflash/code_utils/code_replacer.py b/codeflash/code_utils/code_replacer.py
@@ -447,7 +447,7 @@ def replace_function_definitions_in_module(
 
     new_code: str = replace_functions_and_add_imports(
         # adding the global assignments before replacing the code, not after
-        # becuase of an "edge case" where the optimized code intoduced a new import and a global assignment using that import
+        # because of an "edge case" where the optimized code intoduced a new import and a global assignment using that import
         # and that import wasn't used before, so it was ignored when calling AddImportsVisitor.add_needed_import inside replace_functions_and_add_imports (because the global assignment wasn't added yet)
         # this was added at https://github.com/codeflash-ai/codeflash/pull/448
         add_global_assignments(code_to_apply, source_code) if should_add_global_assignments else source_code,

diff --git a/codeflash/code_utils/shell_utils.py b/codeflash/code_utils/shell_utils.py
@@ -3,6 +3,8 @@
 import contextlib
 import os
 import re
+import subprocess
+import sys
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional
 
@@ -11,8 +13,11 @@
 from codeflash.either import Failure, Success
 
 if TYPE_CHECKING:
+    from collections.abc import Mapping
+
     from codeflash.either import Result
 
+
 # PowerShell patterns and prefixes
 POWERSHELL_RC_EXPORT_PATTERN = re.compile(
     r'^\$env:CODEFLASH_API_KEY\s*=\s*(?:"|\')?(cf-[^\s"\']+)(?:"|\')?\s*$', re.MULTILINE
@@ -231,3 +236,24 @@ def save_api_key_to_rc(api_key: str) -> Result[str, str]:
             f"To ensure your Codeflash API key is automatically loaded into your environment at startup, you can create {shell_rc_path} and add the following line:{LF}"
             f"{LF}{api_key_line}{LF}"
         )
+
+
+def get_cross_platform_subprocess_run_args(
+    cwd: Path | str | None = None,
+    env: Mapping[str, str] | None = None,
+    timeout: Optional[float] = None,
+    check: bool = False,  # noqa: FBT001, FBT002
+    text: bool = True,  # noqa: FBT001, FBT002
+    capture_output: bool = True,  # noqa: FBT001, FBT002 (only for non-Windows)
+) -> dict[str, str]:
+    run_args = {"cwd": cwd, "env": env, "text": text, "timeout": timeout, "check": check}
+    if sys.platform == "win32":
+        creationflags = subprocess.CREATE_NEW_PROCESS_GROUP
+        run_args["creationflags"] = creationflags
+        run_args["stdout"] = subprocess.PIPE
+        run_args["stderr"] = subprocess.PIPE
+        run_args["stdin"] = subprocess.DEVNULL
+    else:
+        run_args["capture_output"] = capture_output
+
+    return run_args
diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
@@ -16,7 +16,6 @@
 
 if TYPE_CHECKING:
     from codeflash.discovery.functions_to_optimize import FunctionToOptimize
-
 from pydantic.dataclasses import dataclass
 from rich.panel import Panel
 from rich.text import Text
@@ -29,6 +28,7 @@
     module_name_from_file_path,
 )
 from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE, codeflash_cache_db
+from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args
 from codeflash.models.models import CodePosition, FunctionCalledInTest, TestsInFile, TestType
 
 if TYPE_CHECKING:
@@ -331,7 +331,7 @@ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
             # Be conservative except when an alias is used (which requires exact method matching)
             for target_func in fnames:
                 if "." in target_func:
-                    class_name, method_name = target_func.split(".", 1)
+                    class_name, _method_name = target_func.split(".", 1)
                     if aname == class_name and not alias.asname:
                         self.found_any_target_function = True
                         self.found_qualified_name = target_func
@@ -585,18 +585,18 @@ def discover_tests_pytest(
 
     tmp_pickle_path = get_run_tmp_file("collected_tests.pkl")
     with custom_addopts():
-        result = subprocess.run(
+        run_kwargs = get_cross_platform_subprocess_run_args(
+            cwd=project_root, check=False, text=True, capture_output=True
+        )
+        result = subprocess.run(  # noqa: PLW1510
             [
                 SAFE_SYS_EXECUTABLE,
                 Path(__file__).parent / "pytest_new_process_discovery.py",
                 str(project_root),
                 str(tests_root),
                 str(tmp_pickle_path),
             ],
-            cwd=project_root,
-            check=False,
-            capture_output=True,
-            text=True,
+            **run_kwargs,
         )
     try:
         with tmp_pickle_path.open(mode="rb") as f:

diff --git a/codeflash/discovery/functions_to_optimize.py b/codeflash/discovery/functions_to_optimize.py
@@ -175,7 +175,7 @@ def qualified_name_with_modules_from_root(self, project_root_path: Path) -> str:
 def get_functions_to_optimize(
     optimize_all: str | None,
     replay_test: list[Path] | None,
-    file: Path | None,
+    file: Path | str | None,
     only_get_this_function: str | None,
     test_cfg: TestConfig,
     ignore_paths: list[Path],
@@ -202,6 +202,7 @@ def get_functions_to_optimize(
         elif file is not None:
             logger.info("!lsp|Finding all functions in the file '%s'…", file)
             console.rule()
+            file = Path(file) if isinstance(file, str) else file
             functions: dict[Path, list[FunctionToOptimize]] = find_all_functions_in_file(file)
             if only_get_this_function is not None:
                 split_function = only_get_this_function.split(".")

diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py
@@ -134,7 +134,7 @@ def get_optimizable_functions(params: OptimizableFunctionsParams) -> dict[str, l
     document_uri = params.textDocument.uri
     document = server.workspace.get_text_document(document_uri)
 
-    file_path = Path(document.path)
+    file_path = Path(document.path).resolve()
 
     if not server.optimizer:
         return {"status": "error", "message": "optimizer not initialized"}
@@ -517,7 +517,7 @@ def initialize_function_optimization(params: FunctionOptimizationInitParams) ->
         files = [document.path]
 
         _, _, original_helpers = server.current_optimization_init_result
-        files.extend([str(helper_path) for helper_path in original_helpers])
+        files.extend([str(helper_path.resolve()) for helper_path in original_helpers])
 
         return {"functionName": params.functionName, "status": "success", "files_inside_context": files}
 

diff --git a/codeflash/lsp/features/perform_optimization.py b/codeflash/lsp/features/perform_optimization.py
@@ -63,7 +63,7 @@ def run_generate_optimizations():  # noqa: ANN202
     future_tests = function_optimizer.executor.submit(ctx_tests.run, run_generate_tests)
     future_optimizations = function_optimizer.executor.submit(ctx_opts.run, run_generate_optimizations)
 
-    logger.info(f"loading|Generating new tests and optimizations for function '{params.functionName}'...")
+    logger.info(f"loading|Generating new tests and optimizations for function '{params.functionName}'")
     concurrent.futures.wait([future_tests, future_optimizations])
 
     test_setup_result = future_tests.result()

diff --git a/codeflash/lsp/lsp_message.py b/codeflash/lsp/lsp_message.py
@@ -11,8 +11,8 @@
 json_primitive_types = (str, float, int, bool)
 max_code_lines_before_collapse = 45
 
-# \u241F is the message delimiter becuase it can be more than one message sent over the same message, so we need something to separate each message
-message_delimiter = "\u241f"
+# \\u241F is the message delimiter because it can be more than one message sent over the same message, so we need something to separate each message
+message_delimiter = "\\u241F"
 
 
 # allow the client to know which message it is receiving

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -396,7 +396,7 @@ def __init__(
 
     def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[Path, str]], str]:
         should_run_experiment = self.experiment_id is not None
-        logger.info(f"Function Trace ID: {self.function_trace_id}")
+        logger.info(f"!lsp|Function Trace ID: {self.function_trace_id}")
         ph("cli-optimize-function-start", {"function_trace_id": self.function_trace_id})
         self.cleanup_leftover_test_return_values()
         file_name_from_test_module_name.cache_clear()
@@ -1934,6 +1934,7 @@ def establish_original_code_baseline(
                 instrument_codeflash_capture(
                     self.function_to_optimize, file_path_to_helper_classes, self.test_cfg.tests_root
                 )
+
                 total_looping_time = TOTAL_LOOPING_TIME_EFFECTIVE
                 behavioral_results, coverage_results = self.run_and_parse_tests(
                     testing_type=TestingMode.BEHAVIOR,

diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
@@ -652,8 +652,8 @@ def mirror_paths_for_worktree_mode(self, worktree_dir: Path) -> None:
 
 
 def mirror_path(path: Path, src_root: Path, dest_root: Path) -> Path:
-    relative_path = path.relative_to(src_root)
-    return dest_root / relative_path
+    relative_path = path.resolve().relative_to(src_root.resolve())
+    return Path(dest_root / relative_path)
 
 
 def run_with_args(args: Namespace) -> None:

diff --git a/codeflash/verification/test_runner.py b/codeflash/verification/test_runner.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
+import contextlib
 import shlex
 import subprocess
+import sys
 from pathlib import Path
 from typing import TYPE_CHECKING
 
@@ -10,6 +12,7 @@
 from codeflash.code_utils.compat import IS_POSIX, SAFE_SYS_EXECUTABLE
 from codeflash.code_utils.config_consts import TOTAL_LOOPING_TIME_EFFECTIVE
 from codeflash.code_utils.coverage_utils import prepare_coverage_files
+from codeflash.code_utils.shell_utils import get_cross_platform_subprocess_run_args
 from codeflash.models.models import TestFiles, TestType
 
 if TYPE_CHECKING:
@@ -23,9 +26,12 @@ def execute_test_subprocess(
     cmd_list: list[str], cwd: Path, env: dict[str, str] | None, timeout: int = 600
 ) -> subprocess.CompletedProcess:
     """Execute a subprocess with the given command list, working directory, environment variables, and timeout."""
+    logger.debug(f"executing test run with command: {' '.join(cmd_list)}")
     with custom_addopts():
-        logger.debug(f"executing test run with command: {' '.join(cmd_list)}")
-        return subprocess.run(cmd_list, capture_output=True, cwd=cwd, env=env, text=True, timeout=timeout, check=False)
+        run_args = get_cross_platform_subprocess_run_args(
+            cwd=cwd, env=env, timeout=timeout, check=False, text=True, capture_output=True
+        )
+        return subprocess.run(cmd_list, **run_args)  # noqa: PLW1510
 
 
 def run_behavioral_tests(
@@ -39,6 +45,7 @@ def run_behavioral_tests(
     pytest_target_runtime_seconds: float = TOTAL_LOOPING_TIME_EFFECTIVE,
     enable_coverage: bool = False,
 ) -> tuple[Path, subprocess.CompletedProcess, Path | None, Path | None]:
+    """Run behavioral tests with optional coverage."""
     if test_framework in {"pytest", "unittest"}:
         test_files: list[str] = []
         for file in test_paths.test_files:
@@ -53,12 +60,14 @@ def run_behavioral_tests(
                     )
             else:
                 test_files.append(str(file.instrumented_behavior_file_path))
+
         pytest_cmd_list = (
             shlex.split(f"{SAFE_SYS_EXECUTABLE} -m pytest", posix=IS_POSIX)
             if pytest_cmd == "pytest"
             else [SAFE_SYS_EXECUTABLE, "-m", *shlex.split(pytest_cmd, posix=IS_POSIX)]
         )
         test_files = list(set(test_files))  # remove multiple calls in the same test function
+
         common_pytest_args = [
             "--capture=tee-sys",
             "-q",
@@ -85,11 +94,18 @@ def run_behavioral_tests(
             pytest_test_env["TF_XLA_FLAGS"] = "--tf_xla_auto_jit=0"
             pytest_test_env["TF_ENABLE_ONEDNN_OPTS"] = str(0)
             pytest_test_env["JAX_DISABLE_JIT"] = str(0)
-            cov_erase = execute_test_subprocess(
-                shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env
-            )  # this cleanup is necessary to avoid coverage data from previous runs, if there are any,
-            # then the current run will be appended to the previous data, which skews the results
-            logger.debug(cov_erase)
+
+            is_windows = sys.platform == "win32"
+            if is_windows:
+                # On Windows, delete coverage database file directly instead of using 'coverage erase', to avoid locking issues
+                if coverage_database_file.exists():
+                    with contextlib.suppress(PermissionError, OSError):
+                        coverage_database_file.unlink()
+            else:
+                cov_erase = execute_test_subprocess(
+                    shlex.split(f"{SAFE_SYS_EXECUTABLE} -m coverage erase"), cwd=cwd, env=pytest_test_env, timeout=30
+                )  # this cleanup is necessary to avoid coverage data from previous runs, if there are any, then the current run will be appended to the previous data, which skews the results
+                logger.debug(cov_erase)
             coverage_cmd = [
                 SAFE_SYS_EXECUTABLE,
                 "-m",
@@ -105,7 +121,6 @@ def run_behavioral_tests(
                 coverage_cmd.extend(shlex.split(pytest_cmd, posix=IS_POSIX)[1:])
 
             blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS if plugin != "cov"]
-
             results = execute_test_subprocess(
                 coverage_cmd + common_pytest_args + blocklist_args + result_args + test_files,
                 cwd=cwd,
@@ -118,6 +133,7 @@ def run_behavioral_tests(
             )
         else:
             blocklist_args = [f"-p no:{plugin}" for plugin in BEHAVIORAL_BLOCKLISTED_PLUGINS]
+
             results = execute_test_subprocess(
                 pytest_cmd_list + common_pytest_args + blocklist_args + result_args + test_files,
                 cwd=cwd,

diff --git a/tests/test_function_discovery.py b/tests/test_function_discovery.py
@@ -411,13 +411,17 @@ def not_in_checkpoint_function():
 
         discovered = find_all_functions_in_file(test_file_path)
         modified_functions = {test_file_path: discovered[test_file_path]}
-        filtered, count = filter_functions(
-            modified_functions,
-            tests_root=Path("tests"),
-            ignore_paths=[],
-            project_root=temp_dir,
-            module_root=temp_dir,
-        )
+        # Use an absolute path for tests_root that won't match the temp directory
+        # This avoids path resolution issues in CI where the working directory might differ
+        tests_root_absolute = (temp_dir.parent / "nonexistent_tests_dir").resolve()
+        with unittest.mock.patch("codeflash.discovery.functions_to_optimize.get_blocklisted_functions", return_value={}):
+            filtered, count = filter_functions(
+                modified_functions,
+                tests_root=tests_root_absolute,
+                ignore_paths=[],
+                project_root=temp_dir,
+                module_root=temp_dir,
+            )
         function_names = [fn.function_name for fn in filtered.get(test_file_path, [])]
         assert "propagate_attributes" in function_names
         assert count == 3