aider v1

nanjiangwill · nanjiangwill · commit 1904f275a8ae · 2024-09-12T18:36:55.000-04:00
diff --git a/baselines/baseline_utils.py b/baselines/baseline_utils.py
@@ -140,6 +140,12 @@ def get_target_edit_files_cmd_args(target_dir: str) -> str:
     # Split the output into lines and remove the base_dir prefix
     files = result.stdout.strip().split("\n")
 
+    # Remove the base_dir prefix
+    files = [file.replace(target_dir, "").lstrip("/") for file in files]
+
+    # Only keep python files
+    files = [file for file in files if file.endswith(".py")]
+
     return " ".join(files)
 
 
@@ -186,17 +192,7 @@ def get_message_to_aider(
     else:
         repo_info = ""
 
-    if aider_config.use_lint_info:
-        lint_info = (
-            f"\n{LINT_INFO_HEADER} "
-            + subprocess.run(
-                ["pre-commit", "run", "--all-files"], capture_output=True, text=True
-            ).stdout
-        )[: aider_config.max_lint_info_length]
-    else:
-        lint_info = ""
-
-    message_to_aider = prompt + reference + repo_info + unit_tests_info + lint_info
+    message_to_aider = prompt + reference + repo_info + unit_tests_info
 
     return message_to_aider
 
diff --git a/baselines/class_types.py b/baselines/class_types.py
@@ -6,6 +6,7 @@
 class Commit0Config(BaseModel):
     base_dir: str
     dataset_name: str
+    repo_split: str
 
 
 class AiderConfig(BaseModel):
diff --git a/baselines/config/aider.yaml b/baselines/config/aider.yaml
@@ -7,4 +7,5 @@ aider_config:
   use_repo_info: false
   use_unit_tests_info: false
   use_reference_info: false
-  use_lint_info: false
+  use_lint_info: true
+  pre_commit_config_path: .pre-commit-config.yaml
diff --git a/baselines/config/base.yaml b/baselines/config/base.yaml
@@ -6,6 +6,7 @@ defaults:
 commit0_config:
   base_dir: /Users/willjiang/Desktop/ai2dev/commit0/repos
   dataset_name: "wentingzhao/commit0_docstring"
+  repo_split: "lite"
 
 aider_config:
   llm_name: "claude-3-5-sonnet-20240620"
@@ -17,6 +18,7 @@ aider_config:
   max_reference_info_length: 10000
   use_lint_info: false
   max_lint_info_length: 10000
+  pre_commit_config_path: .pre-commit-config.yaml
 
 hydra:
   run:
diff --git a/baselines/run_aider.py b/baselines/run_aider.py
@@ -1,19 +1,17 @@
 import logging
 import os
 import subprocess
-from functools import partial
 
 import hydra
 from datasets import load_dataset
 from omegaconf import OmegaConf
-from tqdm.contrib.concurrent import thread_map
 import tarfile
 from baselines.baseline_utils import (
     get_message_to_aider,
     get_target_edit_files_cmd_args,
 )
 from baselines.class_types import AiderConfig, BaselineConfig, Commit0Config
-
+from commit0.harness.constants import SPLIT
 # from aider.run_aider import get_aider_cmd
 
 logging.basicConfig(
@@ -27,11 +25,16 @@ def get_aider_cmd(
     files: str,
     message_to_aider: str,
     test_cmd: str,
+    lint_cmd: str,
 ) -> str:
     """Get the Aider command based on the given context."""
-    aider_cmd = f"aider --model {model} --file {files} --message \"{message_to_aider}\" --auto-test --test --test-cmd '{test_cmd}' --yes"
-
-    return aider_cmd
+    base_cmd = f'aider --model {model} --file {files} --message "{message_to_aider}"'
+    if lint_cmd:
+        base_cmd += f" --auto-lint --lint-cmd '{lint_cmd}'"
+    if test_cmd:
+        base_cmd += f" --auto-test --test --test-cmd '{test_cmd}'"
+    base_cmd += " --yes"
+    return base_cmd
 
 
 def run_aider_for_repo(
@@ -61,29 +64,39 @@ def run_aider_for_repo(
 
     repo_path = os.path.join(commit0_config.base_dir, repo_name)
 
+    os.chdir(repo_path)
+
     target_edit_files_cmd_args = get_target_edit_files_cmd_args(repo_path)
 
     message_to_aider = get_message_to_aider(
         aider_config, target_edit_files_cmd_args, repo_path, ds
     )
 
-    test_files = test_files[:1]
+    if aider_config.use_lint_info:
+        lint_cmd = "pre-commit run --config ../../.pre-commit-config.yaml --files"
+    else:
+        lint_cmd = ""
+
     for test_file in test_files:
-        test_cmd = f"uv run commit0 test-reference {repo_name} {test_file}"
+        test_cmd = f"python -m commit0 test {repo_name} {test_file}"
 
         aider_cmd = get_aider_cmd(
             aider_config.llm_name,
             target_edit_files_cmd_args,
             message_to_aider,
             test_cmd,
+            lint_cmd,
         )
 
-        print(aider_cmd)
-
         try:
-            process = subprocess.Popen(aider_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+            process = subprocess.Popen(
+                aider_cmd,
+                shell=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                universal_newlines=True,
+            )
             stdout, stderr = process.communicate()
-            results = process.returncode
             logger.info(f"STDOUT: {stdout}")
             logger.info(f"STDERR: {stderr}")
         except subprocess.CalledProcessError as e:
@@ -97,6 +110,7 @@ def run_aider_for_repo(
                 logger.error(f"Command: {''.join(aider_cmd)}")
             else:
                 logger.error(f"OSError occurred: {e}")
+        asdf
 
 
 @hydra.main(version_base=None, config_path="config", config_name="aider")
@@ -114,13 +128,15 @@ def main(config: BaselineConfig) -> None:
 
     dataset = load_dataset(commit0_config.dataset_name, split="test")
 
-    dataset = [dataset[3]]
-    thread_map(
-        partial(run_aider_for_repo, commit0_config, aider_config),
-        dataset,
-        desc="Running aider for repos",
-        max_workers=10,
-    )
+    filtered_dataset = [
+        example
+        for example in dataset
+        if commit0_config.repo_split == "all"
+        or example["repo"].split("/")[-1] in SPLIT.get(commit0_config.repo_split, [])
+    ]
+
+    for example in filtered_dataset:
+        run_aider_for_repo(commit0_config, aider_config, example)
 
 
 if __name__ == "__main__":