Skip to content

Commit

Permalink
Fix file name extraction after matplotlib block added
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudotensor committed Sep 13, 2024
1 parent c792746 commit 6de12f2
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 2 deletions.
1 change: 1 addition & 0 deletions openai_server/agent_prompting.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def agent_system_prompt(agent_code_writer_system_message, agent_system_site_pack
* Check the execution result returned by the user.
* Ensure python code blocks contain valid python code, and shell code blocks contain valid shell code.
* Every python or shell code block MUST be marked whether it is for execution with a comment that shows if execution is true or false, e.g. # execution: true
* If a python code is marked for execution, do not generate a shell script to execute that python code file, because that would execute the python code twice.
* You can assume that any files (python scripts, shell scripts, images, csv files, etc.) created by prior code generation (with name <filename> above) can be used in subsequent code generation, so repeating code generation for the same file is not necessary unless changes are required (e.g. a python code of some name can be run with a short sh code).
* When you need to collect info, generate code to output the info you need.
* Ensure you provide well-commented code, so the user can understand what the code does.
Expand Down
35 changes: 34 additions & 1 deletion openai_server/autogen_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ def __init__(
self.autogen_code_restrictions_level = autogen_code_restrictions_level
self.stream_output = stream_output

self.filename_patterns: List[re.Pattern] = [
re.compile(r"^<!--\s*filename:\s*([\w.-/]+)\s*-->$"),
re.compile(r"^/\*\s*filename:\s*([\w.-/]+)\s*\*/$"),
re.compile(r"^//\s*filename:\s*([\w.-/]+)\s*$"),
re.compile(r"^#\s*filename:\s*([\w.-/]+)\s*$"),
]

@staticmethod
def remove_comments_strings(code: str, lang: str) -> str:
if verbose:
Expand Down Expand Up @@ -181,6 +188,32 @@ def sanitize_command(lang: str, code: str) -> None:
if match.group(f"pat{i}"):
raise ValueError(f"{danger_mark}: {patterns[pattern]}\n\n{cleaned_code}")

def _get_file_name_from_content(self, code: str, workspace_path: Path) -> Optional[str]:
lines = code.split("\n")
for line in lines:
line = line.strip()
for pattern in self.filename_patterns:
matches = pattern.match(line)
if matches is not None:
filename = matches.group(1).strip()

# Validate filename
if not re.match(r'^[\w.-/]+$', filename):
continue # Invalid filename, try next match

# Construct the path
path = Path(filename)

# Ensure the path doesn't try to go outside the workspace
try:
resolved_path = (workspace_path / path).resolve().relative_to(workspace_path)
return str(resolved_path)
except ValueError:
# Path would be outside the workspace, skip it
continue

return None

def __execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> CommandLineCodeResult:
# nearly identical to parent, but with control over guardrails via self.sanitize_command
logs_all = ""
Expand Down Expand Up @@ -211,7 +244,7 @@ def __execute_code_dont_check_setup(self, code_blocks: List[CodeBlock]) -> Comma
execute_code = self.execution_policies.get(lang, False)
try:
# Check if there is a filename comment
filename = _get_file_name_from_content(code, self._work_dir)
filename = self._get_file_name_from_content(code, self._work_dir)
except ValueError:
return CommandLineCodeResult(exit_code=1, output="Filename is not in the workspace")

Expand Down
76 changes: 76 additions & 0 deletions openai_server/test_autogen_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import re
from pathlib import Path

import pytest

from openai_server.autogen_utils import H2OLocalCommandLineCodeExecutor, bad_output_mark, danger_mark
Expand Down Expand Up @@ -382,3 +384,77 @@ def fetch_content(url):
pass
else:
raise ValueError("Should not reach here")


@pytest.fixture
def workspace_path():
return Path("/tmp/workspace"), H2OLocalCommandLineCodeExecutor()


def test_basic_filename_extraction(workspace_path):
code = "# filename: test.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) == "test.py"


def test_filename_with_path(workspace_path):
code = "# filename: subfolder/test.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) == "subfolder/test.py"


def test_filename_with_different_comment_styles(workspace_path):
code1 = "<!-- filename: test.html -->\n<html></html>"
code2 = "/* filename: test.css */\nbody {}"
code3 = "// filename: test.js\nconsole.log('Hello');"
assert workspace_path[1]._get_file_name_from_content(code1, workspace_path[0]) == "test.html"
assert workspace_path[1]._get_file_name_from_content(code2, workspace_path[0]) == "test.css"
assert workspace_path[1]._get_file_name_from_content(code3, workspace_path[0]) == "test.js"


def test_filename_not_on_first_line(workspace_path):
code = "import os\n# filename: test.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) == "test.py"


def test_no_filename_specified(workspace_path):
code = "print('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) is None


def test_invalid_filename(workspace_path):
code = "# filename: invalid file name.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) is None


def test_filename_outside_workspace(workspace_path):
code = "# filename: /etc/passwd\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) is None


def test_filename_with_colon(workspace_path):
code = "# filename: test.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) == "test.py"


def test_filename_without_colon(workspace_path):
code = "# filename test.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) is None


def test_multiple_filenames(workspace_path):
code = "# filename: first.py\n# filename: second.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) == "first.py"


def test_commented_out_filename(workspace_path):
code = "# # filename: test.py\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) is None


def test_filename_with_spaces_around(workspace_path):
code = "# filename: test.py \nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) == "test.py"


def test_filename_with_extension_containing_dot(workspace_path):
code = "# filename: test.tar.gz\nprint('Hello, World!')"
assert workspace_path[1]._get_file_name_from_content(code, workspace_path[0]) == "test.tar.gz"
2 changes: 1 addition & 1 deletion src/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "b8d4aeb0bee08bb9f1893642c2dd692fcc1cd3d4"
__version__ = "c792746151d399c265614dcc4360506312ebf432"

0 comments on commit 6de12f2

Please sign in to comment.