Skip to content

Commit

Permalink
Add "Simple web server" challenge (Significant-Gravitas#74)
Browse files Browse the repository at this point in the history
Co-authored-by: Silen Naihin <silen.naihin@gmail.com>
  • Loading branch information
waynehamadi and SilenNaihin authored Jul 11, 2023
1 parent 30ba515 commit 437e066
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 30 deletions.
15 changes: 14 additions & 1 deletion agbenchmark/RegressionManager.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from typing import Union


class RegressionManager:
Expand All @@ -15,7 +16,9 @@ def load(self) -> None:
f.read().strip()
) # read the content and remove any leading/trailing whitespace
if file_content: # if file is not empty, load the json
self.tests = json.loads(file_content)
data = json.loads(file_content)
self.tests = {k: data[k] for k in sorted(data)}
data = self.replace_backslash(data)
else: # if file is empty, assign an empty dictionary
self.tests = {}
except FileNotFoundError:
Expand All @@ -36,3 +39,13 @@ def remove_test(self, test_name: str) -> None:
if test_name in self.tests:
del self.tests[test_name]
self.save()

def replace_backslash(self, value: str) -> Union[str, list[str], dict]:
if isinstance(value, str):
return value.replace("\\\\", "/") # escape \ with \\
elif isinstance(value, list):
return [self.replace_backslash(i) for i in value]
elif isinstance(value, dict):
return {k: self.replace_backslash(v) for k, v in value.items()}
else:
return value
27 changes: 27 additions & 0 deletions agbenchmark/challenges/code/d3/custom_python/api_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
from typing import Any, Dict
from unittest.mock import Mock, patch

import requests


def make_assertion() -> None:
if os.environ.get("MOCK_TEST", "False").lower() == "true":
mock_response = Mock(requests.Response)
mock_response.status_code = 200
mock_response.json.return_value = {"status": "OK"}

with patch("requests.get", return_value=mock_response):
make_request_and_assert()
else:
make_request_and_assert()


def make_request_and_assert() -> Dict[str, Any]:
response = requests.get("http://localhost:8079/health")
if response.status_code != 200:
raise AssertionError(
f"Expected status code 200, but got {response.status_code}"
)

return response.json()
18 changes: 18 additions & 0 deletions agbenchmark/challenges/code/d3/data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "TestCreateSimpleWebServer",
"category": ["code"],
"task": "Build a basic web server that responds to a GET localhost:8079/health with a 200 OK. Deploy this web server locally at the port 8079. ",
"dependencies": ["TestDebugSimpleTypoWithGuidance"],
"ground": {
"answer": "GET localhost:8079/health responds with a 200 OK",
"should_contain": [],
"should_not_contain": [],
"files": [],
"type": "custom_python"
},
"info": {
"difficulty": "medium",
"description": "Tests ability for the agent to build a simple web server locally",
"side_effects": []
}
}
25 changes: 24 additions & 1 deletion agbenchmark/challenges/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import importlib
import json
import os
import pkgutil
import sys
import types
from pathlib import Path
from typing import Any, Dict
Expand Down Expand Up @@ -47,6 +49,19 @@ def generate_tests() -> None:
class_name = data.get("name", "")

challenge_location = get_test_path(json_file)
if data["ground"]["type"] == "custom_python":
custom_python_location = (
f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python"
)
sys.path.append(str(custom_python_location))

for (module_loader, name, ispkg) in pkgutil.iter_modules(
[str(custom_python_location)]
):
module = importlib.import_module(name)

if hasattr(module, "make_assertion"):
make_assertion = getattr(module, "make_assertion")

# Define test class dynamically
challenge_class = types.new_class(class_name, (Challenge,))
Expand All @@ -58,7 +73,15 @@ def test_method(self, config: Dict[str, Any]) -> None: # type: ignore
self.setup_challenge(config)

scores = self.get_scores(config)
assert 1 in scores

# Check if make_assertion is defined and use it
if "make_assertion" in locals():
try:
make_assertion()
except AssertionError as error:
print(error) # Or handle this in another way
else:
assert 1 in scores

# Parametrize the method here
test_method = pytest.mark.parametrize(
Expand Down
31 changes: 29 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pydantic = "^1.10.9"
pytest-depends = "^1.0.1"
python-dotenv = "^0.21.0"
click = "^8.1.3"
types-requests = "^2.31.0.1"

[tool.poetry.group.dev.dependencies]
flake8 = "^3.9.2"
Expand Down
73 changes: 47 additions & 26 deletions regression_tests.json
Original file line number Diff line number Diff line change
@@ -1,69 +1,90 @@
{
"TestWriteFile": {
"TestBasicMemory": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark\\challenges\\interface\\write_file"
"dependencies": [
"TestReadFile",
"TestWriteFile"
],
"test": "agbenchmark/challenges/memory/m1"
},
"TestReadFile": {
"TestBasicRetrieval": {
"difficulty": "basic",
"dependencies": [
"TestWriteFile"
"TestWriteFile",
"TestSearch"
],
"test": "agbenchmark\\challenges\\interface\\read_file"
"test": "agbenchmark/challenges/retrieval/r1"
},
"TestBasicMemory": {
"TestCreateSimpleWebServer": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/code/d3"
},
"TestDebugSimpleTypoWithGuidance": {
"difficulty": "basic",
"dependencies": [
"TestReadFile",
"TestWriteFile"
],
"test": "agbenchmark\\challenges\\memory\\m1"
"test": "agbenchmark/challenges/code/d1"
},
"TestBasicRetrieval": {
"TestDebugSimpleTypoWithoutGuidance": {
"difficulty": "medium",
"dependencies": [
"TestDebugSimpleTypoWithGuidance"
],
"test": "agbenchmark/challenges/code/d2"
},
"TestReadFile": {
"difficulty": "basic",
"dependencies": [
"TestWriteFile"
],
"test": "agbenchmark\\challenges\\retrieval\\r1"
"test": "agbenchmark/challenges/interface/read_file"
},
"TestRememberMultipleIds": {
"difficulty": "basic",
"dependencies": [
"TestBasicMemory"
],
"test": "agbenchmark\\challenges\\memory\\m2"
"test": "agbenchmark/challenges/memory/m2"
},
"TestRetrieval2": {
"difficulty": "basic",
"TestRememberMultipleIdsWithNoise": {
"difficulty": "medium",
"dependencies": [
"TestBasicRetrieval"
"TestRememberMultipleIds"
],
"test": "agbenchmark\\challenges\\retrieval\\r2"
"test": "agbenchmark/challenges/memory/m3"
},
"TestRememberMultipleIdsWithNoise": {
"TestRememberMultiplePhrasesWithNoise": {
"difficulty": "medium",
"dependencies": [
"TestRememberMultipleIds"
"TestRememberMultipleIdsWithNoise"
],
"test": "agbenchmark\\challenges\\memory\\m3"
"test": "agbenchmark/challenges/memory/m4"
},
"TestRetrieval3": {
"TestRetrieval2": {
"difficulty": "basic",
"dependencies": [
"TestRetrieval2"
"TestBasicRetrieval"
],
"test": "agbenchmark\\challenges\\retrieval\\r3"
"test": "agbenchmark/challenges/retrieval/r2"
},
"TestRememberMultiplePhrasesWithNoise": {
"difficulty": "medium",
"TestRetrieval3": {
"difficulty": "basic",
"dependencies": [
"TestRememberMultipleIdsWithNoise"
"TestRetrieval2"
],
"test": "agbenchmark\\challenges\\memory\\m4"
"test": "agbenchmark/challenges/retrieval/r3"
},
"TestSearch": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark\\challenges\\interface\\search"
"test": "agbenchmark/challenges/interface/search"
},
"TestWriteFile": {
"difficulty": "basic",
"dependencies": [],
"test": "agbenchmark/challenges/interface/write_file"
}
}

0 comments on commit 437e066

Please sign in to comment.