From 437e066a66c4f3d6aeba26f79fe1c3d8e4ea5743 Mon Sep 17 00:00:00 2001
From: merwanehamadi <merwanehamadi@gmail.com>
Date: Mon, 10 Jul 2023 17:46:03 -0700
Subject: [PATCH] Add "Simple web server" challenge (#74)

Co-authored-by: Silen Naihin <silen.naihin@gmail.com>
---
 agbenchmark/RegressionManager.py              | 15 +++-
 .../code/d3/custom_python/api_tests.py        | 27 +++++++
 agbenchmark/challenges/code/d3/data.json      | 18 +++++
 agbenchmark/challenges/test_all.py            | 25 ++++++-
 poetry.lock                                   | 31 +++++++-
 pyproject.toml                                |  1 +
 regression_tests.json                         | 73 ++++++++++++-------
 7 files changed, 160 insertions(+), 30 deletions(-)
 create mode 100644 agbenchmark/challenges/code/d3/custom_python/api_tests.py
 create mode 100644 agbenchmark/challenges/code/d3/data.json

diff --git a/agbenchmark/RegressionManager.py b/agbenchmark/RegressionManager.py
index e289a4787802..ac9efc69693e 100644
--- a/agbenchmark/RegressionManager.py
+++ b/agbenchmark/RegressionManager.py
@@ -1,4 +1,5 @@
 import json
+from typing import Union
 
 
 class RegressionManager:
@@ -15,7 +16,9 @@ def load(self) -> None:
                     f.read().strip()
                 )  # read the content and remove any leading/trailing whitespace
                 if file_content:  # if file is not empty, load the json
-                    self.tests = json.loads(file_content)
+                    data = json.loads(file_content)
+                    self.tests = {k: data[k] for k in sorted(data)}
+                    data = self.replace_backslash(data)
                 else:  # if file is empty, assign an empty dictionary
                     self.tests = {}
         except FileNotFoundError:
@@ -36,3 +39,13 @@ def remove_test(self, test_name: str) -> None:
         if test_name in self.tests:
             del self.tests[test_name]
             self.save()
+
+    def replace_backslash(self, value: str) -> Union[str, list[str], dict]:
+        if isinstance(value, str):
+            return value.replace("\\\\", "/")  # escape \ with \\
+        elif isinstance(value, list):
+            return [self.replace_backslash(i) for i in value]
+        elif isinstance(value, dict):
+            return {k: self.replace_backslash(v) for k, v in value.items()}
+        else:
+            return value
diff --git a/agbenchmark/challenges/code/d3/custom_python/api_tests.py b/agbenchmark/challenges/code/d3/custom_python/api_tests.py
new file mode 100644
index 000000000000..1d6255ebd155
--- /dev/null
+++ b/agbenchmark/challenges/code/d3/custom_python/api_tests.py
@@ -0,0 +1,27 @@
+import os
+from typing import Any, Dict
+from unittest.mock import Mock, patch
+
+import requests
+
+
+def make_assertion() -> None:
+    if os.environ.get("MOCK_TEST", "False").lower() == "true":
+        mock_response = Mock(requests.Response)
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"status": "OK"}
+
+        with patch("requests.get", return_value=mock_response):
+            make_request_and_assert()
+    else:
+        make_request_and_assert()
+
+
+def make_request_and_assert() -> Dict[str, Any]:
+    response = requests.get("http://localhost:8079/health")
+    if response.status_code != 200:
+        raise AssertionError(
+            f"Expected status code 200, but got {response.status_code}"
+        )
+
+    return response.json()
diff --git a/agbenchmark/challenges/code/d3/data.json b/agbenchmark/challenges/code/d3/data.json
new file mode 100644
index 000000000000..07d607f5fc73
--- /dev/null
+++ b/agbenchmark/challenges/code/d3/data.json
@@ -0,0 +1,18 @@
+{
+  "name": "TestCreateSimpleWebServer",
+  "category": ["code"],
+  "task": "Build a basic web server that responds to a GET localhost:8079/health with a 200 OK. Deploy this web server locally at the port 8079. ",
+  "dependencies": ["TestDebugSimpleTypoWithGuidance"],
+  "ground": {
+    "answer": "GET localhost:8079/health responds with a 200 OK",
+    "should_contain": [],
+    "should_not_contain": [],
+    "files": [],
+    "type": "custom_python"
+  },
+  "info": {
+    "difficulty": "medium",
+    "description": "Tests ability for the agent to build a simple web server locally",
+    "side_effects": []
+  }
+}
diff --git a/agbenchmark/challenges/test_all.py b/agbenchmark/challenges/test_all.py
index e7fe99e738e7..7dee0b2ab82e 100644
--- a/agbenchmark/challenges/test_all.py
+++ b/agbenchmark/challenges/test_all.py
@@ -2,6 +2,8 @@
 import importlib
 import json
 import os
+import pkgutil
+import sys
 import types
 from pathlib import Path
 from typing import Any, Dict
@@ -47,6 +49,19 @@ def generate_tests() -> None:
             class_name = data.get("name", "")
 
         challenge_location = get_test_path(json_file)
+        if data["ground"]["type"] == "custom_python":
+            custom_python_location = (
+                f"{CURRENT_DIRECTORY}/../{challenge_location}/custom_python"
+            )
+            sys.path.append(str(custom_python_location))
+
+            for (module_loader, name, ispkg) in pkgutil.iter_modules(
+                [str(custom_python_location)]
+            ):
+                module = importlib.import_module(name)
+
+                if hasattr(module, "make_assertion"):
+                    make_assertion = getattr(module, "make_assertion")
 
         # Define test class dynamically
         challenge_class = types.new_class(class_name, (Challenge,))
@@ -58,7 +73,15 @@ def test_method(self, config: Dict[str, Any]) -> None:  # type: ignore
             self.setup_challenge(config)
 
             scores = self.get_scores(config)
-            assert 1 in scores
+
+            # Check if make_assertion is defined and use it
+            if "make_assertion" in locals():
+                try:
+                    make_assertion()
+                except AssertionError as error:
+                    print(error)  # Or handle this in another way
+            else:
+                assert 1 in scores
 
         # Parametrize the method here
         test_method = pytest.mark.parametrize(
diff --git a/poetry.lock b/poetry.lock
index 4eae340b677a..5526da16b9cc 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -961,6 +961,33 @@ notebook = ["ipywidgets (>=6)"]
 slack = ["slack-sdk"]
 telegram = ["requests"]
 
+[[package]]
+name = "types-requests"
+version = "2.31.0.1"
+description = "Typing stubs for requests"
+category = "main"
+optional = false
+python-versions = "*"
+files = [
+    {file = "types-requests-2.31.0.1.tar.gz", hash = "sha256:3de667cffa123ce698591de0ad7db034a5317457a596eb0b4944e5a9d9e8d1ac"},
+    {file = "types_requests-2.31.0.1-py3-none-any.whl", hash = "sha256:afb06ef8f25ba83d59a1d424bd7a5a939082f94b94e90ab5e6116bd2559deaa3"},
+]
+
+[package.dependencies]
+types-urllib3 = "*"
+
+[[package]]
+name = "types-urllib3"
+version = "1.26.25.13"
+description = "Typing stubs for urllib3"
+category = "main"
+optional = false
+python-versions = "*"
+files = [
+    {file = "types-urllib3-1.26.25.13.tar.gz", hash = "sha256:3300538c9dc11dad32eae4827ac313f5d986b8b21494801f1bf97a1ac6c03ae5"},
+    {file = "types_urllib3-1.26.25.13-py3-none-any.whl", hash = "sha256:5dbd1d2bef14efee43f5318b5d36d805a489f6600252bb53626d4bfafd95e27c"},
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.7.1"
@@ -1082,4 +1109,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "44b5789494e73f3cb8bcb9d25daa62143e59352a246fd7724fdb3ad58c2560ae"
+content-hash = "81b84bbe08d4a09fb6a4f99c7fb018e0c0fcd879fa368c388b0af20c7c9a3f31"
diff --git a/pyproject.toml b/pyproject.toml
index a8f4f8dee5f6..1a96a51de188 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ pydantic = "^1.10.9"
 pytest-depends = "^1.0.1"
 python-dotenv = "^0.21.0"
 click = "^8.1.3"
+types-requests = "^2.31.0.1"
 
 [tool.poetry.group.dev.dependencies]
 flake8 = "^3.9.2"
diff --git a/regression_tests.json b/regression_tests.json
index 10a6e11bf64a..0cf2d5f30ae3 100644
--- a/regression_tests.json
+++ b/regression_tests.json
@@ -1,69 +1,90 @@
 {
-    "TestWriteFile": {
+    "TestBasicMemory": {
         "difficulty": "basic",
-        "dependencies": [],
-        "test": "agbenchmark\\challenges\\interface\\write_file"
+        "dependencies": [
+            "TestReadFile",
+            "TestWriteFile"
+        ],
+        "test": "agbenchmark/challenges/memory/m1"
     },
-    "TestReadFile": {
+    "TestBasicRetrieval": {
         "difficulty": "basic",
         "dependencies": [
-            "TestWriteFile"
+            "TestWriteFile",
+            "TestSearch"
         ],
-        "test": "agbenchmark\\challenges\\interface\\read_file"
+        "test": "agbenchmark/challenges/retrieval/r1"
     },
-    "TestBasicMemory": {
+    "TestCreateSimpleWebServer": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/challenges/code/d3"
+    },
+    "TestDebugSimpleTypoWithGuidance": {
         "difficulty": "basic",
         "dependencies": [
             "TestReadFile",
             "TestWriteFile"
         ],
-        "test": "agbenchmark\\challenges\\memory\\m1"
+        "test": "agbenchmark/challenges/code/d1"
     },
-    "TestBasicRetrieval": {
+    "TestDebugSimpleTypoWithoutGuidance": {
+        "difficulty": "medium",
+        "dependencies": [
+            "TestDebugSimpleTypoWithGuidance"
+        ],
+        "test": "agbenchmark/challenges/code/d2"
+    },
+    "TestReadFile": {
         "difficulty": "basic",
         "dependencies": [
             "TestWriteFile"
         ],
-        "test": "agbenchmark\\challenges\\retrieval\\r1"
+        "test": "agbenchmark/challenges/interface/read_file"
     },
     "TestRememberMultipleIds": {
         "difficulty": "basic",
         "dependencies": [
             "TestBasicMemory"
         ],
-        "test": "agbenchmark\\challenges\\memory\\m2"
+        "test": "agbenchmark/challenges/memory/m2"
     },
-    "TestRetrieval2": {
-        "difficulty": "basic",
+    "TestRememberMultipleIdsWithNoise": {
+        "difficulty": "medium",
         "dependencies": [
-            "TestBasicRetrieval"
+            "TestRememberMultipleIds"
         ],
-        "test": "agbenchmark\\challenges\\retrieval\\r2"
+        "test": "agbenchmark/challenges/memory/m3"
     },
-    "TestRememberMultipleIdsWithNoise": {
+    "TestRememberMultiplePhrasesWithNoise": {
         "difficulty": "medium",
         "dependencies": [
-            "TestRememberMultipleIds"
+            "TestRememberMultipleIdsWithNoise"
         ],
-        "test": "agbenchmark\\challenges\\memory\\m3"
+        "test": "agbenchmark/challenges/memory/m4"
     },
-    "TestRetrieval3": {
+    "TestRetrieval2": {
         "difficulty": "basic",
         "dependencies": [
-            "TestRetrieval2"
+            "TestBasicRetrieval"
         ],
-        "test": "agbenchmark\\challenges\\retrieval\\r3"
+        "test": "agbenchmark/challenges/retrieval/r2"
     },
-    "TestRememberMultiplePhrasesWithNoise": {
-        "difficulty": "medium",
+    "TestRetrieval3": {
+        "difficulty": "basic",
         "dependencies": [
-            "TestRememberMultipleIdsWithNoise"
+            "TestRetrieval2"
         ],
-        "test": "agbenchmark\\challenges\\memory\\m4"
+        "test": "agbenchmark/challenges/retrieval/r3"
     },
     "TestSearch": {
         "difficulty": "basic",
         "dependencies": [],
-        "test": "agbenchmark\\challenges\\interface\\search"
+        "test": "agbenchmark/challenges/interface/search"
+    },
+    "TestWriteFile": {
+        "difficulty": "basic",
+        "dependencies": [],
+        "test": "agbenchmark/challenges/interface/write_file"
     }
 }
\ No newline at end of file