refactor for clarity (AntonOsika#585)

* add comments and refactor more intuitive names * merge generate and use_qa prompts, refactor for clarity * black formatting * ruff reformatting --------- Co-authored-by: Anton Osika <anton.osika@gmail.com>
jorgeguberte · Aug 16, 2023 · 6646d19 · 6646d19
1 parent fd27ad9
commit 6646d19
Show file tree

Hide file tree

Showing 9 changed files with 98 additions and 66 deletions.
diff --git a/gpt_engineer/ai.py b/gpt_engineer/ai.py
@@ -72,6 +72,10 @@ def next(
         *,
         step_name: str,
     ) -> List[Message]:
+        """
+        Advances the conversation by sending message history
+        to LLM and updating with the response.
+        """
         if prompt:
             messages.append(self.fuser(prompt))
 

diff --git a/gpt_engineer/chat_to_files.py b/gpt_engineer/chat_to_files.py
@@ -1,8 +1,13 @@
 import os
 import re
+from typing import List, Tuple
 
 
-def parse_chat(chat):  # -> List[Tuple[str, str]]:
+def parse_chat(chat) -> List[Tuple[str, str]]:
+    """
+    Extacts all code blocks from a chat and returns them
+    as a list of (filename, codeblock) tuples
+    """
     # Get all ``` blocks and preceding filenames
     regex = r"(\S+)\n\s*```[^\n]*\n(.+?)```"
     matches = re.finditer(regex, chat, re.DOTALL)

diff --git a/gpt_engineer/learning.py b/gpt_engineer/learning.py
@@ -51,7 +51,7 @@ class Learning:
 )
 
 
-def human_input() -> Review:
+def human_review_input() -> Review:
     print()
     print(
         colored("To help gpt-engineer learn, please answer 3 questions:", "light_green")
@@ -175,7 +175,8 @@ def extract_learning(
     return learning
 
 
-def get_session():
+def get_session() -> str:
+    """Returns a unique user id for the current user project (session)"""
     path = Path(tempfile.gettempdir()) / "gpt_engineer_user_id.txt"
 
     try:

diff --git a/gpt_engineer/main.py b/gpt_engineer/main.py
@@ -1,11 +1,8 @@
 import logging
 import os
-
 from pathlib import Path
-
 import openai
 import typer
-
 from dotenv import load_dotenv
 
 from gpt_engineer.ai import AI
@@ -14,7 +11,7 @@
 from gpt_engineer.learning import collect_consent
 from gpt_engineer.steps import STEPS, Config as StepsConfig
 
-app = typer.Typer()
+app = typer.Typer()  # creates a CLI app
 
 
 def load_env_if_needed():
@@ -65,7 +62,9 @@ def main(
         logs=DB(memory_path / "logs"),
         input=DB(input_path),
         workspace=DB(workspace_path),
-        preprompts=DB(Path(__file__).parent / "preprompts"),
+        preprompts=DB(
+            Path(__file__).parent / "preprompts"
+        ),  # Loads preprompts from the preprompts directory
         archive=DB(archive_path),
     )
 

diff --git a/gpt_engineer/preprompts/generate b/gpt_engineer/preprompts/generate
@@ -1,8 +1,5 @@
-You will get instructions for code to write.
-You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.
-
 Think step by step and reason yourself to the right decisions to make sure we get it right.
-You will first lay out the names of the core classes, functions, methods that will be necessary, as well as a quick comment on their purpose.
+First lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
 
 Then you will output the content of each file including ALL code.
 Each file must strictly follow a markdown code block format, where the following tokens must be replaced such that
@@ -14,13 +11,13 @@ FILENAME
 CODE
 ```
 
-Do not comment on what every file does
+Do not comment on what every file does. Please note that the code should be fully functional. No placeholders.
 
 You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.
 Please note that the code should be fully functional. No placeholders.
 
 Follow a language and framework appropriate best practice file naming convention.
-Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.
+Make sure that files contain all imports, types etc.  The code should be fully functional. Make sure that code in different files are compatible with each other.
 Ensure to implement all code, if you are unsure, write a plausible implementation.
 Include module dependency or package manager dependency definition file.
 Before you finish, double check that all parts of the architecture is present in the files.
diff --git a/gpt_engineer/preprompts/roadmap b/gpt_engineer/preprompts/roadmap
@@ -0,0 +1,2 @@
+You will get instructions for code to write.
+You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.
diff --git a/gpt_engineer/preprompts/use_qa b/gpt_engineer/preprompts/use_qa
diff --git a/gpt_engineer/steps.py b/gpt_engineer/steps.py
@@ -1,13 +1,10 @@
 import inspect
 import re
 import subprocess
-
 from enum import Enum
 from typing import List, Union
-
 from langchain.schema import AIMessage, HumanMessage, SystemMessage
 from termcolor import colored
-
 from gpt_engineer.ai import AI
 from gpt_engineer.chat_to_files import (
     format_file_to_input,
@@ -16,15 +13,22 @@
     to_files,
 )
 from gpt_engineer.db import DBs
+from gpt_engineer.learning import human_review_input
 from gpt_engineer.file_selector import ask_for_files
-from gpt_engineer.learning import human_input
 
 Message = Union[AIMessage, HumanMessage, SystemMessage]
 
 
 def setup_sys_prompt(dbs: DBs) -> str:
+    """
+    Primes the AI with instructions as to how it should
+    generate code and the philosophy to follow
+    """
     return (
-        dbs.preprompts["generate"] + "\nUseful to know:\n" + dbs.preprompts["philosophy"]
+        dbs.preprompts["roadmap"]
+        + dbs.preprompts["generate"]
+        + "\nUseful to know:\n"
+        + dbs.preprompts["philosophy"]
     )
 
 
@@ -40,7 +44,10 @@ def setup_sys_prompt_existing_code(dbs: DBs) -> str:
 
 
 def get_prompt(dbs: DBs) -> str:
-    """While we migrate we have this fallback getter"""
+    """
+    Loads the user's prompt for the project from prompt file
+    (While we migrate we have this fallback getter)
+    """
     assert (
         "prompt" in dbs.input or "main_prompt" in dbs.input
     ), "Please put your prompt in the file `prompt` in the project directory"
@@ -56,7 +63,12 @@ def get_prompt(dbs: DBs) -> str:
 
 
 def curr_fn() -> str:
-    """Get the name of the current function"""
+    """
+    Get the name of the current function
+    NOTE: This will be the name of the function that called this function,
+    so it serves to ensure we don't hardcode the function name in the step,
+    but allow the step names to be refactored
+    """
     return inspect.stack()[1].function
 
 
@@ -132,6 +144,7 @@ def gen_spec(ai: AI, dbs: DBs) -> List[Message]:
 
 
 def respec(ai: AI, dbs: DBs) -> List[Message]:
+    """Asks the LLM to review the specs so far and reiterate them if necessary"""
     messages = AI.deserialize_messages(dbs.logs[gen_spec.__name__])
     messages += [ai.fsystem(dbs.preprompts["respec"])]
 
@@ -177,22 +190,24 @@ def gen_clarified_code(ai: AI, dbs: DBs) -> List[dict]:
 
     messages = [
         ai.fsystem(setup_sys_prompt(dbs)),
-    ] + messages[1:]
-    messages = ai.next(messages, dbs.preprompts["use_qa"], step_name=curr_fn())
+    ] + messages[
+        1:
+    ]  # skip the first clarify message, which was the original clarify priming prompt
+    messages = ai.next(messages, dbs.preprompts["generate"], step_name=curr_fn())
 
     to_files(messages[-1].content.strip(), dbs.workspace)
     return messages
 
 
-def gen_code(ai: AI, dbs: DBs) -> List[dict]:
-    # get the messages from previous step
+def gen_code_after_unit_tests(ai: AI, dbs: DBs) -> List[dict]:
+    """Generates project code after unit tests have been produced"""
     messages = [
         ai.fsystem(setup_sys_prompt(dbs)),
         ai.fuser(f"Instructions: {dbs.input['prompt']}"),
         ai.fuser(f"Specification:\n\n{dbs.memory['specification']}"),
         ai.fuser(f"Unit tests:\n\n{dbs.memory['unit_tests']}"),
     ]
-    messages = ai.next(messages, dbs.preprompts["use_qa"], step_name=curr_fn())
+    messages = ai.next(messages, dbs.preprompts["generate"], step_name=curr_fn())
     to_files(messages[-1].content.strip(), dbs.workspace)
     return messages
 
@@ -264,12 +279,20 @@ def use_feedback(ai: AI, dbs: DBs):
     messages = [
         ai.fsystem(setup_sys_prompt(dbs)),
         ai.fuser(f"Instructions: {dbs.input['prompt']}"),
-        ai.fassistant(dbs.workspace["all_output.txt"]),
-        ai.fsystem(dbs.preprompts["use_feedback"]),
+        ai.fassistant(
+            dbs.workspace["all_output.txt"]
+        ),  # reload previously generated code
     ]
-    messages = ai.next(messages, dbs.input["feedback"], step_name=curr_fn())
-    to_files(messages[-1].content.strip(), dbs.workspace)
-    return messages
+    if dbs.input["feedback"]:
+        messages = ai.next(messages, dbs.input["feedback"], step_name=curr_fn())
+        to_files(messages[-1].content.strip(), dbs.workspace)
+        return messages
+    else:
+        print(
+            "No feedback was found in the input folder. Please create a file "
+            + "called 'feedback' in the same folder as the prompt file."
+        )
+        exit(1)
 
 
 def improve_existing_code(ai: AI, dbs: DBs):
@@ -328,7 +351,7 @@ def improve_existing_code(ai: AI, dbs: DBs):
 
 
 def fix_code(ai: AI, dbs: DBs):
-    messages = AI.deserialize_messages(dbs.logs[gen_code.__name__])
+    messages = AI.deserialize_messages(dbs.logs[gen_code_after_unit_tests.__name__])
     code_output = messages[-1].content.strip()
     messages = [
         ai.fsystem(setup_sys_prompt(dbs)),
@@ -344,7 +367,8 @@ def fix_code(ai: AI, dbs: DBs):
 
 
 def human_review(ai: AI, dbs: DBs):
-    review = human_input()
+    """Collects and stores human review of the code"""
+    review = human_review_input()
     dbs.memory["review"] = review.to_json()  # type: ignore
     return []
 
@@ -363,7 +387,7 @@ class Config(str, Enum):
     IMPROVE_CODE = "improve_code"
 
 
-# Different configs of what steps to run
+# Define the steps to run for different configs
 STEPS = {
     Config.DEFAULT: [
         clarify,
@@ -372,20 +396,27 @@ class Config(str, Enum):
         execute_entrypoint,
         human_review,
     ],
-    Config.BENCHMARK: [simple_gen, gen_entrypoint],
-    Config.SIMPLE: [simple_gen, gen_entrypoint, execute_entrypoint],
+    Config.BENCHMARK: [
+        simple_gen,
+        gen_entrypoint,
+    ],
+    Config.SIMPLE: [
+        simple_gen,
+        gen_entrypoint,
+        execute_entrypoint,
+    ],
     Config.TDD: [
         gen_spec,
         gen_unit_tests,
-        gen_code,
+        gen_code_after_unit_tests,
         gen_entrypoint,
         execute_entrypoint,
         human_review,
     ],
     Config.TDD_PLUS: [
         gen_spec,
         gen_unit_tests,
-        gen_code,
+        gen_code_after_unit_tests,
         fix_code,
         gen_entrypoint,
         execute_entrypoint,
@@ -402,15 +433,25 @@ class Config(str, Enum):
         gen_spec,
         respec,
         gen_unit_tests,
-        gen_code,
+        gen_code_after_unit_tests,
         fix_code,
         gen_entrypoint,
         execute_entrypoint,
         human_review,
     ],
-    Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review],
-    Config.EXECUTE_ONLY: [execute_entrypoint],
-    Config.EVALUATE: [execute_entrypoint, human_review],
+    Config.USE_FEEDBACK: [
+        use_feedback,
+        gen_entrypoint,
+        execute_entrypoint,
+        human_review,
+    ],
+    Config.EXECUTE_ONLY: [
+        execute_entrypoint,
+    ],
+    Config.EVALUATE: [
+        execute_entrypoint,
+        human_review,
+    ],
     Config.IMPROVE_CODE: [improve_existing_code],
 }
 

diff --git a/tests/test_collect.py b/tests/test_collect.py
@@ -9,7 +9,7 @@
 from gpt_engineer.collect import collect_learnings, steps_file_hash
 from gpt_engineer.db import DB, DBs
 from gpt_engineer.learning import extract_learning
-from gpt_engineer.steps import gen_code
+from gpt_engineer.steps import gen_code_after_unit_tests
 
 
 def test_collect_learnings(monkeypatch):
@@ -18,14 +18,18 @@ def test_collect_learnings(monkeypatch):
 
     model = "test_model"
     temperature = 0.5
-    steps = [gen_code]
+    steps = [gen_code_after_unit_tests]
     dbs = DBs(DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"), DB("/tmp"))
     dbs.input = {
         "prompt": "test prompt\n with newlines",
         "feedback": "test feedback",
     }
     code = "this is output\n\nit contains code"
-    dbs.logs = {gen_code.__name__: json.dumps([{"role": "system", "content": code}])}
+    dbs.logs = {
+        gen_code_after_unit_tests.__name__: json.dumps(
+            [{"role": "system", "content": code}]
+        )
+    }
     dbs.workspace = {"all_output.txt": "test workspace\n" + code}
 
     collect_learnings(model, temperature, steps, dbs)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		You will get instructions for code to write.
		You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.