Seedling of our engineer is here

AlvarArias · Apr 29, 2023 · 026ac20 · 026ac20
1 parent f8f9920
commit 026ac20
Show file tree

Hide file tree

Showing 4 changed files with 187 additions and 0 deletions.
diff --git a/chat_to_files.py b/chat_to_files.py
@@ -0,0 +1,33 @@
+from ast import List, Tuple
+import os
+import re
+
+
+def parse_chat(chat):# -> List[Tuple[str, str]]:
+    # Get all ``` blocks
+    regex = r"```(.*?)```"
+
+    matches = re.finditer(regex, chat, re.DOTALL)
+
+    files = []
+    for match in matches:
+        path = match.group(1).split("\n")[0]
+        # Get the code
+        code = match.group(1).split("\n")[1:]
+        code = "\n".join(code)
+        # Add the file to the list
+        files.append((path, code))
+
+    return files
+
+
+def to_files(chat, path):
+    os.makedirs(path, exist_ok=True)
+
+    with open(os.path.join(path, 'all_output.txt'), "w") as f:
+        f.write(chat)
+
+    files = parse_chat(chat)
+    for file_name, file_content in files:
+        with open(os.path.join(path, file_name), "w") as f:
+            f.write(file_content)
diff --git a/main.py b/main.py
@@ -0,0 +1,64 @@
+import os
+import pathlib
+from typing import Optional
+import openai
+from chat_to_files import to_files
+import typer
+
+
+app = typer.Typer()
+
+
+@app.command()
+def chat(
+    engine: str = "gpt-4",
+    temperature: float = 0.0,
+    max_tokens: int = 4096,
+    n: int = 1,
+    stream: bool = True,
+    system_prompt: str = typer.Argument("system", help="System prompt file"),
+    user_prompt: str = typer.Argument("user", help="User prompt file"),
+    code_to_file_path: Optional[str] = typer.Option(
+        None, "--out", "-c", help="Code to file path"
+    ),
+):
+
+    # ensure file path corresponds to file in the same file as this script, using __file__
+    if system_prompt == "system":
+        # get folder of script
+        system_prompt = pathlib.Path(__file__).parent / system_prompt
+
+    if user_prompt == "user":
+        user_prompt = pathlib.Path(__file__).parent / user_prompt
+
+
+    with open(system_prompt, "r") as f:
+        system_prompt = f.read()
+    with open(user_prompt, "r") as f:
+        user_prompt = f.read()
+    response = openai.ChatCompletion.create(
+        model=engine,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        temperature=temperature,
+        max_tokens=max_tokens,
+        n=n,
+        stream=stream,
+        stop=None,
+    )
+
+    chat = []
+    for chunk in response:
+        delta = chunk['choices'][0]['delta']
+        msg = delta.get('content', '')
+        print(msg, end="")
+        chat.append(msg)
+
+    if code_to_file_path is not None:
+        to_files("".join(chat), code_to_file_path)
+
+
+if __name__ == "__main__":
+    app()
diff --git a/system b/system
@@ -0,0 +1,15 @@
+You will get instructions for code to write.
+You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code.
+
+You will first lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose.
+Then you will output the content of each file, with syntax below.
+(You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.)
+Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other.
+Ensure to implement all code, if you are unsure, write a plausible implementation.
+Before you finish, double check that all parts of the architecture is present in the files.
+
+File syntax:
+
+```main_file.py
+[ADD YOUR CODE HERE]
+```
diff --git a/user b/user
@@ -0,0 +1,75 @@
+Instructions:
+We are writing a feature computation framework.
+
+It will mainly consist of FeatureBuilder classes.
+
+Each Feature Builder will have the methods:
+- get(key, context, cache):  To first check cache, and then go on to call dependencies to compute the feature. Returns value and hash of value.
+- dry_run(key, context):  To check that "type" of key will match input requirements of features
+- input_type(context):  That explains what dimensions key is applying to
+- output_type(context):  That explains what type the output is
+
+It will have the class attr:
+- deps:  list of FeatureBuilder classes
+
+Where it is unclear, please make assumptions and add a commend in the code about it
+
+Here is an example of Builders we want:
+
+ProductEmbeddingString:  takes product_id, queries the product_db and gets the title as a string
+ProductEmbedding: takes string and returns and embedding
+ProductEmbeddingDB: takes just `merchant` name, uses all product_ids and returns the blob that is a database of embeddings
+ProductEmbeddingSearcher: takes a string, constructs embeddingDB feature (note: all features are cached), embeds the string and searches the db
+LLMProductPrompt:  queries the ProductEmbeddingString, and formats a template that says "get recommendations for {title}"
+LLMSuggestions: Takes product_id, looks up prompts and gets list of suggestions of product descriptions
+LLMLogic: Takes the product_id, gets the LLM suggestions, embeds the suggestions, does a search, and returns a list of product_ids
+
+
+The LLMLogic is the logic_builder in a file such as this one:
+```
+def main(merchant, market):
+    cache = get_cache()
+    interaction_data_db = get_interaction_data_db()
+    product_db = get_product_db()
+    merchant_config = get_merchant_config(merchant)[merchant]
+
+    context = Context(
+        interaction_data_db=interaction_data_db,
+        product_db=product_db,
+        merchant_config=merchant_config,
+    )
+
+    product_ids = cache(ProductIds.get)(
+        key=(merchant, market),
+        context=context,
+        cache=cache,
+    )
+
+    for logic_builder in merchant_config['logic_builders']:
+        for product_id in product_ids:
+            key = (merchant, market, product_id)
+            p2p_recs = cache(logic_builder.get)(key, cache, context)
+            redis.set(key, p2p_recs)
+```
+
+API to product_db:
+```
+    async def get_product_attribute_dimensions(
+        self,
+    ) -> dict[AttributeId, Dimension]:
+        return await self.repository.get_product_attribute_dimensions(self.merchant)
+
+    async def get_products(
+        self,
+        attribute_ids: set[AttributeId],
+        product_ids: set[ProductId] | None = None,
+    ) -> dict[ProductId, dict[AttributeId, dict[IngestionDimensionKey, Any]]]:
+        return await self.repository.get_products_dict(
+            self.merchant, attribute_ids, product_ids
+        )
+```
+
+(note, dimensions are not so important. They related to information that varies by: locale, warehouse, pricelist etc)
+
+
+Remember to read the Instructions carefully.