From 026ac206c15e33bb3d86e50bd562205fdf17bcc5 Mon Sep 17 00:00:00 2001 From: Anton Osika Date: Sat, 29 Apr 2023 14:53:21 +0200 Subject: [PATCH] Seedling of our engineer is here --- chat_to_files.py | 33 +++++++++++++++++++++ main.py | 64 +++++++++++++++++++++++++++++++++++++++++ system | 15 ++++++++++ user | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 187 insertions(+) create mode 100644 chat_to_files.py create mode 100644 main.py create mode 100644 system create mode 100644 user diff --git a/chat_to_files.py b/chat_to_files.py new file mode 100644 index 0000000000..b1f0fecaea --- /dev/null +++ b/chat_to_files.py @@ -0,0 +1,33 @@ +from ast import List, Tuple +import os +import re + + +def parse_chat(chat):# -> List[Tuple[str, str]]: + # Get all ``` blocks + regex = r"```(.*?)```" + + matches = re.finditer(regex, chat, re.DOTALL) + + files = [] + for match in matches: + path = match.group(1).split("\n")[0] + # Get the code + code = match.group(1).split("\n")[1:] + code = "\n".join(code) + # Add the file to the list + files.append((path, code)) + + return files + + +def to_files(chat, path): + os.makedirs(path, exist_ok=True) + + with open(os.path.join(path, 'all_output.txt'), "w") as f: + f.write(chat) + + files = parse_chat(chat) + for file_name, file_content in files: + with open(os.path.join(path, file_name), "w") as f: + f.write(file_content) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000000..d48bb28073 --- /dev/null +++ b/main.py @@ -0,0 +1,64 @@ +import os +import pathlib +from typing import Optional +import openai +from chat_to_files import to_files +import typer + + +app = typer.Typer() + + +@app.command() +def chat( + engine: str = "gpt-4", + temperature: float = 0.0, + max_tokens: int = 4096, + n: int = 1, + stream: bool = True, + system_prompt: str = typer.Argument("system", help="System prompt file"), + user_prompt: str = typer.Argument("user", help="User prompt file"), + code_to_file_path: Optional[str] = typer.Option( + None, "--out", "-c", help="Code to file path" + ), +): + + # ensure file path corresponds to file in the same file as this script, using __file__ + if system_prompt == "system": + # get folder of script + system_prompt = pathlib.Path(__file__).parent / system_prompt + + if user_prompt == "user": + user_prompt = pathlib.Path(__file__).parent / user_prompt + + + with open(system_prompt, "r") as f: + system_prompt = f.read() + with open(user_prompt, "r") as f: + user_prompt = f.read() + response = openai.ChatCompletion.create( + model=engine, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=temperature, + max_tokens=max_tokens, + n=n, + stream=stream, + stop=None, + ) + + chat = [] + for chunk in response: + delta = chunk['choices'][0]['delta'] + msg = delta.get('content', '') + print(msg, end="") + chat.append(msg) + + if code_to_file_path is not None: + to_files("".join(chat), code_to_file_path) + + +if __name__ == "__main__": + app() diff --git a/system b/system new file mode 100644 index 0000000000..cb8bac6b53 --- /dev/null +++ b/system @@ -0,0 +1,15 @@ +You will get instructions for code to write. +You will write a very long answer. Make sure that every detail of the architecture is, in the end, implemented as code. + +You will first lay out the names of the core classes, functions, methods that will be necessary, As well as a quick comment on their purpose. +Then you will output the content of each file, with syntax below. +(You will start with the "entrypoint" file, then go to the ones that are imported by that file, and so on.) +Make sure that files contain all imports, types etc. Make sure that code in different files are compatible with each other. +Ensure to implement all code, if you are unsure, write a plausible implementation. +Before you finish, double check that all parts of the architecture is present in the files. + +File syntax: + +```main_file.py +[ADD YOUR CODE HERE] +``` \ No newline at end of file diff --git a/user b/user new file mode 100644 index 0000000000..c9b1ebd2b8 --- /dev/null +++ b/user @@ -0,0 +1,75 @@ +Instructions: +We are writing a feature computation framework. + +It will mainly consist of FeatureBuilder classes. + +Each Feature Builder will have the methods: +- get(key, context, cache): To first check cache, and then go on to call dependencies to compute the feature. Returns value and hash of value. +- dry_run(key, context): To check that "type" of key will match input requirements of features +- input_type(context): That explains what dimensions key is applying to +- output_type(context): That explains what type the output is + +It will have the class attr: +- deps: list of FeatureBuilder classes + +Where it is unclear, please make assumptions and add a commend in the code about it + +Here is an example of Builders we want: + +ProductEmbeddingString: takes product_id, queries the product_db and gets the title as a string +ProductEmbedding: takes string and returns and embedding +ProductEmbeddingDB: takes just `merchant` name, uses all product_ids and returns the blob that is a database of embeddings +ProductEmbeddingSearcher: takes a string, constructs embeddingDB feature (note: all features are cached), embeds the string and searches the db +LLMProductPrompt: queries the ProductEmbeddingString, and formats a template that says "get recommendations for {title}" +LLMSuggestions: Takes product_id, looks up prompts and gets list of suggestions of product descriptions +LLMLogic: Takes the product_id, gets the LLM suggestions, embeds the suggestions, does a search, and returns a list of product_ids + + +The LLMLogic is the logic_builder in a file such as this one: +``` +def main(merchant, market): + cache = get_cache() + interaction_data_db = get_interaction_data_db() + product_db = get_product_db() + merchant_config = get_merchant_config(merchant)[merchant] + + context = Context( + interaction_data_db=interaction_data_db, + product_db=product_db, + merchant_config=merchant_config, + ) + + product_ids = cache(ProductIds.get)( + key=(merchant, market), + context=context, + cache=cache, + ) + + for logic_builder in merchant_config['logic_builders']: + for product_id in product_ids: + key = (merchant, market, product_id) + p2p_recs = cache(logic_builder.get)(key, cache, context) + redis.set(key, p2p_recs) +``` + +API to product_db: +``` + async def get_product_attribute_dimensions( + self, + ) -> dict[AttributeId, Dimension]: + return await self.repository.get_product_attribute_dimensions(self.merchant) + + async def get_products( + self, + attribute_ids: set[AttributeId], + product_ids: set[ProductId] | None = None, + ) -> dict[ProductId, dict[AttributeId, dict[IngestionDimensionKey, Any]]]: + return await self.repository.get_products_dict( + self.merchant, attribute_ids, product_ids + ) +``` + +(note, dimensions are not so important. They related to information that varies by: locale, warehouse, pricelist etc) + + +Remember to read the Instructions carefully. \ No newline at end of file