docs: add docstrings and module explanations

sugatoray · May 31, 2023 · ea1bc36 · ea1bc36
1 parent 98c1bff
commit ea1bc36
Showing 6 changed files with 66 additions and 8 deletions.
diff --git a/llm-apps-course/app.py b/llm-apps-course/app.py
@@ -1,3 +1,4 @@
+"""A Simple chatbot that uses the LangChain and Gradio UI to answer questions about wandb documentation."""
 import os
 from types import SimpleNamespace
 
@@ -9,10 +10,16 @@
 
 
 class Chat:
+    """A chatbot interface that persists the vectorstore and chain between calls."""
+
     def __init__(
         self,
         config: SimpleNamespace,
     ):
+        """Initialize the chatbot.
+        Args:
+            config (SimpleNamespace): The configuration.
+        """
         self.config = config
         self.wandb_run = wandb.init(
             project=self.config.project,
@@ -30,6 +37,14 @@ def __call__(
         history: list[tuple[str, str]] | None = None,
         openai_api_key: str = None,
     ):
+        """Answer a question about wandb documentation using the LangChain QA chain and vector store retriever.
+        Args:
+            question (str): The question to answer.
+            history (list[tuple[str, str]] | None, optional): The chat history. Defaults to None.
+            openai_api_key (str, optional): The OpenAI API key. Defaults to None.
+        Returns:
+            list[tuple[str, str]], list[tuple[str, str]]: The chat history before and after the question is answered.
+        """
         if openai_api_key is not None:
             openai_key = openai_api_key
         elif os.environ["OPENAI_API_KEY"]:

diff --git a/llm-apps-course/chain.py b/llm-apps-course/chain.py
@@ -1,5 +1,6 @@
+"""This module contains functions for loading a ConversationalRetrievalChain"""
+
 import logging
-from types import SimpleNamespace
 
 import wandb
 from langchain.chains import ConversationalRetrievalChain
@@ -16,6 +17,7 @@ def load_vector_store(wandb_run: wandb.run, openai_api_key: str) -> Chroma:
     """Load a vector store from a Weights & Biases artifact
     Args:
         run (wandb.run): An active Weights & Biases run
+        openai_api_key (str): The OpenAI API key to use for embedding
     Returns:
         Chroma: A chroma vector store object
     """
@@ -34,10 +36,12 @@ def load_vector_store(wandb_run: wandb.run, openai_api_key: str) -> Chroma:
 
 def load_chain(wandb_run: wandb.run, vector_store: Chroma, openai_api_key: str):
     """Load a ConversationalQA chain from a config and a vector store
-
     Args:
-        config (SimpleNamespace): A config object
-
+        wandb_run (wandb.run): An active Weights & Biases run
+        vector_store (Chroma): A Chroma vector store object
+        openai_api_key (str): The OpenAI API key to use for embedding
+    Returns:
+        ConversationalRetrievalChain: A ConversationalRetrievalChain object
     """
     retriever = vector_store.as_retriever()
     llm = ChatOpenAI(
@@ -66,6 +70,15 @@ def get_answer(
     question: str,
     chat_history: list[tuple[str, str]],
 ):
+    """Get an answer from a ConversationalRetrievalChain
+    Args:
+        chain (ConversationalRetrievalChain): A ConversationalRetrievalChain object
+        callback (WandbTracer): A WandbTracer callback object
+        question (str): The question to ask
+        chat_history (list[tuple[str, str]]): A list of tuples of (question, answer)
+    Returns:
+        str: The answer to the question
+    """
     result = chain(
         inputs={"question": question, "chat_history": chat_history},
         callbacks=[callback],

diff --git a/llm-apps-course/config.py b/llm-apps-course/config.py
@@ -1,3 +1,4 @@
+"""Configuration for the LLM Apps Course"""
 from types import SimpleNamespace
 
 TEAM = None

diff --git a/llm-apps-course/eval.py b/llm-apps-course/eval.py
@@ -1,3 +1,4 @@
+"""Evaluate a ConversationalRetrievalChain on a dataset of questions and answers."""
 import os
 from pathlib import Path
 from types import SimpleNamespace
@@ -15,7 +16,13 @@
 from wandb.integration.langchain import WandbTracer
 
 
-def load_eval_dataset(config: SimpleNamespace):
+def load_eval_dataset(config: SimpleNamespace) -> pd.DataFrame:
+    """Load a dataset of questions and answers from a Weights & Biases artifact
+    Args:
+        config (SimpleNamespace): A config object
+    Returns:
+        pd.DataFrame: A dataframe of questions and answers
+    """
     # we will load data from a wandb Table  artifact
     artifact = wandb.use_artifact(config.eval_artifact)
     # download artifact
@@ -27,7 +34,14 @@ def load_eval_dataset(config: SimpleNamespace):
 
 def generate_answers(
     eval_dataset: pd.DataFrame, qa_chain: ConversationalRetrievalChain
-):
+) -> pd.DataFrame:
+    """Generate answers for a dataset of questions and answers
+    Args:
+        eval_dataset (pd.DataFrame): A dataframe of questions and answers
+        qa_chain (ConversationalRetrievalChain): A ConversationalRetrievalChain object
+    Returns:
+        pd.DataFrame: A dataframe of questions, answers, and model answers
+    """
     answers = []
     for query in tqdm(eval_dataset["question"], total=len(eval_dataset)):
         answer = delayed(
@@ -40,7 +54,16 @@ def generate_answers(
     return eval_dataset
 
 
-def evaluate_answers(eval_dataset: pd.DataFrame, config: SimpleNamespace):
+def evaluate_answers(
+    eval_dataset: pd.DataFrame, config: SimpleNamespace
+) -> pd.DataFrame:
+    """Evaluate a dataset of questions, answers, and model answers
+    Args:
+        eval_dataset (pd.DataFrame): A dataframe of questions, answers, and model answers
+        config (SimpleNamespace): A config object
+    Returns:
+        pd.DataFrame: A dataframe of questions, answers, model answers, and model scores
+    """
     eval_prompt = load_eval_prompt()
     llm = ChatOpenAI(
         model_name=config.eval_model,
@@ -69,7 +92,11 @@ def evaluate_answers(eval_dataset: pd.DataFrame, config: SimpleNamespace):
     return eval_dataset
 
 
-def log_results(eval_dataset: pd.DataFrame):
+def log_results(eval_dataset: pd.DataFrame) -> None:
+    """Log evaluation results to a Weights & Biases Artifact
+    Args:
+        eval_dataset (pd.DataFrame): A dataframe of questions, answers, model answers, and model scores
+    """
     model_accuracy = len(eval_dataset[eval_dataset["model_score"] == "CORRECT"]) / len(
         eval_dataset
     )

diff --git a/llm-apps-course/ingest.py b/llm-apps-course/ingest.py
@@ -1,3 +1,4 @@
+"""Ingest a directory of documentation files into a vector store and store the relevant artifacts in Weights & Biases"""
 import argparse
 import json
 import logging

diff --git a/llm-apps-course/prompts.py b/llm-apps-course/prompts.py
@@ -1,3 +1,4 @@
+"""Prompts for the chatbot and evaluation."""
 import json
 import logging
 import pathlib