Skip to content

Commit

Permalink
docs: add docstrings and module explanations
Browse files Browse the repository at this point in the history
Bharat Ramanathan committed May 31, 2023
1 parent 98c1bff commit ea1bc36
Showing 6 changed files with 66 additions and 8 deletions.
15 changes: 15 additions & 0 deletions llm-apps-course/app.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""A Simple chatbot that uses the LangChain and Gradio UI to answer questions about wandb documentation."""
import os
from types import SimpleNamespace

@@ -9,10 +10,16 @@


class Chat:
"""A chatbot interface that persists the vectorstore and chain between calls."""

def __init__(
self,
config: SimpleNamespace,
):
"""Initialize the chatbot.
Args:
config (SimpleNamespace): The configuration.
"""
self.config = config
self.wandb_run = wandb.init(
project=self.config.project,
@@ -30,6 +37,14 @@ def __call__(
history: list[tuple[str, str]] | None = None,
openai_api_key: str = None,
):
"""Answer a question about wandb documentation using the LangChain QA chain and vector store retriever.
Args:
question (str): The question to answer.
history (list[tuple[str, str]] | None, optional): The chat history. Defaults to None.
openai_api_key (str, optional): The OpenAI API key. Defaults to None.
Returns:
list[tuple[str, str]], list[tuple[str, str]]: The chat history before and after the question is answered.
"""
if openai_api_key is not None:
openai_key = openai_api_key
elif os.environ["OPENAI_API_KEY"]:
21 changes: 17 additions & 4 deletions llm-apps-course/chain.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""This module contains functions for loading a ConversationalRetrievalChain"""

import logging
from types import SimpleNamespace

import wandb
from langchain.chains import ConversationalRetrievalChain
@@ -16,6 +17,7 @@ def load_vector_store(wandb_run: wandb.run, openai_api_key: str) -> Chroma:
"""Load a vector store from a Weights & Biases artifact
Args:
run (wandb.run): An active Weights & Biases run
openai_api_key (str): The OpenAI API key to use for embedding
Returns:
Chroma: A chroma vector store object
"""
@@ -34,10 +36,12 @@ def load_vector_store(wandb_run: wandb.run, openai_api_key: str) -> Chroma:

def load_chain(wandb_run: wandb.run, vector_store: Chroma, openai_api_key: str):
"""Load a ConversationalQA chain from a config and a vector store
Args:
config (SimpleNamespace): A config object
wandb_run (wandb.run): An active Weights & Biases run
vector_store (Chroma): A Chroma vector store object
openai_api_key (str): The OpenAI API key to use for embedding
Returns:
ConversationalRetrievalChain: A ConversationalRetrievalChain object
"""
retriever = vector_store.as_retriever()
llm = ChatOpenAI(
@@ -66,6 +70,15 @@ def get_answer(
question: str,
chat_history: list[tuple[str, str]],
):
"""Get an answer from a ConversationalRetrievalChain
Args:
chain (ConversationalRetrievalChain): A ConversationalRetrievalChain object
callback (WandbTracer): A WandbTracer callback object
question (str): The question to ask
chat_history (list[tuple[str, str]]): A list of tuples of (question, answer)
Returns:
str: The answer to the question
"""
result = chain(
inputs={"question": question, "chat_history": chat_history},
callbacks=[callback],
1 change: 1 addition & 0 deletions llm-apps-course/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Configuration for the LLM Apps Course"""
from types import SimpleNamespace

TEAM = None
35 changes: 31 additions & 4 deletions llm-apps-course/eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Evaluate a ConversationalRetrievalChain on a dataset of questions and answers."""
import os
from pathlib import Path
from types import SimpleNamespace
@@ -15,7 +16,13 @@
from wandb.integration.langchain import WandbTracer


def load_eval_dataset(config: SimpleNamespace):
def load_eval_dataset(config: SimpleNamespace) -> pd.DataFrame:
"""Load a dataset of questions and answers from a Weights & Biases artifact
Args:
config (SimpleNamespace): A config object
Returns:
pd.DataFrame: A dataframe of questions and answers
"""
# we will load data from a wandb Table artifact
artifact = wandb.use_artifact(config.eval_artifact)
# download artifact
@@ -27,7 +34,14 @@ def load_eval_dataset(config: SimpleNamespace):

def generate_answers(
eval_dataset: pd.DataFrame, qa_chain: ConversationalRetrievalChain
):
) -> pd.DataFrame:
"""Generate answers for a dataset of questions and answers
Args:
eval_dataset (pd.DataFrame): A dataframe of questions and answers
qa_chain (ConversationalRetrievalChain): A ConversationalRetrievalChain object
Returns:
pd.DataFrame: A dataframe of questions, answers, and model answers
"""
answers = []
for query in tqdm(eval_dataset["question"], total=len(eval_dataset)):
answer = delayed(
@@ -40,7 +54,16 @@ def generate_answers(
return eval_dataset


def evaluate_answers(eval_dataset: pd.DataFrame, config: SimpleNamespace):
def evaluate_answers(
eval_dataset: pd.DataFrame, config: SimpleNamespace
) -> pd.DataFrame:
"""Evaluate a dataset of questions, answers, and model answers
Args:
eval_dataset (pd.DataFrame): A dataframe of questions, answers, and model answers
config (SimpleNamespace): A config object
Returns:
pd.DataFrame: A dataframe of questions, answers, model answers, and model scores
"""
eval_prompt = load_eval_prompt()
llm = ChatOpenAI(
model_name=config.eval_model,
@@ -69,7 +92,11 @@ def evaluate_answers(eval_dataset: pd.DataFrame, config: SimpleNamespace):
return eval_dataset


def log_results(eval_dataset: pd.DataFrame):
def log_results(eval_dataset: pd.DataFrame) -> None:
"""Log evaluation results to a Weights & Biases Artifact
Args:
eval_dataset (pd.DataFrame): A dataframe of questions, answers, model answers, and model scores
"""
model_accuracy = len(eval_dataset[eval_dataset["model_score"] == "CORRECT"]) / len(
eval_dataset
)
1 change: 1 addition & 0 deletions llm-apps-course/ingest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Ingest a directory of documentation files into a vector store and store the relevant artifacts in Weights & Biases"""
import argparse
import json
import logging
1 change: 1 addition & 0 deletions llm-apps-course/prompts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""Prompts for the chatbot and evaluation."""
import json
import logging
import pathlib

0 comments on commit ea1bc36

Please sign in to comment.