Add user interface to ch06 and ch07 (rasbt#366)

* Add user interface to ch06 and ch07 * pep8 * fix url
ahnjj · Sep 22, 2024 · 76e9a9e · 76e9a9e
1 parent 6f6dfb6
commit 76e9a9e
Show file tree

Hide file tree

Showing 16 changed files with 1,022 additions and 58 deletions.
diff --git a/.gitignore b/.gitignore
@@ -90,9 +90,9 @@ ch07/04_preference-tuning-with-dpo/gpt2-medium355M-sft.pth
 ch07/04_preference-tuning-with-dpo/loss-plot.pdf
 
 # Other
-ch05/06_user_interface/chainlit.md
-ch05/06_user_interface/.chainlit
-ch05/06_user_interface/.files
+ch0?/0?_user_interface/.chainlit/
+ch0?/0?_user_interface/chainlit.md
+ch0?/0?_user_interface/.files
 
 # Temporary OS-related files
 .DS_Store

diff --git a/README.md b/README.md
@@ -78,16 +78,15 @@ You can alternatively view this and other files on GitHub at [https://github.com
 | Appendix D: Adding Bells and Whistles to the Training Loop | - [appendix-D.ipynb](appendix-D/01_main-chapter-code/appendix-D.ipynb)                                                          | [./appendix-D](./appendix-D)  |
 | Appendix E: Parameter-efficient Finetuning with LoRA       | - [appendix-E.ipynb](appendix-E/01_main-chapter-code/appendix-E.ipynb)                                                          | [./appendix-E](./appendix-E) |
 
-
 <br>
-&nbsp
+&nbsp;
 
 The mental model below summarizes the contents covered in this book.
 
 <img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/mental-model.jpg" width="650px">
 
 <br>
-&nbsp
+&nbsp;
 
 ## Hardware Requirements
 
@@ -120,16 +119,18 @@ Several folders contain optional materials as a bonus for interested readers:
 - **Chapter 6:**
   - [Additional experiments finetuning different layers and using larger models](ch06/02_bonus_additional-experiments)
   - [Finetuning different models on 50k IMDB movie review dataset](ch06/03_bonus_imdb-classification)
+  - [Building a User Interface to Interact With the GPT-based Spam Classifier](ch06/04_user_interface)
 - **Chapter 7:**
   - [Dataset Utilities for Finding Near Duplicates and Creating Passive Voice Entries](ch07/02_dataset-utilities)
   - [Evaluating Instruction Responses Using the OpenAI API and Ollama](ch07/03_model-evaluation)
   - [Generating a Dataset for Instruction Finetuning](ch07/05_dataset-generation/llama3-ollama.ipynb)
   - [Improving a Dataset for Instruction Finetuning](ch07/05_dataset-generation/reflection-gpt4.ipynb)
   - [Generating a Preference Dataset with Llama 3.1 70B and Ollama](ch07/04_preference-tuning-with-dpo/create-preference-data-ollama.ipynb)
   - [Direct Preference Optimization (DPO) for LLM Alignment](ch07/04_preference-tuning-with-dpo/dpo-from-scratch.ipynb)
+  - [Building a User Interface to Interact With the Instruction Finetuned GPT Model](ch07/06_user_interface)
 
 <br>
-&nbsp
+&nbsp;
 
 ## Questions, Feedback, and Contributing to This Repository
 

diff --git a/ch05/06_user_interface/README.md b/ch05/06_user_interface/README.md
@@ -29,7 +29,7 @@ pip install chainlit
 This folder contains 2 files:
 
 1. [`app_orig.py`](app_orig.py): This file loads and uses the original GPT-2 weights from OpenAI. 
-2. [`app_own.py`](app_own.py): This file loads and uses the GPT-2 weights we generated in chapter 5. This requires that you execute the [`../01_main-chapter-code/ch05.ipynb`] file first.
+2. [`app_own.py`](app_own.py): This file loads and uses the GPT-2 weights we generated in chapter 5. This requires that you execute the [`../01_main-chapter-code/ch05.ipynb`](../01_main-chapter-code/ch05.ipynb) file first.
 
 (Open and inspect these files to learn more.)
 

diff --git a/ch05/06_user_interface/app_orig.py b/ch05/06_user_interface/app_orig.py
@@ -21,7 +21,7 @@
 
 def get_model_and_tokenizer():
     """
-    Code to loads a GPT-2 model with pretrained weights from OpenAI.
+    Code to load a GPT-2 model with pretrained weights from OpenAI.
     The code is similar to chapter 5.
     The model will be downloaded automatically if it doesn't exist in the current folder, yet.
     """

diff --git a/ch05/06_user_interface/app_own.py b/ch05/06_user_interface/app_own.py
@@ -22,7 +22,7 @@
 
 def get_model_and_tokenizer():
     """
-    Code to loads a GPT-2 model with pretrained weights generated in chapter 5.
+    Code to load a GPT-2 model with pretrained weights generated in chapter 5.
     This requires that you run the code in chapter 5 first, which generates the necessary model.pth file.
     """
 

diff --git a/ch05/06_user_interface/previous_chapters.py b/ch05/06_user_interface/previous_chapters.py
@@ -12,55 +12,11 @@
 
 import numpy as np
 import tensorflow as tf
-import tiktoken
 import torch
 import torch.nn as nn
-from torch.utils.data import Dataset, DataLoader
 from tqdm import tqdm
 
 
-#####################################
-# Chapter 2
-#####################################
-
-
-class GPTDatasetV1(Dataset):
-    def __init__(self, txt, tokenizer, max_length, stride):
-        self.input_ids = []
-        self.target_ids = []
-
-        # Tokenize the entire text
-        token_ids = tokenizer.encode(txt, allowed_special={"<|endoftext|>"})
-
-        # Use a sliding window to chunk the book into overlapping sequences of max_length
-        for i in range(0, len(token_ids) - max_length, stride):
-            input_chunk = token_ids[i:i + max_length]
-            target_chunk = token_ids[i + 1: i + max_length + 1]
-            self.input_ids.append(torch.tensor(input_chunk))
-            self.target_ids.append(torch.tensor(target_chunk))
-
-    def __len__(self):
-        return len(self.input_ids)
-
-    def __getitem__(self, idx):
-        return self.input_ids[idx], self.target_ids[idx]
-
-
-def create_dataloader_v1(txt, batch_size=4, max_length=256,
-                         stride=128, shuffle=True, drop_last=True, num_workers=0):
-    # Initialize the tokenizer
-    tokenizer = tiktoken.get_encoding("gpt2")
-
-    # Create dataset
-    dataset = GPTDatasetV1(txt, tokenizer, max_length, stride)
-
-    # Create dataloader
-    dataloader = DataLoader(
-        dataset, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers)
-
-    return dataloader
-
-
 #####################################
 # Chapter 3
 #####################################

diff --git a/ch06/04_user_interface/README.md b/ch06/04_user_interface/README.md
@@ -0,0 +1,39 @@
+# Building a User Interface to Interact With the GPT-based Spam Classifier
+
+
+
+This bonus folder contains code for running a ChatGPT-like user interface to interact with the finetuned GPT-based spam classifier from chapter 6, as shown below.
+
+
+
+![Chainlit UI example](https://sebastianraschka.com/images/LLMs-from-scratch-images/bonus/chainlit/chainlit-spam.webp)
+
+
+
+To implement this user interface, we use the open-source [Chainlit Python package](https://github.com/Chainlit/chainlit).
+
+&nbsp;
+## Step 1: Install dependencies
+
+First, we install the `chainlit` package via
+
+```bash
+pip install chainlit
+```
+
+(Alternatively, execute `pip install -r requirements-extra.txt`.)
+
+&nbsp;
+## Step 2: Run `app` code
+
+The [`app.py`](app.py) file contains the UI code based. Open and inspect these files to learn more.
+
+This file loads and uses the GPT-2 classifier weights we generated in chapter 6. This requires that you execute the [`../01_main-chapter-code/ch06.ipynb`](../01_main-chapter-code/ch06.ipynb) file first.
+
+Excecute the following command from the terminal to start the UI server:
+
+```bash
+chainlit run app.py
+```
+
+Running commands above should open a new browser tab where you can interact with the model. If the browser tab does not open automatically, inspect the terminal command and copy the local address into your browser address bar (usually, the address is `http://localhost:8000`).
diff --git a/ch06/04_user_interface/app.py b/ch06/04_user_interface/app.py
@@ -0,0 +1,80 @@
+# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
+# Source for "Build a Large Language Model From Scratch"
+#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
+# Code: https://github.com/rasbt/LLMs-from-scratch
+
+from pathlib import Path
+import sys
+
+import tiktoken
+import torch
+import chainlit
+
+from previous_chapters import (
+    classify_review,
+    GPTModel
+)
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def get_model_and_tokenizer():
+    """
+    Code to load finetuned GPT-2 model generated in chapter 6.
+    This requires that you run the code in chapter 6 first, which generates the necessary model.pth file.
+    """
+
+    GPT_CONFIG_124M = {
+        "vocab_size": 50257,     # Vocabulary size
+        "context_length": 1024,  # Context length
+        "emb_dim": 768,          # Embedding dimension
+        "n_heads": 12,           # Number of attention heads
+        "n_layers": 12,          # Number of layers
+        "drop_rate": 0.1,        # Dropout rate
+        "qkv_bias": True         # Query-key-value bias
+    }
+
+    tokenizer = tiktoken.get_encoding("gpt2")
+
+    model_path = Path("..") / "01_main-chapter-code" / "review_classifier.pth"
+    if not model_path.exists():
+        print(
+            f"Could not find the {model_path} file. Please run the chapter 6 code"
+            " (ch06.ipynb) to generate the review_classifier.pth file."
+        )
+        sys.exit()
+
+    # Instantiate model
+    model = GPTModel(GPT_CONFIG_124M)
+
+    # Convert model to classifier as in section 6.5 in ch06.ipynb
+    num_classes = 2
+    model.out_head = torch.nn.Linear(in_features=GPT_CONFIG_124M["emb_dim"], out_features=num_classes)
+
+    # Then load model weights
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    checkpoint = torch.load(model_path, map_location=device, weights_only=True)
+    model.load_state_dict(checkpoint)
+    model.to(device)
+    model.eval()
+
+    return tokenizer, model
+
+
+# Obtain the necessary tokenizer and model files for the chainlit function below
+tokenizer, model = get_model_and_tokenizer()
+
+
+@chainlit.on_message
+async def main(message: chainlit.Message):
+    """
+    The main Chainlit function.
+    """
+    user_input = message.content
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    label = classify_review(user_input, model, tokenizer, device, max_length=120)
+
+    await chainlit.Message(
+        content=f"{label}",  # This returns the model response to the interface
+    ).send()