GPT-J model implementation

gmh5225 · May 2, 2023 · 0b87983 · 0b87983
1 parent d33ed84
commit 0b87983
Show file tree

Hide file tree

Showing 10 changed files with 623 additions and 5 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -32,6 +32,24 @@
       "args": ["${env:HOME}/.ggml-models/cerebras-gpt-13b.bin"],
       "cwd": "${workspaceFolder}"
     },
+    {
+      "type": "lldb",
+      "request": "launch",
+      "name": "Debug GPT-J Inference",
+      "cargo": {
+        "args": [
+          "build",
+          "--example=gptj-inference",
+          "--package=llm-gptj"
+        ],
+        "filter": {
+          "name": "gptj-inference",
+          "kind": "example"
+        }
+      },
+      "args": ["${env:HOME}/.ggml-models/gpt-j-6b.bin"],
+      "cwd": "${workspaceFolder}"
+    },
     {
       "type": "lldb",
       "request": "launch",
@@ -57,7 +75,7 @@
           "kind": "example"
         }
       },
-      "args": ["${env:HOME}/.ggml-models/stablelm-base-alpha-3b-f16.bin"],
+      "args": ["${env:HOME}/.ggml-models/stablelm-base-alpha-3b.bin"],
       "cwd": "${workspaceFolder}"
     }
   ]

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/binaries/llm-cli/src/cli_args.rs b/binaries/llm-cli/src/cli_args.rs
@@ -26,6 +26,12 @@ pub enum Args {
         #[command(subcommand)]
         args: BaseArgs,
     },
+    /// Use a GPT-J model
+    #[clap(id = "gptj")]
+    GptJ {
+        #[command(subcommand)]
+        args: BaseArgs,
+    },
     /// Use a GPT-NeoX model
     #[clap(id = "neox")]
     NeoX {

diff --git a/binaries/llm-cli/src/main.rs b/binaries/llm-cli/src/main.rs
@@ -25,6 +25,7 @@ fn main() -> Result<()> {
         Args::Llama { args } => handle_args::<llm::models::Llama>(args),
         Args::Bloom { args } => handle_args::<llm::models::Bloom>(args),
         Args::Gpt2 { args } => handle_args::<llm::models::Gpt2>(args),
+        Args::GptJ { args } => handle_args::<llm::models::GptJ>(args),
         Args::NeoX { args } => handle_args::<llm::models::NeoX>(args),
     }
 }

diff --git a/crates/llm/Cargo.toml b/crates/llm/Cargo.toml
@@ -7,12 +7,14 @@ edition = "2021"
 llm-base = { path = "../llm-base" }
 llm-llama = { path = "../models/llama", features = ["convert"], optional = true }
 llm-gpt2 = { path = "../models/gpt2", optional = true }
+llm-gptj = { path = "../models/gptj", optional = true }
 llm-bloom = { path = "../models/bloom", optional = true }
 llm-neox = { path = "../models/neox", optional = true }
 
 [features]
-default = ["llama", "gpt2", "bloom", "neox"]
+default = ["llama", "gpt2", "gptj", "bloom", "neox"]
 llama = ["dep:llm-llama"]
 gpt2 = ["dep:llm-gpt2"]
+gptj = ["dep:llm-gptj"]
 bloom = ["dep:llm-bloom"]
 neox = ["dep:llm-neox"]
diff --git a/crates/llm/src/lib.rs b/crates/llm/src/lib.rs
@@ -20,6 +20,8 @@ pub mod models {
     pub use llm_bloom::{self as bloom, Bloom};
     #[cfg(feature = "gpt2")]
     pub use llm_gpt2::{self as gpt2, Gpt2};
+    #[cfg(feature = "gptj")]
+    pub use llm_gptj::{self as gptj, GptJ};
     #[cfg(feature = "llama")]
     pub use llm_llama::{self as llama, Llama};
     #[cfg(feature = "neox")]

diff --git a/crates/models/gptj/Cargo.toml b/crates/models/gptj/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "llm-gptj"
+version = { workspace = true }
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+llm-base = { path = "../../llm-base" }
+ggml = { path = "../../ggml" }
+
+bytemuck = { workspace = true }
+
+[dev-dependencies]
+rand = { workspace = true }
diff --git a/crates/models/gptj/examples/gptj-inference.rs b/crates/models/gptj/examples/gptj-inference.rs
@@ -0,0 +1,40 @@
+use std::{convert::Infallible, env::args, io::Write, path::Path};
+
+use llm_base::{load_progress_callback_stdout, KnownModel};
+
+fn main() {
+    let args: Vec<String> = args().collect();
+    let loc = &args[1];
+    let prompt = match &args.len() {
+        3 => &args[2],
+        _ => "Rust is a cool programming language because ",
+    };
+
+    println!(" >>> Loading model from {loc}...");
+    let now = std::time::Instant::now();
+
+    let gptj = llm_gptj::GptJ::load(Path::new(loc), true, 512, load_progress_callback_stdout)
+        .unwrap_or_else(|e| panic!("Error loading model from {loc}: {e}"));
+
+    println!(" >>> Model loaded in {} ms.", now.elapsed().as_millis());
+
+    let mut session = gptj.start_session(Default::default());
+    let res = session.inference_with_prompt::<Infallible>(
+        &gptj,
+        &Default::default(),
+        &Default::default(),
+        prompt,
+        &mut rand::thread_rng(),
+        |t| {
+            print!("{t}");
+            std::io::stdout().flush().unwrap();
+
+            Ok(())
+        },
+    );
+
+    match res {
+        Ok(result) => println!("\n\nInference stats:\n{result}"),
+        Err(err) => println!("\n{err}"),
+    }
+}