Add back BLOOM

Co-authored-by: @hhamud <53880692+hhamud@users.noreply.github.com>
jcdickinson · Apr 30, 2023 · 78db42c · 78db42c
1 parent 608090b
commit 78db42c
Show file tree

Hide file tree

Showing 7 changed files with 614 additions and 4 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -4,6 +4,24 @@
   // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
   "version": "0.2.0",
   "configurations": [
+    {
+      "type": "lldb",
+      "request": "launch",
+      "name": "Debug example 'bloom_inference'",
+      "cargo": {
+        "args": [
+          "build",
+          "--example=bloom_inference",
+          "--package=bloom"
+        ],
+        "filter": {
+          "name": "bloom_inference",
+          "kind": "example"
+        }
+      },
+      "args": ["${env:HOME}/.ggml-models/bloom-7b.bin"],
+      "cwd": "${workspaceFolder}"
+    },
     {
       "type": "lldb",
       "request": "launch",

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,11 +1,12 @@
 [workspace]
 members = [
     # Crates
+    "bloom",
     "ggml",
-    "llm-base",
     "gpt2",
     "llama",
     "llm",
+    "llm-base",
     "llm-cli",
 ]
 resolver = "2"

diff --git a/README.md b/README.md
@@ -21,9 +21,9 @@ quantized versions of the model.
 
 Make sure you have a Rust 1.65.0 or above and C toolchain[^1] set up.
 
-`llm-base`, `gpt2`, and `llama` are Rust libraries, while `llm-cli` is a CLI
-applications that wraps `gpt2` and `llama` and offer basic inference
-capabilities.
+`llm-base`, and the model crates (e.g. `bloom`, `gpt2` `llama`) are Rust
+libraries, while `llm-cli` is a CLI applications that wraps the models and offer
+basic inference capabilities.
 
 The following instructions explain how to build CLI applications.
 
@@ -103,6 +103,14 @@ cargo run -p llama-cli quantize /path/to/your/models/7B/ggml-model-f16.bin /path
 > The [llama.cpp repository](https://github.com/ggerganov/llama.cpp) has
 > additional information on how to obtain and run specific models.
 
+### BLOOM
+
+The open-source [BLOOM](https://bigscience.huggingface.co/blog/bloom) model is
+also supported.
+[More information](https://huggingface.co/docs/transformers/model_doc/bloom)
+about BLOOM is available on HuggingFace, as are some
+[quantized models](https://huggingface.co/models?search=bloom%20ggml).
+
 ### GPT2
 
 OpenAI's [GPT-2](https://jalammar.github.io/illustrated-gpt2/) architecture is

diff --git a/bloom/Cargo.toml b/bloom/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "bloom"
+version = { workspace = true }
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+ggml = { path = "../ggml" }
+llm-base = { path = "../llm-base" }
+
+bytemuck = { workspace = true }
+
+[dev-dependencies]
+rand = { workspace = true }
diff --git a/bloom/examples/bloom_inference.rs b/bloom/examples/bloom_inference.rs
@@ -0,0 +1,42 @@
+use std::{convert::Infallible, env::args, io::Write};
+
+use llm_base::{load_progress_callback, model::KnownModel};
+
+extern crate bloom;
+
+fn main() {
+    let args: Vec<String> = args().collect();
+    let loc = &args[1];
+    let prompt = match &args.len() {
+        3 => &args[2],
+        _ => "Rust is a cool programming language because ",
+    };
+
+    println!(" >>> Loading model from {loc}...");
+    let now = std::time::Instant::now();
+
+    let bloom = bloom::Bloom::load(loc, true, 512, load_progress_callback)
+        .unwrap_or_else(|e| panic!("Error loading model from {loc}: {e}"));
+
+    println!(" >>> Model loaded in {} ms.", now.elapsed().as_millis());
+
+    let mut session = bloom.start_session(Default::default());
+    let res = session.inference_with_prompt::<Infallible>(
+        &bloom,
+        &Default::default(),
+        prompt,
+        None,
+        &mut rand::thread_rng(),
+        |t| {
+            print!("{t}");
+            std::io::stdout().flush().unwrap();
+
+            Ok(())
+        },
+    );
+
+    match res {
+        Ok(result) => println!("\n\nInference stats:\n{result}"),
+        Err(err) => println!("\n{err}"),
+    }
+}