Skip to content

Commit

Permalink
Refactor ggml stuff into a single crate
Browse files Browse the repository at this point in the history
  • Loading branch information
danforbes committed Apr 29, 2023
1 parent 288df7f commit 0aea8f7
Show file tree
Hide file tree
Showing 39 changed files with 1,027 additions and 16,074 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "ggml-rs/ggml"]
path = ggml-rs/ggml
url = git@github.com:ggerganov/ggml.git
36 changes: 6 additions & 30 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 1 addition & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
[workspace]
members = [
# Crates
"ggml-sys",
"ggml-format",
"ggml",
"ggml-rs",
"llm-base",
"llama",
"bloom",
"llm",
"llm-cli",

# Tools
"generate-ggml-bindings"
]
resolver = "2"

Expand Down
2 changes: 1 addition & 1 deletion bloom/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
ggml = { path = "../ggml" }
ggml-rs = { path = "../ggml-rs" }
llm-base = { path = "../llm-base" }

bytemuck = { workspace = true }
Expand Down
52 changes: 26 additions & 26 deletions bloom/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::path::Path;
// use ggml_loader::{LoadError, LoadProgress};
use llm_base::{
util, EvaluateOutputRequest, FileType, InferenceParameters, InferenceSession,
InferenceSessionParameters, LoadError, LoadProgress, Mmap, KnownModel, TokenId, Vocabulary,
InferenceSessionParameters, KnownModel, LoadError, LoadProgress, Mmap, TokenId, Vocabulary,
};

/// The weights for the BLOOM model. All the mutable state is split into a
Expand All @@ -13,16 +13,16 @@ pub struct Bloom {
n_context_tokens: usize,

vocabulary: Vocabulary,
tok_embeddings: ggml::Tensor,
norm: ggml::Tensor,
norm_b: ggml::Tensor,
output_norm: ggml::Tensor,
output_norm_b: ggml::Tensor,
output: ggml::Tensor,
tok_embeddings: ggml_rs::Tensor,
norm: ggml_rs::Tensor,
norm_b: ggml_rs::Tensor,
output_norm: ggml_rs::Tensor,
output_norm_b: ggml_rs::Tensor,
output: ggml_rs::Tensor,
layers: Vec<Layer>,

// Must be kept alive for the model
_context: ggml::Context,
_context: ggml_rs::context::Context,
_mmap: Option<Mmap>,
}

Expand Down Expand Up @@ -162,12 +162,12 @@ impl KnownModel for Bloom {
// add 10% to account for ggml object overhead
buf_size = (1.1f64 * session.mem_per_token as f64 * n as f64) as usize;
};
let ctx0 = ggml::Context::init(buf_size, true);
let ctx0 = ggml_rs::context::Context::init(buf_size, true);

// TODO: REMAKE THIS AFTER CHECKING GGML GRAPH
let mut gf = ggml::ComputationGraph::new(n_threads);
let mut gf = ggml_rs::ComputationGraph::new(n_threads);

let mut embd = ctx0.new_tensor_1d(ggml::Type::I32, n);
let mut embd = ctx0.new_tensor_1d(ggml_rs::Type::I32, n);
unsafe { embd.write_data(bytemuck::cast_slice(input_tokens)) };

let mut input_layer = ctx0.op_get_rows(&self.tok_embeddings, &embd);
Expand All @@ -181,7 +181,7 @@ impl KnownModel for Bloom {

for il in 0..n_layer {
let input_self_attention = input_layer.share();
let mut current: ggml::Tensor;
let mut current: ggml_rs::Tensor;

// norm
{
Expand Down Expand Up @@ -252,7 +252,7 @@ impl KnownModel for Bloom {
let q = ctx0.op_permute(
&ctx0.op_cpy(
&q_current,
&ctx0.new_tensor_3d(ggml::Type::F32, n_embd / n_head, n_head, n),
&ctx0.new_tensor_3d(ggml_rs::Type::F32, n_embd / n_head, n_head, n),
),
0,
2,
Expand Down Expand Up @@ -336,7 +336,7 @@ impl KnownModel for Bloom {
// cur = KQV_merged.contiguous().view(n_embd, N)
current = ctx0.op_cpy(
&k_q_v_merged,
&ctx0.new_tensor_2d(ggml::Type::F32, n_embd, n),
&ctx0.new_tensor_2d(ggml_rs::Type::F32, n_embd, n),
);

// projection
Expand Down Expand Up @@ -499,18 +499,18 @@ impl llm_base::Hyperparameters for Hyperparameters {
}

struct Layer {
pub attention_norm: ggml::Tensor,
pub attention_norm_b: ggml::Tensor,
pub wo: ggml::Tensor,
pub wo_b: ggml::Tensor,
pub query_key_value: ggml::Tensor,
pub query_key_value_b: ggml::Tensor,
pub attention_norm: ggml_rs::Tensor,
pub attention_norm_b: ggml_rs::Tensor,
pub wo: ggml_rs::Tensor,
pub wo_b: ggml_rs::Tensor,
pub query_key_value: ggml_rs::Tensor,
pub query_key_value_b: ggml_rs::Tensor,
// normalization
pub ffn_norm: ggml::Tensor,
pub ffn_norm_b: ggml::Tensor,
pub ffn_norm: ggml_rs::Tensor,
pub ffn_norm_b: ggml_rs::Tensor,
// ff
pub w1: ggml::Tensor,
pub w1_b: ggml::Tensor,
pub w2: ggml::Tensor,
pub w2_b: ggml::Tensor,
pub w1: ggml_rs::Tensor,
pub w1_b: ggml_rs::Tensor,
pub w2: ggml_rs::Tensor,
pub w2_b: ggml_rs::Tensor,
}
9 changes: 0 additions & 9 deletions generate-ggml-bindings/Cargo.toml

This file was deleted.

34 changes: 0 additions & 34 deletions generate-ggml-bindings/src/main.rs

This file was deleted.

13 changes: 0 additions & 13 deletions ggml-format/Cargo.toml

This file was deleted.

45 changes: 0 additions & 45 deletions ggml-format/src/lib.rs

This file was deleted.

14 changes: 14 additions & 0 deletions ggml-rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[package]
name = "ggml-rs"
version = { workspace = true }
edition = "2021"

[build-dependencies]
bindgen = "0.64.0"
cc = "^1.0"

[dependencies]
thiserror = "1.0"

[dev-dependencies]
rand = "0.8"
25 changes: 18 additions & 7 deletions ggml-sys/build.rs → ggml-rs/build.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
use std::env;
use std::{env, path::PathBuf};

// By default, this crate will attempt to compile ggml with the features of your host system if
// the host and target are the same. If they are not, it will turn off auto-feature-detection,
// and you will need to manually specify target features through target-features.
fn main() {
// By default, this crate will attempt to compile ggml with the features of your host system if
// the host and target are the same. If they are not, it will turn off auto-feature-detection,
// and you will need to manually specify target features through target-features.

println!("cargo:rerun-if-changed=ggml");

let ggml_src = ["ggml/ggml.c"];
let ggml_src = ["ggml/src/ggml.c"];

let mut builder = cc::Build::new();

let build = builder.files(ggml_src.iter()).include("include");
let build = builder
.files(ggml_src.iter())
.include("./ggml/include/ggml");

// This is a very basic heuristic for applying compile flags.
// Feel free to update this to fit your operating system.
Expand Down Expand Up @@ -88,6 +89,16 @@ fn main() {
}
build.warnings(false);
build.compile("ggml");

let header_path = "./ggml/include/ggml/ggml.h";
bindgen::Builder::default()
.header(String::from(header_path))
.allowlist_file(header_path)
.parse_callbacks(Box::new(bindgen::CargoCallbacks))
.generate()
.expect("Unable to generate bindings.")
.write_to_file(PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs"))
.expect("Unable to write generated bindings to file.");
}

fn get_supported_target_features() -> std::collections::HashSet<String> {
Expand Down
1 change: 1 addition & 0 deletions ggml-rs/ggml
Submodule ggml added at 8cc067
Loading

0 comments on commit 0aea8f7

Please sign in to comment.