Skip to content

Commit 3bada65

Browse files
committed
feat: add LlamaModel::load_from_file_async
1 parent ccb794d commit 3bada65

File tree

3 files changed

+19
-124
lines changed

3 files changed

+19
-124
lines changed

Cargo.lock

Lines changed: 8 additions & 121 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/llama_cpp/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,6 @@ derive_more = "0.99.17"
1515
flume = "0.11.0"
1616
llama_cpp_sys = { version = "^0.2.0", path = "../llama_cpp_sys" }
1717
num_cpus = "1.16.0"
18-
thiserror = "1.0.49"
19-
tokio = { version = "1.33.0", features = ["sync"] }
18+
thiserror = "1.0.50"
19+
tokio = { version = "1.33.0", features = ["sync", "rt"] }
2020
tracing = "0.1.39"

crates/llama_cpp/src/lib.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,15 @@ impl LlamaModel {
291291
}
292292
}
293293

294+
/// Loads a LLaMA model from a compatible GGUF (`.gguf`) file asyncronously.
295+
///
296+
/// This is a thin `tokio` wrapper over [`LlamaModel::load_from_file`].
297+
pub async fn load_from_file_async(file_path: impl AsRef<Path>) -> Result<Self, LlamaLoadError> {
298+
let path = file_path.as_ref().to_owned();
299+
300+
tokio::task::spawn_blocking(move || Self::load_from_file(path)).await.unwrap()
301+
}
302+
294303
/// Converts `content` into a vector of tokens that are valid input for this model.
295304
///
296305
/// This temporarily allocates at the amount of memory consumed by `content`, but shrinks that
@@ -690,7 +699,6 @@ mod detail {
690699

691700
use std::ffi::{c_char, c_void, CStr};
692701
use std::ptr::slice_from_raw_parts;
693-
use tokio::sync::OwnedSemaphorePermit;
694702

695703
use tracing::{error, info, trace, warn};
696704

0 commit comments

Comments
 (0)