Skip to content

Commit

Permalink
add tracing
Browse files Browse the repository at this point in the history
  • Loading branch information
snowmead committed Aug 12, 2024
1 parent 1cf04ab commit 53aa9fc
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
publish = true
name = "llm-weaver"
version = "0.1.96"
version = "0.1.97"
edition = "2021"
description = "Manage long conversations with any LLM"
readme = "README.md"
Expand Down
39 changes: 31 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ use num_traits::{
pub use redis::{RedisWrite, ToRedisArgs};
use serde::{Deserialize, Serialize};
use storage::TapestryChest;
use tracing::{debug, error, instrument};
use tracing::{debug, error, instrument, trace};

pub mod architecture;
pub mod storage;
Expand Down Expand Up @@ -313,6 +313,8 @@ impl<T: Config> TapestryFragment<T> {
))
})?;

trace!("Pushing message: {:?}, new token count: {}", msg, new_token_count);

self.context_tokens = new_token_count;
self.context_messages.push(msg);
Ok(())
Expand All @@ -331,7 +333,8 @@ impl<T: Config> TapestryFragment<T> {
.iter()
.fold(PromptModelTokens::<T>::default(), |acc, x| acc.saturating_add(x));

// Check the total token count before proceeding
trace!("Extending messages with token sum: {}", sum);

let new_token_count = self.context_tokens.checked_add(&sum).ok_or_else(|| {
LoomError::from(WeaveError::BadConfig(
"Number of tokens exceeds max tokens for model".to_string(),
Expand All @@ -341,7 +344,7 @@ impl<T: Config> TapestryFragment<T> {
// Update the token count and messages only if all checks pass
self.context_tokens = new_token_count;
for m in msgs {
self.context_messages.push(m); // Push the message directly without cloning
self.context_messages.push(m);
}

Ok(())
Expand Down Expand Up @@ -383,7 +386,8 @@ pub trait Loom<T: Config> {
Self::build_context_message(SYSTEM_ROLE.into(), instructions, None);
let instructions_req_msg: PromptModelRequest<T> = instructions_ctx_msg.clone().into();

// Get current tapestry fragment to work with
trace!("Fetching current tapestry fragment for ID: {:?}", tapestry_id);

let current_tapestry_fragment = T::Chest::get_tapestry_fragment(tapestry_id.clone(), None)
.await?
.unwrap_or_default();
Expand All @@ -393,8 +397,7 @@ pub trait Loom<T: Config> {

// Request messages which will be sent as a whole to the LLM
let mut req_msgs = VecPromptMsgsDeque::<T, T::PromptModel>::with_capacity(
current_tapestry_fragment.context_messages.len() + 1, /* +1 for the instruction
* message to add */
current_tapestry_fragment.context_messages.len() + 1,
);

// Add instructions as the first message
Expand All @@ -416,6 +419,12 @@ pub trait Loom<T: Config> {
// or we are continuing the current tapestry fragment.
let msgs_tokens = Self::count_tokens_in_messages(msgs.iter());

trace!(
"Total tokens after adding new messages: {:?}, maximum allowed: {:?}",
req_msgs.tokens.saturating_add(&msgs_tokens),
max_prompt_tokens_limit
);

// Check if the total number of tokens in the tapestry fragment exceeds the maximum number
// of tokens allowed after adding the new messages and the minimum response length.
let does_exceeding_max_token_limit = max_prompt_tokens_limit <=
Expand All @@ -425,12 +434,13 @@ pub trait Loom<T: Config> {

let (mut tapestry_fragment_to_persist, was_summary_generated) =
if does_exceeding_max_token_limit {
trace!("Generating summary as the token limit exceeded");

// Summary generation should not exceed the maximum token limit of the prompt model
// since it will be added to the tapestry fragment
let summary_max_tokens: PromptModelTokens<T> =
prompt_llm_config.model.max_context_length() - max_prompt_tokens_limit;

// Generate summary
let summary = Self::generate_summary(
summary_llm_config,
&current_tapestry_fragment,
Expand Down Expand Up @@ -464,11 +474,14 @@ pub trait Loom<T: Config> {
// Tokens available for LLM response which would not exceed maximum token limit
let max_completion_tokens = max_prompt_tokens_limit.saturating_sub(&req_msgs.tokens);

trace!("Max completion tokens available: {:?}", max_completion_tokens);

if max_completion_tokens.is_zero() {
return Err(LoomError::from(WeaveError::MaxCompletionTokensIsZero).into());
}

// Execute prompt to LLM
trace!("Prompting LLM with request messages");

let response = prompt_llm_config
.model
.prompt(
Expand Down Expand Up @@ -521,6 +534,12 @@ pub trait Loom<T: Config> {
tapestry_fragment: &TapestryFragment<T>,
summary_max_tokens: SummaryModelTokens<T>,
) -> Result<String> {
trace!(
"Generating summary with max tokens: {:?}, for tapestry fragment: {:?}",
summary_max_tokens,
tapestry_fragment
);

let mut summary_generation_prompt = VecPromptMsgsDeque::<T, T::SummaryModel>::new();

summary_generation_prompt.extend(
Expand All @@ -546,6 +565,8 @@ pub trait Loom<T: Config> {

let summary_response_content = res.into();

trace!("Generated summary: {:?}", summary_response_content);

Ok(summary_response_content.unwrap_or_default())
}

Expand All @@ -555,6 +576,8 @@ pub trait Loom<T: Config> {
content: String,
account_id: Option<String>,
) -> ContextMessage<T> {
trace!("Building context message for role: {:?}, content: {}", role, content);

ContextMessage {
role,
content,
Expand Down

0 comments on commit 53aa9fc

Please sign in to comment.