Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement a templated endpoint for visibility into chat requests #2333

Merged
merged 4 commits into from
Aug 6, 2024

Conversation

drbh
Copy link
Collaborator

@drbh drbh commented Jul 30, 2024

This PR adds a new /templated POST endpoint to allow users to send a ChatRequest and return the templated request. This can help with prompt engineering and for debugging/transparency purposes.

curl localhost:3000/chat_tokenize -s \
    -H 'Content-Type: application/json'  \
    -X POST \
    -d '{
  "model": "meta-llama/Meta-Llama-3.1-405B-Instruct",
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful assistant."
    },
    {
      "role": "user",
      "content": "What is deep learning?"
    }
  ],
  "stream": false,
  "max_tokens": 20, "seed": 42, "tools": null
}' | jq
{
  "tokenize_response": [
    {
      "id": 128000,
      "text": "",
      "start": 0,
      "stop": 0
    },
    {
      "id": 128000,
      "text": "<|begin_of_text|>",
      "start": 0,
      "stop": 17
    },
    {
      "id": 128006,
      "text": "<|start_header_id|>",
      "start": 17,
      "stop": 36
    },
    {
      "id": 9125,
      "text": "",
      "start": 36,
      "stop": 36
    },
    {
      "id": 128007,
      "text": "<|end_header_id|>",
      "start": 42,
      "stop": 59
    },
    {
      "id": 271,
      "text": "",
      "start": 59,
      "stop": 59
    },
    {
      "id": 38766,
      "text": "Cut",
      "start": 61,
      "stop": 64
    },
    {
      "id": 1303,
      "text": "ting",
      "start": 64,
      "stop": 68
    },
    {
      "id": 33025,
      "text": "",
      "start": 68,
      "stop": 68
    },
    {
      "id": 2696,
      "text": "",
      "start": 78,
      "stop": 78
    },
    {
      "id": 25,
      "text": "",
      "start": 83,
      "stop": 83
    },
    {
      "id": 6790,
      "text": "",
      "start": 84,
      "stop": 84
    },
    {
      "id": 220,
      "text": "",
      "start": 93,
      "stop": 93
    },
    {
      "id": 2366,
      "text": "",
      "start": 94,
      "stop": 94
    },
    {
      "id": 18,
      "text": "",
      "start": 97,
      "stop": 97
    },
    {
      "id": 198,
      "text": "",
      "start": 98,
      "stop": 98
    },
    {
      "id": 15724,
      "text": "",
      "start": 99,
      "stop": 99
    },
    {
      "id": 2696,
      "text": "",
      "start": 104,
      "stop": 104
    },
    {
      "id": 25,
      "text": "",
      "start": 109,
      "stop": 109
    },
    {
      "id": 220,
      "text": "",
      "start": 110,
      "stop": 110
    },
    {
      "id": 1627,
      "text": "",
      "start": 111,
      "stop": 111
    },
    {
      "id": 10263,
      "text": "",
      "start": 113,
      "stop": 113
    },
    {
      "id": 220,
      "text": "",
      "start": 117,
      "stop": 117
    },
    {
      "id": 2366,
      "text": "",
      "start": 118,
      "stop": 118
    },
    {
      "id": 19,
      "text": "",
      "start": 121,
      "stop": 121
    },
    {
      "id": 271,
      "text": "",
      "start": 122,
      "stop": 122
    },
    {
      "id": 2675,
      "text": "",
      "start": 124,
      "stop": 124
    },
    {
      "id": 527,
      "text": "",
      "start": 127,
      "stop": 127
    },
    {
      "id": 264,
      "text": "",
      "start": 131,
      "stop": 131
    },
    {
      "id": 11190,
      "text": "",
      "start": 133,
      "stop": 133
    },
    {
      "id": 18328,
      "text": "",
      "start": 141,
      "stop": 141
    },
    {
      "id": 13,
      "text": "",
      "start": 151,
      "stop": 151
    },
    {
      "id": 128009,
      "text": "<|eot_id|>",
      "start": 152,
      "stop": 162
    },
    {
      "id": 128006,
      "text": "<|start_header_id|>",
      "start": 162,
      "stop": 181
    },
    {
      "id": 882,
      "text": "",
      "start": 181,
      "stop": 181
    },
    {
      "id": 128007,
      "text": "<|end_header_id|>",
      "start": 185,
      "stop": 202
    },
    {
      "id": 271,
      "text": "",
      "start": 202,
      "stop": 202
    },
    {
      "id": 3923,
      "text": "",
      "start": 204,
      "stop": 204
    },
    {
      "id": 374,
      "text": "",
      "start": 208,
      "stop": 208
    },
    {
      "id": 5655,
      "text": "",
      "start": 211,
      "stop": 211
    },
    {
      "id": 6975,
      "text": "",
      "start": 216,
      "stop": 216
    },
    {
      "id": 30,
      "text": "",
      "start": 225,
      "stop": 225
    },
    {
      "id": 128009,
      "text": "<|eot_id|>",
      "start": 226,
      "stop": 236
    },
    {
      "id": 128006,
      "text": "<|start_header_id|>",
      "start": 236,
      "stop": 255
    },
    {
      "id": 78191,
      "text": "",
      "start": 255,
      "stop": 255
    },
    {
      "id": 128007,
      "text": "<|end_header_id|>",
      "start": 264,
      "stop": 281
    },
    {
      "id": 271,
      "text": "",
      "start": 281,
      "stop": 281
    }
  ],
  "templated_text": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is deep learning?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
}

Comment on lines 146 to 201
if response_format.is_some() && tools.is_some() {
return Err((
StatusCode::UNPROCESSABLE_ENTITY,
Json(ErrorResponse {
error: "Grammar and tools are mutually exclusive".to_string(),
error_type: "validation".to_string(),
}),
));
}

let tool_grammar = match ToolGrammar::apply(tools, tool_choice) {
Ok(grammar) => grammar,
Err(err) => {
metrics::counter!("tgi_request_failure", "err" => "validation").increment(1);
tracing::error!("{}", err);
return Err((
StatusCode::UNPROCESSABLE_ENTITY,
Json(ErrorResponse {
error: err.to_string(),
error_type: err.error_type().to_string(),
}),
));
}
};

let tools_grammar_prompt = tool_grammar.as_ref().map(|t| {
(
GrammarType::Json(serde_json::json!(t)),
tool_prompt.unwrap_or_default(),
)
});

let (tools_grammar_prompt, _grammar) = response_format
.map(|rf| (None, Some(rf)))
.unwrap_or_else(|| {
(
tools_grammar_prompt.clone(),
tools_grammar_prompt.map(|(g, _)| g),
)
});

let inputs = match infer.apply_chat_template(messages, tools_grammar_prompt) {
Ok(inputs) => inputs,
Err(err) => {
metrics::counter!("tgi_request_failure", "err" => "validation").increment(1);
tracing::error!("{}", err);
return Err((
StatusCode::UNPROCESSABLE_ENTITY,
Json(ErrorResponse {
error: err.to_string(),
error_type: err.error_type().to_string(),
}),
));
}
};
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code is already used in some validation, we should probably extract it into a function (so this doesn't drift from the original implem later).

Also this code is very hard to read for me (lots of options everywhere). I'm sure we can simplify it a lot.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

totally agree! in the latest commit i've moved this code into a function called prepare_chat_input which is reusable and much easier to read.

.zip(encoding.get_offsets())
.map(|(&id, &(start, stop))| {
let text: String =
String::from_utf8_lossy(&input.as_bytes()[start..stop]).to_string();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, start and stop are defined in utf-8 char ranges.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh good catch, i've updated this code and the location it was copied from to operate on chars rather than bytes

@Narsil Narsil merged commit e11f5f1 into main Aug 6, 2024
9 checks passed
@Narsil Narsil deleted the add-templated-route branch August 6, 2024 11:51
yuanwu2017 pushed a commit to yuanwu2017/tgi-gaudi that referenced this pull request Sep 26, 2024
huggingface#2333)

* feat: implement a templated endpoint for visibility into chat requests

* feat: improve to tokenize too

* fix: adjust return type

* feat: simplify prepare_chat_input logic and adjust start stop chars
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants