Skip to content

Commit f83d4b5

Browse files
author
xusenlin
committed
Support for Text Generation Inference (TGI)
1 parent 4a8af77 commit f83d4b5

File tree

4 files changed

+6
-8
lines changed

4 files changed

+6
-8
lines changed

api/llama_cpp_routes/chat.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ async def create_chat_completion(
3535

3636
prompt = engine.apply_chat_template(request.messages, request.functions, request.tools)
3737
include = {
38-
"temperature", "top_p", "stream", "stop",
39-
"max_tokens", "presence_penalty", "frequency_penalty", "model"
38+
"temperature", "top_p", "stream", "stop", "model",
39+
"max_tokens", "presence_penalty", "frequency_penalty",
4040
}
4141
kwargs = model_dump(request, include=include)
4242
logger.debug(f"==== request ====\n{kwargs}")

api/llama_cpp_routes/completion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ async def create_completion(
3333
request = await handle_request(request, engine.stop)
3434

3535
include = {
36-
"temperature", "top_p", "stream", "stop",
37-
"max_tokens", "presence_penalty", "frequency_penalty", "model"
36+
"temperature", "top_p", "stream", "stop", "model",
37+
"max_tokens", "presence_penalty", "frequency_penalty",
3838
}
3939
kwargs = model_dump(request, include=include)
4040
logger.debug(f"==== request ====\n{kwargs}")

api/tgi_routes/chat.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,7 @@ async def create_chat_completion(
5656

5757
prompt = engine.apply_chat_template(request.messages)
5858
include = {
59-
"temperature", "stop_token_ids", "stream", "model",
60-
"repetition_penalty", "typical_p", "watermark", "best_of"
59+
"temperature", "best_of", "repetition_penalty", "typical_p", "watermark",
6160
}
6261
params = model_dump(request, include=include)
6362
params |= dict(

api/tgi_routes/completion.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ async def create_completion(
5050

5151
request_id: str = f"cmpl-{str(uuid.uuid4())}"
5252
include = {
53-
"temperature", "stop_token_ids", "stream", "model",
54-
"repetition_penalty", "typical_p", "watermark", "best_of",
53+
"temperature", "best_of", "repetition_penalty", "typical_p", "watermark",
5554
}
5655
params = model_dump(request, include=include)
5756
params |= dict(

0 commit comments

Comments
 (0)