Skip to content

Commit

Permalink
Allow TGI adaptor to have non-standard llama model names
Browse files Browse the repository at this point in the history
  • Loading branch information
Hardik Shah committed Sep 20, 2024
1 parent 59af1c8 commit 42d29f3
Showing 1 changed file with 0 additions and 14 deletions.
14 changes: 0 additions & 14 deletions llama_stack/providers/adapters/inference/tgi/tgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,6 @@ async def initialize(self) -> None:
raise RuntimeError("Missing max_total_tokens in model info")
self.max_tokens = info["max_total_tokens"]

model_id = info["model_id"]
model_name = next(
(name for name, id in HF_SUPPORTED_MODELS.items() if id == model_id),
None,
)
if model_name is None:
raise RuntimeError(
f"TGI is serving model: {model_id}, use one of the supported models: {', '.join(HF_SUPPORTED_MODELS.values())}"
)
self.model_name = model_name
self.inference_url = info["inference_url"]
except Exception as e:
import traceback
Expand Down Expand Up @@ -116,10 +106,6 @@ async def chat_completion(

print(f"Calculated max_new_tokens: {max_new_tokens}")

assert (
request.model == self.model_name
), f"Model mismatch, expected {self.model_name}, got {request.model}"

options = self.get_chat_options(request)
if not request.stream:
response = self.client.text_generation(
Expand Down

0 comments on commit 42d29f3

Please sign in to comment.