Skip to content

Commit 4c82dd9

Browse files
Ollama Chat - parse tool calls on streaming (#11171)
* fix(user_api_key_auth.py): fix else block Fixes #11170 * refactor(ollama/chat): refactor to base config pattern easier to maintain fixes * fix(ollama/chat): support tool call parsing on streaming Closes #11104 * test: update import location * fix: cleanup unused import * fix: fix ruff check error * test: update import * test: update test on ci * ci: cleanup * fix: fix chekc * fix: fix api key check order * test: fix import * ci: fix script * test: fix imports * fix: fix tests
1 parent 64096ae commit 4c82dd9

File tree

17 files changed

+537
-277
lines changed

17 files changed

+537
-277
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -931,7 +931,7 @@ jobs:
931931
command: |
932932
pwd
933933
ls
934-
python -m pytest -vv tests/litellm --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit-litellm.xml --durations=10 -n 4
934+
python -m pytest -vv tests/test_litellm --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit-litellm.xml --durations=10 -n 4
935935
no_output_timeout: 120m
936936
- run:
937937
name: Run enterprise tests

.github/workflows/test-litellm.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: LiteLLM Mock Tests (folder - tests/litellm)
1+
name: LiteLLM Mock Tests (folder - tests/test_litellm)
22

33
on:
44
pull_request:

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ test:
2626
poetry run pytest tests/
2727

2828
test-unit:
29-
poetry run pytest tests/litellm/
29+
poetry run pytest tests/test_litellm/
3030

3131
test-integration:
32-
poetry run pytest tests/ -k "not litellm"
32+
poetry run pytest tests/ -k "not test_litellm"
3333

3434
test-unit-helm:
3535
helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm

litellm/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -929,11 +929,10 @@ def add_known_models():
929929
from .llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import (
930930
VertexAIAi21Config,
931931
)
932-
932+
from .llms.ollama.chat.transformation import OllamaChatConfig
933933
from .llms.ollama.completion.transformation import OllamaConfig
934934
from .llms.sagemaker.completion.transformation import SagemakerConfig
935935
from .llms.sagemaker.chat.transformation import SagemakerChatConfig
936-
from .llms.ollama_chat import OllamaChatConfig
937936
from .llms.bedrock.chat.invoke_handler import (
938937
AmazonCohereChatConfig,
939938
bedrock_tool_name_mappings,

litellm/litellm_core_utils/streaming_handler.py

Lines changed: 2 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -549,41 +549,6 @@ def handle_baseten_chunk(self, chunk):
549549
)
550550
return ""
551551

552-
def handle_ollama_chat_stream(self, chunk):
553-
# for ollama_chat/ provider
554-
try:
555-
if isinstance(chunk, dict):
556-
json_chunk = chunk
557-
else:
558-
json_chunk = json.loads(chunk)
559-
if "error" in json_chunk:
560-
raise Exception(f"Ollama Error - {json_chunk}")
561-
562-
text = ""
563-
is_finished = False
564-
finish_reason = None
565-
if json_chunk["done"] is True:
566-
text = ""
567-
is_finished = True
568-
finish_reason = "stop"
569-
return {
570-
"text": text,
571-
"is_finished": is_finished,
572-
"finish_reason": finish_reason,
573-
}
574-
elif "message" in json_chunk:
575-
print_verbose(f"delta content: {json_chunk}")
576-
text = json_chunk["message"]["content"]
577-
return {
578-
"text": text,
579-
"is_finished": is_finished,
580-
"finish_reason": finish_reason,
581-
}
582-
else:
583-
raise Exception(f"Ollama Error - {json_chunk}")
584-
except Exception as e:
585-
raise e
586-
587552
def handle_triton_stream(self, chunk):
588553
try:
589554
if isinstance(chunk, dict):
@@ -1142,12 +1107,6 @@ def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915
11421107
new_chunk = self.completion_stream[:chunk_size]
11431108
completion_obj["content"] = new_chunk
11441109
self.completion_stream = self.completion_stream[chunk_size:]
1145-
elif self.custom_llm_provider == "ollama_chat":
1146-
response_obj = self.handle_ollama_chat_stream(chunk)
1147-
completion_obj["content"] = response_obj["text"]
1148-
print_verbose(f"completion obj content: {completion_obj['content']}")
1149-
if response_obj["is_finished"]:
1150-
self.received_finish_reason = response_obj["finish_reason"]
11511110
elif self.custom_llm_provider == "triton":
11521111
response_obj = self.handle_triton_stream(chunk)
11531112
completion_obj["content"] = response_obj["text"]
@@ -1198,6 +1157,7 @@ def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915
11981157
if response_obj["is_finished"]:
11991158
self.received_finish_reason = response_obj["finish_reason"]
12001159
elif self.custom_llm_provider == "cached_response":
1160+
chunk = cast(ModelResponseStream, chunk)
12011161
response_obj = {
12021162
"text": chunk.choices[0].delta.content,
12031163
"is_finished": True,
@@ -1225,7 +1185,7 @@ def chunk_creator(self, chunk: Any): # type: ignore # noqa: PLR0915
12251185
if self.custom_llm_provider == "azure":
12261186
if isinstance(chunk, BaseModel) and hasattr(chunk, "model"):
12271187
# for azure, we need to pass the model from the orignal chunk
1228-
self.model = chunk.model
1188+
self.model = getattr(chunk, "model", self.model)
12291189
response_obj = self.handle_openai_chat_completion_chunk(chunk)
12301190
if response_obj is None:
12311191
return

0 commit comments

Comments
 (0)