BerriAI
diff --git a/‎.circleci/config.yml
Lines changed: 1 addition & 1 deletion b/‎.circleci/config.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/test-litellm.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/test-litellm.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎Makefile
Lines changed: 2 additions & 2 deletions b/‎Makefile
Lines changed: 2 additions & 2 deletions
diff --git a/‎litellm/__init__.py
Lines changed: 1 addition & 2 deletions b/‎litellm/__init__.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎litellm/litellm_core_utils/streaming_handler.py
Lines changed: 2 additions & 42 deletions b/‎litellm/litellm_core_utils/streaming_handler.py
Lines changed: 2 additions & 42 deletions
@@ -931,7 +931,7 @@ jobs:
           command: |
             pwd
             ls
-            python -m pytest -vv tests/litellm --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit-litellm.xml --durations=10 -n 4
+            python -m pytest -vv tests/test_litellm --cov=litellm --cov-report=xml -x -s -v --junitxml=test-results/junit-litellm.xml --durations=10 -n 4
           no_output_timeout: 120m
       - run:
           name: Run enterprise tests
 
@@ -1,4 +1,4 @@
-name: LiteLLM Mock Tests (folder - tests/litellm)
+name: LiteLLM Mock Tests (folder - tests/test_litellm)
 
 on:
   pull_request:
 
@@ -26,10 +26,10 @@ test:
 	poetry run pytest tests/
 
 test-unit:
-	poetry run pytest tests/litellm/
+	poetry run pytest tests/test_litellm/
 
 test-integration:
-	poetry run pytest tests/ -k "not litellm"
+	poetry run pytest tests/ -k "not test_litellm"
 
 test-unit-helm:
 	helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
@@ -929,11 +929,10 @@ def add_known_models():
 from .llms.vertex_ai.vertex_ai_partner_models.ai21.transformation import (
     VertexAIAi21Config,
 )
-
+from .llms.ollama.chat.transformation import OllamaChatConfig
 from .llms.ollama.completion.transformation import OllamaConfig
 from .llms.sagemaker.completion.transformation import SagemakerConfig
 from .llms.sagemaker.chat.transformation import SagemakerChatConfig
-from .llms.ollama_chat import OllamaChatConfig
 from .llms.bedrock.chat.invoke_handler import (
     AmazonCohereChatConfig,
     bedrock_tool_name_mappings,
 
@@ -549,41 +549,6 @@ def handle_baseten_chunk(self, chunk):
             )
             return ""
 
-    def handle_ollama_chat_stream(self, chunk):
-        # for ollama_chat/ provider
-        try:
-            if isinstance(chunk, dict):
-                json_chunk = chunk
-            else:
-                json_chunk = json.loads(chunk)
-            if "error" in json_chunk:
-                raise Exception(f"Ollama Error - {json_chunk}")
-
-            text = ""
-            is_finished = False
-            finish_reason = None
-            if json_chunk["done"] is True:
-                text = ""
-                is_finished = True
-                finish_reason = "stop"
-                return {
-                    "text": text,
-                    "is_finished": is_finished,
-                    "finish_reason": finish_reason,
-                }
-            elif "message" in json_chunk:
-                print_verbose(f"delta content: {json_chunk}")
-                text = json_chunk["message"]["content"]
-                return {
-                    "text": text,
-                    "is_finished": is_finished,
-                    "finish_reason": finish_reason,
-                }
-            else:
-                raise Exception(f"Ollama Error - {json_chunk}")
-        except Exception as e:
-            raise e
-
     def handle_triton_stream(self, chunk):
         try:
             if isinstance(chunk, dict):
@@ -1142,12 +1107,6 @@ def chunk_creator(self, chunk: Any):  # type: ignore  # noqa: PLR0915
                 new_chunk = self.completion_stream[:chunk_size]
                 completion_obj["content"] = new_chunk
                 self.completion_stream = self.completion_stream[chunk_size:]
-            elif self.custom_llm_provider == "ollama_chat":
-                response_obj = self.handle_ollama_chat_stream(chunk)
-                completion_obj["content"] = response_obj["text"]
-                print_verbose(f"completion obj content: {completion_obj['content']}")
-                if response_obj["is_finished"]:
-                    self.received_finish_reason = response_obj["finish_reason"]
             elif self.custom_llm_provider == "triton":
                 response_obj = self.handle_triton_stream(chunk)
                 completion_obj["content"] = response_obj["text"]
@@ -1198,6 +1157,7 @@ def chunk_creator(self, chunk: Any):  # type: ignore  # noqa: PLR0915
                 if response_obj["is_finished"]:
                     self.received_finish_reason = response_obj["finish_reason"]
             elif self.custom_llm_provider == "cached_response":
+                chunk = cast(ModelResponseStream, chunk)
                 response_obj = {
                     "text": chunk.choices[0].delta.content,
                     "is_finished": True,
@@ -1225,7 +1185,7 @@ def chunk_creator(self, chunk: Any):  # type: ignore  # noqa: PLR0915
                 if self.custom_llm_provider == "azure":
                     if isinstance(chunk, BaseModel) and hasattr(chunk, "model"):
                         # for azure, we need to pass the model from the orignal chunk
-                        self.model = chunk.model
+                        self.model = getattr(chunk, "model", self.model)
                 response_obj = self.handle_openai_chat_completion_chunk(chunk)
                 if response_obj is None:
                     return
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: LiteLLM Mock Tests (folder - tests/litellm)`
	`1`	`+name: LiteLLM Mock Tests (folder - tests/test_litellm)`
`2`	`2`
`3`	`3`	`on:`
`4`	`4`	`pull_request:`