(fix) health check - allow setting health_check_model (#7752)

* use _update_litellm_params_for_health_check * fix Wildcard Routes * test_update_litellm_params_for_health_check * test_perform_health_check_with_health_check_model * fix doc string * huggingface/mistralai/Mistral-7B-Instruct-v0.3
BerriAI · Jan 14, 2025 · d510f1d · d510f1d
1 parent 9daa6fb
commit d510f1d
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 14 deletions.
diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md
@@ -182,6 +182,28 @@ model_list:
       mode: realtime
 ```
 
+### Wildcard Routes
+
+For wildcard routes, you can specify a `health_check_model` in your config.yaml. This model will be used for health checks for that wildcard route.
+
+In this example, when running a health check for `openai/*`, the health check will make a `/chat/completions` request to `openai/gpt-4o-mini`.
+
+```yaml
+model_list:
+  - model_name: openai/*
+    litellm_params:
+      model:  openai/*
+      api_key: os.environ/OPENAI_API_KEY
+    model_info:
+      health_check_model: openai/gpt-4o-mini
+  - model_name: anthropic/*
+    litellm_params:
+      model: anthropic/*
+      api_key: os.environ/ANTHROPIC_API_KEY
+    model_info:
+      health_check_model: anthropic/claude-3-5-sonnet-20240620
+```
+
 ## Background Health Checks 
 
 You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. 

diff --git a/litellm/proxy/health_check.py b/litellm/proxy/health_check.py
@@ -70,8 +70,10 @@ async def _perform_health_check(model_list: list, details: Optional[bool] = True
     for model in model_list:
         litellm_params = model["litellm_params"]
         model_info = model.get("model_info", {})
-        litellm_params["messages"] = _get_random_llm_message()
         mode = model_info.get("mode", None)
+        litellm_params = _update_litellm_params_for_health_check(
+            model_info, litellm_params
+        )
         tasks.append(
             litellm.ahealth_check(
                 litellm_params,
@@ -103,6 +105,22 @@ async def _perform_health_check(model_list: list, details: Optional[bool] = True
     return healthy_endpoints, unhealthy_endpoints
 
 
+def _update_litellm_params_for_health_check(
+    model_info: dict, litellm_params: dict
+) -> dict:
+    """
+    Update the litellm params for health check.
+
+    - gets a short `messages` param for health check
+    - updates the `model` param with the `health_check_model` if it exists Doc: https://docs.litellm.ai/docs/proxy/health#wildcard-routes
+    """
+    litellm_params["messages"] = _get_random_llm_message()
+    _health_check_model = model_info.get("health_check_model", None)
+    if _health_check_model is not None:
+        litellm_params["model"] = _health_check_model
+    return litellm_params
+
+
 async def perform_health_check(
     model_list: list,
     model: Optional[str] = None,

diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
@@ -1,15 +1,13 @@
 model_list:
-  - model_name: "gpt-4"
+  - model_name: openai/*
     litellm_params:
-      model: openai/fake
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-
-general_settings:    
-    alerting: ["slack"]
-    alert_to_webhook_url: {
-      "budget_alerts": ["https://hooks.slack.com/services/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"]
-    }
-    alerting_args:
-       budget_alert_ttl: 10
-       log_to_console: true
+      model:  openai/*
+      api_key: os.environ/OPENAI_API_KEY
+    model_info:
+      health_check_model: openai/gpt-4o-mini
+  - model_name: anthropic/*
+    litellm_params:
+      model: anthropic/*
+      api_key: os.environ/ANTHROPIC_API_KEY
+    model_info:
+      health_check_model: anthropic/claude-3-5-sonnet-20240620
diff --git a/tests/local_testing/test_health_check.py b/tests/local_testing/test_health_check.py
@@ -222,3 +222,78 @@ async def test_async_realtime_health_check(model, mocker):
     )
     print(response)
     assert response == {}
+
+
+def test_update_litellm_params_for_health_check():
+    """
+    Test if _update_litellm_params_for_health_check correctly:
+    1. Updates messages with a random message
+    2. Updates model name when health_check_model is provided
+    """
+    from litellm.proxy.health_check import _update_litellm_params_for_health_check
+
+    # Test with health_check_model
+    model_info = {"health_check_model": "gpt-3.5-turbo"}
+    litellm_params = {
+        "model": "gpt-4",
+        "api_key": "fake_key",
+    }
+
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+
+    assert "messages" in updated_params
+    assert isinstance(updated_params["messages"], list)
+    assert updated_params["model"] == "gpt-3.5-turbo"
+
+    # Test without health_check_model
+    model_info = {}
+    litellm_params = {
+        "model": "gpt-4",
+        "api_key": "fake_key",
+    }
+
+    updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)
+
+    assert "messages" in updated_params
+    assert isinstance(updated_params["messages"], list)
+    assert updated_params["model"] == "gpt-4"
+
+
+@pytest.mark.asyncio
+async def test_perform_health_check_with_health_check_model():
+    """
+    Test if _perform_health_check correctly uses `health_check_model` when model=`openai/*`:
+    1. Verifies that health_check_model overrides the original model when model=`openai/*`
+    2. Ensures the health check is performed with the override model
+    """
+    from litellm.proxy.health_check import _perform_health_check
+
+    # Mock model list with health_check_model specified
+    model_list = [
+        {
+            "litellm_params": {"model": "openai/*", "api_key": "fake-key"},
+            "model_info": {
+                "mode": "chat",
+                "health_check_model": "openai/gpt-4o-mini",  # Override model for health check
+            },
+        }
+    ]
+
+    # Track which model is actually used in the health check
+    health_check_calls = []
+
+    async def mock_health_check(litellm_params, **kwargs):
+        health_check_calls.append(litellm_params["model"])
+        return {"status": "healthy"}
+
+    with patch("litellm.ahealth_check", side_effect=mock_health_check):
+        healthy_endpoints, unhealthy_endpoints = await _perform_health_check(model_list)
+        print("health check calls: ", health_check_calls)
+
+        # Verify the health check used the override model
+        assert health_check_calls[0] == "openai/gpt-4o-mini"
+        # Verify the result still shows the original model
+        print("healthy endpoints: ", healthy_endpoints)
+        assert healthy_endpoints[0]["model"] == "openai/gpt-4o-mini"
+        assert len(healthy_endpoints) == 1
+        assert len(unhealthy_endpoints) == 0