diff --git a/docs/my-website/docs/proxy/health.md b/docs/my-website/docs/proxy/health.md index 0da4716dcb41..cd95a7dffcff 100644 --- a/docs/my-website/docs/proxy/health.md +++ b/docs/my-website/docs/proxy/health.md @@ -182,6 +182,28 @@ model_list: mode: realtime ``` +### Wildcard Routes + +For wildcard routes, you can specify a `health_check_model` in your config.yaml. This model will be used for health checks for that wildcard route. + +In this example, when running a health check for `openai/*`, the health check will make a `/chat/completions` request to `openai/gpt-4o-mini`. + +```yaml +model_list: + - model_name: openai/* + litellm_params: + model: openai/* + api_key: os.environ/OPENAI_API_KEY + model_info: + health_check_model: openai/gpt-4o-mini + - model_name: anthropic/* + litellm_params: + model: anthropic/* + api_key: os.environ/ANTHROPIC_API_KEY + model_info: + health_check_model: anthropic/claude-3-5-sonnet-20240620 +``` + ## Background Health Checks You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`. diff --git a/litellm/proxy/health_check.py b/litellm/proxy/health_check.py index 78973434c676..babc93714d6c 100644 --- a/litellm/proxy/health_check.py +++ b/litellm/proxy/health_check.py @@ -70,8 +70,10 @@ async def _perform_health_check(model_list: list, details: Optional[bool] = True for model in model_list: litellm_params = model["litellm_params"] model_info = model.get("model_info", {}) - litellm_params["messages"] = _get_random_llm_message() mode = model_info.get("mode", None) + litellm_params = _update_litellm_params_for_health_check( + model_info, litellm_params + ) tasks.append( litellm.ahealth_check( litellm_params, @@ -103,6 +105,22 @@ async def _perform_health_check(model_list: list, details: Optional[bool] = True return healthy_endpoints, unhealthy_endpoints +def _update_litellm_params_for_health_check( + model_info: dict, litellm_params: dict +) -> dict: + """ + Update the litellm params for health check. + + - gets a short `messages` param for health check + - updates the `model` param with the `health_check_model` if it exists Doc: https://docs.litellm.ai/docs/proxy/health#wildcard-routes + """ + litellm_params["messages"] = _get_random_llm_message() + _health_check_model = model_info.get("health_check_model", None) + if _health_check_model is not None: + litellm_params["model"] = _health_check_model + return litellm_params + + async def perform_health_check( model_list: list, model: Optional[str] = None, diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 6febe3ffeb23..f1927b613cd4 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,15 +1,13 @@ model_list: - - model_name: "gpt-4" + - model_name: openai/* litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - -general_settings: - alerting: ["slack"] - alert_to_webhook_url: { - "budget_alerts": ["https://hooks.slack.com/services/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"] - } - alerting_args: - budget_alert_ttl: 10 - log_to_console: true \ No newline at end of file + model: openai/* + api_key: os.environ/OPENAI_API_KEY + model_info: + health_check_model: openai/gpt-4o-mini + - model_name: anthropic/* + litellm_params: + model: anthropic/* + api_key: os.environ/ANTHROPIC_API_KEY + model_info: + health_check_model: anthropic/claude-3-5-sonnet-20240620 diff --git a/tests/local_testing/test_health_check.py b/tests/local_testing/test_health_check.py index 9a64f1556617..5d5a2cd56968 100644 --- a/tests/local_testing/test_health_check.py +++ b/tests/local_testing/test_health_check.py @@ -222,3 +222,78 @@ async def test_async_realtime_health_check(model, mocker): ) print(response) assert response == {} + + +def test_update_litellm_params_for_health_check(): + """ + Test if _update_litellm_params_for_health_check correctly: + 1. Updates messages with a random message + 2. Updates model name when health_check_model is provided + """ + from litellm.proxy.health_check import _update_litellm_params_for_health_check + + # Test with health_check_model + model_info = {"health_check_model": "gpt-3.5-turbo"} + litellm_params = { + "model": "gpt-4", + "api_key": "fake_key", + } + + updated_params = _update_litellm_params_for_health_check(model_info, litellm_params) + + assert "messages" in updated_params + assert isinstance(updated_params["messages"], list) + assert updated_params["model"] == "gpt-3.5-turbo" + + # Test without health_check_model + model_info = {} + litellm_params = { + "model": "gpt-4", + "api_key": "fake_key", + } + + updated_params = _update_litellm_params_for_health_check(model_info, litellm_params) + + assert "messages" in updated_params + assert isinstance(updated_params["messages"], list) + assert updated_params["model"] == "gpt-4" + + +@pytest.mark.asyncio +async def test_perform_health_check_with_health_check_model(): + """ + Test if _perform_health_check correctly uses `health_check_model` when model=`openai/*`: + 1. Verifies that health_check_model overrides the original model when model=`openai/*` + 2. Ensures the health check is performed with the override model + """ + from litellm.proxy.health_check import _perform_health_check + + # Mock model list with health_check_model specified + model_list = [ + { + "litellm_params": {"model": "openai/*", "api_key": "fake-key"}, + "model_info": { + "mode": "chat", + "health_check_model": "openai/gpt-4o-mini", # Override model for health check + }, + } + ] + + # Track which model is actually used in the health check + health_check_calls = [] + + async def mock_health_check(litellm_params, **kwargs): + health_check_calls.append(litellm_params["model"]) + return {"status": "healthy"} + + with patch("litellm.ahealth_check", side_effect=mock_health_check): + healthy_endpoints, unhealthy_endpoints = await _perform_health_check(model_list) + print("health check calls: ", health_check_calls) + + # Verify the health check used the override model + assert health_check_calls[0] == "openai/gpt-4o-mini" + # Verify the result still shows the original model + print("healthy endpoints: ", healthy_endpoints) + assert healthy_endpoints[0]["model"] == "openai/gpt-4o-mini" + assert len(healthy_endpoints) == 1 + assert len(unhealthy_endpoints) == 0