Skip to content

Commit

Permalink
(fix) health check - allow setting health_check_model (#7752)
Browse files Browse the repository at this point in the history
* use _update_litellm_params_for_health_check

* fix Wildcard Routes

* test_update_litellm_params_for_health_check

* test_perform_health_check_with_health_check_model

* fix doc string

* huggingface/mistralai/Mistral-7B-Instruct-v0.3
  • Loading branch information
ishaan-jaff authored Jan 14, 2025
1 parent 9daa6fb commit d510f1d
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 14 deletions.
22 changes: 22 additions & 0 deletions docs/my-website/docs/proxy/health.md
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,28 @@ model_list:
mode: realtime
```

### Wildcard Routes

For wildcard routes, you can specify a `health_check_model` in your config.yaml. This model will be used for health checks for that wildcard route.

In this example, when running a health check for `openai/*`, the health check will make a `/chat/completions` request to `openai/gpt-4o-mini`.

```yaml
model_list:
- model_name: openai/*
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
model_info:
health_check_model: openai/gpt-4o-mini
- model_name: anthropic/*
litellm_params:
model: anthropic/*
api_key: os.environ/ANTHROPIC_API_KEY
model_info:
health_check_model: anthropic/claude-3-5-sonnet-20240620
```

## Background Health Checks

You can enable model health checks being run in the background, to prevent each model from being queried too frequently via `/health`.
Expand Down
20 changes: 19 additions & 1 deletion litellm/proxy/health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@ async def _perform_health_check(model_list: list, details: Optional[bool] = True
for model in model_list:
litellm_params = model["litellm_params"]
model_info = model.get("model_info", {})
litellm_params["messages"] = _get_random_llm_message()
mode = model_info.get("mode", None)
litellm_params = _update_litellm_params_for_health_check(
model_info, litellm_params
)
tasks.append(
litellm.ahealth_check(
litellm_params,
Expand Down Expand Up @@ -103,6 +105,22 @@ async def _perform_health_check(model_list: list, details: Optional[bool] = True
return healthy_endpoints, unhealthy_endpoints


def _update_litellm_params_for_health_check(
model_info: dict, litellm_params: dict
) -> dict:
"""
Update the litellm params for health check.
- gets a short `messages` param for health check
- updates the `model` param with the `health_check_model` if it exists Doc: https://docs.litellm.ai/docs/proxy/health#wildcard-routes
"""
litellm_params["messages"] = _get_random_llm_message()
_health_check_model = model_info.get("health_check_model", None)
if _health_check_model is not None:
litellm_params["model"] = _health_check_model
return litellm_params


async def perform_health_check(
model_list: list,
model: Optional[str] = None,
Expand Down
24 changes: 11 additions & 13 deletions litellm/proxy/proxy_config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
model_list:
- model_name: "gpt-4"
- model_name: openai/*
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/

general_settings:
alerting: ["slack"]
alert_to_webhook_url: {
"budget_alerts": ["https://hooks.slack.com/services/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"]
}
alerting_args:
budget_alert_ttl: 10
log_to_console: true
model: openai/*
api_key: os.environ/OPENAI_API_KEY
model_info:
health_check_model: openai/gpt-4o-mini
- model_name: anthropic/*
litellm_params:
model: anthropic/*
api_key: os.environ/ANTHROPIC_API_KEY
model_info:
health_check_model: anthropic/claude-3-5-sonnet-20240620
75 changes: 75 additions & 0 deletions tests/local_testing/test_health_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,78 @@ async def test_async_realtime_health_check(model, mocker):
)
print(response)
assert response == {}


def test_update_litellm_params_for_health_check():
"""
Test if _update_litellm_params_for_health_check correctly:
1. Updates messages with a random message
2. Updates model name when health_check_model is provided
"""
from litellm.proxy.health_check import _update_litellm_params_for_health_check

# Test with health_check_model
model_info = {"health_check_model": "gpt-3.5-turbo"}
litellm_params = {
"model": "gpt-4",
"api_key": "fake_key",
}

updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)

assert "messages" in updated_params
assert isinstance(updated_params["messages"], list)
assert updated_params["model"] == "gpt-3.5-turbo"

# Test without health_check_model
model_info = {}
litellm_params = {
"model": "gpt-4",
"api_key": "fake_key",
}

updated_params = _update_litellm_params_for_health_check(model_info, litellm_params)

assert "messages" in updated_params
assert isinstance(updated_params["messages"], list)
assert updated_params["model"] == "gpt-4"


@pytest.mark.asyncio
async def test_perform_health_check_with_health_check_model():
"""
Test if _perform_health_check correctly uses `health_check_model` when model=`openai/*`:
1. Verifies that health_check_model overrides the original model when model=`openai/*`
2. Ensures the health check is performed with the override model
"""
from litellm.proxy.health_check import _perform_health_check

# Mock model list with health_check_model specified
model_list = [
{
"litellm_params": {"model": "openai/*", "api_key": "fake-key"},
"model_info": {
"mode": "chat",
"health_check_model": "openai/gpt-4o-mini", # Override model for health check
},
}
]

# Track which model is actually used in the health check
health_check_calls = []

async def mock_health_check(litellm_params, **kwargs):
health_check_calls.append(litellm_params["model"])
return {"status": "healthy"}

with patch("litellm.ahealth_check", side_effect=mock_health_check):
healthy_endpoints, unhealthy_endpoints = await _perform_health_check(model_list)
print("health check calls: ", health_check_calls)

# Verify the health check used the override model
assert health_check_calls[0] == "openai/gpt-4o-mini"
# Verify the result still shows the original model
print("healthy endpoints: ", healthy_endpoints)
assert healthy_endpoints[0]["model"] == "openai/gpt-4o-mini"
assert len(healthy_endpoints) == 1
assert len(unhealthy_endpoints) == 0

0 comments on commit d510f1d

Please sign in to comment.