Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/my-website/docs/proxy/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ Use this to track overall LiteLLM Proxy usage.

| Metric Name | Description |
|----------------------|--------------------------------------|
| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "exception_status", "exception_class", "route"` |
| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "status_code", "user_email", "route"` |
| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "user_email", "exception_status", "exception_class", "route", "model_id"` |
| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests. Labels: `"end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "status_code", "user_email", "route", "model_id"` |

### Callback Logging Metrics

Expand Down Expand Up @@ -191,10 +191,10 @@ Use this for LLM API Error monitoring and tracking remaining rate limits and tok

| Metric Name | Description |
|----------------------|--------------------------------------|
| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels "end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "model" |
| `litellm_request_total_latency_metric` | Total latency (seconds) for a request to LiteLLM Proxy Server - tracked for labels "end_user", "hashed_api_key", "api_key_alias", "requested_model", "team", "team_alias", "user", "model", "model_id" |
| `litellm_overhead_latency_metric` | Latency overhead (seconds) added by LiteLLM processing - tracked for labels "model_group", "api_provider", "api_base", "litellm_model_name", "hashed_api_key", "api_key_alias" |
| `litellm_llm_api_latency_metric` | Latency (seconds) for just the LLM API call - tracked for labels "model", "hashed_api_key", "api_key_alias", "team", "team_alias", "requested_model", "end_user", "user" |
| `litellm_llm_api_time_to_first_token_metric` | Time to first token for LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias` [Note: only emitted for streaming requests] |
| `litellm_llm_api_time_to_first_token_metric` | Time to first token for LLM API call - tracked for labels `model`, `hashed_api_key`, `api_key_alias`, `team`, `team_alias`, `requested_model`, `end_user`, `user`, `model_id` [Note: only emitted for streaming requests] |

## Tracking `end_user` on Prometheus

Expand Down
12 changes: 7 additions & 5 deletions litellm/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -1126,12 +1126,14 @@ def _set_latency_metrics(
time_to_first_token_seconds is not None
and kwargs.get("stream", False) is True # only emit for streaming requests
):
_ttft_labels = prometheus_label_factory(
supported_enum_labels=self.get_labels_for_metric(
metric_name="litellm_llm_api_time_to_first_token_metric"
),
enum_values=enum_values,
)
self.litellm_llm_api_time_to_first_token_metric.labels(
model,
user_api_key,
user_api_key_alias,
user_api_team,
user_api_team_alias,
**_ttft_labels
).observe(time_to_first_token_seconds)
else:
verbose_logger.debug(
Expand Down
7 changes: 7 additions & 0 deletions litellm/types/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ class PrometheusMetricLabels:
UserAPIKeyLabelNames.API_KEY_ALIAS.value,
UserAPIKeyLabelNames.TEAM.value,
UserAPIKeyLabelNames.TEAM_ALIAS.value,
UserAPIKeyLabelNames.REQUESTED_MODEL.value,
UserAPIKeyLabelNames.END_USER.value,
UserAPIKeyLabelNames.USER.value,
UserAPIKeyLabelNames.MODEL_ID.value,
]

litellm_request_total_latency_metric = [
Expand All @@ -217,6 +221,7 @@ class PrometheusMetricLabels:
UserAPIKeyLabelNames.TEAM_ALIAS.value,
UserAPIKeyLabelNames.USER.value,
UserAPIKeyLabelNames.v1_LITELLM_MODEL_NAME.value,
UserAPIKeyLabelNames.MODEL_ID.value,
]

litellm_proxy_total_requests_metric = [
Expand All @@ -230,6 +235,7 @@ class PrometheusMetricLabels:
UserAPIKeyLabelNames.STATUS_CODE.value,
UserAPIKeyLabelNames.USER_EMAIL.value,
UserAPIKeyLabelNames.ROUTE.value,
UserAPIKeyLabelNames.MODEL_ID.value,
]

litellm_proxy_failed_requests_metric = [
Expand All @@ -244,6 +250,7 @@ class PrometheusMetricLabels:
UserAPIKeyLabelNames.EXCEPTION_STATUS.value,
UserAPIKeyLabelNames.EXCEPTION_CLASS.value,
UserAPIKeyLabelNames.ROUTE.value,
UserAPIKeyLabelNames.MODEL_ID.value,
]

litellm_deployment_latency_per_output_token = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,15 @@ def test_set_latency_metrics(prometheus_logger):

# completion_start_time - api_call_start_time
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels.assert_called_once_with(
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
end_user=None,
user="test_user",
hashed_api_key="test_hash",
api_key_alias="test_alias",
team="test_team",
team_alias="test_team_alias",
requested_model="openai-gpt",
model="gpt-3.5-turbo",
model_id="model-123",
)
prometheus_logger.litellm_llm_api_time_to_first_token_metric.labels().observe.assert_called_once_with(
0.5
Expand Down Expand Up @@ -442,6 +450,7 @@ def test_set_latency_metrics(prometheus_logger):
team_alias="test_team_alias",
requested_model="openai-gpt",
model="gpt-3.5-turbo",
model_id="model-123",
)
prometheus_logger.litellm_request_total_latency_metric.labels().observe.assert_called_once_with(
2.0
Expand Down Expand Up @@ -737,6 +746,7 @@ async def test_async_post_call_failure_hook(prometheus_logger):
exception_status="429",
exception_class="Openai.RateLimitError",
route=user_api_key_dict.request_route,
model_id=None,
)
prometheus_logger.litellm_proxy_failed_requests_metric.labels().inc.assert_called_once()

Expand All @@ -752,6 +762,7 @@ async def test_async_post_call_failure_hook(prometheus_logger):
status_code="429",
user_email=None,
route=user_api_key_dict.request_route,
model_id=None,
)
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()

Expand Down Expand Up @@ -798,6 +809,7 @@ async def test_async_post_call_success_hook(prometheus_logger):
status_code="200",
user_email=None,
route=user_api_key_dict.request_route,
model_id=None,
)
prometheus_logger.litellm_proxy_total_requests_metric.labels().inc.assert_called_once()

Expand Down
31 changes: 31 additions & 0 deletions tests/test_litellm/integrations/test_prometheus_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,39 @@ def test_prometheus_metric_labels_structure():
print(f"✅ {metric_name} has proper label structure with user_email")


def test_model_id_in_required_metrics():
"""
Test that model_id label is present in all the metrics that should have it:
- litellm_proxy_total_requests_metric
- litellm_proxy_failed_requests_metric
- litellm_request_total_latency_metric
- litellm_llm_api_time_to_first_token_metric
"""
model_id_label = UserAPIKeyLabelNames.MODEL_ID.value

# Metrics that should have model_id
metrics_with_model_id = [
"litellm_proxy_total_requests_metric",
"litellm_proxy_failed_requests_metric",
"litellm_request_total_latency_metric",
"litellm_llm_api_time_to_first_token_metric"
]

for metric_name in metrics_with_model_id:
labels = PrometheusMetricLabels.get_labels(metric_name)
assert model_id_label in labels, f"Metric {metric_name} should contain model_id label"
print(f"✅ {metric_name} contains model_id label")


def test_model_id_label_exists():
"""Test that the MODEL_ID label is properly defined"""
assert UserAPIKeyLabelNames.MODEL_ID.value == "model_id"


if __name__ == "__main__":
test_user_email_in_required_metrics()
test_user_email_label_exists()
test_prometheus_metric_labels_structure()
test_model_id_in_required_metrics()
test_model_id_label_exists()
print("All prometheus label tests passed!")
Loading