diff --git a/examples/production_monitoring/grafana.json b/examples/production_monitoring/grafana.json index 5e9bd5bd03869..273f7f5ac42cf 100644 --- a/examples/production_monitoring/grafana.json +++ b/examples/production_monitoring/grafana.json @@ -1,4 +1,41 @@ { + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.4.2" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], "annotations": { "list": [ { @@ -25,14 +62,14 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 29, + "id": null, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "description": "End to end request latency measured in seconds.", "fieldConfig": { @@ -41,6 +78,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -54,6 +92,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -111,7 +150,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -127,7 +166,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -144,7 +183,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -161,7 +200,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -178,7 +217,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "rate(vllm:e2e_request_latency_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:e2e_request_latency_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", @@ -195,7 +234,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "description": "Number of tokens processed per second", "fieldConfig": { @@ -204,6 +243,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -217,6 +257,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -273,7 +314,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -289,7 +330,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -310,7 +351,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "description": "Inter token latency in seconds.", "fieldConfig": { @@ -319,6 +360,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -332,6 +374,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -389,7 +432,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -405,7 +448,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -422,7 +465,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -439,7 +482,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -456,7 +499,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "rate(vllm:time_per_output_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_per_output_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", @@ -473,7 +516,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "description": "Number of requests in RUNNING, WAITING, and SWAPPED state", "fieldConfig": { @@ -482,6 +525,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -495,6 +539,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -552,7 +597,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -568,7 +613,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -585,7 +630,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -606,7 +651,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "description": "P50, P90, P95, and P99 TTFT latency in seconds.", "fieldConfig": { @@ -615,6 +660,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -628,6 +674,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -685,7 +732,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -702,7 +749,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -718,7 +765,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -735,7 +782,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -752,7 +799,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "rate(vllm:time_to_first_token_seconds_sum{model_name=\"$model_name\"}[$__rate_interval])\n/\nrate(vllm:time_to_first_token_seconds_count{model_name=\"$model_name\"}[$__rate_interval])", @@ -769,7 +816,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "description": "Percentage of used cache blocks by vLLM.", "fieldConfig": { @@ -778,6 +825,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -791,6 +839,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -848,7 +897,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "vllm:gpu_cache_usage_perc{model_name=\"$model_name\"}", @@ -860,7 +909,7 @@ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", "expr": "vllm:cpu_cache_usage_perc{model_name=\"$model_name\"}", @@ -875,229 +924,232 @@ "type": "timeseries" }, { - "type": "heatmap", - "title": "Request Prompt Length", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "Heatmap of request prompt length", + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, "gridPos": { - "x": 0, - "y": 24, + "h": 8, "w": 12, - "h": 8 - }, - "datasource": { - "uid": "prometheus", - "type": "prometheus" + "x": 0, + "y": 24 }, "id": 12, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A", - "expr": "sum by(le) (increase(vllm:request_prompt_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))", - "range": true, - "instant": false, - "editorMode": "builder", - "legendFormat": "{{le}}", - "useBackend": false, - "disableTextWrap": false, - "fullMetaSearch": false, - "includeNullMetadata": true, - "format": "heatmap" - } - ], "options": { "calculate": false, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "none", - "axisLabel": "Prompt Length" - }, - "rowsFrame": { - "layout": "auto", - "value": "Request count" + "cellGap": 1, + "cellValues": { + "unit": "none" }, "color": { - "mode": "scheme", + "exponent": 0.5, "fill": "dark-orange", + "min": 0, + "mode": "scheme", + "reverse": false, "scale": "exponential", - "exponent": 0.5, "scheme": "Spectral", - "steps": 64, - "reverse": false, - "min": 0 + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "cellGap": 1, "filterValues": { "le": 1e-9 }, - "tooltip": { - "show": true, - "yHistogram": true - }, "legend": { "show": true }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" + "rowsFrame": { + "layout": "auto", + "value": "Request count" }, - "cellValues": { + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisLabel": "Prompt Length", + "axisPlacement": "left", + "reverse": false, "unit": "none" } }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(le) (increase(vllm:request_prompt_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request Prompt Length", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Heatmap of request generation length", "fieldConfig": { "defaults": { "custom": { - "scaleDistribution": { - "type": "linear" - }, "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false + }, + "scaleDistribution": { + "type": "linear" } } }, "overrides": [] }, - "pluginVersion": "10.2.0" - }, - { - "datasource": { - "uid": "prometheus", - "type": "prometheus" - }, - "type": "heatmap", - "title": "Request Generation Length", - "description": "Heatmap of request generation length", "gridPos": { - "x": 12, - "y": 24, + "h": 8, "w": 12, - "h": 8 + "x": 12, + "y": 24 }, "id": 13, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A", - "expr": "sum by(le) (increase(vllm:request_generation_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))", - "range": true, - "instant": false, - "editorMode": "builder", - "legendFormat": "{{le}}", - "useBackend": false, - "disableTextWrap": false, - "fullMetaSearch": false, - "includeNullMetadata": true, - "format": "heatmap" - } - ], "options": { "calculate": false, - "yAxis": { - "axisPlacement": "left", - "reverse": false, - "unit": "none", - "axisLabel": "Generation Length" - }, - "rowsFrame": { - "layout": "auto", - "value": "Request count" + "cellGap": 1, + "cellValues": { + "unit": "none" }, "color": { - "mode": "scheme", + "exponent": 0.5, "fill": "dark-orange", + "min": 0, + "mode": "scheme", + "reverse": false, "scale": "exponential", - "exponent": 0.5, "scheme": "Spectral", - "steps": 64, - "reverse": false, - "min": 0 + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" }, - "cellGap": 1, "filterValues": { "le": 1e-9 }, - "tooltip": { - "show": true, - "yHistogram": true - }, "legend": { "show": true }, - "exemplars": { - "color": "rgba(255,0,255,0.7)" + "rowsFrame": { + "layout": "auto", + "value": "Request count" }, - "cellValues": { + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": true + }, + "yAxis": { + "axisLabel": "Generation Length", + "axisPlacement": "left", + "reverse": false, "unit": "none" } }, - "fieldConfig": { - "defaults": { - "custom": { - "scaleDistribution": { - "type": "linear" - }, - "hideFrom": { - "tooltip": false, - "viz": false, - "legend": false - } - } - }, - "overrides": [] - }, - "pluginVersion": "10.2.0" + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(le) (increase(vllm:request_generation_tokens_bucket{model_name=\"$model_name\"}[$__rate_interval]))", + "format": "heatmap", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{le}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Request Generation Length", + "type": "heatmap" }, { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, + "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.", "fieldConfig": { "defaults": { + "color": { + "mode": "palette-classic" + }, "custom": { - "drawStyle": "line", - "lineInterpolation": "linear", + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", "barAlignment": 0, - "lineWidth": 1, + "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", - "spanNulls": false, + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, "insertNulls": false, - "showPoints": "auto", + "lineInterpolation": "linear", + "lineWidth": 1, "pointSize": 5, - "stacking": { - "mode": "none", - "group": "A" - }, - "axisPlacement": "auto", - "axisLabel": "", - "axisColorMode": "text", - "axisBorderShow": false, "scaleDistribution": { "type": "linear" }, - "axisCenteredZero": false, - "hideFrom": { - "tooltip": false, - "viz": false, - "legend": false + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, - "color": { - "mode": "palette-classic" - }, "mappings": [], "thresholds": { "mode": "absolute", @@ -1123,22 +1175,22 @@ }, "id": 11, "options": { - "tooltip": { - "mode": "single", - "sort": "none" - }, "legend": { - "showLegend": true, + "calcs": [], "displayMode": "list", "placement": "bottom", - "calcs": [] + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "disableTextWrap": false, "editorMode": "builder", @@ -1154,25 +1206,19 @@ } ], "title": "Finish Reason", - "description": "Number of finished requests by their finish reason: either an EOS token was generated or the max sequence length was reached.", "type": "timeseries" } ], "refresh": "", - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 39, "tags": [], "templating": { "list": [ { - "current": { - "selected": false, - "text": "vllm", - "value": "vllm" - }, + "current": {}, "datasource": { "type": "prometheus", - "uid": "prometheus" + "uid": "${DS_PROMETHEUS}" }, "definition": "label_values(model_name)", "hide": 0, @@ -1201,6 +1247,6 @@ "timezone": "", "title": "vLLM", "uid": "b281712d-8bff-41ef-9f3f-71ad43c05e9b", - "version": 2, + "version": 1, "weekStart": "" }