feat: Instrument latency without streaming duration (#290)

dosuken123 · trallnag · web-flow · commit 4530ba4eade2 · 2024-03-11T23:59:14.000+01:00
* Track response start duration

This commit adds a feature to track the latency excluding
streaming duration.

* ci(pre-commit): Apply hook auto fixes

* fix: Add default to Info constructor and adjust test

* fix: Make mypy happy

* docs: Add parameter to docstring

* fix: Add start time stuff to body handler

* test: Add test

* feat: Add duration stuff to default and add tests

* docs: Add entry to changelog

---------

Co-authored-by: Tim Schwenke &lt;tim@trallnag.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,24 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0).
   and implementing it in
   [#288](https://github.com/trallnag/prometheus-fastapi-instrumentator/pull/288).
 
+- **Middleware also records duration without streaming** in addition to the
+  already existing total latency (i.e. the time consumed for streaming is not
+  included in the duration value). The differentiation can be valuable as it
+  shows the time to first byte.
+
+  This mode is opt-in and can be enabled / used in several ways: The
+  `Instrumentator()` constructor, the `metrics.default()` closure, and the
+  `metrics.latency()` closure now come with the flag
+  `should_exclude_streaming_duration`. The attribute
+  `modified_duration_without_streaming` has been added to the `metrics.Info`
+  class. Instances of `metrics.Info` are passed to instrumentation functions,
+  where the added value can be used to set metrics.
+
+  Thanks to [@dosuken123](https://github.com/dosuken123) for proposing this in
+  [#291](https://github.com/trallnag/prometheus-fastapi-instrumentator/issues/291)
+  and implementing it in
+  [#290](https://github.com/trallnag/prometheus-fastapi-instrumentator/pull/290).
+
 - Relaxed type of `get_route_name` argument to `HTTPConnection`. This allows
   developers to use the `get_route_name` function for getting the name of
   websocket routes as well. Thanks to [@pajowu](https://github.com/pajowu) for
diff --git a/src/prometheus_fastapi_instrumentator/instrumentation.py b/src/prometheus_fastapi_instrumentator/instrumentation.py
@@ -33,6 +33,7 @@ def __init__(
         should_round_latency_decimals: bool = False,
         should_respect_env_var: bool = False,
         should_instrument_requests_inprogress: bool = False,
+        should_exclude_streaming_duration: bool = False,
         excluded_handlers: List[str] = [],
         body_handlers: List[str] = [],
         round_latency_decimals: int = 4,
@@ -69,6 +70,10 @@ def __init__(
                 the inprogress requests. See also the related args starting
                 with `inprogress`. Defaults to `False`.
 
+            should_exclude_streaming_duration: Should the streaming duration be
+                excluded? Only relevant if default metrics are used. Defaults
+                to `False`.
+
             excluded_handlers (List[str]): List of strings that will be compiled
                 to regex patterns. All matches will be skipped and not
                 instrumented. Defaults to `[]`.
@@ -112,6 +117,7 @@ def __init__(
         self.should_round_latency_decimals = should_round_latency_decimals
         self.should_respect_env_var = should_respect_env_var
         self.should_instrument_requests_inprogress = should_instrument_requests_inprogress
+        self.should_exclude_streaming_duration = should_exclude_streaming_duration
 
         self.round_latency_decimals = round_latency_decimals
         self.env_var_name = env_var_name
@@ -205,6 +211,7 @@ def instrument(
             should_round_latency_decimals=self.should_round_latency_decimals,
             should_respect_env_var=self.should_respect_env_var,
             should_instrument_requests_inprogress=self.should_instrument_requests_inprogress,
+            should_exclude_streaming_duration=self.should_exclude_streaming_duration,
             round_latency_decimals=self.round_latency_decimals,
             env_var_name=self.env_var_name,
             inprogress_name=self.inprogress_name,
diff --git a/src/prometheus_fastapi_instrumentator/metrics.py b/src/prometheus_fastapi_instrumentator/metrics.py
@@ -27,6 +27,7 @@ def __init__(
         modified_handler: str,
         modified_status: str,
         modified_duration: float,
+        modified_duration_without_streaming: float = 0.0,
     ):
         """Creates Info object that is used for instrumentation functions.
 
@@ -42,6 +43,8 @@ def __init__(
                 by instrumentator. For example grouping into `2xx`, `3xx` and so on.
             modified_duration (float): Latency representation after processing
                 by instrumentator. For example rounding of decimals. Seconds.
+            modified_duration_without_streaming (float): Latency between request arrival and response starts (i.e. first chunk duration).
+                Excluding the streaming duration. Defaults to 0.
         """
 
         self.request = request
@@ -50,6 +53,7 @@ def __init__(
         self.modified_handler = modified_handler
         self.modified_status = modified_status
         self.modified_duration = modified_duration
+        self.modified_duration_without_streaming = modified_duration_without_streaming
 
 
 def _build_label_attribute_names(
@@ -114,6 +118,7 @@ def latency(
     should_include_handler: bool = True,
     should_include_method: bool = True,
     should_include_status: bool = True,
+    should_exclude_streaming_duration: bool = False,
     buckets: Sequence[Union[float, str]] = Histogram.DEFAULT_BUCKETS,
     registry: CollectorRegistry = REGISTRY,
 ) -> Optional[Callable[[Info], None]]:
@@ -141,6 +146,9 @@ def latency(
         should_include_status: Should the `status` label be part of the
             metric? Defaults to `True`.
 
+        should_exclude_streaming_duration: Should the streaming duration be
+            excluded? Defaults to `False`.
+
         buckets: Buckets for the histogram. Defaults to Prometheus default.
             Defaults to default buckets from Prometheus client library.
 
@@ -184,15 +192,21 @@ def latency(
             )
 
         def instrumentation(info: Info) -> None:
+            duration = info.modified_duration
+            if should_exclude_streaming_duration:
+                duration = info.modified_duration_without_streaming
+            else:
+                duration = info.modified_duration
+
             if label_names:
                 label_values = [
                     getattr(info, attribute_name)
                     for attribute_name in info_attribute_names
                 ]
 
-                METRIC.labels(*label_values).observe(info.modified_duration)
+                METRIC.labels(*label_values).observe(duration)
             else:
-                METRIC.observe(info.modified_duration)
+                METRIC.observe(duration)
 
         return instrumentation
     except ValueError as e:
@@ -569,6 +583,7 @@ def default(
     metric_namespace: str = "",
     metric_subsystem: str = "",
     should_only_respect_2xx_for_highr: bool = False,
+    should_exclude_streaming_duration: bool = False,
     latency_highr_buckets: Sequence[Union[float, str]] = (
         0.01,
         0.025,
@@ -610,7 +625,7 @@ def default(
         content length bytes by handler.
     * `http_request_duration_highr_seconds` (no labels): High number of buckets
         leading to more accurate calculation of percentiles.
-    * `http_request_duration_seconds` (`handler`):
+    * `http_request_duration_seconds` (`handler`, `method`):
         Kepp the bucket count very low. Only put in SLIs.
 
     Args:
@@ -625,6 +640,9 @@ def default(
             requests / responses that have a status code starting with `2`?
             Defaults to `False`.
 
+        should_exclude_streaming_duration: Should the streaming duration be
+            excluded? Defaults to `False`.
+
         latency_highr_buckets (tuple[float], optional): Buckets tuple for high
             res histogram. Can be large because no labels are used. Defaults to
             (0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1, 1.5, 2, 2.5,
@@ -719,6 +737,12 @@ def default(
         )
 
         def instrumentation(info: Info) -> None:
+            duration = info.modified_duration
+            if should_exclude_streaming_duration:
+                duration = info.modified_duration_without_streaming
+            else:
+                duration = info.modified_duration
+
             TOTAL.labels(info.method, info.modified_status, info.modified_handler).inc()
 
             IN_SIZE.labels(info.modified_handler).observe(
@@ -735,11 +759,11 @@ def instrumentation(info: Info) -> None:
             if not should_only_respect_2xx_for_highr or info.modified_status.startswith(
                 "2"
             ):
-                LATENCY_HIGHR.observe(info.modified_duration)
+                LATENCY_HIGHR.observe(duration)
 
             LATENCY_LOWR.labels(
                 handler=info.modified_handler, method=info.method
-            ).observe(info.modified_duration)
+            ).observe(duration)
 
         return instrumentation
 
diff --git a/src/prometheus_fastapi_instrumentator/middleware.py b/src/prometheus_fastapi_instrumentator/middleware.py
@@ -27,6 +27,7 @@ def __init__(
         should_round_latency_decimals: bool = False,
         should_respect_env_var: bool = False,
         should_instrument_requests_inprogress: bool = False,
+        should_exclude_streaming_duration: bool = False,
         excluded_handlers: Sequence[str] = (),
         body_handlers: Sequence[str] = (),
         round_latency_decimals: int = 4,
@@ -89,6 +90,7 @@ def __init__(
                 metric_namespace=metric_namespace,
                 metric_subsystem=metric_subsystem,
                 should_only_respect_2xx_for_highr=should_only_respect_2xx_for_highr,
+                should_exclude_streaming_duration=should_exclude_streaming_duration,
                 latency_highr_buckets=latency_highr_buckets,
                 latency_lowr_buckets=latency_lowr_buckets,
                 registry=self.registry,
@@ -140,15 +142,17 @@ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
         status_code = 500
         headers = []
         body = b""
+        response_start_time = None
 
         # Message body collected for handlers matching body_handlers patterns.
         if any(pattern.search(handler) for pattern in self.body_handlers):
 
             async def send_wrapper(message: Message) -> None:
                 if message["type"] == "http.response.start":
-                    nonlocal status_code, headers
+                    nonlocal status_code, headers, response_start_time
                     headers = message["headers"]
                     status_code = message["status"]
+                    response_start_time = default_timer()
                 elif message["type"] == "http.response.body" and message["body"]:
                     nonlocal body
                     body += message["body"]
@@ -158,9 +162,10 @@ async def send_wrapper(message: Message) -> None:
 
             async def send_wrapper(message: Message) -> None:
                 if message["type"] == "http.response.start":
-                    nonlocal status_code, headers
+                    nonlocal status_code, headers, response_start_time
                     headers = message["headers"]
                     status_code = message["status"]
+                    response_start_time = default_timer()
                 await send(message)
 
         try:
@@ -175,13 +180,22 @@ async def send_wrapper(message: Message) -> None:
             )
 
             if not is_excluded:
-                duration = max(default_timer() - start_time, 0)
+                duration = max(default_timer() - start_time, 0.0)
+                duration_without_streaming = 0.0
+
+                if response_start_time:
+                    duration_without_streaming = max(
+                        response_start_time - start_time, 0.0
+                    )
 
                 if self.should_instrument_requests_inprogress:
                     inprogress.dec()
 
                 if self.should_round_latency_decimals:
                     duration = round(duration, self.round_latency_decimals)
+                    duration_without_streaming = round(
+                        duration_without_streaming, self.round_latency_decimals
+                    )
 
                 if self.should_group_status_codes:
                     status = status[0] + "xx"
@@ -197,6 +211,7 @@ async def send_wrapper(message: Message) -> None:
                     modified_handler=handler,
                     modified_status=status,
                     modified_duration=duration,
+                    modified_duration_without_streaming=duration_without_streaming,
                 )
 
                 for instrumentation in self.instrumentations:
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -1,8 +1,8 @@
 from typing import Any, Dict, Optional
 
 import pytest
-from fastapi import FastAPI, HTTPException
-from prometheus_client import REGISTRY
+from fastapi import FastAPI, HTTPException, responses
+from prometheus_client import REGISTRY, Histogram
 from requests import Response as TestClientResponse
 from starlette.testclient import TestClient
 
@@ -106,6 +106,7 @@ def test_existence_of_attributes():
     assert info.modified_duration is None
     assert info.modified_status is None
     assert info.modified_handler is None
+    assert info.modified_duration_without_streaming == 0.0
 
 
 def test_build_label_attribute_names_all_false():
@@ -422,6 +423,47 @@ def test_latency_with_bucket_no_inf():
     )
 
 
+def test_latency_duration_without_streaming():
+    _ = create_app()
+    app = FastAPI()
+    client = TestClient(app)
+
+    @app.get("/")
+    def root():
+        return responses.StreamingResponse(("x" * 1_000 for _ in range(5)))
+
+    METRIC = Histogram(
+        "http_request_duration_with_streaming_seconds",
+        "x",
+    )
+
+    def instrumentation(info: metrics.Info) -> None:
+        METRIC.observe(info.modified_duration)
+
+    Instrumentator().add(
+        metrics.latency(
+            should_include_handler=False,
+            should_include_method=False,
+            should_include_status=False,
+            should_exclude_streaming_duration=True,
+        ),
+        instrumentation,
+    ).instrument(app).expose(app)
+    client = TestClient(app)
+
+    client.get("/")
+
+    _ = get_response(client, "/metrics")
+
+    assert REGISTRY.get_sample_value(
+        "http_request_duration_seconds_sum",
+        {},
+    ) < REGISTRY.get_sample_value(
+        "http_request_duration_with_streaming_seconds_sum",
+        {},
+    )
+
+
 # ------------------------------------------------------------------------------
 # default
 
@@ -521,6 +563,39 @@ def test_default_with_runtime_error():
     )
 
 
+def test_default_duration_without_streaming():
+    _ = create_app()
+    app = FastAPI()
+
+    @app.get("/")
+    def root():
+        return responses.StreamingResponse(("x" * 1_000 for _ in range(5)))
+
+    METRIC = Histogram(
+        "http_request_duration_with_streaming_seconds", "x", labelnames=["handler"]
+    )
+
+    def instrumentation(info: metrics.Info) -> None:
+        METRIC.labels(info.modified_handler).observe(info.modified_duration)
+
+    Instrumentator().add(
+        metrics.default(should_exclude_streaming_duration=True), instrumentation
+    ).instrument(app).expose(app)
+    client = TestClient(app)
+
+    client.get("/")
+
+    _ = get_response(client, "/metrics")
+
+    assert REGISTRY.get_sample_value(
+        "http_request_duration_with_streaming_seconds_sum",
+        {"handler": "/"},
+    ) > REGISTRY.get_sample_value(
+        "http_request_duration_seconds_sum",
+        {"handler": "/", "method": "GET"},
+    )
+
+
 # ------------------------------------------------------------------------------
 # requests
 
diff --git a/tests/test_middleware.py b/tests/test_middleware.py
@@ -163,3 +163,24 @@ def instrumentation(info: metrics.Info) -> None:
     response = client.get("/")
     assert instrumentation_executed
     assert len(response.content) == 5_000_000
+
+
+def test_info_body_duration_without_streaming():
+    app = FastAPI()
+    client = TestClient(app)
+
+    @app.get("/")
+    def root():
+        return responses.StreamingResponse(("x" * 1_000 for _ in range(5)))
+
+    instrumentation_executed = False
+
+    def instrumentation(info: metrics.Info) -> None:
+        nonlocal instrumentation_executed
+        instrumentation_executed = True
+        assert info.modified_duration_without_streaming < info.modified_duration
+
+    Instrumentator(body_handlers=[r".*"]).instrument(app).add(instrumentation)
+
+    client.get("/")
+    assert instrumentation_executed