triton-inference-server · rmccorm4 · Apr 15, 2022 · Apr 13, 2022 · Apr 13, 2022 · Apr 13, 2022
diff --git a/docs/metrics.md b/docs/metrics.md
@@ -127,3 +127,17 @@ Count*. The count metrics are illustrated by the following examples:
   the server. *Request Count* = 2, *Inference Count* = 9, *Execution
   Count* = 1.
 
+## Custom Metrics
+
+Triton exposes a C API to allow users and backends to register and collect
+custom metrics with the existing Triton metrics endpoint. The user takes the
+ownership of the custom metrics created through the APIs and must manage their
+lifetime following the API documentation.
+
+The 
+[identity_backend](https://github.com/triton-inference-server/identity_backend/blob/main/README.md#custom-metric-example)
+demonstrates a practical example of adding a custom metric to a backend.
+
+Further documentation can be found in the `TRITONSERVER_MetricFamily*` and
+`TRITONSERVER_Metric*` API annotations in
+[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h).
diff --git a/qa/L0_backend_identity/identity_test.py b/qa/L0_backend_identity/identity_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -129,17 +129,21 @@
             success_str = 'nv_inference_request_success{model="identity_uint32",version="1"}'
             infer_count_str = 'nv_inference_count{model="identity_uint32",version="1"}'
             infer_exec_str = 'nv_inference_exec_count{model="identity_uint32",version="1"}'
+            custom_metric_str = 'input_byte_size_counter{model="identity_uint32",version="1"}'
 
             success_val = None
             infer_count_val = None
             infer_exec_val = None
+            custom_metric_val = None
             for line in metrics.text.splitlines():
                 if line.startswith(success_str):
                     success_val = float(line[len(success_str):])
                 if line.startswith(infer_count_str):
                     infer_count_val = float(line[len(infer_count_str):])
                 if line.startswith(infer_exec_str):
                     infer_exec_val = float(line[len(infer_exec_str):])
+                if line.startswith(custom_metric_str):
+                    custom_metric_val = float(line[len(custom_metric_str):])
 
             if success_val != 4:
                 print("error: expected metric {} == 4, got {}".format(
@@ -153,6 +157,10 @@
                 print("error: expected metric {} == 1, got {}".format(
                     infer_exec_str, infer_exec_val))
                 sys.exit(1)
+            if custom_metric_val != 64:
+                print("error: expected metric {} == 64, got {}".format(
+                    custom_metric_str, custom_metric_val))
+                sys.exit(1)
 
     # Reuse a single client for all sync tests
     with client_util.InferenceServerClient(FLAGS.url,