From efc89cad71082138f5eac4453924e9aa0e44a195 Mon Sep 17 00:00:00 2001 From: Ryan McCormick Date: Thu, 14 Apr 2022 19:11:33 -0700 Subject: [PATCH] Add Metric API documentation and update L0_backend_identity CI test (#4211) * Add custom metric example to L0_backend_identity test * Add metrics API documentation * [FIXME] Try building identity backend with explicit metrics enabled for L0_backend_identity test * Typo * Undo Dockerfile change * Review feedback - tweak lifecyle wording, point to identity_backend section on custom metric --- docs/metrics.md | 14 ++++++++++++++ qa/L0_backend_identity/identity_test.py | 10 +++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/docs/metrics.md b/docs/metrics.md index 6afd8a6856..6f9a15f918 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -127,3 +127,17 @@ Count*. The count metrics are illustrated by the following examples: the server. *Request Count* = 2, *Inference Count* = 9, *Execution Count* = 1. +## Custom Metrics + +Triton exposes a C API to allow users and backends to register and collect +custom metrics with the existing Triton metrics endpoint. The user takes the +ownership of the custom metrics created through the APIs and must manage their +lifetime following the API documentation. + +The +[identity_backend](https://github.com/triton-inference-server/identity_backend/blob/main/README.md#custom-metric-example) +demonstrates a practical example of adding a custom metric to a backend. + +Further documentation can be found in the `TRITONSERVER_MetricFamily*` and +`TRITONSERVER_Metric*` API annotations in +[tritonserver.h](https://github.com/triton-inference-server/core/blob/main/include/triton/core/tritonserver.h). diff --git a/qa/L0_backend_identity/identity_test.py b/qa/L0_backend_identity/identity_test.py index 4980f54cd8..5d5f8e7cf3 100644 --- a/qa/L0_backend_identity/identity_test.py +++ b/qa/L0_backend_identity/identity_test.py @@ -1,6 +1,6 @@ #!/usr/bin/python -# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -129,10 +129,12 @@ success_str = 'nv_inference_request_success{model="identity_uint32",version="1"}' infer_count_str = 'nv_inference_count{model="identity_uint32",version="1"}' infer_exec_str = 'nv_inference_exec_count{model="identity_uint32",version="1"}' + custom_metric_str = 'input_byte_size_counter{model="identity_uint32",version="1"}' success_val = None infer_count_val = None infer_exec_val = None + custom_metric_val = None for line in metrics.text.splitlines(): if line.startswith(success_str): success_val = float(line[len(success_str):]) @@ -140,6 +142,8 @@ infer_count_val = float(line[len(infer_count_str):]) if line.startswith(infer_exec_str): infer_exec_val = float(line[len(infer_exec_str):]) + if line.startswith(custom_metric_str): + custom_metric_val = float(line[len(custom_metric_str):]) if success_val != 4: print("error: expected metric {} == 4, got {}".format( @@ -153,6 +157,10 @@ print("error: expected metric {} == 1, got {}".format( infer_exec_str, infer_exec_val)) sys.exit(1) + if custom_metric_val != 64: + print("error: expected metric {} == 64, got {}".format( + custom_metric_str, custom_metric_val)) + sys.exit(1) # Reuse a single client for all sync tests with client_util.InferenceServerClient(FLAGS.url,