Skip to content

Commit

Permalink
Custom metric API example (#19)
Browse files Browse the repository at this point in the history
* Example metric may be to track counts of each input type for identity backend

* Initial metrics example counting instances of each input type

* clang

* Cleanup metrics on model state destruction, add ifdef guards for TRITON_ENABLE_METRICS

* input_byte_size counter instead of dtype counter

* Use BackendState to share metric family across all models in identity backend

* Remove temp define

* Tweak example metric family description

* [FIXME] Add else showing TRITON_ENABLE_METRICS is not set in backend

* Tweak CMake build var descriptions

* Set build arg in compile definition, remove debug message

* Update copyright

* Add custom metric example description to README

* Review feedback - init metricfamily in constructor, remove testing comments

* Check nullptr on delete and clang-format
  • Loading branch information
rmccorm4 committed Apr 15, 2022
1 parent 53ef31b commit f881381
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 10 deletions.
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -39,6 +39,7 @@ project(tritonidentitybackend LANGUAGES C CXX)
#
option(TRITON_ENABLE_GPU "Enable GPU support in backend" OFF)
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
option(TRITON_ENABLE_METRICS "Include metrics support in backend" ON)

set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
Expand Down Expand Up @@ -104,6 +105,13 @@ target_compile_options(
$<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc>
)

if(${TRITON_ENABLE_METRICS})
target_compile_definitions(
triton-identity-backend
PRIVATE TRITON_ENABLE_METRICS=1
)
endif() # TRITON_ENABLE_METRICS

target_link_libraries(
triton-identity-backend
PRIVATE
Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,22 @@ the following additional cmake flags:
-DTRITON_CORE_REPO_TAG=r21.10
-DTRITON_COMMON_REPO_TAG=r21.10
```

## Custom Metric Example

When `TRITON_ENABLE_METRICS` is enabled, this backend implements an example
of registering a custom metric to Triton's existing metrics endpoint via the
[Metrics API](https://github.com/triton-inference-server/server/blob/main/docs/metrics.md#custom-metrics).

This metric will track the cumulative `input_byte_size` of all requests
to this backend per-model. Here's an example output of the custom metric
from Triton's metrics endpoint after a few requests to each model:

```
# HELP input_byte_size_counter Cumulative input byte size of all requests received by the model
# TYPE input_byte_size_counter counter
input_byte_size_counter{model="identity_uint32",version="1"} 64.000000
input_byte_size_counter{model="identity_fp32",version="1"} 32.000000
```

This example can be referenced to implement custom metrics for various use cases.
110 changes: 101 additions & 9 deletions src/identity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,32 @@ namespace triton { namespace backend { namespace identity {
} \
} while (false)

// Custom object to store global state for this backend
struct IdentityBackendState {
TRITONSERVER_MetricFamily* metric_family_ = nullptr;
std::string message_ = "backend state";

explicit IdentityBackendState()
{
#ifdef TRITON_ENABLE_METRICS
// Create metric family
THROW_IF_BACKEND_MODEL_ERROR(TRITONSERVER_MetricFamilyNew(
&metric_family_, TRITONSERVER_METRIC_KIND_COUNTER,
"input_byte_size_counter",
"Cumulative input byte size of all requests received by the model"));
#endif // TRITON_ENABLE_METRICS
}

~IdentityBackendState()
{
#ifdef TRITON_ENABLE_METRICS
if (metric_family_ != nullptr) {
TRITONSERVER_MetricFamilyDelete(metric_family_);
}
#endif // TRITON_ENABLE_METRICS
}
};

//
// ModelState
//
Expand All @@ -77,7 +103,7 @@ class ModelState : public BackendModel {
public:
static TRITONSERVER_Error* Create(
TRITONBACKEND_Model* triton_model, ModelState** state);
virtual ~ModelState() = default;
~ModelState();

// Get execution delay and delay multiplier
uint64_t ExecDelay() const { return execute_delay_ms_; }
Expand All @@ -99,6 +125,15 @@ class ModelState : public BackendModel {
// This function is used for testing.
TRITONSERVER_Error* CreationDelay();

#ifdef TRITON_ENABLE_METRICS
// Setup metrics for this backend.
TRITONSERVER_Error* InitMetrics(
TRITONSERVER_MetricFamily* family, std::string model_name,
uint64_t model_version);
// Update metrics for this backend.
TRITONSERVER_Error* UpdateMetrics(uint64_t input_byte_size);
#endif // TRITON_ENABLE_METRICS

private:
ModelState(TRITONBACKEND_Model* triton_model);

Expand All @@ -111,6 +146,11 @@ class ModelState : public BackendModel {
// in inference while the output is requested
std::map<int, std::tuple<TRITONSERVER_DataType, std::vector<int64_t>>>
optional_inputs_;

#ifdef TRITON_ENABLE_METRICS
// Custom metrics associated with this model
TRITONSERVER_Metric* input_byte_size_counter_ = nullptr;
#endif // TRITON_ENABLE_METRICS
};

TRITONSERVER_Error*
Expand All @@ -137,6 +177,42 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
{
}

ModelState::~ModelState()
{
#ifdef TRITON_ENABLE_METRICS
if (input_byte_size_counter_ != nullptr) {
TRITONSERVER_MetricDelete(input_byte_size_counter_);
}
#endif // TRITON_ENABLE_METRICS
}

#ifdef TRITON_ENABLE_METRICS
TRITONSERVER_Error*
ModelState::InitMetrics(
TRITONSERVER_MetricFamily* family, std::string model_name,
uint64_t model_version)
{
// Create labels for model/version pair to breakdown backend metrics per-model
std::vector<const TRITONSERVER_Parameter*> labels;
labels.emplace_back(TRITONSERVER_ParameterNew(
"model", TRITONSERVER_PARAMETER_STRING, model_name.c_str()));
labels.emplace_back(TRITONSERVER_ParameterNew(
"version", TRITONSERVER_PARAMETER_STRING,
std::to_string(model_version).c_str()));
RETURN_IF_ERROR(TRITONSERVER_MetricNew(
&input_byte_size_counter_, family, labels.data(), labels.size()));
return nullptr; // success
}

TRITONSERVER_Error*
ModelState::UpdateMetrics(uint64_t input_byte_size)
{
RETURN_IF_ERROR(
TRITONSERVER_MetricIncrement(input_byte_size_counter_, input_byte_size));
return nullptr; // success
}
#endif // TRITON_ENABLE_METRICS

TRITONSERVER_Error*
ModelState::CreationDelay()
{
Expand Down Expand Up @@ -422,9 +498,9 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
(std::string("backend configuration:\n") + buffer).c_str());

// If we have any global backend state we create and set it here. We
// don't need anything for this backend but for demonstration
// purposes we just create something...
std::string* state = new std::string("backend state");
// make use of the global backend state here to track a custom metric across
// all models using this backend if metrics are enabled.
IdentityBackendState* state = new IdentityBackendState();
RETURN_IF_ERROR(
TRITONBACKEND_BackendSetState(backend, reinterpret_cast<void*>(state)));

Expand All @@ -439,11 +515,12 @@ TRITONBACKEND_Finalize(TRITONBACKEND_Backend* backend)
{
void* vstate;
RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vstate));
std::string* state = reinterpret_cast<std::string*>(vstate);
IdentityBackendState* state = reinterpret_cast<IdentityBackendState*>(vstate);

LOG_MESSAGE(
TRITONSERVER_LOG_INFO,
(std::string("TRITONBACKEND_Finalize: state is '") + *state + "'")
(std::string("TRITONBACKEND_Finalize: state is '") + state->message_ +
"'")
.c_str());

delete state;
Expand Down Expand Up @@ -484,17 +561,21 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
(std::string("Repository location: ") + clocation).c_str());

// The model can access the backend as well... here we can access
// the backend global state.
// the backend global state. We will use it to add per-model metrics
// to the global metric family object stored in the state, if metrics
// are enabled,
TRITONBACKEND_Backend* backend;
RETURN_IF_ERROR(TRITONBACKEND_ModelBackend(model, &backend));

void* vbackendstate;
RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vbackendstate));
std::string* backend_state = reinterpret_cast<std::string*>(vbackendstate);
IdentityBackendState* backend_state =
reinterpret_cast<IdentityBackendState*>(vbackendstate);

LOG_MESSAGE(
TRITONSERVER_LOG_INFO,
(std::string("backend state is '") + *backend_state + "'").c_str());
(std::string("backend state is '") + backend_state->message_ + "'")
.c_str());

// Create a ModelState object and associate it with the TRITONBACKEND_Model.
ModelState* model_state;
Expand All @@ -511,6 +592,12 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
// For testing.. Block the thread for certain time period before returning.
RETURN_IF_ERROR(model_state->CreationDelay());

#ifdef TRITON_ENABLE_METRICS
// Create custom metric per model with metric family shared across backend
RETURN_IF_ERROR(
model_state->InitMetrics(backend_state->metric_family_, name, version));
#endif // TRITON_ENABLE_METRICS

return nullptr; // success
}

Expand Down Expand Up @@ -947,6 +1034,11 @@ TRITONBACKEND_ModelInstanceExecute(
TRITONSERVER_LOG_VERBOSE,
(std::string("\trequested_output ") + output_name).c_str());

#ifdef TRITON_ENABLE_METRICS
GUARDED_RESPOND_IF_ERROR(
responses, r, model_state->UpdateMetrics(input_byte_size));
#endif // TRITON_ENABLE_METRICS

// This backend simply copies the output tensors from the corresponding
// input tensors. The input tensors contents are available in one or more
// contiguous buffers. To do the copy we:
Expand Down

0 comments on commit f881381

Please sign in to comment.