Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom metric API example #19

Merged
merged 16 commits into from
Apr 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -39,6 +39,7 @@ project(tritonidentitybackend LANGUAGES C CXX)
#
option(TRITON_ENABLE_GPU "Enable GPU support in backend" OFF)
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
option(TRITON_ENABLE_METRICS "Include metrics support in backend" ON)

set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
Expand Down Expand Up @@ -104,6 +105,13 @@ target_compile_options(
$<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc>
)

if(${TRITON_ENABLE_METRICS})
target_compile_definitions(
triton-identity-backend
PRIVATE TRITON_ENABLE_METRICS=1
)
endif() # TRITON_ENABLE_METRICS

target_link_libraries(
triton-identity-backend
PRIVATE
Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,22 @@ the following additional cmake flags:
-DTRITON_CORE_REPO_TAG=r21.10
-DTRITON_COMMON_REPO_TAG=r21.10
```

## Custom Metric Example

When `TRITON_ENABLE_METRICS` is enabled, this backend implements an example
of registering a custom metric to Triton's existing metrics endpoint via the
[Metrics API](https://github.com/triton-inference-server/server/blob/main/docs/metrics.md#custom-metrics).

This metric will track the cumulative `input_byte_size` of all requests
to this backend per-model. Here's an example output of the custom metric
from Triton's metrics endpoint after a few requests to each model:

```
# HELP input_byte_size_counter Cumulative input byte size of all requests received by the model
# TYPE input_byte_size_counter counter
input_byte_size_counter{model="identity_uint32",version="1"} 64.000000
input_byte_size_counter{model="identity_fp32",version="1"} 32.000000
```

This example can be referenced to implement custom metrics for various use cases.
110 changes: 101 additions & 9 deletions src/identity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,32 @@ namespace triton { namespace backend { namespace identity {
} \
} while (false)

// Custom object to store global state for this backend
struct IdentityBackendState {
TRITONSERVER_MetricFamily* metric_family_ = nullptr;
std::string message_ = "backend state";

explicit IdentityBackendState()
{
#ifdef TRITON_ENABLE_METRICS
// Create metric family
THROW_IF_BACKEND_MODEL_ERROR(TRITONSERVER_MetricFamilyNew(
&metric_family_, TRITONSERVER_METRIC_KIND_COUNTER,
"input_byte_size_counter",
"Cumulative input byte size of all requests received by the model"));
#endif // TRITON_ENABLE_METRICS
}

~IdentityBackendState()
{
#ifdef TRITON_ENABLE_METRICS
if (metric_family_ != nullptr) {
TRITONSERVER_MetricFamilyDelete(metric_family_);
}
#endif // TRITON_ENABLE_METRICS
}
};

//
// ModelState
//
Expand All @@ -77,7 +103,7 @@ class ModelState : public BackendModel {
public:
static TRITONSERVER_Error* Create(
TRITONBACKEND_Model* triton_model, ModelState** state);
virtual ~ModelState() = default;
~ModelState();

// Get execution delay and delay multiplier
uint64_t ExecDelay() const { return execute_delay_ms_; }
Expand All @@ -99,6 +125,15 @@ class ModelState : public BackendModel {
// This function is used for testing.
TRITONSERVER_Error* CreationDelay();

#ifdef TRITON_ENABLE_METRICS
// Setup metrics for this backend.
TRITONSERVER_Error* InitMetrics(
TRITONSERVER_MetricFamily* family, std::string model_name,
uint64_t model_version);
// Update metrics for this backend.
TRITONSERVER_Error* UpdateMetrics(uint64_t input_byte_size);
#endif // TRITON_ENABLE_METRICS

private:
ModelState(TRITONBACKEND_Model* triton_model);

Expand All @@ -111,6 +146,11 @@ class ModelState : public BackendModel {
// in inference while the output is requested
std::map<int, std::tuple<TRITONSERVER_DataType, std::vector<int64_t>>>
optional_inputs_;

#ifdef TRITON_ENABLE_METRICS
// Custom metrics associated with this model
TRITONSERVER_Metric* input_byte_size_counter_ = nullptr;
#endif // TRITON_ENABLE_METRICS
};

TRITONSERVER_Error*
Expand All @@ -137,6 +177,42 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
{
}

ModelState::~ModelState()
{
#ifdef TRITON_ENABLE_METRICS
if (input_byte_size_counter_ != nullptr) {
TRITONSERVER_MetricDelete(input_byte_size_counter_);
}
#endif // TRITON_ENABLE_METRICS
}

#ifdef TRITON_ENABLE_METRICS
TRITONSERVER_Error*
ModelState::InitMetrics(
TRITONSERVER_MetricFamily* family, std::string model_name,
uint64_t model_version)
{
// Create labels for model/version pair to breakdown backend metrics per-model
std::vector<const TRITONSERVER_Parameter*> labels;
labels.emplace_back(TRITONSERVER_ParameterNew(
"model", TRITONSERVER_PARAMETER_STRING, model_name.c_str()));
labels.emplace_back(TRITONSERVER_ParameterNew(
"version", TRITONSERVER_PARAMETER_STRING,
std::to_string(model_version).c_str()));
RETURN_IF_ERROR(TRITONSERVER_MetricNew(
&input_byte_size_counter_, family, labels.data(), labels.size()));
return nullptr; // success
}

TRITONSERVER_Error*
ModelState::UpdateMetrics(uint64_t input_byte_size)
{
RETURN_IF_ERROR(
TRITONSERVER_MetricIncrement(input_byte_size_counter_, input_byte_size));
return nullptr; // success
}
#endif // TRITON_ENABLE_METRICS

TRITONSERVER_Error*
ModelState::CreationDelay()
{
Expand Down Expand Up @@ -422,9 +498,9 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
(std::string("backend configuration:\n") + buffer).c_str());

// If we have any global backend state we create and set it here. We
// don't need anything for this backend but for demonstration
// purposes we just create something...
std::string* state = new std::string("backend state");
// make use of the global backend state here to track a custom metric across
// all models using this backend if metrics are enabled.
IdentityBackendState* state = new IdentityBackendState();
RETURN_IF_ERROR(
TRITONBACKEND_BackendSetState(backend, reinterpret_cast<void*>(state)));

Expand All @@ -439,11 +515,12 @@ TRITONBACKEND_Finalize(TRITONBACKEND_Backend* backend)
{
void* vstate;
RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vstate));
std::string* state = reinterpret_cast<std::string*>(vstate);
IdentityBackendState* state = reinterpret_cast<IdentityBackendState*>(vstate);

LOG_MESSAGE(
TRITONSERVER_LOG_INFO,
(std::string("TRITONBACKEND_Finalize: state is '") + *state + "'")
(std::string("TRITONBACKEND_Finalize: state is '") + state->message_ +
"'")
.c_str());

delete state;
Expand Down Expand Up @@ -484,17 +561,21 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
(std::string("Repository location: ") + clocation).c_str());

// The model can access the backend as well... here we can access
// the backend global state.
// the backend global state. We will use it to add per-model metrics
// to the global metric family object stored in the state, if metrics
// are enabled,
TRITONBACKEND_Backend* backend;
RETURN_IF_ERROR(TRITONBACKEND_ModelBackend(model, &backend));

void* vbackendstate;
RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vbackendstate));
std::string* backend_state = reinterpret_cast<std::string*>(vbackendstate);
IdentityBackendState* backend_state =
reinterpret_cast<IdentityBackendState*>(vbackendstate);

LOG_MESSAGE(
TRITONSERVER_LOG_INFO,
(std::string("backend state is '") + *backend_state + "'").c_str());
(std::string("backend state is '") + backend_state->message_ + "'")
.c_str());

// Create a ModelState object and associate it with the TRITONBACKEND_Model.
ModelState* model_state;
Expand All @@ -511,6 +592,12 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
// For testing.. Block the thread for certain time period before returning.
RETURN_IF_ERROR(model_state->CreationDelay());

#ifdef TRITON_ENABLE_METRICS
// Create custom metric per model with metric family shared across backend
RETURN_IF_ERROR(
model_state->InitMetrics(backend_state->metric_family_, name, version));
#endif // TRITON_ENABLE_METRICS

return nullptr; // success
}

Expand Down Expand Up @@ -947,6 +1034,11 @@ TRITONBACKEND_ModelInstanceExecute(
TRITONSERVER_LOG_VERBOSE,
(std::string("\trequested_output ") + output_name).c_str());

#ifdef TRITON_ENABLE_METRICS
GUARDED_RESPOND_IF_ERROR(
responses, r, model_state->UpdateMetrics(input_byte_size));
Tabrizian marked this conversation as resolved.
Show resolved Hide resolved
#endif // TRITON_ENABLE_METRICS

// This backend simply copies the output tensors from the corresponding
// input tensors. The input tensors contents are available in one or more
// contiguous buffers. To do the copy we:
Expand Down