Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom metric API example #19

Merged
merged 16 commits into from
Apr 15, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -39,6 +39,7 @@ project(tritonidentitybackend LANGUAGES C CXX)
#
option(TRITON_ENABLE_GPU "Enable GPU support in backend" OFF)
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
option(TRITON_ENABLE_METRICS "Include metrics support in backend" ON)

set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
Expand Down Expand Up @@ -104,6 +105,13 @@ target_compile_options(
$<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc>
)

if(${TRITON_ENABLE_METRICS})
target_compile_definitions(
triton-identity-backend
PRIVATE TRITON_ENABLE_METRICS=1
)
endif() # TRITON_ENABLE_METRICS

target_link_libraries(
triton-identity-backend
PRIVATE
Expand Down
106 changes: 97 additions & 9 deletions src/identity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,18 @@ namespace triton { namespace backend { namespace identity {
} \
} while (false)

// Custom object to store global state for this backend
struct IdentityBackendState {
TRITONSERVER_MetricFamily* metric_family_;
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
std::string message_ = "backend state";
~IdentityBackendState()
{
#ifdef TRITON_ENABLE_METRICS
TRITONSERVER_MetricFamilyDelete(metric_family_);
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
#endif // TRITON_ENABLE_METRICS
}
};

//
// ModelState
//
Expand All @@ -77,7 +89,7 @@ class ModelState : public BackendModel {
public:
static TRITONSERVER_Error* Create(
TRITONBACKEND_Model* triton_model, ModelState** state);
virtual ~ModelState() = default;
~ModelState();

// Get execution delay and delay multiplier
uint64_t ExecDelay() const { return execute_delay_ms_; }
Expand All @@ -99,6 +111,15 @@ class ModelState : public BackendModel {
// This function is used for testing.
TRITONSERVER_Error* CreationDelay();

#ifdef TRITON_ENABLE_METRICS
// Setup metrics for this backend. This function is used for testing.
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
TRITONSERVER_Error* InitMetrics(
TRITONSERVER_MetricFamily* family, std::string model_name,
uint64_t model_version);
// Update metrics for this backend. This function is used for testing.
TRITONSERVER_Error* UpdateMetrics(uint64_t input_byte_size);
#endif // TRITON_ENABLE_METRICS

private:
ModelState(TRITONBACKEND_Model* triton_model);

Expand All @@ -111,6 +132,11 @@ class ModelState : public BackendModel {
// in inference while the output is requested
std::map<int, std::tuple<TRITONSERVER_DataType, std::vector<int64_t>>>
optional_inputs_;

#ifdef TRITON_ENABLE_METRICS
// Custom metrics associated with this model
TRITONSERVER_Metric* input_byte_size_counter_ = nullptr;
#endif // TRITON_ENABLE_METRICS
};

TRITONSERVER_Error*
Expand All @@ -137,6 +163,40 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
{
}

ModelState::~ModelState()
{
#ifdef TRITON_ENABLE_METRICS
TRITONSERVER_MetricDelete(input_byte_size_counter_);
#endif // TRITON_ENABLE_METRICS
}

#ifdef TRITON_ENABLE_METRICS
TRITONSERVER_Error*
ModelState::InitMetrics(
TRITONSERVER_MetricFamily* family, std::string model_name,
uint64_t model_version)
{
// Create labels for model/version pair to breakdown backend metrics per-model
std::vector<const TRITONSERVER_Parameter*> labels;
labels.emplace_back(TRITONSERVER_ParameterNew(
"model", TRITONSERVER_PARAMETER_STRING, model_name.c_str()));
labels.emplace_back(TRITONSERVER_ParameterNew(
"version", TRITONSERVER_PARAMETER_STRING,
std::to_string(model_version).c_str()));
RETURN_IF_ERROR(TRITONSERVER_MetricNew(
&input_byte_size_counter_, family, labels.data(), labels.size()));
return nullptr; // success
}

TRITONSERVER_Error*
ModelState::UpdateMetrics(uint64_t input_byte_size)
{
RETURN_IF_ERROR(
TRITONSERVER_MetricIncrement(input_byte_size_counter_, input_byte_size));
return nullptr; // success
}
#endif // TRITON_ENABLE_METRICS

TRITONSERVER_Error*
ModelState::CreationDelay()
{
Expand Down Expand Up @@ -422,9 +482,20 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
(std::string("backend configuration:\n") + buffer).c_str());

// If we have any global backend state we create and set it here. We
// don't need anything for this backend but for demonstration
// purposes we just create something...
std::string* state = new std::string("backend state");
// make use of the global backend state here to track a custom metric across
// all models using this backend if metrics are enabled.
IdentityBackendState* state = new IdentityBackendState();

#ifdef TRITON_ENABLE_METRICS
// Create metric family
const char* family_name = "input_byte_size_counter";
const char* desc =
"Cumulative input_byte_size across all identity model requests";
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_COUNTER;
RETURN_IF_ERROR(TRITONSERVER_MetricFamilyNew(
&state->metric_family_, kind, family_name, desc));
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
#endif // TRITON_ENABLE_METRICS

RETURN_IF_ERROR(
TRITONBACKEND_BackendSetState(backend, reinterpret_cast<void*>(state)));

Expand All @@ -439,11 +510,12 @@ TRITONBACKEND_Finalize(TRITONBACKEND_Backend* backend)
{
void* vstate;
RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vstate));
std::string* state = reinterpret_cast<std::string*>(vstate);
IdentityBackendState* state = reinterpret_cast<IdentityBackendState*>(vstate);

LOG_MESSAGE(
TRITONSERVER_LOG_INFO,
(std::string("TRITONBACKEND_Finalize: state is '") + *state + "'")
(std::string("TRITONBACKEND_Finalize: state is '") + state->message_ +
"'")
.c_str());

delete state;
Expand Down Expand Up @@ -484,17 +556,21 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
(std::string("Repository location: ") + clocation).c_str());

// The model can access the backend as well... here we can access
// the backend global state.
// the backend global state. We will use it to add per-model metrics
// to the global metric family object stored in the state, if metrics
// are enabled,
TRITONBACKEND_Backend* backend;
RETURN_IF_ERROR(TRITONBACKEND_ModelBackend(model, &backend));

void* vbackendstate;
RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vbackendstate));
std::string* backend_state = reinterpret_cast<std::string*>(vbackendstate);
IdentityBackendState* backend_state =
reinterpret_cast<IdentityBackendState*>(vbackendstate);

LOG_MESSAGE(
TRITONSERVER_LOG_INFO,
(std::string("backend state is '") + *backend_state + "'").c_str());
(std::string("backend state is '") + backend_state->message_ + "'")
.c_str());

// Create a ModelState object and associate it with the TRITONBACKEND_Model.
ModelState* model_state;
Expand All @@ -511,6 +587,13 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
// For testing.. Block the thread for certain time period before returning.
RETURN_IF_ERROR(model_state->CreationDelay());

#ifdef TRITON_ENABLE_METRICS
// For testing.. Create custom metric per model with metric family shared
rmccorm4 marked this conversation as resolved.
Show resolved Hide resolved
// across backend
RETURN_IF_ERROR(
model_state->InitMetrics(backend_state->metric_family_, name, version));
#endif // TRITON_ENABLE_METRICS

return nullptr; // success
}

Expand Down Expand Up @@ -947,6 +1030,11 @@ TRITONBACKEND_ModelInstanceExecute(
TRITONSERVER_LOG_VERBOSE,
(std::string("\trequested_output ") + output_name).c_str());

#ifdef TRITON_ENABLE_METRICS
GUARDED_RESPOND_IF_ERROR(
responses, r, model_state->UpdateMetrics(input_byte_size));
Tabrizian marked this conversation as resolved.
Show resolved Hide resolved
#endif // TRITON_ENABLE_METRICS

// This backend simply copies the output tensors from the corresponding
// input tensors. The input tensors contents are available in one or more
// contiguous buffers. To do the copy we:
Expand Down