triton-inference-server · rmccorm4 · Apr 15, 2022 · Mar 28, 2022 · Mar 30, 2022 · Apr 12, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -39,6 +39,7 @@ project(tritonidentitybackend LANGUAGES C CXX)
 #
 option(TRITON_ENABLE_GPU "Enable GPU support in backend" OFF)
 option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
+option(TRITON_ENABLE_METRICS "Include metrics support in backend" ON)
 
 set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
 set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
@@ -104,6 +105,13 @@ target_compile_options(
   $<$<CXX_COMPILER_ID:MSVC>:/Wall /D_WIN32_WINNT=0x0A00 /EHsc>
 )
 
+if(${TRITON_ENABLE_METRICS})
+  target_compile_definitions(
+    triton-identity-backend
+    PRIVATE TRITON_ENABLE_METRICS=1
+  )
+endif() # TRITON_ENABLE_METRICS
+
 target_link_libraries(
   triton-identity-backend
   PRIVATE

diff --git a/src/identity.cc b/src/identity.cc
@@ -67,6 +67,18 @@ namespace triton { namespace backend { namespace identity {
     }                                                                   \
   } while (false)
 
+// Custom object to store global state for this backend
+struct IdentityBackendState {
+  TRITONSERVER_MetricFamily* metric_family_;
+  std::string message_ = "backend state";
+  ~IdentityBackendState()
+  {
+#ifdef TRITON_ENABLE_METRICS
+    TRITONSERVER_MetricFamilyDelete(metric_family_);
+#endif  // TRITON_ENABLE_METRICS
+  }
+};
+
 //
 // ModelState
 //
@@ -77,7 +89,7 @@ class ModelState : public BackendModel {
  public:
   static TRITONSERVER_Error* Create(
       TRITONBACKEND_Model* triton_model, ModelState** state);
-  virtual ~ModelState() = default;
+  ~ModelState();
 
   // Get execution delay and delay multiplier
   uint64_t ExecDelay() const { return execute_delay_ms_; }
@@ -99,6 +111,15 @@ class ModelState : public BackendModel {
   // This function is used for testing.
   TRITONSERVER_Error* CreationDelay();
 
+#ifdef TRITON_ENABLE_METRICS
+  // Setup metrics for this backend. This function is used for testing.
+  TRITONSERVER_Error* InitMetrics(
+      TRITONSERVER_MetricFamily* family, std::string model_name,
+      uint64_t model_version);
+  // Update metrics for this backend. This function is used for testing.
+  TRITONSERVER_Error* UpdateMetrics(uint64_t input_byte_size);
+#endif  // TRITON_ENABLE_METRICS
+
  private:
   ModelState(TRITONBACKEND_Model* triton_model);
 
@@ -111,6 +132,11 @@ class ModelState : public BackendModel {
   // in inference while the output is requested
   std::map<int, std::tuple<TRITONSERVER_DataType, std::vector<int64_t>>>
       optional_inputs_;
+
+#ifdef TRITON_ENABLE_METRICS
+  // Custom metrics associated with this model
+  TRITONSERVER_Metric* input_byte_size_counter_ = nullptr;
+#endif  // TRITON_ENABLE_METRICS
 };
 
 TRITONSERVER_Error*
@@ -137,6 +163,40 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
 {
 }
 
+ModelState::~ModelState()
+{
+#ifdef TRITON_ENABLE_METRICS
+  TRITONSERVER_MetricDelete(input_byte_size_counter_);
+#endif  // TRITON_ENABLE_METRICS
+}
+
+#ifdef TRITON_ENABLE_METRICS
+TRITONSERVER_Error*
+ModelState::InitMetrics(
+    TRITONSERVER_MetricFamily* family, std::string model_name,
+    uint64_t model_version)
+{
+  // Create labels for model/version pair to breakdown backend metrics per-model
+  std::vector<const TRITONSERVER_Parameter*> labels;
+  labels.emplace_back(TRITONSERVER_ParameterNew(
+      "model", TRITONSERVER_PARAMETER_STRING, model_name.c_str()));
+  labels.emplace_back(TRITONSERVER_ParameterNew(
+      "version", TRITONSERVER_PARAMETER_STRING,
+      std::to_string(model_version).c_str()));
+  RETURN_IF_ERROR(TRITONSERVER_MetricNew(
+      &input_byte_size_counter_, family, labels.data(), labels.size()));
+  return nullptr;  // success
+}
+
+TRITONSERVER_Error*
+ModelState::UpdateMetrics(uint64_t input_byte_size)
+{
+  RETURN_IF_ERROR(
+      TRITONSERVER_MetricIncrement(input_byte_size_counter_, input_byte_size));
+  return nullptr;  // success
+}
+#endif  // TRITON_ENABLE_METRICS
+
 TRITONSERVER_Error*
 ModelState::CreationDelay()
 {
@@ -422,9 +482,20 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
       (std::string("backend configuration:\n") + buffer).c_str());
 
   // If we have any global backend state we create and set it here. We
-  // don't need anything for this backend but for demonstration
-  // purposes we just create something...
-  std::string* state = new std::string("backend state");
+  // make use of the global backend state here to track a custom metric across
+  // all models using this backend if metrics are enabled.
+  IdentityBackendState* state = new IdentityBackendState();
+
+#ifdef TRITON_ENABLE_METRICS
+  // Create metric family
+  const char* family_name = "input_byte_size_counter";
+  const char* desc =
+      "Cumulative input_byte_size across all identity model requests";
+  TRITONSERVER_MetricKind kind = TRITONSERVER_METRIC_KIND_COUNTER;
+  RETURN_IF_ERROR(TRITONSERVER_MetricFamilyNew(
+      &state->metric_family_, kind, family_name, desc));
+#endif  // TRITON_ENABLE_METRICS
+
   RETURN_IF_ERROR(
       TRITONBACKEND_BackendSetState(backend, reinterpret_cast<void*>(state)));
 
@@ -439,11 +510,12 @@ TRITONBACKEND_Finalize(TRITONBACKEND_Backend* backend)
 {
   void* vstate;
   RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vstate));
-  std::string* state = reinterpret_cast<std::string*>(vstate);
+  IdentityBackendState* state = reinterpret_cast<IdentityBackendState*>(vstate);
 
   LOG_MESSAGE(
       TRITONSERVER_LOG_INFO,
-      (std::string("TRITONBACKEND_Finalize: state is '") + *state + "'")
+      (std::string("TRITONBACKEND_Finalize: state is '") + state->message_ +
+       "'")
           .c_str());
 
   delete state;
@@ -484,17 +556,21 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
       (std::string("Repository location: ") + clocation).c_str());
 
   // The model can access the backend as well... here we can access
-  // the backend global state.
+  // the backend global state. We will use it to add per-model metrics
+  // to the global metric family object stored in the state, if metrics
+  // are enabled,
   TRITONBACKEND_Backend* backend;
   RETURN_IF_ERROR(TRITONBACKEND_ModelBackend(model, &backend));
 
   void* vbackendstate;
   RETURN_IF_ERROR(TRITONBACKEND_BackendState(backend, &vbackendstate));
-  std::string* backend_state = reinterpret_cast<std::string*>(vbackendstate);
+  IdentityBackendState* backend_state =
+      reinterpret_cast<IdentityBackendState*>(vbackendstate);
 
   LOG_MESSAGE(
       TRITONSERVER_LOG_INFO,
-      (std::string("backend state is '") + *backend_state + "'").c_str());
+      (std::string("backend state is '") + backend_state->message_ + "'")
+          .c_str());
 
   // Create a ModelState object and associate it with the TRITONBACKEND_Model.
   ModelState* model_state;
@@ -511,6 +587,13 @@ TRITONBACKEND_ModelInitialize(TRITONBACKEND_Model* model)
   // For testing.. Block the thread for certain time period before returning.
   RETURN_IF_ERROR(model_state->CreationDelay());
 
+#ifdef TRITON_ENABLE_METRICS
+  // For testing.. Create custom metric per model with metric family shared
+  // across backend
+  RETURN_IF_ERROR(
+      model_state->InitMetrics(backend_state->metric_family_, name, version));
+#endif  // TRITON_ENABLE_METRICS
+
   return nullptr;  // success
 }
 
@@ -947,6 +1030,11 @@ TRITONBACKEND_ModelInstanceExecute(
           TRITONSERVER_LOG_VERBOSE,
           (std::string("\trequested_output ") + output_name).c_str());
 
+#ifdef TRITON_ENABLE_METRICS
+      GUARDED_RESPOND_IF_ERROR(
+          responses, r, model_state->UpdateMetrics(input_byte_size));
+#endif  // TRITON_ENABLE_METRICS
+
       // This backend simply copies the output tensors from the corresponding
       // input tensors. The input tensors contents are available in one or more
       // contiguous buffers. To do the copy we: