From dd1083098466c9a0356c6dd54238bf26e8fb948b Mon Sep 17 00:00:00 2001
From: GuanLuo <41310872+GuanLuo@users.noreply.github.com>
Date: Mon, 28 Jun 2021 18:35:36 -0700
Subject: [PATCH] Add test for host policy. Fix edge case (#3049)

* Add test for host policy. Fix edge case

* Format

* Address comment

* Fix up

* Fix bug. Pass string by reference

* Update copyright
---
 qa/L0_io/test.sh                              | 23 +++++-
 .../backend/triton_backend_manager.cc         |  2 +-
 src/backends/backend/triton_model.cc          | 32 +++++---
 src/backends/backend/triton_model_instance.cc | 19 ++---
 src/core/infer_request.cc                     |  8 +-
 src/core/infer_request.h                      |  6 +-
 src/servers/memory_alloc.cc                   | 78 ++++++++++++++++---
 7 files changed, 127 insertions(+), 41 deletions(-)

diff --git a/qa/L0_io/test.sh b/qa/L0_io/test.sh
index e7a513f213..8d7028d622 100755
--- a/qa/L0_io/test.sh
+++ b/qa/L0_io/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -132,10 +132,12 @@ for input_device in -1 0 1; do
                 full=${trial}_float32_float32_float32
                 full_log=$CLIENT_LOG.$full.$input_device.$output_device.$model_device
 
+                host_policy=cpu
                 if [ "$model_device" == "-1" ]; then
                     (cd $MODELSDIR/${full} && \
                         sed -i "s/instance_group.*/instance_group [{ kind: KIND_CPU }]/" config.pbtxt)
                 else
+                    host_policy=gpu_${model_device}
                     (cd $MODELSDIR/${full} && \
                         sed -i "s/instance_group.*/instance_group [{ kind: KIND_GPU, gpus: [${model_device}] }]/" config.pbtxt)
                 fi
@@ -149,6 +151,25 @@ for input_device in -1 0 1; do
                 fi
                 set -e
 
+                # Test with host policy
+                set +e
+                $IO_TEST_UTIL -i $input_device -o $output_device -h $host_policy -r $MODELSDIR -m $full >>$full_log 2>&1
+                # FIXME currently only apply the new changes to ORT backend, should apply to others
+                if [[ "$trial" == "onnx" ]]; then
+                  if [ $? -ne 0 ]; then
+                      cat $full_log
+                      echo -e "\n***\n*** Test Failed. Expect passing \n***"
+                      RET=1
+                  fi
+                else
+                  if [ $? -eq 0 ]; then
+                      cat $full_log
+                      echo -e "\n***\n*** Test Failed. Expect failure \n***"
+                      RET=1
+                  fi
+                fi
+                set -e
+
                 # ensemble
                 set +e
                 $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m fan_$full >>$full_log.ensemble 2>&1
diff --git a/src/backends/backend/triton_backend_manager.cc b/src/backends/backend/triton_backend_manager.cc
index 2d25d4f77b..e8cfed69c5 100644
--- a/src/backends/backend/triton_backend_manager.cc
+++ b/src/backends/backend/triton_backend_manager.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
diff --git a/src/backends/backend/triton_model.cc b/src/backends/backend/triton_model.cc
index 96c2e1416c..2155b21ec6 100644
--- a/src/backends/backend/triton_model.cc
+++ b/src/backends/backend/triton_model.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -848,11 +848,20 @@ TRITONBACKEND_InputPropertiesForHostPolicy(
   if (dims_count != nullptr) {
     *dims_count = ti->ShapeWithBatchDim().size();
   }
-  if (byte_size != nullptr) {
-    *byte_size = ti->Data(host_policy_name)->TotalByteSize();
-  }
-  if (buffer_count != nullptr) {
-    *buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name);
+  if (host_policy_name != nullptr) {
+    if (byte_size != nullptr) {
+      *byte_size = ti->Data(host_policy_name)->TotalByteSize();
+    }
+    if (buffer_count != nullptr) {
+        *buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name);
+    }
+  } else {
+    if (byte_size != nullptr) {
+      *byte_size = ti->Data()->TotalByteSize();
+    }
+    if (buffer_count != nullptr) {
+      *buffer_count = ti->DataBufferCount();
+    }
   }
   return nullptr;  // success
 }
@@ -885,9 +894,14 @@ TRITONBACKEND_InputBufferForHostPolicy(
 {
   InferenceRequest::Input* ti =
       reinterpret_cast<InferenceRequest::Input*>(input);
-  Status status = ti->DataBufferForHostPolicy(
-      index, buffer, buffer_byte_size, memory_type, memory_type_id,
-      host_policy_name);
+
+  Status status =
+      (host_policy_name == nullptr)
+          ? ti->DataBuffer(
+                index, buffer, buffer_byte_size, memory_type, memory_type_id)
+          : ti->DataBufferForHostPolicy(
+                index, buffer, buffer_byte_size, memory_type, memory_type_id,
+                host_policy_name);
   if (!status.IsOk()) {
     *buffer = nullptr;
     *buffer_byte_size = 0;
diff --git a/src/backends/backend/triton_model_instance.cc b/src/backends/backend/triton_model_instance.cc
index 73e0f81bd9..80a700132b 100644
--- a/src/backends/backend/triton_model_instance.cc
+++ b/src/backends/backend/triton_model_instance.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -143,17 +143,14 @@ TritonModelInstance::CreateInstance(
   // Create the JSON representation of the backend configuration.
   triton::common::TritonJson::Value host_policy_json(
       triton::common::TritonJson::ValueType::OBJECT);
-  if (!host_policy.empty()) {
-    triton::common::TritonJson::Value policy_setting_json(
-        host_policy_json, triton::common::TritonJson::ValueType::OBJECT);
-    for (const auto& pr : host_policy) {
-      RETURN_IF_ERROR(
-          policy_setting_json.AddString(pr.first.c_str(), pr.second));
-    }
-
-    RETURN_IF_ERROR(host_policy_json.Add(
-        host_policy_name.c_str(), std::move(policy_setting_json)));
+  triton::common::TritonJson::Value policy_setting_json(
+      host_policy_json, triton::common::TritonJson::ValueType::OBJECT);
+  for (const auto& pr : host_policy) {
+    RETURN_IF_ERROR(policy_setting_json.AddString(pr.first.c_str(), pr.second));
   }
+
+  RETURN_IF_ERROR(host_policy_json.Add(
+      host_policy_name.c_str(), std::move(policy_setting_json)));
   TritonServerMessage host_policy_message(host_policy_json);
 
   std::unique_ptr<TritonModelInstance> local_instance(new TritonModelInstance(
diff --git a/src/core/infer_request.cc b/src/core/infer_request.cc
index 7fa2ce3bf5..6e6097777f 100644
--- a/src/core/infer_request.cc
+++ b/src/core/infer_request.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -808,7 +808,7 @@ InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
 }
 
 const std::shared_ptr<Memory>&
-InferenceRequest::Input::Data(const std::string host_policy_name) const
+InferenceRequest::Input::Data(const std::string& host_policy_name) const
 {
   auto device_data = host_policy_data_map_.find(host_policy_name);
   if (device_data == host_policy_data_map_.end()) {
@@ -891,7 +891,7 @@ Status
 InferenceRequest::Input::DataBufferForHostPolicy(
     const size_t idx, const void** base, size_t* byte_size,
     TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-    const std::string host_policy_name) const
+    const std::string& host_policy_name) const
 {
   auto device_data = host_policy_data_map_.find(host_policy_name);
   if (device_data == host_policy_data_map_.end()) {
@@ -907,7 +907,7 @@ InferenceRequest::Input::DataBufferForHostPolicy(
 
 size_t
 InferenceRequest::Input::DataBufferCountForHostPolicy(
-    const std::string host_policy_name) const
+    const std::string& host_policy_name) const
 {
   auto policy_data = host_policy_data_map_.find(host_policy_name);
   if (policy_data != host_policy_data_map_.end()) {
diff --git a/src/core/infer_request.h b/src/core/infer_request.h
index 74302cb0da..7a5bb9093c 100644
--- a/src/core/infer_request.h
+++ b/src/core/infer_request.h
@@ -114,7 +114,7 @@ class InferenceRequest {
 
     // The data for this input for a specific device
     const std::shared_ptr<Memory>& Data(
-        const std::string host_policy_name) const;
+        const std::string& host_policy_name) const;
 
     // Set the data for this input. Error if input already has some
     // data.
@@ -140,7 +140,7 @@ class InferenceRequest {
     // host policy, the number of buffers in the fallback input data is
     // returned.
     size_t DataBufferCountForHostPolicy(
-        const std::string host_policy_name) const;
+        const std::string& host_policy_name) const;
 
     // Get the 'idx' buffer containing a contiguous chunk of bytes for
     // the input. Return error is 'idx' refers to a buffer that does
@@ -174,7 +174,7 @@ class InferenceRequest {
     Status DataBufferForHostPolicy(
         const size_t idx, const void** base, size_t* byte_size,
         TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
-        const std::string host_policy_name) const;
+        const std::string& host_policy_name) const;
 
    private:
     DISALLOW_COPY_AND_ASSIGN(Input);
diff --git a/src/servers/memory_alloc.cc b/src/servers/memory_alloc.cc
index 509624dd60..e425bde845 100644
--- a/src/servers/memory_alloc.cc
+++ b/src/servers/memory_alloc.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -79,6 +79,11 @@ Usage(char** argv, const std::string& msg = std::string())
   std::cerr << "\t-v Enable verbose logging" << std::endl;
   std::cerr << "\t-r [model repository absolute path]" << std::endl;
   std::cerr << "\t-m [model name to be tested]" << std::endl;
+  std::cerr << "\t-h [host policy name]" << std::endl;
+  std::cerr << "\tFor '-h', if specify, the input will be set with different "
+            << "host policy names, given that the specified value is the "
+            << "host policy that the model under test is associated with."
+            << std::endl;
   std::cerr << "\tFor device ID, -1 is used to stand for CPU device, "
             << "non-negative value is for GPU device." << std::endl;
 
@@ -428,9 +433,12 @@ main(int argc, char** argv)
   io_spec.output_type_ = TRITONSERVER_MEMORY_CPU;
   io_spec.output_type_id_ = 0;
 
+  const char* host_policy_cstr = nullptr;
+  std::string host_policy;
+
   // Parse commandline...
   int opt;
-  while ((opt = getopt(argc, argv, "vi:o:r:m:")) != -1) {
+  while ((opt = getopt(argc, argv, "vi:o:r:m:h:")) != -1) {
     switch (opt) {
       case 'i': {
         int64_t raw_id = std::stoll(optarg);
@@ -454,6 +462,11 @@ main(int argc, char** argv)
         }
         break;
       }
+      case 'h': {
+        host_policy = optarg;
+        host_policy_cstr = host_policy.c_str();
+        break;
+      }
       case 'r':
         model_repository_path = optarg;
         break;
@@ -655,6 +668,10 @@ main(int argc, char** argv)
           irequest, InferRequestComplete, nullptr /* request_release_userp */),
       "setting request release callback");
 
+  // Create 0 data that shouldn't be selected and used to test host policy
+  // functionality
+  std::vector<uint32_t> zero_data(16);
+
   // Create the data for the two input tensors. Initialize the first
   // to unique integers and the second to all ones.
   std::vector<char> input0_data;
@@ -726,16 +743,53 @@ main(int argc, char** argv)
   input0_base = gpu_input ? input0_gpu.get() : &input0_data[0];
   input1_base = gpu_input ? input1_gpu.get() : &input1_data[0];
 
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAppendInputData(
-          irequest, input0, input0_base, input0_size, io_spec.input_type_,
-          io_spec.input_type_id_),
-      "assigning INPUT0 data");
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAppendInputData(
-          irequest, input1, input1_base, input1_size, io_spec.input_type_,
-          io_spec.input_type_id_),
-      "assigning INPUT1 data");
+
+  if (host_policy_cstr == nullptr) {
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, input0_base, input0_size, io_spec.input_type_,
+            io_spec.input_type_id_),
+        "assigning INPUT0 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input1, input1_base, input1_size, io_spec.input_type_,
+            io_spec.input_type_id_),
+        "assigning INPUT1 data");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input0, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
+            "fake_host_policy_name"),
+        "assigning zero INPUT0 data with host policy 'fake_host_policy_name'");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input1, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
+            "fake_host_policy_name"),
+        "assigning zero INPUT1 data with host policy 'fake_host_policy_name'");
+  } else {
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
+        "assigning zero INPUT0 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input1, zero_data.data(),
+            zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
+        "assigning zero INPUT1 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input0, input0_base, input0_size, io_spec.input_type_,
+            io_spec.input_type_id_, host_policy_cstr),
+        "assigning INPUT0 data to provided host policy");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+            irequest, input1, input1_base, input1_size, io_spec.input_type_,
+            io_spec.input_type_id_, host_policy_cstr),
+        "assigning INPUT1 data to provided host policy");
+  }
 
   // Perform inference...
   auto p = new std::promise<TRITONSERVER_InferenceResponse*>();