From dd1083098466c9a0356c6dd54238bf26e8fb948b Mon Sep 17 00:00:00 2001 From: GuanLuo <41310872+GuanLuo@users.noreply.github.com> Date: Mon, 28 Jun 2021 18:35:36 -0700 Subject: [PATCH] Add test for host policy. Fix edge case (#3049) * Add test for host policy. Fix edge case * Format * Address comment * Fix up * Fix bug. Pass string by reference * Update copyright --- qa/L0_io/test.sh | 23 +++++- .../backend/triton_backend_manager.cc | 2 +- src/backends/backend/triton_model.cc | 32 +++++--- src/backends/backend/triton_model_instance.cc | 19 ++--- src/core/infer_request.cc | 8 +- src/core/infer_request.h | 6 +- src/servers/memory_alloc.cc | 78 ++++++++++++++++--- 7 files changed, 127 insertions(+), 41 deletions(-) diff --git a/qa/L0_io/test.sh b/qa/L0_io/test.sh index e7a513f213..8d7028d622 100755 --- a/qa/L0_io/test.sh +++ b/qa/L0_io/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. +# Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -132,10 +132,12 @@ for input_device in -1 0 1; do full=${trial}_float32_float32_float32 full_log=$CLIENT_LOG.$full.$input_device.$output_device.$model_device + host_policy=cpu if [ "$model_device" == "-1" ]; then (cd $MODELSDIR/${full} && \ sed -i "s/instance_group.*/instance_group [{ kind: KIND_CPU }]/" config.pbtxt) else + host_policy=gpu_${model_device} (cd $MODELSDIR/${full} && \ sed -i "s/instance_group.*/instance_group [{ kind: KIND_GPU, gpus: [${model_device}] }]/" config.pbtxt) fi @@ -149,6 +151,25 @@ for input_device in -1 0 1; do fi set -e + # Test with host policy + set +e + $IO_TEST_UTIL -i $input_device -o $output_device -h $host_policy -r $MODELSDIR -m $full >>$full_log 2>&1 + # FIXME currently only apply the new changes to ORT backend, should apply to others + if [[ "$trial" == "onnx" ]]; then + if [ $? -ne 0 ]; then + cat $full_log + echo -e "\n***\n*** Test Failed. Expect passing \n***" + RET=1 + fi + else + if [ $? -eq 0 ]; then + cat $full_log + echo -e "\n***\n*** Test Failed. Expect failure \n***" + RET=1 + fi + fi + set -e + # ensemble set +e $IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m fan_$full >>$full_log.ensemble 2>&1 diff --git a/src/backends/backend/triton_backend_manager.cc b/src/backends/backend/triton_backend_manager.cc index 2d25d4f77b..e8cfed69c5 100644 --- a/src/backends/backend/triton_backend_manager.cc +++ b/src/backends/backend/triton_backend_manager.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/backends/backend/triton_model.cc b/src/backends/backend/triton_model.cc index 96c2e1416c..2155b21ec6 100644 --- a/src/backends/backend/triton_model.cc +++ b/src/backends/backend/triton_model.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -848,11 +848,20 @@ TRITONBACKEND_InputPropertiesForHostPolicy( if (dims_count != nullptr) { *dims_count = ti->ShapeWithBatchDim().size(); } - if (byte_size != nullptr) { - *byte_size = ti->Data(host_policy_name)->TotalByteSize(); - } - if (buffer_count != nullptr) { - *buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name); + if (host_policy_name != nullptr) { + if (byte_size != nullptr) { + *byte_size = ti->Data(host_policy_name)->TotalByteSize(); + } + if (buffer_count != nullptr) { + *buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name); + } + } else { + if (byte_size != nullptr) { + *byte_size = ti->Data()->TotalByteSize(); + } + if (buffer_count != nullptr) { + *buffer_count = ti->DataBufferCount(); + } } return nullptr; // success } @@ -885,9 +894,14 @@ TRITONBACKEND_InputBufferForHostPolicy( { InferenceRequest::Input* ti = reinterpret_cast(input); - Status status = ti->DataBufferForHostPolicy( - index, buffer, buffer_byte_size, memory_type, memory_type_id, - host_policy_name); + + Status status = + (host_policy_name == nullptr) + ? ti->DataBuffer( + index, buffer, buffer_byte_size, memory_type, memory_type_id) + : ti->DataBufferForHostPolicy( + index, buffer, buffer_byte_size, memory_type, memory_type_id, + host_policy_name); if (!status.IsOk()) { *buffer = nullptr; *buffer_byte_size = 0; diff --git a/src/backends/backend/triton_model_instance.cc b/src/backends/backend/triton_model_instance.cc index 73e0f81bd9..80a700132b 100644 --- a/src/backends/backend/triton_model_instance.cc +++ b/src/backends/backend/triton_model_instance.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -143,17 +143,14 @@ TritonModelInstance::CreateInstance( // Create the JSON representation of the backend configuration. triton::common::TritonJson::Value host_policy_json( triton::common::TritonJson::ValueType::OBJECT); - if (!host_policy.empty()) { - triton::common::TritonJson::Value policy_setting_json( - host_policy_json, triton::common::TritonJson::ValueType::OBJECT); - for (const auto& pr : host_policy) { - RETURN_IF_ERROR( - policy_setting_json.AddString(pr.first.c_str(), pr.second)); - } - - RETURN_IF_ERROR(host_policy_json.Add( - host_policy_name.c_str(), std::move(policy_setting_json))); + triton::common::TritonJson::Value policy_setting_json( + host_policy_json, triton::common::TritonJson::ValueType::OBJECT); + for (const auto& pr : host_policy) { + RETURN_IF_ERROR(policy_setting_json.AddString(pr.first.c_str(), pr.second)); } + + RETURN_IF_ERROR(host_policy_json.Add( + host_policy_name.c_str(), std::move(policy_setting_json))); TritonServerMessage host_policy_message(host_policy_json); std::unique_ptr local_instance(new TritonModelInstance( diff --git a/src/core/infer_request.cc b/src/core/infer_request.cc index 7fa2ce3bf5..6e6097777f 100644 --- a/src/core/infer_request.cc +++ b/src/core/infer_request.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. +// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -808,7 +808,7 @@ InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor) } const std::shared_ptr& -InferenceRequest::Input::Data(const std::string host_policy_name) const +InferenceRequest::Input::Data(const std::string& host_policy_name) const { auto device_data = host_policy_data_map_.find(host_policy_name); if (device_data == host_policy_data_map_.end()) { @@ -891,7 +891,7 @@ Status InferenceRequest::Input::DataBufferForHostPolicy( const size_t idx, const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, - const std::string host_policy_name) const + const std::string& host_policy_name) const { auto device_data = host_policy_data_map_.find(host_policy_name); if (device_data == host_policy_data_map_.end()) { @@ -907,7 +907,7 @@ InferenceRequest::Input::DataBufferForHostPolicy( size_t InferenceRequest::Input::DataBufferCountForHostPolicy( - const std::string host_policy_name) const + const std::string& host_policy_name) const { auto policy_data = host_policy_data_map_.find(host_policy_name); if (policy_data != host_policy_data_map_.end()) { diff --git a/src/core/infer_request.h b/src/core/infer_request.h index 74302cb0da..7a5bb9093c 100644 --- a/src/core/infer_request.h +++ b/src/core/infer_request.h @@ -114,7 +114,7 @@ class InferenceRequest { // The data for this input for a specific device const std::shared_ptr& Data( - const std::string host_policy_name) const; + const std::string& host_policy_name) const; // Set the data for this input. Error if input already has some // data. @@ -140,7 +140,7 @@ class InferenceRequest { // host policy, the number of buffers in the fallback input data is // returned. size_t DataBufferCountForHostPolicy( - const std::string host_policy_name) const; + const std::string& host_policy_name) const; // Get the 'idx' buffer containing a contiguous chunk of bytes for // the input. Return error is 'idx' refers to a buffer that does @@ -174,7 +174,7 @@ class InferenceRequest { Status DataBufferForHostPolicy( const size_t idx, const void** base, size_t* byte_size, TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id, - const std::string host_policy_name) const; + const std::string& host_policy_name) const; private: DISALLOW_COPY_AND_ASSIGN(Input); diff --git a/src/servers/memory_alloc.cc b/src/servers/memory_alloc.cc index 509624dd60..e425bde845 100644 --- a/src/servers/memory_alloc.cc +++ b/src/servers/memory_alloc.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -79,6 +79,11 @@ Usage(char** argv, const std::string& msg = std::string()) std::cerr << "\t-v Enable verbose logging" << std::endl; std::cerr << "\t-r [model repository absolute path]" << std::endl; std::cerr << "\t-m [model name to be tested]" << std::endl; + std::cerr << "\t-h [host policy name]" << std::endl; + std::cerr << "\tFor '-h', if specify, the input will be set with different " + << "host policy names, given that the specified value is the " + << "host policy that the model under test is associated with." + << std::endl; std::cerr << "\tFor device ID, -1 is used to stand for CPU device, " << "non-negative value is for GPU device." << std::endl; @@ -428,9 +433,12 @@ main(int argc, char** argv) io_spec.output_type_ = TRITONSERVER_MEMORY_CPU; io_spec.output_type_id_ = 0; + const char* host_policy_cstr = nullptr; + std::string host_policy; + // Parse commandline... int opt; - while ((opt = getopt(argc, argv, "vi:o:r:m:")) != -1) { + while ((opt = getopt(argc, argv, "vi:o:r:m:h:")) != -1) { switch (opt) { case 'i': { int64_t raw_id = std::stoll(optarg); @@ -454,6 +462,11 @@ main(int argc, char** argv) } break; } + case 'h': { + host_policy = optarg; + host_policy_cstr = host_policy.c_str(); + break; + } case 'r': model_repository_path = optarg; break; @@ -655,6 +668,10 @@ main(int argc, char** argv) irequest, InferRequestComplete, nullptr /* request_release_userp */), "setting request release callback"); + // Create 0 data that shouldn't be selected and used to test host policy + // functionality + std::vector zero_data(16); + // Create the data for the two input tensors. Initialize the first // to unique integers and the second to all ones. std::vector input0_data; @@ -726,16 +743,53 @@ main(int argc, char** argv) input0_base = gpu_input ? input0_gpu.get() : &input0_data[0]; input1_base = gpu_input ? input1_gpu.get() : &input1_data[0]; - FAIL_IF_ERR( - TRITONSERVER_InferenceRequestAppendInputData( - irequest, input0, input0_base, input0_size, io_spec.input_type_, - io_spec.input_type_id_), - "assigning INPUT0 data"); - FAIL_IF_ERR( - TRITONSERVER_InferenceRequestAppendInputData( - irequest, input1, input1_base, input1_size, io_spec.input_type_, - io_spec.input_type_id_), - "assigning INPUT1 data"); + + if (host_policy_cstr == nullptr) { + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputData( + irequest, input0, input0_base, input0_size, io_spec.input_type_, + io_spec.input_type_id_), + "assigning INPUT0 data"); + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputData( + irequest, input1, input1_base, input1_size, io_spec.input_type_, + io_spec.input_type_id_), + "assigning INPUT1 data"); + + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy( + irequest, input0, zero_data.data(), + zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0, + "fake_host_policy_name"), + "assigning zero INPUT0 data with host policy 'fake_host_policy_name'"); + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy( + irequest, input1, zero_data.data(), + zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0, + "fake_host_policy_name"), + "assigning zero INPUT1 data with host policy 'fake_host_policy_name'"); + } else { + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputData( + irequest, input0, zero_data.data(), + zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0), + "assigning zero INPUT0 data"); + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputData( + irequest, input1, zero_data.data(), + zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0), + "assigning zero INPUT1 data"); + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy( + irequest, input0, input0_base, input0_size, io_spec.input_type_, + io_spec.input_type_id_, host_policy_cstr), + "assigning INPUT0 data to provided host policy"); + FAIL_IF_ERR( + TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy( + irequest, input1, input1_base, input1_size, io_spec.input_type_, + io_spec.input_type_id_, host_policy_cstr), + "assigning INPUT1 data to provided host policy"); + } // Perform inference... auto p = new std::promise();