Skip to content

Commit

Permalink
Add test for host policy. Fix edge case (#3049)
Browse files Browse the repository at this point in the history
* Add test for host policy. Fix edge case

* Format

* Address comment

* Fix up

* Fix bug. Pass string by reference

* Update copyright
  • Loading branch information
GuanLuo authored Jun 29, 2021
1 parent e08ea2f commit dd10830
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 41 deletions.
23 changes: 22 additions & 1 deletion qa/L0_io/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
# Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -132,10 +132,12 @@ for input_device in -1 0 1; do
full=${trial}_float32_float32_float32
full_log=$CLIENT_LOG.$full.$input_device.$output_device.$model_device

host_policy=cpu
if [ "$model_device" == "-1" ]; then
(cd $MODELSDIR/${full} && \
sed -i "s/instance_group.*/instance_group [{ kind: KIND_CPU }]/" config.pbtxt)
else
host_policy=gpu_${model_device}
(cd $MODELSDIR/${full} && \
sed -i "s/instance_group.*/instance_group [{ kind: KIND_GPU, gpus: [${model_device}] }]/" config.pbtxt)
fi
Expand All @@ -149,6 +151,25 @@ for input_device in -1 0 1; do
fi
set -e

# Test with host policy
set +e
$IO_TEST_UTIL -i $input_device -o $output_device -h $host_policy -r $MODELSDIR -m $full >>$full_log 2>&1
# FIXME currently only apply the new changes to ORT backend, should apply to others
if [[ "$trial" == "onnx" ]]; then
if [ $? -ne 0 ]; then
cat $full_log
echo -e "\n***\n*** Test Failed. Expect passing \n***"
RET=1
fi
else
if [ $? -eq 0 ]; then
cat $full_log
echo -e "\n***\n*** Test Failed. Expect failure \n***"
RET=1
fi
fi
set -e

# ensemble
set +e
$IO_TEST_UTIL -i $input_device -o $output_device -r $MODELSDIR -m fan_$full >>$full_log.ensemble 2>&1
Expand Down
2 changes: 1 addition & 1 deletion src/backends/backend/triton_backend_manager.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down
32 changes: 23 additions & 9 deletions src/backends/backend/triton_model.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -848,11 +848,20 @@ TRITONBACKEND_InputPropertiesForHostPolicy(
if (dims_count != nullptr) {
*dims_count = ti->ShapeWithBatchDim().size();
}
if (byte_size != nullptr) {
*byte_size = ti->Data(host_policy_name)->TotalByteSize();
}
if (buffer_count != nullptr) {
*buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name);
if (host_policy_name != nullptr) {
if (byte_size != nullptr) {
*byte_size = ti->Data(host_policy_name)->TotalByteSize();
}
if (buffer_count != nullptr) {
*buffer_count = ti->DataBufferCountForHostPolicy(host_policy_name);
}
} else {
if (byte_size != nullptr) {
*byte_size = ti->Data()->TotalByteSize();
}
if (buffer_count != nullptr) {
*buffer_count = ti->DataBufferCount();
}
}
return nullptr; // success
}
Expand Down Expand Up @@ -885,9 +894,14 @@ TRITONBACKEND_InputBufferForHostPolicy(
{
InferenceRequest::Input* ti =
reinterpret_cast<InferenceRequest::Input*>(input);
Status status = ti->DataBufferForHostPolicy(
index, buffer, buffer_byte_size, memory_type, memory_type_id,
host_policy_name);

Status status =
(host_policy_name == nullptr)
? ti->DataBuffer(
index, buffer, buffer_byte_size, memory_type, memory_type_id)
: ti->DataBufferForHostPolicy(
index, buffer, buffer_byte_size, memory_type, memory_type_id,
host_policy_name);
if (!status.IsOk()) {
*buffer = nullptr;
*buffer_byte_size = 0;
Expand Down
19 changes: 8 additions & 11 deletions src/backends/backend/triton_model_instance.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -143,17 +143,14 @@ TritonModelInstance::CreateInstance(
// Create the JSON representation of the backend configuration.
triton::common::TritonJson::Value host_policy_json(
triton::common::TritonJson::ValueType::OBJECT);
if (!host_policy.empty()) {
triton::common::TritonJson::Value policy_setting_json(
host_policy_json, triton::common::TritonJson::ValueType::OBJECT);
for (const auto& pr : host_policy) {
RETURN_IF_ERROR(
policy_setting_json.AddString(pr.first.c_str(), pr.second));
}

RETURN_IF_ERROR(host_policy_json.Add(
host_policy_name.c_str(), std::move(policy_setting_json)));
triton::common::TritonJson::Value policy_setting_json(
host_policy_json, triton::common::TritonJson::ValueType::OBJECT);
for (const auto& pr : host_policy) {
RETURN_IF_ERROR(policy_setting_json.AddString(pr.first.c_str(), pr.second));
}

RETURN_IF_ERROR(host_policy_json.Add(
host_policy_name.c_str(), std::move(policy_setting_json)));
TritonServerMessage host_policy_message(host_policy_json);

std::unique_ptr<TritonModelInstance> local_instance(new TritonModelInstance(
Expand Down
8 changes: 4 additions & 4 deletions src/core/infer_request.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -808,7 +808,7 @@ InferenceRequest::Input::SetIsShapeTensor(const bool is_shape_tensor)
}

const std::shared_ptr<Memory>&
InferenceRequest::Input::Data(const std::string host_policy_name) const
InferenceRequest::Input::Data(const std::string& host_policy_name) const
{
auto device_data = host_policy_data_map_.find(host_policy_name);
if (device_data == host_policy_data_map_.end()) {
Expand Down Expand Up @@ -891,7 +891,7 @@ Status
InferenceRequest::Input::DataBufferForHostPolicy(
const size_t idx, const void** base, size_t* byte_size,
TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
const std::string host_policy_name) const
const std::string& host_policy_name) const
{
auto device_data = host_policy_data_map_.find(host_policy_name);
if (device_data == host_policy_data_map_.end()) {
Expand All @@ -907,7 +907,7 @@ InferenceRequest::Input::DataBufferForHostPolicy(

size_t
InferenceRequest::Input::DataBufferCountForHostPolicy(
const std::string host_policy_name) const
const std::string& host_policy_name) const
{
auto policy_data = host_policy_data_map_.find(host_policy_name);
if (policy_data != host_policy_data_map_.end()) {
Expand Down
6 changes: 3 additions & 3 deletions src/core/infer_request.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class InferenceRequest {

// The data for this input for a specific device
const std::shared_ptr<Memory>& Data(
const std::string host_policy_name) const;
const std::string& host_policy_name) const;

// Set the data for this input. Error if input already has some
// data.
Expand All @@ -140,7 +140,7 @@ class InferenceRequest {
// host policy, the number of buffers in the fallback input data is
// returned.
size_t DataBufferCountForHostPolicy(
const std::string host_policy_name) const;
const std::string& host_policy_name) const;

// Get the 'idx' buffer containing a contiguous chunk of bytes for
// the input. Return error is 'idx' refers to a buffer that does
Expand Down Expand Up @@ -174,7 +174,7 @@ class InferenceRequest {
Status DataBufferForHostPolicy(
const size_t idx, const void** base, size_t* byte_size,
TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id,
const std::string host_policy_name) const;
const std::string& host_policy_name) const;

private:
DISALLOW_COPY_AND_ASSIGN(Input);
Expand Down
78 changes: 66 additions & 12 deletions src/servers/memory_alloc.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -79,6 +79,11 @@ Usage(char** argv, const std::string& msg = std::string())
std::cerr << "\t-v Enable verbose logging" << std::endl;
std::cerr << "\t-r [model repository absolute path]" << std::endl;
std::cerr << "\t-m [model name to be tested]" << std::endl;
std::cerr << "\t-h [host policy name]" << std::endl;
std::cerr << "\tFor '-h', if specify, the input will be set with different "
<< "host policy names, given that the specified value is the "
<< "host policy that the model under test is associated with."
<< std::endl;
std::cerr << "\tFor device ID, -1 is used to stand for CPU device, "
<< "non-negative value is for GPU device." << std::endl;

Expand Down Expand Up @@ -428,9 +433,12 @@ main(int argc, char** argv)
io_spec.output_type_ = TRITONSERVER_MEMORY_CPU;
io_spec.output_type_id_ = 0;

const char* host_policy_cstr = nullptr;
std::string host_policy;

// Parse commandline...
int opt;
while ((opt = getopt(argc, argv, "vi:o:r:m:")) != -1) {
while ((opt = getopt(argc, argv, "vi:o:r:m:h:")) != -1) {
switch (opt) {
case 'i': {
int64_t raw_id = std::stoll(optarg);
Expand All @@ -454,6 +462,11 @@ main(int argc, char** argv)
}
break;
}
case 'h': {
host_policy = optarg;
host_policy_cstr = host_policy.c_str();
break;
}
case 'r':
model_repository_path = optarg;
break;
Expand Down Expand Up @@ -655,6 +668,10 @@ main(int argc, char** argv)
irequest, InferRequestComplete, nullptr /* request_release_userp */),
"setting request release callback");

// Create 0 data that shouldn't be selected and used to test host policy
// functionality
std::vector<uint32_t> zero_data(16);

// Create the data for the two input tensors. Initialize the first
// to unique integers and the second to all ones.
std::vector<char> input0_data;
Expand Down Expand Up @@ -726,16 +743,53 @@ main(int argc, char** argv)
input0_base = gpu_input ? input0_gpu.get() : &input0_data[0];
input1_base = gpu_input ? input1_gpu.get() : &input1_data[0];

FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputData(
irequest, input0, input0_base, input0_size, io_spec.input_type_,
io_spec.input_type_id_),
"assigning INPUT0 data");
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputData(
irequest, input1, input1_base, input1_size, io_spec.input_type_,
io_spec.input_type_id_),
"assigning INPUT1 data");

if (host_policy_cstr == nullptr) {
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputData(
irequest, input0, input0_base, input0_size, io_spec.input_type_,
io_spec.input_type_id_),
"assigning INPUT0 data");
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputData(
irequest, input1, input1_base, input1_size, io_spec.input_type_,
io_spec.input_type_id_),
"assigning INPUT1 data");

FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
irequest, input0, zero_data.data(),
zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
"fake_host_policy_name"),
"assigning zero INPUT0 data with host policy 'fake_host_policy_name'");
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
irequest, input1, zero_data.data(),
zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0,
"fake_host_policy_name"),
"assigning zero INPUT1 data with host policy 'fake_host_policy_name'");
} else {
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputData(
irequest, input0, zero_data.data(),
zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
"assigning zero INPUT0 data");
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputData(
irequest, input1, zero_data.data(),
zero_data.size() * sizeof(uint32_t), TRITONSERVER_MEMORY_CPU, 0),
"assigning zero INPUT1 data");
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
irequest, input0, input0_base, input0_size, io_spec.input_type_,
io_spec.input_type_id_, host_policy_cstr),
"assigning INPUT0 data to provided host policy");
FAIL_IF_ERR(
TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
irequest, input1, input1_base, input1_size, io_spec.input_type_,
io_spec.input_type_id_, host_policy_cstr),
"assigning INPUT1 data to provided host policy");
}

// Perform inference...
auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
Expand Down

0 comments on commit dd10830

Please sign in to comment.