Skip to content

Commit

Permalink
Auto complete now uses default-max-batch-size (#60)
Browse files Browse the repository at this point in the history
* auto completing max_batch_size uses the default-max-batch-size from backend config; set dynamic batcher when greater than 0

* only set dynamic batching when max-batch-size greater than 1

* moved setting dynamic batching scheduler until after max_batch_size is set

* added documentation about how tensorflow uses default-max-batch-size

* when default-max-batch-size=0 then max_batch_size=0

* added const for defining start_index

* consolidated the new_max-batch_size_logic
  • Loading branch information
nv-kmcgill53 authored Jun 2, 2022
1 parent 9e4e59b commit 40f9d94
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 15 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,21 @@ GPU memory.
Select the version of the TensorFlow library to be used, available
versions are 1 and 2. Default version is 1.

##### --backend-config=tensorflow,default-max-batch-size=\<int\>

The default value to use for max_batch_size during [auto-completing model configuration](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#auto-generated-model-configuration)
when batching support is detected in the model. If the `--strict-model-config=false`
command-line option is used, the tensorflow backend will set the
max_batch_size of the model to this default value under the following
conditions:

1. Autocomplete has determined the model is capable of batching requests.
2. max_batch_size is 0 in the model configuration or max_batch_size
is omitted from the model configuration.

If max_batch_size > 1 and no [scheduler](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#scheduling-and-batching)
is provided, the dynamic batch scheduler will be enabled.

## Build the TensorFlow Backend

Use a recent cmake to build. First install the required dependencies.
Expand Down
59 changes: 44 additions & 15 deletions src/tensorflow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,15 @@ using TRITONTFModelHandle = std::shared_ptr<TRITONTF_Model>;
struct BackendConfiguration {
BackendConfiguration()
: allow_gpu_memory_growth_(true), per_process_gpu_memory_fraction_(0.0),
allow_soft_placement_(true), memory_limit_mb_()
allow_soft_placement_(true), memory_limit_mb_(),
default_max_batch_size_(0)
{
}
bool allow_gpu_memory_growth_;
float per_process_gpu_memory_fraction_;
bool allow_soft_placement_;
std::map<int, std::vector<float>> memory_limit_mb_;
int default_max_batch_size_;
};

namespace graphdef {
Expand Down Expand Up @@ -320,8 +322,7 @@ ValidateTRITONTFModel(
TRITONSERVER_ERROR_INVALID_ARG,
std::string(
"unable to load model '" + model_name +
"', configuration expects " +
std::to_string(expected_input_cnt) +
"', configuration expects " + std::to_string(expected_input_cnt) +
" inputs, model provides " + std::to_string(expected_inputs.size()))
.c_str());
}
Expand Down Expand Up @@ -1206,7 +1207,7 @@ AutoCompleteHelper::FixBatchingSupport()
}
}

const int max_batch_size = model_state_->MaxBatchSize();
int max_batch_size = model_state_->MaxBatchSize();

// If max-batch-size is explicitly set to non-zero but the model
// signature doesn't support batching then can't autofill.
Expand Down Expand Up @@ -1286,21 +1287,24 @@ AutoCompleteHelper::FixBatchingSupport()
}
}

// Set max-batch-size to 1 if the model signature and config hint
// agree. We need to update the configuration itself as well as the
// cached value we have already initialized in the model state.
if (max_batch_size == 0) {
const int new_max_batch_size = model_support_batching_ ? 1 : 0;
const int new_max_batch_size =
model_support_batching_
? std::max(
model_state_->BackendConfig()->default_max_batch_size_, 0)
: 0;

triton::common::TritonJson::Value mbs_value;
model_state_->ModelConfig().Find("max_batch_size", &mbs_value);
mbs_value.SetInt(new_max_batch_size);

model_state_->SetMaxBatchSize(new_max_batch_size);
max_batch_size = new_max_batch_size;
if (model_support_batching_ == 1) {
LOG_MESSAGE(
TRITONSERVER_LOG_WARN,
(std::string("autofilled max_batch_size to 1 for model '") +
(std::string(
"autofilled max_batch_size to " +
std::to_string(new_max_batch_size) + " for model '") +
model_state_->Name() +
"' since batching is supporrted but no max_batch_size is specified "
"in model configuration. Must specify max_batch_size to utilize "
Expand All @@ -1309,6 +1313,24 @@ AutoCompleteHelper::FixBatchingSupport()
}
}

// Turn on dynamic batch scheduler if batch size is greater
// than 1 and there is no scheduler defined in the configuration.
if (max_batch_size > 1) {
triton::common::TritonJson::Value value;
bool found_sequence_batching =
model_state_->ModelConfig().Find("sequence_batching", &value);
bool found_dynamic_batching =
model_state_->ModelConfig().Find("dynamic_batching", &value);
if (!found_sequence_batching && !found_dynamic_batching) {
triton::common::TritonJson::Value dynamic_batching(
model_state_->ModelConfig(),
triton::common::TritonJson::ValueType::OBJECT);
model_state_->ModelConfig().Add(
"dynamic_batching", std::move(dynamic_batching));
}
}


return nullptr; // success
}

Expand Down Expand Up @@ -1342,11 +1364,12 @@ AutoCompleteHelper::FixIOConfig(
"unable to autofill for '" + model_state_->Name() +
"': the rank of model tensor '" + io->name_ +
"' is 0 which is not supported"));
// The model signature supports batching then the first
// dimension is -1 and should not appear in the model
// configuration 'dims' that we are creating.
for (size_t i = (model_support_batching_ ? 1 : 0); i < io->shape_->rank_;
++i) {
// If the model supports batching and the max_batch_size
// is 0, then batching is turned off and the IO dimensions
// must be explicit.
const size_t start_index =
(model_support_batching_ && model_state_->MaxBatchSize() > 0) ? 1 : 0;
for (size_t i = start_index; i < io->shape_->rank_; ++i) {
RETURN_IF_ERROR(dims.AppendInt(io->shape_->dims_[i]));
}

Expand Down Expand Up @@ -2244,6 +2267,12 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend)
lconfig->per_process_gpu_memory_fraction_ = lvalue;
lconfig->allow_gpu_memory_growth_ = (lvalue == 0.0);
}
if (cmdline.Find("default-max-batch-size", &value)) {
RETURN_IF_ERROR(value.AsString(&value_str));
int lvalue;
RETURN_IF_ERROR(ParseIntValue(value_str, &lvalue));
lconfig->default_max_batch_size_ = lvalue;
}
}
RETURN_IF_ERROR(TRITONBACKEND_BackendSetState(
backend, reinterpret_cast<void*>(lconfig.get())));
Expand Down

0 comments on commit 40f9d94

Please sign in to comment.