diff --git a/README.md b/README.md index e4b502c..681fab9 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,21 @@ GPU memory. Select the version of the TensorFlow library to be used, available versions are 1 and 2. Default version is 1. +##### --backend-config=tensorflow,default-max-batch-size=\ + +The default value to use for max_batch_size during [auto-completing model configuration](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#auto-generated-model-configuration) +when batching support is detected in the model. If the `--strict-model-config=false` +command-line option is used, the tensorflow backend will set the +max_batch_size of the model to this default value under the following +conditions: + +1. Autocomplete has determined the model is capable of batching requests. +2. max_batch_size is 0 in the model configuration or max_batch_size + is omitted from the model configuration. + +If max_batch_size > 1 and no [scheduler](https://github.com/triton-inference-server/server/blob/main/docs/model_configuration.md#scheduling-and-batching) +is provided, the dynamic batch scheduler will be enabled. + ## Build the TensorFlow Backend Use a recent cmake to build. First install the required dependencies. diff --git a/src/tensorflow.cc b/src/tensorflow.cc index a06074d..6701a16 100644 --- a/src/tensorflow.cc +++ b/src/tensorflow.cc @@ -60,13 +60,15 @@ using TRITONTFModelHandle = std::shared_ptr; struct BackendConfiguration { BackendConfiguration() : allow_gpu_memory_growth_(true), per_process_gpu_memory_fraction_(0.0), - allow_soft_placement_(true), memory_limit_mb_() + allow_soft_placement_(true), memory_limit_mb_(), + default_max_batch_size_(0) { } bool allow_gpu_memory_growth_; float per_process_gpu_memory_fraction_; bool allow_soft_placement_; std::map> memory_limit_mb_; + int default_max_batch_size_; }; namespace graphdef { @@ -320,8 +322,7 @@ ValidateTRITONTFModel( TRITONSERVER_ERROR_INVALID_ARG, std::string( "unable to load model '" + model_name + - "', configuration expects " + - std::to_string(expected_input_cnt) + + "', configuration expects " + std::to_string(expected_input_cnt) + " inputs, model provides " + std::to_string(expected_inputs.size())) .c_str()); } @@ -1206,7 +1207,7 @@ AutoCompleteHelper::FixBatchingSupport() } } - const int max_batch_size = model_state_->MaxBatchSize(); + int max_batch_size = model_state_->MaxBatchSize(); // If max-batch-size is explicitly set to non-zero but the model // signature doesn't support batching then can't autofill. @@ -1286,21 +1287,24 @@ AutoCompleteHelper::FixBatchingSupport() } } - // Set max-batch-size to 1 if the model signature and config hint - // agree. We need to update the configuration itself as well as the - // cached value we have already initialized in the model state. if (max_batch_size == 0) { - const int new_max_batch_size = model_support_batching_ ? 1 : 0; + const int new_max_batch_size = + model_support_batching_ + ? std::max( + model_state_->BackendConfig()->default_max_batch_size_, 0) + : 0; triton::common::TritonJson::Value mbs_value; model_state_->ModelConfig().Find("max_batch_size", &mbs_value); mbs_value.SetInt(new_max_batch_size); - model_state_->SetMaxBatchSize(new_max_batch_size); + max_batch_size = new_max_batch_size; if (model_support_batching_ == 1) { LOG_MESSAGE( TRITONSERVER_LOG_WARN, - (std::string("autofilled max_batch_size to 1 for model '") + + (std::string( + "autofilled max_batch_size to " + + std::to_string(new_max_batch_size) + " for model '") + model_state_->Name() + "' since batching is supporrted but no max_batch_size is specified " "in model configuration. Must specify max_batch_size to utilize " @@ -1309,6 +1313,24 @@ AutoCompleteHelper::FixBatchingSupport() } } + // Turn on dynamic batch scheduler if batch size is greater + // than 1 and there is no scheduler defined in the configuration. + if (max_batch_size > 1) { + triton::common::TritonJson::Value value; + bool found_sequence_batching = + model_state_->ModelConfig().Find("sequence_batching", &value); + bool found_dynamic_batching = + model_state_->ModelConfig().Find("dynamic_batching", &value); + if (!found_sequence_batching && !found_dynamic_batching) { + triton::common::TritonJson::Value dynamic_batching( + model_state_->ModelConfig(), + triton::common::TritonJson::ValueType::OBJECT); + model_state_->ModelConfig().Add( + "dynamic_batching", std::move(dynamic_batching)); + } + } + + return nullptr; // success } @@ -1342,11 +1364,12 @@ AutoCompleteHelper::FixIOConfig( "unable to autofill for '" + model_state_->Name() + "': the rank of model tensor '" + io->name_ + "' is 0 which is not supported")); - // The model signature supports batching then the first - // dimension is -1 and should not appear in the model - // configuration 'dims' that we are creating. - for (size_t i = (model_support_batching_ ? 1 : 0); i < io->shape_->rank_; - ++i) { + // If the model supports batching and the max_batch_size + // is 0, then batching is turned off and the IO dimensions + // must be explicit. + const size_t start_index = + (model_support_batching_ && model_state_->MaxBatchSize() > 0) ? 1 : 0; + for (size_t i = start_index; i < io->shape_->rank_; ++i) { RETURN_IF_ERROR(dims.AppendInt(io->shape_->dims_[i])); } @@ -2244,6 +2267,12 @@ TRITONBACKEND_Initialize(TRITONBACKEND_Backend* backend) lconfig->per_process_gpu_memory_fraction_ = lvalue; lconfig->allow_gpu_memory_growth_ = (lvalue == 0.0); } + if (cmdline.Find("default-max-batch-size", &value)) { + RETURN_IF_ERROR(value.AsString(&value_str)); + int lvalue; + RETURN_IF_ERROR(ParseIntValue(value_str, &lvalue)); + lconfig->default_max_batch_size_ = lvalue; + } } RETURN_IF_ERROR(TRITONBACKEND_BackendSetState( backend, reinterpret_cast(lconfig.get())));