Skip to content

Commit

Permalink
Add autofill enable option to server and pass it through to servables
Browse files Browse the repository at this point in the history
  • Loading branch information
David Goodwin committed Nov 14, 2018
1 parent 72c93b2 commit 74656bd
Show file tree
Hide file tree
Showing 17 changed files with 246 additions and 57 deletions.
2 changes: 0 additions & 2 deletions src/core/infer.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ class HTTPInferRequestProvider : public InferRequestProvider {
std::vector<std::vector<char>> contiguous_buffers_;
};


// Provide inference request outputs
class InferResponseProvider {
public:
Expand Down Expand Up @@ -236,7 +235,6 @@ class HTTPInferResponseProvider : public InferResponseProvider {
size_t total_raw_byte_size_;
};


// Interface for servables that handle generic inference requests.
class InferenceServable {
public:
Expand Down
1 change: 0 additions & 1 deletion src/core/request_status.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

#include "src/core/request_status.h"


namespace nvidia { namespace inferenceserver {

namespace {
Expand Down
60 changes: 39 additions & 21 deletions src/core/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
#include "src/servables/tensorflow/graphdef_bundle.h"
#include "src/servables/tensorflow/graphdef_bundle.pb.h"
#include "src/servables/tensorflow/savedmodel_bundle.h"
#include "src/servables/tensorflow/savedmodel_bundle.pb.h"
#include "src/servables/tensorrt/plan_bundle.h"
#include "src/servables/tensorrt/plan_bundle.pb.h"
#include "tensorflow/core/lib/core/status.h"
Expand All @@ -82,7 +83,6 @@
#include "tensorflow_serving/core/availability_preserving_policy.h"
#include "tensorflow_serving/core/servable_handle.h"
#include "tensorflow_serving/model_servers/server_core.h"
#include "tensorflow_serving/servables/tensorflow/saved_model_bundle_source_adapter.pb.h"
#include "tensorflow_serving/util/net_http/server/public/httpserver.h"
#include "tensorflow_serving/util/net_http/server/public/response_code_enum.h"
#include "tensorflow_serving/util/net_http/server/public/server_request_interface.h"
Expand Down Expand Up @@ -535,6 +535,7 @@ InferenceServer::InferenceServer()
grpc_port_ = 8001;
metrics_port_ = 8002;
http_thread_cnt_ = 8;
strict_model_config_ = false;
strict_readiness_ = true;
model_load_unload_enabled_ = true;
profiling_enabled_ = false;
Expand Down Expand Up @@ -567,6 +568,7 @@ InferenceServer::Init(int argc, char** argv)
// On error, the init process will stop.
// The difference is if the server will be terminated.
bool exit_on_error = true;
bool strict_model_config = strict_model_config_;
bool strict_readiness = strict_readiness_;
bool allow_model_load_unload = model_load_unload_enabled_;
bool allow_profiling = profiling_enabled_;
Expand Down Expand Up @@ -604,13 +606,20 @@ InferenceServer::Init(int argc, char** argv)
"config instead of the default platform."),
tensorflow::Flag(
"exit-on-error", &exit_on_error,
"Exit the inference server if an error occurs during initialization."),
"Exit the inference server if an error occurs during "
"initialization."),
tensorflow::Flag(
"strict-model-config", &strict_model_config,
"If true model configuration files must be provided and all required "
"configuration settings must be specified. If false the model "
"configuration may be absent or only partially specified and the "
"server will attempt to derive the missing required configuration."),
tensorflow::Flag(
"strict-readiness", &strict_readiness,
"If true /api/health/ready endpoint indicates ready if the server "
"is responsive and all models are available. If false /api/health/ready "
"endpoint indicates ready if server is responsive even if some/all "
"models are unavailable."),
"is responsive and all models are available. If false "
"/api/health/ready endpoint indicates ready if server is responsive even "
"if some/all models are unavailable."),
tensorflow::Flag(
"allow-model-load-unload", &allow_model_load_unload,
"Allow models to be loaded and unloaded dynamically based on changes "
Expand Down Expand Up @@ -639,13 +648,13 @@ InferenceServer::Init(int argc, char** argv)
"Number of threads handling HTTP requests."),
tensorflow::Flag(
"file-system-poll-secs", &file_system_poll_secs,
"Interval in seconds between each poll of the file "
"system for changes to the model store."),
"Interval in seconds between each poll of the file system for changes to "
"the model store."),
tensorflow::Flag(
"exit-timeout-secs", &exit_timeout_secs,
"Timeout (in seconds) when exiting to wait for in-flight inferences "
"to finish. After the timeout expires the server exits even if "
"inferences are still in flight."),
"Timeout (in seconds) when exiting to wait for in-flight inferences to "
"finish. After the timeout expires the server exits even if inferences "
"are still in flight."),
tensorflow::Flag(
"tf-allow-soft-placement", &tf_allow_soft_placement,
"Instruct TensorFlow to use CPU implementation of an operation when a "
Expand All @@ -654,8 +663,8 @@ InferenceServer::Init(int argc, char** argv)
"tf-gpu-memory-fraction", &tf_gpu_memory_fraction,
"Reserve a portion of GPU memory for TensorFlow models. Default value "
"0.0 indicates that TensorFlow should dynamically allocate memory as "
"needed. Value of 1.0 indicates that TensorFlow should allocate all "
"of GPU memory."),
"needed. Value of 1.0 indicates that TensorFlow should allocate all of "
"GPU memory."),
};

std::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
Expand All @@ -681,6 +690,7 @@ InferenceServer::Init(int argc, char** argv)
metrics_port_ = allow_metrics ? metrics_port : -1;
model_store_path_ = model_store_path;
http_thread_cnt_ = http_thread_cnt;
strict_model_config_ = strict_model_config;
strict_readiness_ = strict_readiness;
model_load_unload_enabled_ = allow_model_load_unload;
profiling_enabled_ = allow_profiling;
Expand Down Expand Up @@ -1315,6 +1325,8 @@ InferenceServer::BuildPlatformConfigMap(
{
GraphDefBundleSourceAdapterConfig graphdef_config;

graphdef_config.set_autofill(!strict_model_config_);

// Tensorflow session config
if (tf_gpu_memory_fraction == 0.0) {
graphdef_config.mutable_session_config()
Expand All @@ -1333,35 +1345,36 @@ InferenceServer::BuildPlatformConfigMap(

//// Tensorflow SavedModel
{
tfs::SavedModelBundleSourceAdapterConfig saved_model_config;
SavedModelBundleSourceAdapterConfig saved_model_config;

saved_model_config.set_autofill(!strict_model_config_);

if (tf_gpu_memory_fraction == 0.0) {
saved_model_config.mutable_legacy_config()
->mutable_session_config()
saved_model_config.mutable_session_config()
->mutable_gpu_options()
->set_allow_growth(true);
} else {
saved_model_config.mutable_legacy_config()
->mutable_session_config()
saved_model_config.mutable_session_config()
->mutable_gpu_options()
->set_per_process_gpu_memory_fraction(tf_gpu_memory_fraction);
}

saved_model_config.mutable_legacy_config()
->mutable_session_config()
->set_allow_soft_placement(tf_allow_soft_placement);
saved_model_config.mutable_session_config()->set_allow_soft_placement(
tf_allow_soft_placement);
saved_model_source_adapter_config.PackFrom(saved_model_config);
}

//// Caffe NetDef
{
NetDefBundleSourceAdapterConfig netdef_config;
netdef_config.set_autofill(!strict_model_config_);
netdef_source_adapter_config.PackFrom(netdef_config);
}

//// TensorRT
{
PlanBundleSourceAdapterConfig plan_config;
plan_config.set_autofill(!strict_model_config_);
plan_source_adapter_config.PackFrom(plan_config);
}

Expand Down Expand Up @@ -1404,7 +1417,12 @@ InferenceServer::BuildModelConfig(
for (const auto& child : real_children) {
const auto full_path = tensorflow::io::JoinPath(model_store_path_, child);
ModelConfig* model_config = model_configs->add_config();
TF_RETURN_IF_ERROR(GetNormalizedModelConfig(full_path, model_config));

// If enabled, try to automatically generate missing parts of the
// model configuration from the model definition. In all cases
// normalize and validate the config.
TF_RETURN_IF_ERROR(
GetNormalizedModelConfig(full_path, !strict_model_config_, model_config));
TF_RETURN_IF_ERROR(ValidateModelConfig(*model_config, std::string()));

// Make sure the name of the model matches the name of the
Expand Down
1 change: 1 addition & 0 deletions src/core/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class InferenceServer {

std::string model_store_path_;
int http_thread_cnt_;
bool strict_model_config_;
bool strict_readiness_;
bool model_load_unload_enabled_;
bool profiling_enabled_;
Expand Down
99 changes: 96 additions & 3 deletions src/core/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,81 @@

namespace nvidia { namespace inferenceserver {

namespace {

tensorflow::Status
GetAutoFillPlatform(
const tensorflow::StringPiece& model_name,
const tensorflow::StringPiece& path, std::string* platform)
{
const std::string model_path(path);

// Find version subdirectories...
std::vector<std::string> versions;
TF_RETURN_IF_ERROR(
tensorflow::Env::Default()->GetChildren(model_path, &versions));

// GetChildren() returns all descendants instead for cloud storage
// like GCS. In such case we should filter out all non-direct
// descendants.
std::set<std::string> real_versions;
for (size_t i = 0; i < versions.size(); ++i) {
const std::string& version = versions[i];
real_versions.insert(version.substr(0, version.find_first_of('/')));
}

if (real_versions.empty()) {
return tensorflow::errors::NotFound(
"no version sub-directories for model '", model_name, "'");
}

// If a default named file/directory exists in a version
// sub-directory then assume the corresponding platform.
for (const auto& version : real_versions) {
const auto vp = tensorflow::io::JoinPath(model_path, version);

// TensorRT
if (tensorflow::Env::Default()
->FileExists(tensorflow::io::JoinPath(vp, kTensorRTPlanFilename))
.ok()) {
*platform = kTensorRTPlanPlatform;
return tensorflow::Status::OK();
}

// TensorFlow
if (tensorflow::Env::Default()
->FileExists(
tensorflow::io::JoinPath(vp, kTensorFlowSavedModelFilename))
.ok()) {
*platform = kTensorFlowSavedModelPlatform;
return tensorflow::Status::OK();
}
if (tensorflow::Env::Default()
->FileExists(
tensorflow::io::JoinPath(vp, kTensorFlowGraphDefFilename))
.ok()) {
*platform = kTensorFlowGraphDefPlatform;
return tensorflow::Status::OK();
}

// Caffe2
if (tensorflow::Env::Default()
->FileExists(tensorflow::io::JoinPath(vp, kCaffe2NetDefFilename))
.ok()) {
*platform = kCaffe2NetDefPlatform;
return tensorflow::Status::OK();
}
}

return tensorflow::errors::NotFound(
"unable to derive platform for model '", model_name, "', the model ",
"definition file must be named '", kTensorRTPlanFilename, "', '",
kTensorFlowGraphDefFilename, "', '", kTensorFlowSavedModelFilename,
"', or '", kCaffe2NetDefFilename, "'");
}

} // namespace

tensorflow::Status
GetModelVersionFromPath(const tensorflow::StringPiece& path, uint32_t* version)
{
Expand All @@ -50,11 +125,29 @@ GetModelVersionFromPath(const tensorflow::StringPiece& path, uint32_t* version)

tensorflow::Status
GetNormalizedModelConfig(
const tensorflow::StringPiece& path, ModelConfig* config)
const tensorflow::StringPiece& path, const bool autofill, ModelConfig* config)
{
// If 'autofill' then the configuration file can be empty.
const auto config_path = tensorflow::io::JoinPath(path, kModelConfigPbTxt);
TF_RETURN_IF_ERROR(
ReadTextProto(tensorflow::Env::Default(), config_path, config));
if (autofill && !tensorflow::Env::Default()->FileExists(config_path).ok()) {
config->Clear();
} else {
TF_RETURN_IF_ERROR(
ReadTextProto(tensorflow::Env::Default(), config_path, config));
}

// Autofill the platform and name...
if (autofill) {
const std::string model_name(tensorflow::io::Basename(path));
if (config->name().empty()) {
config->set_name(model_name);
}

if (config->platform().empty()) {
TF_RETURN_IF_ERROR(
GetAutoFillPlatform(model_name, path, config->mutable_platform()));
}
}

// If 'default_model_filename' is not specified set it appropriately
// based upon 'platform'.
Expand Down
6 changes: 4 additions & 2 deletions src/core/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ tensorflow::Status GetModelVersionFromPath(

// Read a ModelConfig and normalize it as expected by model servables.
// 'path' should be the full-path to the directory containing the
// model configuration.
// model configuration. If 'autofill' then attempt to determine any
// missing required configuration from the model definition.
tensorflow::Status GetNormalizedModelConfig(
const tensorflow::StringPiece& path, ModelConfig* config);
const tensorflow::StringPiece& path, const bool autofill,
ModelConfig* config);

// Validate that a model is specified correctly (excluding inputs and
// outputs which are validated via ValidateModelInput() and
Expand Down
7 changes: 6 additions & 1 deletion src/servables/caffe2/netdef_bundle.proto
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,9 @@ syntax = "proto3";
package nvidia.inferenceserver;

// Config proto for NetDefBundleSourceAdapter.
message NetDefBundleSourceAdapterConfig {}
message NetDefBundleSourceAdapterConfig
{
// Autofill missing required model configuration settings based on
// model definition file.
bool autofill = 1;
}
15 changes: 10 additions & 5 deletions src/servables/caffe2/netdef_bundle_source_adapter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,15 @@ namespace {

tensorflow::Status
CreateNetDefBundle(
const NetDefBundleSourceAdapterConfig& adapter_config,
const std::string& path, std::unique_ptr<NetDefBundle>* bundle)
{
const auto model_path = tensorflow::io::Dirname(path);

ModelConfig config;
TF_RETURN_IF_ERROR(GetNormalizedModelConfig(model_path, &config));
ModelConfig model_config;
model_config.set_platform(kCaffe2NetDefPlatform);
TF_RETURN_IF_ERROR(GetNormalizedModelConfig(
model_path, adapter_config.autofill(), &model_config));

// Read all the netdef files in 'path'. GetChildren() returns all
// descendants instead for cloud storage like GCS, so filter out all
Expand All @@ -74,7 +77,7 @@ CreateNetDefBundle(
// Create the bundle for the model and all the execution contexts
// requested for this model.
bundle->reset(new NetDefBundle);
tensorflow::Status status = (*bundle)->Init(path, config);
tensorflow::Status status = (*bundle)->Init(path, model_config);
if (status.ok()) {
status = (*bundle)->CreateExecutionContexts(models);
}
Expand All @@ -97,8 +100,11 @@ NetDefBundleSourceAdapter::Create(
LOG_VERBOSE(1) << "Create NetDefBundleSourceAdaptor for config \""
<< config.DebugString() << "\"";

Creator creator = std::bind(
&CreateNetDefBundle, config, std::placeholders::_1, std::placeholders::_2);

adapter->reset(new NetDefBundleSourceAdapter(
config, CreateNetDefBundle, SimpleSourceAdapter::EstimateNoResources()));
config, creator, SimpleSourceAdapter::EstimateNoResources()));
return tensorflow::Status::OK();
}

Expand All @@ -114,5 +120,4 @@ namespace tensorflow { namespace serving {
REGISTER_STORAGE_PATH_SOURCE_ADAPTER(
nvidia::inferenceserver::NetDefBundleSourceAdapter,
nvidia::inferenceserver::NetDefBundleSourceAdapterConfig);

}} // namespace tensorflow::serving
Loading

0 comments on commit 74656bd

Please sign in to comment.