Skip to content

Commit

Permalink
(Re)load a model from the repository in response to any change.
Browse files Browse the repository at this point in the history
Rework the ModelRepositoryManager to be more general and ready for
future non-eager loading.
  • Loading branch information
David Goodwin committed Nov 19, 2018
1 parent 4a0f636 commit 0c1c202
Show file tree
Hide file tree
Showing 8 changed files with 535 additions and 244 deletions.
12 changes: 6 additions & 6 deletions qa/L0_infer/infer_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def test_raw_version_latest_1(self):
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Servable not found for request"))
ex.message().startswith("Inference request for unknown model"))

try:
iu.infer_exact(self, platform, tensor_shape, 1, True,
Expand All @@ -135,7 +135,7 @@ def test_raw_version_latest_1(self):
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Servable not found for request"))
ex.message().startswith("Inference request for unknown model"))

iu.infer_exact(self, platform, tensor_shape, 1, True,
np.int8, np.int8, np.int8,
Expand All @@ -156,7 +156,7 @@ def test_raw_version_latest_2(self):
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Servable not found for request"))
ex.message().startswith("Inference request for unknown model"))

iu.infer_exact(self, platform, tensor_shape, 1, True,
np.int16, np.int16, np.int16,
Expand Down Expand Up @@ -201,7 +201,7 @@ def test_raw_version_specific_1(self):
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Servable not found for request"))
ex.message().startswith("Inference request for unknown model"))

try:
iu.infer_exact(self, platform, tensor_shape, 1, True,
Expand All @@ -211,7 +211,7 @@ def test_raw_version_specific_1(self):
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Servable not found for request"))
ex.message().startswith("Inference request for unknown model"))

def test_raw_version_specific_1_3(self):
input_size = 16
Expand All @@ -232,7 +232,7 @@ def test_raw_version_specific_1_3(self):
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Servable not found for request"))
ex.message().startswith("Inference request for unknown model"))

iu.infer_exact(self, platform, tensor_shape, 1, True,
np.float32, np.float32, np.float32,
Expand Down
95 changes: 91 additions & 4 deletions qa/L0_lifecycle/lifecycle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def test_parse_error_modelfail(self):
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Servable not found for request"))
ex.message().startswith(
"Inference request for unknown model 'graphdef_float32_float32_float32'"))

def test_dynamic_model_load_unload(self):
input_size = 16
Expand Down Expand Up @@ -231,7 +232,7 @@ def test_dynamic_model_load_unload(self):
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith(
"no configuration for model 'savedmodel_float32_float32_float32'"))
"Inference request for unknown model 'savedmodel_float32_float32_float32'"))

# Add back the same model. The status/stats should be reset.
try:
Expand Down Expand Up @@ -287,7 +288,7 @@ def test_dynamic_model_load_unload(self):
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith(
"no configuration for model 'netdef_float32_float32_float32'"))
"Inference request for unknown model 'netdef_float32_float32_float32'"))

def test_dynamic_model_load_unload_disabled(self):
input_size = 16
Expand Down Expand Up @@ -457,7 +458,7 @@ def test_dynamic_version_load_unload(self):
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith(
"Servable not found for request: Specific(graphdef_int32_int32_int32, 1)"))
"Inference request for unknown model 'graphdef_int32_int32_int32'"))

# Add back the same version. The status/stats should be
# retained for versions (note that this is different behavior
Expand Down Expand Up @@ -572,6 +573,92 @@ def test_dynamic_version_load_unload_disabled(self):
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

def test_dynamic_model_modify(self):
input_size = 16
models_base = ('savedmodel', 'plan')
models_shape = ((input_size,), (input_size, 1, 1))
models = list()
for m in models_base:
models.append(tu.get_model_name(m, np.float32, np.float32, np.float32))

# Make sure savedmodel and plan are in the status
for model_name in models:
try:
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], model_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(model_name in ss.model_status,
"expected status for model " + model_name)
for (k, v) in iteritems(ss.model_status[model_name].version_status):
self.assertEqual(v.ready_state, server_status.MODEL_READY)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Run inference on the model, both versions 1 and 3
for version in (1, 3):
for model_name, model_shape in zip(models_base, models_shape):
try:
iu.infer_exact(self, model_name, model_shape, 1, True,
np.float32, np.float32, np.float32, swap=(version == 3),
model_version=version)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Change the model configuration to have the default version
# policy (so that only version 3) if available.
for base_name, model_name in zip(models_base, models):
shutil.copyfile("config.pbtxt." + base_name, "models/" + model_name + "/config.pbtxt")

time.sleep(5) # wait for models to reload
for model_name in models:
try:
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], model_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)
self.assertEqual(len(ss.model_status), 1)
self.assertTrue(model_name in ss.model_status,
"expected status for model " + model_name)
self.assertTrue(1 in ss.model_status[model_name].version_status,
"expected status for version 1 of model " + model_name)
self.assertTrue(3 in ss.model_status[model_name].version_status,
"expected status for version 3 of model " + model_name)
self.assertEqual(ss.model_status[model_name].version_status[1].ready_state,
server_status.MODEL_UNAVAILABLE)
self.assertEqual(ss.model_status[model_name].version_status[3].ready_state,
server_status.MODEL_READY)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Attempt inferencing using version 1, should fail since
# change in model policy makes that no longer available.
for model_name, model_shape in zip(models_base, models_shape):
try:
iu.infer_exact(self, model_name, model_shape, 1, True,
np.float32, np.float32, np.float32, swap=False,
model_version=1)
self.assertTrue(False, "expected error for unavailable model " + model_name)
except InferenceServerException as ex:
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith("Inference request for unknown model"))

# Version 3 should continue to work...
for model_name, model_shape in zip(models_base, models_shape):
try:
iu.infer_exact(self, model_name, model_shape, 1, True,
np.float32, np.float32, np.float32, swap=True,
model_version=3)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

if __name__ == '__main__':
unittest.main()
31 changes: 30 additions & 1 deletion qa/L0_lifecycle/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ done

SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 \
--allow-poll-model-repository=false --exit-timeout-secs=5"
SERVER_LOG="./inference_server_5.log"
SERVER_LOG="./inference_server_6.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
Expand All @@ -225,6 +225,35 @@ set -e
kill $SERVER_PID
wait $SERVER_PID

# LifeCycleTest.test_dynamic_model_modify
rm -fr models config.pbtxt.*
mkdir models
for i in savedmodel plan ; do
cp -r $DATADIR/qa_model_repository/${i}_float32_float32_float32 models/.
sed '/^version_policy/d' \
$DATADIR/qa_model_repository/${i}_float32_float32_float32/config.pbtxt > config.pbtxt.${i}
done

SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 --exit-timeout-secs=5"
SERVER_LOG="./inference_server_7.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_dynamic_model_modify >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Failed\n***"
RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID


# python unittest seems to swallow ImportError and still return 0 exit
# code. So need to explicitly check CLIENT_LOG to make sure we see
Expand Down
Loading

0 comments on commit 0c1c202

Please sign in to comment.