Skip to content

Commit

Permalink
Rename/add flags to control if repository is monitored for changes
Browse files Browse the repository at this point in the history
  • Loading branch information
David Goodwin committed Nov 19, 2018
1 parent 9ee5772 commit e44259b
Show file tree
Hide file tree
Showing 13 changed files with 404 additions and 40 deletions.
6 changes: 6 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ RUN sha1sum -c tools/patch/tensorflow/checksums && \
RUN sha1sum -c tools/patch/tfs/checksums && \
patch -i tools/patch/tfs/model_servers/server_core.cc \
/workspace/serving/tensorflow_serving/model_servers/server_core.cc && \
patch -i tools/patch/tfs/sources/storage_path/file_system_storage_path_source.cc \
/workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.cc && \
patch -i tools/patch/tfs/sources/storage_path/file_system_storage_path_source.h \
/workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.h && \
patch -i tools/patch/tfs/sources/storage_path/file_system_storage_path_source.proto \
/workspace/serving/tensorflow_serving/sources/storage_path/file_system_storage_path_source.proto && \
patch -i tools/patch/tfs/util/retrier.cc \
/workspace/serving/tensorflow_serving/util/retrier.cc && \
patch -i tools/patch/tfs/util/BUILD \
Expand Down
13 changes: 7 additions & 6 deletions docs/model_repository.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,13 @@ inference server is running:
* New models can be added to the repository by adding a new model
directory.

Changes to the model repository may not be detected immediately by the
inference server because it only polls the file system
periodically. You can control the polling interval with the
-\\-file-system-poll-secs options. The console log or the :ref:`Status
API <section-api-status>` can be used to determine when model repository
changes have taken effect.
Changes to the model repository will be recognized by default but can
be disabled with -\\-allow-poll-model-repository=false. Changes to the
model repository may not be detected immediately because the inference
server polls the repository periodically. You can control the polling
interval with the -\\-repository-poll-secs options. The console log or
the :ref:`Status API <section-api-status>` can be used to determine
when model repository changes have taken effect.

Currently changes to the model configuration (config.pbtxt) will not
be recognized by the inference server. To change a model's
Expand Down
205 changes: 205 additions & 0 deletions qa/L0_lifecycle/lifecycle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,211 @@ def test_dynamic_model_load_unload_disabled(self):
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

def test_dynamic_version_load_unload(self):
input_size = 16
tensor_shape = (input_size,)
graphdef_name = tu.get_model_name('graphdef', np.int32, np.int32, np.int32)

# There are 3 versions. Make sure that all have status and are
# ready.
try:
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(graphdef_name in ss.model_status,
"expected status for model " + graphdef_name)
self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
self.assertEqual(v.ready_state, server_status.MODEL_READY)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Run inference on version 1 to make sure it is available
try:
iu.infer_exact(self, 'graphdef', tensor_shape, 1, True,
np.int32, np.int32, np.int32, swap=False,
model_version=1)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Make sure version 1 has execution stats in the status.
expected_exec_cnt = 0
try:
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(graphdef_name in ss.model_status,
"expected status for model " + graphdef_name)
self.assertTrue(1 in ss.model_status[graphdef_name].version_status,
"expected status for version 1 of model " + graphdef_name)

version_status = ss.model_status[graphdef_name].version_status[1]
self.assertEqual(version_status.ready_state, server_status.MODEL_READY)
self.assertGreater(version_status.model_execution_count, 0)
expected_exec_cnt = version_status.model_execution_count
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Remove version 1 from the model store and give it time to
# unload. Make sure that it has a status but is unavailable.
try:
shutil.rmtree("models/" + graphdef_name + "/1")
time.sleep(5) # wait for version to unload
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(graphdef_name in ss.model_status,
"expected status for model " + graphdef_name)
self.assertTrue(1 in ss.model_status[graphdef_name].version_status,
"expected status for version 1 of model " + graphdef_name)

version_status = ss.model_status[graphdef_name].version_status[1]
self.assertEqual(version_status.ready_state, server_status.MODEL_UNAVAILABLE)
self.assertEqual(version_status.model_execution_count, expected_exec_cnt)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Version is removed so inference should fail
try:
iu.infer_exact(self, 'graphdef', tensor_shape, 1, True,
np.int32, np.int32, np.int32, swap=False,
model_version=1)
self.assertTrue(False, "expected error for unavailable model " + graphdef_name)
except InferenceServerException as ex:
self.assertEqual("inference:0", ex.server_id())
self.assertGreater(ex.request_id(), 0)
self.assertTrue(
ex.message().startswith(
"Servable not found for request: Specific(graphdef_int32_int32_int32, 1)"))

# Add back the same version. The status/stats should be
# retained for versions (note that this is different behavior
# than if a model is removed and then added back).
try:
shutil.copytree("models/" + graphdef_name + "/2",
"models/" + graphdef_name + "/1")
time.sleep(5) # wait for model to load
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(graphdef_name in ss.model_status,
"expected status for model " + graphdef_name)
self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
self.assertEqual(v.ready_state, server_status.MODEL_READY)
if k == 1:
self.assertEqual(v.model_execution_count, expected_exec_cnt)
else:
self.assertEqual(v.model_execution_count, 0)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Add another version from the model store.
try:
shutil.copytree("models/" + graphdef_name + "/2",
"models/" + graphdef_name + "/7")
time.sleep(5) # wait for version to load
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(graphdef_name in ss.model_status,
"expected status for model " + graphdef_name)
self.assertTrue(7 in ss.model_status[graphdef_name].version_status,
"expected status for version 7 of model " + graphdef_name)

self.assertEqual(len(ss.model_status[graphdef_name].version_status), 4)
for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
self.assertEqual(v.ready_state, server_status.MODEL_READY)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

def test_dynamic_version_load_unload_disabled(self):
input_size = 16
tensor_shape = (input_size,)
graphdef_name = tu.get_model_name('graphdef', np.int32, np.int32, np.int32)

# Add a new version to the model store and give it time to
# load. But it shouldn't load because dynamic loading is
# disabled.
try:
shutil.copytree("models/" + graphdef_name + "/2",
"models/" + graphdef_name + "/7")
time.sleep(5) # wait for model to load
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(graphdef_name in ss.model_status,
"expected status for model " + graphdef_name)
self.assertFalse(7 in ss.model_status[graphdef_name].version_status,
"unexpected status for version 7 of model " + graphdef_name)
self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Remove one of the original versions from the model
# store. Unloading is disabled so it should remain available
# in the status.
try:
shutil.rmtree("models/" + graphdef_name + "/1")
time.sleep(5) # wait for version to unload (but it shouldn't)
for pair in [("localhost:8000", ProtocolType.HTTP), ("localhost:8001", ProtocolType.GRPC)]:
ctx = ServerStatusContext(pair[0], pair[1], graphdef_name, True)
ss = ctx.get_server_status()
self.assertEqual(os.environ["TENSORRT_SERVER_VERSION"], ss.version)
self.assertEqual("inference:0", ss.id)
self.assertEqual(server_status.SERVER_READY, ss.ready_state)

self.assertEqual(len(ss.model_status), 1)
self.assertTrue(graphdef_name in ss.model_status,
"expected status for model " + graphdef_name)
self.assertTrue(1 in ss.model_status[graphdef_name].version_status,
"expected status for version 1 of model " + graphdef_name)

self.assertEqual(len(ss.model_status[graphdef_name].version_status), 3)
for (k, v) in iteritems(ss.model_status[graphdef_name].version_status):
self.assertEqual(v.ready_state, server_status.MODEL_READY)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))

# Run inference to make sure model still being served even
# though version deleted from model store
try:
iu.infer_exact(self, 'graphdef', tensor_shape, 1, True,
np.int32, np.int32, np.int32, swap=False,
model_version=1)
except InferenceServerException as ex:
self.assertTrue(False, "unexpected error {}".format(ex))


if __name__ == '__main__':
unittest.main()
61 changes: 58 additions & 3 deletions qa/L0_lifecycle/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ for i in graphdef netdef plan ; do
done
cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .

SERVER_ARGS="--model-store=`pwd`/models --file-system-poll-secs=1 --exit-timeout-secs=5"
SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 --exit-timeout-secs=5"
SERVER_LOG="./inference_server_3.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
Expand Down Expand Up @@ -149,8 +149,8 @@ for i in graphdef netdef plan ; do
done
cp -r $DATADIR/qa_model_repository/savedmodel_float32_float32_float32 .

SERVER_ARGS="--model-store=`pwd`/models --allow-model-load-unload=false \
--file-system-poll-secs=1 --exit-timeout-secs=5"
SERVER_ARGS="--model-store=`pwd`/models --allow-poll-model-repository=false \
--repository-poll-secs=1 --exit-timeout-secs=5"
SERVER_LOG="./inference_server_4.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
Expand All @@ -170,6 +170,61 @@ set -e
kill $SERVER_PID
wait $SERVER_PID

# LifeCycleTest.test_dynamic_version_load_unload
rm -fr models
mkdir models
for i in graphdef ; do
cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
done

SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 --exit-timeout-secs=5"
SERVER_LOG="./inference_server_5.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Failed\n***"
RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID

# LifeCycleTest.test_dynamic_version_load_unload_disabled
rm -fr models
mkdir models
for i in graphdef ; do
cp -r $DATADIR/qa_model_repository/${i}_int32_int32_int32 models/.
done

SERVER_ARGS="--model-store=`pwd`/models --repository-poll-secs=1 \
--allow-poll-model-repository=false --exit-timeout-secs=5"
SERVER_LOG="./inference_server_5.log"
run_server
if [ "$SERVER_PID" == "0" ]; then
echo -e "\n***\n*** Failed to start $SERVER\n***"
cat $SERVER_LOG
exit 1
fi

set +e
python $LC_TEST LifeCycleTest.test_dynamic_version_load_unload_disabled >>$CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Test Failed\n***"
RET=1
fi
set -e

kill $SERVER_PID
wait $SERVER_PID


# python unittest seems to swallow ImportError and still return 0 exit
# code. So need to explicitly check CLIENT_LOG to make sure we see
Expand Down
2 changes: 1 addition & 1 deletion qa/L0_server_status/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ SERVER_STATUS_TEST=server_status_test.py
DATADIR=/data/inferenceserver

SERVER=/opt/tensorrtserver/bin/trtserver
SERVER_ARGS="--file-system-poll-secs=1 --model-store=`pwd`/models"
SERVER_ARGS="--repository-poll-secs=1 --model-store=`pwd`/models"
SERVER_LOG="./inference_server.log"
source ../common/util.sh

Expand Down
Loading

0 comments on commit e44259b

Please sign in to comment.