Register TensorFlow Plugins at Runtime (#4944)

* Add tf runtime op tests * Fix test * Fix relative paths for L0_custom_ops * Add documentation for runtime TF plugins * Wording, test loop * Comment clarity * Remove extra statement
triton-inference-server · Dec 6, 2022 · 80cc713 · 80cc713
1 parent 232d432
commit 80cc713
Show file tree

Hide file tree

Showing 3 changed files with 79 additions and 7 deletions.
diff --git a/docs/user_guide/custom_operations.md b/docs/user_guide/custom_operations.md
@@ -69,21 +69,36 @@ container.
 
 ## TensorFlow
 
-Tensorflow allows users to [add custom
+TensorFlow allows users to [add custom
 operations](https://www.tensorflow.org/guide/create_op) which can then
-be used in TensorFlow models. By using LD_PRELOAD you can load your
-custom TensorFlow operations into Triton. For example, assuming your
-TensorFlow custom operations are compiled into libtfcustom.so,
+be used in TensorFlow models. You can load custom TensorFlow operations
+into Triton in two ways: 
+* At model load time, by listing them in the model configuration.
+* At server launch time, by using LD_PRELOAD.
+
+To register your custom operations library via the the model configuration,
+you can include it as an additional field. See the below configuration as an example.
+
+```bash
+$ model_operations { op_library_filename: "path/to/libtfcustom.so" }
+```
+
+Note that even though the models are loaded at runtime, multiple models can use the custom
+operators. There is currently no way to deallocate the custom operators, so they will stay
+available until Triton is shut down.
+
+You can also register your custom operations library via LD_PRELOAD. For example,
+assuming your TensorFlow custom operations are compiled into libtfcustom.so,
 starting Triton with the following command makes those operations
 available to all TensorFlow models.
 
 ```bash
 $ LD_PRELOAD=libtfcustom.so:${LD_PRELOAD} tritonserver --model-repository=/tmp/models ...
 ```
 
-All TensorFlow custom operations depend on a TensorFlow shared library
-that must be available to the custom shared library when it is
-loading. In practice this means that you must make sure that
+With this approach, all TensorFlow custom operations depend on a TensorFlow shared
+library that must be available to the custom shared library when it is
+loading. In practice, this means that you must make sure that
 /opt/tritonserver/backends/tensorflow1 or
 /opt/tritonserver/backends/tensorflow2 is on the library path before
 issuing the above command. There are several ways to control the

diff --git a/qa/L0_custom_ops/test.sh b/qa/L0_custom_ops/test.sh
@@ -60,6 +60,7 @@ RET=0
 LD_LIBRARY_PATH=/opt/tritonserver/backends/tensorflow2:$LD_LIBRARY_PATH
 
 # Tensorflow
+## Load operations via LD_PRELOAD
 SERVER_ARGS="--model-repository=/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops"
 SERVER_LD_PRELOAD="/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops/libzeroout.so:/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops/libcudaop.so:/data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops/libbusyop.so"
 
@@ -105,6 +106,62 @@ set -e
 kill $SERVER_PID
 wait $SERVER_PID
 
+## Load operations via model config
+SERVER_ARGS="--model-repository=tf_custom_ops"
+SERVER_LD_PRELOAD=""
+
+rm -rf tf_custom_ops && \
+    mkdir -p tf_custom_ops && \
+    cp -r /data/inferenceserver/${REPO_VERSION}/qa_custom_ops/tf_custom_ops .
+
+for MODEL_TYPE in savedmodel graphdef; do
+    echo "model_operations { op_library_filename: \"tf_custom_ops/libbusyop.so\" }" >> tf_custom_ops/${MODEL_TYPE}_busyop/config.pbtxt
+    echo "model_operations { op_library_filename: \"tf_custom_ops/libcudaop.so\" }" >> tf_custom_ops/${MODEL_TYPE}_cudaop/config.pbtxt
+    echo "model_operations { op_library_filename: \"tf_custom_ops/libzeroout.so\" }" >> tf_custom_ops/${MODEL_TYPE}_zeroout/config.pbtxt
+done
+
+run_server
+if [ "$SERVER_PID" == "0" ]; then
+    echo -e "\n***\n*** Failed to start $SERVER\n***"
+    cat $SERVER_LOG
+    exit 1
+fi
+
+set +e
+
+python $ZERO_OUT_TEST -v -m graphdef_zeroout >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $ZERO_OUT_TEST -v -m savedmodel_zeroout >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $CUDA_OP_TEST -v -m graphdef_cudaop >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+python $CUDA_OP_TEST -v -m savedmodel_cudaop >>$CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Test Failed\n***"
+    RET=1
+fi
+
+set -e
+
+kill $SERVER_PID
+wait $SERVER_PID
+
 # Must set LD_LIBRARY_PATH just for the server launch so that the
 # custom operations can find libtorch.so and other pytorch dependencies.
 LD_LIBRARY_PATH=/opt/tritonserver/backends/pytorch:$LD_LIBRARY_PATH

diff --git a/qa/common/__pycache__/test_util.cpython-38.pyc b/qa/common/__pycache__/test_util.cpython-38.pyc