triton-inference-server · GuanLuo · Apr 27, 2023 · Apr 24, 2023 · Apr 25, 2023
diff --git a/...models/identity_fp32_timeout/config.pbtxt → ...ustom_sequence_int32_timeout/config.pbtxt b/...models/identity_fp32_timeout/config.pbtxt → ...ustom_sequence_int32_timeout/config.pbtxt
@@ -24,22 +24,22 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-name: "identity_fp32_timeout"
-backend: "python"
+backend: "sequence"
+default_model_filename: "libtriton_sequence.so"
 max_batch_size: 1
 
 input [
   {
-    name: "INPUT0"
-    data_type: TYPE_FP32
+    name: "INPUT"
+    data_type: TYPE_INT32
     dims: [ 1 ]
   }
 ]
 
 output [
   {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
+    name: "OUTPUT"
+    data_type: TYPE_INT32
     dims: [ 1 ]
   }
 ]
@@ -53,4 +53,31 @@ instance_group [
 
 sequence_batching {
   max_sequence_idle_microseconds: 50000000
+  control_input [
+    {
+      name: "START"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_START
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    },
+    {
+      name: "READY"
+      control [
+        {
+          kind: CONTROL_SEQUENCE_READY
+          int32_false_true: [ 0, 1 ]
+        }
+      ]
+    }
+  ]
 }
+
+parameters [
+  {
+    key: "execute_delay_ms"
+    value: { string_value: "3000" }
+  }
+]
diff --git a/qa/L0_sequence_batcher/sequence_batcher_test.py b/qa/L0_sequence_batcher/sequence_batcher_test.py
@@ -2860,15 +2860,21 @@ def setUp(self):
         self.server_address_ = os.environ.get('TRITONSERVER_IPADDR',
                                               'localhost') + ":8001"
 
-        self.model_name_ = "identity_fp32_timeout"
-        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.float32)
-        self.inputs_ = [grpcclient.InferInput('INPUT0', [1, 1], "FP32")]
+        # Prepare input and expected output based on the model and
+        # the infer sequence sent for testing. If the test is to be extended
+        # for different sequence and model, then proper grouping should be added
+        self.model_name_ = "custom_sequence_int32_timeout"
+        self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
+        self.inputs_ = [grpcclient.InferInput('INPUT', [1, 1], "INT32")]
         self.inputs_[0].set_data_from_numpy(self.tensor_data_)
+        self.expected_out_seq_ = [("OUTPUT", self.tensor_data_),
+                                  ("OUTPUT", self.tensor_data_ * 2),
+                                  ("OUTPUT", self.tensor_data_ * 3)]
 
     def send_sequence_with_timeout(self,
                                    seq_id,
                                    callback,
-                                   timeout_us=3000000,
+                                   timeout_us=2000000,
                                    request_pause_sec=0):
         with grpcclient.InferenceServerClient(
                 self.server_address_) as triton_client:
@@ -2897,8 +2903,8 @@ def test_request_timeout(self):
         # expect the timeout will only be expired on backlog sequence and reject
         # all requests of the sequence once expired.
         # Sending two sequences while the model can only process one sequence
-        # at a time. Each model execution takes 5 second and all requests have
-        # 3 second timeout, so the second sequence will be rejected.
+        # at a time. Each model execution takes 3 second and all requests have
+        # 2 second timeout, so the second sequence will be rejected.
 
         # correlation ID is 1-index
         seq1_res = []
@@ -2920,16 +2926,19 @@ def test_request_timeout(self):
         for t in threads:
             t.join()
 
-        for result, error in seq1_res:
+        for idx in range(len(seq1_res)):
+            result, error = seq1_res[idx]
             self.assertIsNone(
                 error,
                 "Expect sucessful inference for sequence 1 requests, got error: {}"
                 .format(error))
+            out = result.as_numpy(self.expected_out_seq_[idx][0])
+            expected_out = self.expected_out_seq_[idx][1]
             np.testing.assert_allclose(
-                result.as_numpy("OUTPUT0"),
-                self.tensor_data_,
-                err_msg="Unexpected output tensor, got {}".format(
-                    result.as_numpy("OUTPUT0")))
+                out,
+                expected_out,
+                err_msg="Unexpected output tensor: expect {}, got {}".format(
+                    expected_out, out))
 
         for _, error in seq2_res:
             self.assertIsNotNone(error, "Expect error for sequence 2 requests")

diff --git a/qa/L0_sequence_batcher/test.sh b/qa/L0_sequence_batcher/test.sh
@@ -735,8 +735,7 @@ if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1
 
     TEST_CASE=SequenceBatcherRequestTimeoutTest
     MODEL_PATH=request_timeout_models
-    mkdir -p ${MODEL_PATH}/identity_fp32_timeout/1
-    cp ../python_models/identity_fp32_timeout/model.py ${MODEL_PATH}/identity_fp32_timeout/1/.
+    cp -r ../custom_models/custom_sequence_int32/1 ${MODEL_PATH}/custom_sequence_int32_timeout
 
     SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
     SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"