Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify timeout test in L0_sequence_batcher to use portable backend #5696

Merged
merged 2 commits into from
Apr 27, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,22 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "identity_fp32_timeout"
backend: "python"
backend: "sequence"
default_model_filename: "libtriton_sequence.so"
max_batch_size: 1

input [
{
name: "INPUT0"
data_type: TYPE_FP32
name: "INPUT"
data_type: TYPE_INT32
dims: [ 1 ]
}
]

output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
name: "OUTPUT"
data_type: TYPE_INT32
dims: [ 1 ]
}
]
Expand All @@ -53,4 +53,31 @@ instance_group [

sequence_batching {
max_sequence_idle_microseconds: 50000000
control_input [
{
name: "START"
control [
{
kind: CONTROL_SEQUENCE_START
int32_false_true: [ 0, 1 ]
}
]
},
{
name: "READY"
control [
{
kind: CONTROL_SEQUENCE_READY
int32_false_true: [ 0, 1 ]
}
]
}
]
}

parameters [
{
key: "execute_delay_ms"
value: { string_value: "3000" }
}
]
31 changes: 20 additions & 11 deletions qa/L0_sequence_batcher/sequence_batcher_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2860,15 +2860,21 @@ def setUp(self):
self.server_address_ = os.environ.get('TRITONSERVER_IPADDR',
'localhost') + ":8001"

self.model_name_ = "identity_fp32_timeout"
self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.float32)
self.inputs_ = [grpcclient.InferInput('INPUT0', [1, 1], "FP32")]
# Prepare input and expected output based on the model and
# the infer sequence sent for testing. If the test is to be extended
# for different sequence and model, then proper grouping should be added
self.model_name_ = "custom_sequence_int32_timeout"
self.tensor_data_ = np.ones(shape=[1, 1], dtype=np.int32)
self.inputs_ = [grpcclient.InferInput('INPUT', [1, 1], "INT32")]
self.inputs_[0].set_data_from_numpy(self.tensor_data_)
self.expected_out_seq_ = [("OUTPUT", self.tensor_data_),
("OUTPUT", self.tensor_data_ * 2),
("OUTPUT", self.tensor_data_ * 3)]

def send_sequence_with_timeout(self,
seq_id,
callback,
timeout_us=3000000,
timeout_us=2000000,
request_pause_sec=0):
with grpcclient.InferenceServerClient(
self.server_address_) as triton_client:
Expand Down Expand Up @@ -2897,8 +2903,8 @@ def test_request_timeout(self):
# expect the timeout will only be expired on backlog sequence and reject
# all requests of the sequence once expired.
# Sending two sequences while the model can only process one sequence
# at a time. Each model execution takes 5 second and all requests have
# 3 second timeout, so the second sequence will be rejected.
# at a time. Each model execution takes 3 second and all requests have
# 2 second timeout, so the second sequence will be rejected.

# correlation ID is 1-index
seq1_res = []
Expand All @@ -2920,16 +2926,19 @@ def test_request_timeout(self):
for t in threads:
t.join()

for result, error in seq1_res:
for idx in range(len(seq1_res)):
result, error = seq1_res[idx]
self.assertIsNone(
error,
"Expect sucessful inference for sequence 1 requests, got error: {}"
.format(error))
out = result.as_numpy(self.expected_out_seq_[idx][0])
expected_out = self.expected_out_seq_[idx][1]
np.testing.assert_allclose(
result.as_numpy("OUTPUT0"),
self.tensor_data_,
err_msg="Unexpected output tensor, got {}".format(
result.as_numpy("OUTPUT0")))
out,
expected_out,
err_msg="Unexpected output tensor: expect {}, got {}".format(
expected_out, out))

for _, error in seq2_res:
self.assertIsNotNone(error, "Expect error for sequence 2 requests")
Expand Down
3 changes: 1 addition & 2 deletions qa/L0_sequence_batcher/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -735,8 +735,7 @@ if [ "$TEST_SYSTEM_SHARED_MEMORY" -ne 1 ] && [ "$TEST_CUDA_SHARED_MEMORY" -ne 1

TEST_CASE=SequenceBatcherRequestTimeoutTest
MODEL_PATH=request_timeout_models
mkdir -p ${MODEL_PATH}/identity_fp32_timeout/1
cp ../python_models/identity_fp32_timeout/model.py ${MODEL_PATH}/identity_fp32_timeout/1/.
cp -r ../custom_models/custom_sequence_int32/1 ${MODEL_PATH}/custom_sequence_int32_timeout

SERVER_ARGS="--model-repository=$MODELDIR/$MODEL_PATH ${SERVER_ARGS_EXTRA}"
SERVER_LOG="./$TEST_CASE.$MODEL_PATH.server.log"
Expand Down