From 99d767f41d925ca623fff0c43e81eeaddd055b1c Mon Sep 17 00:00:00 2001
From: David Gardner <96306125+dagardner-nv@users.noreply.github.com>
Date: Mon, 8 Aug 2022 12:47:28 -0700
Subject: [PATCH] Fixes issues with NLP pipelines when data is not truncated
 (#316)

* Fixes type-os in `TensorObject::read_element` as suggested in description in #305
* In cases where the length of the output results do not match the length of the data frame the `seq_ids` array is used to perform reduction of the rows. Such that if rows 5,6 & 7 of the output results map to row 5 in the dataframe, the max value of each row is stored in the response output.
* Add new method `MatxUtil::reduce_max` to perform reduction.

fixes #305

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: https://github.com/nv-morpheus/Morpheus/pull/316
---
 .../include/morpheus/objects/dev_mem_info.hpp |   4 +-
 .../morpheus/objects/tensor_object.hpp        |  10 +-
 .../include/morpheus/utilities/matx_util.hpp  |  20 ++
 .../_lib/src/messages/multi_inference.cpp     |  17 +-
 morpheus/_lib/src/messages/multi_response.cpp |   8 +-
 morpheus/_lib/src/stages/triton_inference.cpp |  97 +++++++--
 morpheus/_lib/src/utilities/matx_util.cu      |  92 +++++++++
 morpheus/_lib/tests/CMakeLists.txt            |   2 +
 morpheus/_lib/tests/test_matx_util.cpp        | 186 ++++++++++++++++++
 morpheus/_lib/tests/test_morpheus.cpp         |  27 +++
 morpheus/_lib/tests/test_morpheus.hpp         |   5 +
 morpheus/_lib/tests/test_multi_slices.cpp     |   7 -
 .../inference/auto_encoder_inference_stage.py |   9 +-
 morpheus/stages/inference/inference_stage.py  |  16 +-
 tests/benchmarks/test_bench_e2e_pipelines.py  |   2 +-
 .../sid-minibert-onnx-no-trunc/GET.mock       |   5 +
 .../config/GET.mock                           |   5 +
 .../infer/POST.mock                           |  24 +++
 .../sid-minibert-onnx-no-trunc/ready/GET.mock |   3 +
 .../sid-no-trunc/sid_infer_resp.1.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.10.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.11.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.12.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.13.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.14.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.15.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.16.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.17.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.18.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.19.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.2.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.20.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.21.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.22.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.23.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.24.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.25.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.26.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.27.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.28.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.29.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.3.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.30.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.31.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.32.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.33.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.34.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.35.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.36.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.37.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.38.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.39.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.4.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.40.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.41.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.42.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.43.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.44.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.45.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.46.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.47.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.48.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.49.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.5.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.50.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.51.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.52.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.53.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.54.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.55.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.56.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.57.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.58.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.59.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.6.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.60.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.61.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.62.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.63.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.64.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.65.body       |   3 +
 .../sid-no-trunc/sid_infer_resp.7.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.8.body        |   3 +
 .../sid-no-trunc/sid_infer_resp.9.body        |   3 +
 tests/test_inference_stage.py                 |   4 +-
 tests/test_inference_worker.py                |  18 +-
 tests/test_sid.py                             |  29 ++-
 87 files changed, 714 insertions(+), 71 deletions(-)
 create mode 100644 morpheus/_lib/tests/test_matx_util.cpp
 create mode 100644 morpheus/_lib/tests/test_morpheus.cpp
 create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock
 create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock
 create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock
 create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body
 create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body

diff --git a/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp b/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp
index e72d86680c..9c0f2e0f42 100644
--- a/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp
+++ b/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp
@@ -42,7 +42,9 @@ struct DevMemInfo
     size_t offset;
 
     /**
-     * TODO(Documentation)
+     * @brief Returns raw pointer to underlying buffer offset by the `offset`
+     *
+     * @return void*
      */
     void *data() const;
 };
diff --git a/morpheus/_lib/include/morpheus/objects/tensor_object.hpp b/morpheus/_lib/include/morpheus/objects/tensor_object.hpp
index 7986b01c69..766e4a1cc3 100644
--- a/morpheus/_lib/include/morpheus/objects/tensor_object.hpp
+++ b/morpheus/_lib/include/morpheus/objects/tensor_object.hpp
@@ -476,8 +476,10 @@ struct TensorObject final
         auto stride = this->get_stride();
         auto shape  = this->get_shape();
 
+        CHECK(shape.size() == N) << "Length of idx must match lengh of shape";
+
         CHECK(std::transform_reduce(
-            stride.begin(), stride.end(), std::begin(idx), 0, std::logical_and<>(), std::less<>()))
+            shape.begin(), shape.end(), std::begin(idx), 1, std::logical_and<>(), std::greater<>()))
             << "Index is outsize of the bounds of the tensor. Index="
             << detail::array_to_str(std::begin(idx), std::begin(idx) + N)
             << ", Size=" << detail::array_to_str(shape.begin(), shape.end()) << "";
@@ -504,8 +506,10 @@ struct TensorObject final
         auto stride = this->get_stride();
         auto shape  = this->get_shape();
 
-        CHECK(std::transform_reduce(
-            stride.begin(), stride.end(), std::begin(idx), 0, std::logical_and<>(), std::less<>()))
+        CHECK(shape.size() == N) << "Length of idx must match lengh of shape";
+
+        CHECK(
+            std::transform_reduce(shape.begin(), shape.end(), std::begin(idx), 1, std::logical_and<>(), std::less<>()))
             << "Index is outsize of the bounds of the tensor. Index="
             << detail::array_to_str(std::begin(idx), std::begin(idx) + N)
             << ", Size=" << detail::array_to_str(shape.begin(), shape.end()) << "";
diff --git a/morpheus/_lib/include/morpheus/utilities/matx_util.hpp b/morpheus/_lib/include/morpheus/utilities/matx_util.hpp
index 05e559ba27..25b5109704 100644
--- a/morpheus/_lib/include/morpheus/utilities/matx_util.hpp
+++ b/morpheus/_lib/include/morpheus/utilities/matx_util.hpp
@@ -24,6 +24,7 @@
 
 #include <cstddef>
 #include <memory>
+#include <vector>
 
 namespace morpheus {
 struct MatxUtil
@@ -63,5 +64,24 @@ struct MatxUtil
                                                          const std::vector<TensorIndex> &stride,
                                                          double thresh_val,
                                                          bool by_row);
+
+    /**
+     * @brief Returns a buffer with `output_shape` containing the max value from values in `input` mapped according to
+     * `seq_ids`.
+     * Ex given a hypothetical input of:
+     *
+     *     input =   [5, 2, 8, 9, 8, 2, 1]
+     *     seq_ids = [0, 0, 0, 1, 2, 3, 3]
+     *
+     * Will return:
+     *               [8, 9, 8, 2]
+     * @return std::shared_ptr<rmm::device_buffer>
+     */
+    static std::shared_ptr<rmm::device_buffer> reduce_max(const DevMemInfo &input,
+                                                          const std::vector<int32_t> &seq_ids,
+                                                          size_t seq_id_offset,
+                                                          const std::vector<int64_t> &input_shape,
+                                                          const std::vector<int64_t> &input_stride,
+                                                          const std::vector<int64_t> &output_shape);
 };
 }  // namespace morpheus
diff --git a/morpheus/_lib/src/messages/multi_inference.cpp b/morpheus/_lib/src/messages/multi_inference.cpp
index 936382d453..24c4d8a013 100644
--- a/morpheus/_lib/src/messages/multi_inference.cpp
+++ b/morpheus/_lib/src/messages/multi_inference.cpp
@@ -73,8 +73,6 @@ void MultiInferenceMessage::get_slice_impl(std::shared_ptr<MultiMessage> new_mes
                                            std::size_t start,
                                            std::size_t stop) const
 {
-    CHECK(this->mess_count == this->count) << "At this time, mess_count and count must be the same for slicing";
-
     auto sliced_message = DCHECK_NOTNULL(std::dynamic_pointer_cast<MultiInferenceMessage>(new_message));
 
     sliced_message->offset = start;
@@ -82,7 +80,7 @@ void MultiInferenceMessage::get_slice_impl(std::shared_ptr<MultiMessage> new_mes
 
     // If we have more inference rows than message rows, we need to use the seq_ids to figure out the slicing. This
     // will be slow and should be avoided at all costs
-    if (this->memory->has_input("seq_ids") && this->count != this->mess_count)
+    if (this->count != this->mess_count && this->memory->has_input("seq_ids"))
     {
         auto seq_ids = this->get_input("seq_ids");
 
@@ -146,12 +144,6 @@ std::size_t MultiInferenceMessageInterfaceProxy::count(MultiInferenceMessage &se
 pybind11::object MultiInferenceMessageInterfaceProxy::get_input(MultiInferenceMessage &self, const std::string &name)
 {
     const auto &py_tensor = CupyUtil::tensor_to_cupy(self.get_input(name));
-
-    //  //  Need to get just our portion. TODO(MDD): THis should be handled in get_input
-    //  py::object sliced = py_tensor[py::make_tuple(
-    //      py::slice(py::int_(self.offset), py::int_(self.offset + self.count), py::none()),
-    //      py::slice(py::none(), py::none(), py::none()))];
-
     return py_tensor;
 }
 
@@ -159,13 +151,6 @@ std::shared_ptr<MultiInferenceMessage> MultiInferenceMessageInterfaceProxy::get_
                                                                                       std::size_t start,
                                                                                       std::size_t stop)
 {
-    // py::object seq_ids = CupyUtil::tensor_to_cupy(self.get_input("seq_ids"), m);
-
-    // int mess_start = seq_ids[py::make_tuple(start, 0)].attr("item")().cast<int>();
-    // int mess_stop  = seq_ids[py::make_tuple(stop - 1, 0)].attr("item")().cast<int>() + 1;
-
-    // return std::make_shared<MultiInferenceMessage>(
-    //     self.meta, mess_start, mess_stop - mess_start, self.memory, start, stop - start);
     return self.get_slice(start, stop);
 }
 }  // namespace morpheus
diff --git a/morpheus/_lib/src/messages/multi_response.cpp b/morpheus/_lib/src/messages/multi_response.cpp
index 8ad7cd0109..7b12506bbf 100644
--- a/morpheus/_lib/src/messages/multi_response.cpp
+++ b/morpheus/_lib/src/messages/multi_response.cpp
@@ -91,13 +91,17 @@ void MultiResponseMessage::get_slice_impl(std::shared_ptr<MultiMessage> new_mess
                                           std::size_t start,
                                           std::size_t stop) const
 {
-    CHECK(this->mess_count == this->count) << "At this time, mess_count and count must be the same for slicing";
-
     auto sliced_message = DCHECK_NOTNULL(std::dynamic_pointer_cast<MultiResponseMessage>(new_message));
 
     sliced_message->offset = start;
     sliced_message->count  = stop - start;
 
+    // Currently our output lengths should always match mess_count, and even if they didn't we wouldn't have any way to
+    // associate rows in the output with rows in the dataframe. Note on the input side we have the seq_ids array to
+    // but we don't have any equivelant for the output.
+    DCHECK(this->count == this->mess_count)
+        << "Number of rows in response output does not match number of messages in DF";
+
     // Pass onto the base
     DerivedMultiMessage::get_slice_impl(new_message, start, stop);
 }
diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp
index 838379df35..0e6c4eb7f0 100644
--- a/morpheus/_lib/src/stages/triton_inference.cpp
+++ b/morpheus/_lib/src/stages/triton_inference.cpp
@@ -54,6 +54,12 @@ void InferenceClientStage__check_triton_errors(triton::client::Error status,
         throw std::runtime_error(err_msg);
     }
 }
+
+template <typename IndexT>
+inline IndexT get_elem_count(const std::vector<IndexT> &shape)
+{
+    return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
+}
 }  // namespace
 
 namespace morpheus {
@@ -87,34 +93,51 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator()
 
         return input.subscribe(rxcpp::make_observer<sink_type_t>(
             [this, &output, &client](sink_type_t x) {
-                auto reponse_memory = std::make_shared<ResponseMemory>(x->count);
+                // When our tensor lengths are longer than our dataframe we will need to use the seq_ids
+                // array to lookup how the values should map back into the dataframe
+                const bool needs_seq_ids = x->mess_count != x->count;
+                auto reponse_memory      = std::make_shared<ResponseMemory>(x->mess_count);
 
                 // Create the output memory blocks
                 for (auto &model_output : m_model_outputs)
                 {
-                    auto total_shape = model_output.shape;
+                    std::vector<TensorIndex> total_shape{model_output.shape.begin(), model_output.shape.end()};
 
-                    // First dimension will always end up being the number of rows
-                    total_shape[0] = x->count;
-
-                    auto elem_count = std::accumulate(total_shape.begin(), total_shape.end(), 1, std::multiplies<>());
+                    // First dimension will always end up being the number of rows in the dataframe
+                    total_shape[0]  = static_cast<TensorIndex>(x->mess_count);
+                    auto elem_count = get_elem_count(total_shape);
 
                     // Create the output memory
                     auto output_buffer = std::make_shared<rmm::device_buffer>(
                         elem_count * model_output.datatype.item_size(), rmm::cuda_stream_per_thread);
 
                     reponse_memory->tensors[model_output.mapped_name] = Tensor::create(
-                        std::move(output_buffer),
-                        model_output.datatype,
-                        std::vector<TensorIndex>{static_cast<int>(total_shape[0]), static_cast<int>(total_shape[1])},
-                        std::vector<TensorIndex>{},
-                        0);
+                        std::move(output_buffer), model_output.datatype, total_shape, std::vector<TensorIndex>{}, 0);
                 }
 
                 // This will be the final output of all mini-batches
                 auto response = std::make_shared<MultiResponseProbsMessage>(
                     x->meta, x->mess_offset, x->mess_count, std::move(reponse_memory), 0, reponse_memory->count);
 
+                std::unique_ptr<std::vector<int32_t>> host_seq_ids{nullptr};
+                if (needs_seq_ids)
+                {
+                    // Take a copy of the sequence Ids allowing us to map rows in the response to rows in the dataframe
+                    // The output tensors we store in `reponse_memory` will all be of the same length as the the
+                    // dataframe. seq_ids has three columns, but we are only interested in the first column.
+                    auto seq_ids         = x->get_input("seq_ids");
+                    const auto item_size = seq_ids.dtype().item_size();
+
+                    host_seq_ids = std::make_unique<std::vector<int32_t>>(x->count);
+                    SRF_CHECK_CUDA(cudaMemcpy2D(host_seq_ids->data(),
+                                                item_size,
+                                                seq_ids.data(),
+                                                seq_ids.stride(0) * item_size,
+                                                item_size,
+                                                host_seq_ids->size(),
+                                                cudaMemcpyDeviceToHost));
+                }
+
                 for (size_t i = 0; i < x->count; i += m_max_batch_size)
                 {
                     triton::client::InferInput *input1;
@@ -122,8 +145,24 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator()
                     size_t start = i;
                     size_t stop  = std::min(i + m_max_batch_size, x->count);
 
-                    sink_type_t mini_batch_input    = x->get_slice(start, stop);
-                    source_type_t mini_batch_output = response->get_slice(start, stop);
+                    sink_type_t mini_batch_input = x->get_slice(start, stop);
+
+                    size_t out_start = start;
+                    size_t out_stop  = stop;
+                    if (needs_seq_ids)
+                    {
+                        out_start = (*host_seq_ids)[out_start];
+                        if (out_stop < host_seq_ids->size())
+                        {
+                            out_stop = (*host_seq_ids)[out_stop];
+                        }
+                        else
+                        {
+                            out_stop = x->mess_count;
+                        }
+                    }
+
+                    source_type_t mini_batch_output = response->get_slice(out_start, out_stop);
 
                     // Iterate on the model inputs in case the model takes less than what tensors are available
                     std::vector<std::pair<std::shared_ptr<triton::client::InferInput>, std::vector<uint8_t>>>
@@ -199,12 +238,34 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator()
                         SRF_CHECK_CUDA(
                             cudaMemcpy(output_buffer->data(), output_ptr, output_ptr_size, cudaMemcpyHostToDevice));
 
+                        if (needs_seq_ids && output_shape[0] != mini_batch_output->count)
+                        {
+                            // Since we are working with slices of both the input and the output, the seq_ids have
+                            // already been applied to the output's start & stop, so we only need to reduce the
+                            // response tensort when the size doesn't match our output
+                            std::vector<int64_t> mapped_output_shape{output_shape};
+                            mapped_output_shape[0] = mini_batch_output->count;
+
+                            size_t element_count = get_elem_count(output_shape);
+
+                            // Triton results are always in row-major as required by the KServe protocol
+                            // https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data
+                            std::vector<int64_t> stride{output_shape[1], 1};
+                            output_buffer = MatxUtil::reduce_max(
+                                DevMemInfo{element_count, model_output.datatype.type_id(), output_buffer, 0},
+                                *host_seq_ids,
+                                mini_batch_input->offset,
+                                output_shape,
+                                stride,
+                                mapped_output_shape);
+                            output_shape = std::move(mapped_output_shape);
+                        }
+
                         // If we need to do logits, do that here
                         if (m_needs_logits)
                         {
-                            size_t element_count =
-                                std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<>());
-                            output_buffer = MatxUtil::logits(
+                            size_t element_count = get_elem_count(output_shape);
+                            output_buffer        = MatxUtil::logits(
                                 DevMemInfo{element_count, model_output.datatype.type_id(), output_buffer, 0});
                         }
 
@@ -212,8 +273,8 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator()
                             model_output.mapped_name,
                             Tensor::create(std::move(output_buffer),
                                            model_output.datatype,
-                                           std::vector<TensorIndex>{static_cast<int>(output_shape[0]),
-                                                                    static_cast<int>(output_shape[1])},
+                                           std::vector<TensorIndex>{static_cast<TensorIndex>(output_shape[0]),
+                                                                    static_cast<TensorIndex>(output_shape[1])},
                                            std::vector<TensorIndex>{},
                                            0));
                     }
diff --git a/morpheus/_lib/src/utilities/matx_util.cu b/morpheus/_lib/src/utilities/matx_util.cu
index 5edabbfba8..dabc5d74b6 100644
--- a/morpheus/_lib/src/utilities/matx_util.cu
+++ b/morpheus/_lib/src/utilities/matx_util.cu
@@ -243,6 +243,45 @@ namespace morpheus {
         }
     };
 
+    struct MatxUtil__MatxReduceMax {
+        matx::index_t num_input_rows;
+        matx::index_t num_cols;
+        std::vector<matx::index_t> input_stride;
+        matx::index_t num_output_rows;
+        void *input_data;
+        void *output_data;
+        rmm::cuda_stream_view stream;
+
+        template<typename InputT, std::enable_if_t<!cudf::is_floating_point<InputT>()> * = nullptr>
+        void operator()(std::size_t start, std::size_t stop, int32_t output_idx) {
+            throw std::invalid_argument("Unsupported conversion");
+        }
+
+        template<typename InputT, std::enable_if_t<cudf::is_floating_point<InputT>()> * = nullptr>
+        void operator()(std::size_t start, std::size_t stop, int32_t output_idx) {
+            auto input_count = stop - start;
+            matx::tensorShape_t<2> input_shape({static_cast<matx::index_t>(input_count), num_cols});
+            matx::tensorShape_t<1> output_shape({num_cols});
+
+            matx::index_t output_stride[2] = {input_stride[0], input_stride[1]};
+            if (output_stride[0] == 1)
+            {
+                output_stride[1] = num_output_rows;
+            }
+
+            auto input_ptr = static_cast<InputT *>(input_data) + (start * input_stride[0]);
+            auto output_ptr = static_cast<InputT *>(output_data) + (output_idx *  output_stride[0]);
+
+            matx::tensor_t<InputT, 2> input_tensor(input_ptr, input_shape, {input_stride[0], input_stride[1]});
+            matx::tensor_t<InputT, 1> output_tensor(output_ptr, output_shape, {output_stride[1]});
+
+            // We need to transpose the input such that rmax will reduce the rows
+            // Matx performs reductions over the innermost dimensions.
+            // see https://nvidia.github.io/MatX/api/reduce.html
+            matx::rmax(output_tensor, input_tensor.Permute({1, 0}), stream.value());
+        }
+    };
+
     // Component public implementations
     // ************ MatxUtil************************* //
     std::shared_ptr<rmm::device_buffer> MatxUtil::cast(const DevMemInfo &input, TypeId output_type) {
@@ -337,4 +376,57 @@ namespace morpheus {
 
         return output;
     }
+
+    std::shared_ptr<rmm::device_buffer>
+    MatxUtil::reduce_max(const DevMemInfo &input,
+                         const std::vector<int32_t> &seq_ids,
+                         size_t seq_id_offset,
+                         const std::vector<int64_t> &input_shape,
+                         const std::vector<int64_t> &input_stride,
+                         const std::vector<int64_t> &output_shape)
+    {
+        auto dtype = DType(input.type_id);
+        auto elem_size = dtype.item_size();
+        auto cudf_type = cudf::data_type{dtype.cudf_type_id()};
+        auto num_input_rows = input_shape[0];
+        auto num_input_cols = input_shape[1];
+
+        std::vector<matx::index_t>matx_stride{input_stride[0], input_stride[1]};
+        std::size_t output_element_count = output_shape[0] * output_shape[1];
+        std::size_t output_buff_size = elem_size * output_element_count;
+
+        DCHECK(output_element_count <= input.element_count) << "Output buffer size should be less than or equal to the input";
+        DCHECK(num_input_cols == output_shape[1]) << "Number of input and output columns must match";
+
+        auto output = std::make_shared<rmm::device_buffer>(output_buff_size,
+                                                           input.buffer->stream(),
+                                                           input.buffer->memory_resource());
+
+        MatxUtil__MatxReduceMax matx_reduce_max{num_input_rows, num_input_cols, matx_stride, output_shape[0], input.data(), output->data(), output->stream()};
+
+        std::size_t start = 0;
+        auto output_offset = seq_ids[seq_id_offset];
+        for (std::size_t i=0; i < num_input_rows; ++i)
+        {
+            auto idx = seq_ids[i+seq_id_offset];
+            if (idx != seq_ids[start+seq_id_offset])
+            {
+                cudf::type_dispatcher(cudf_type,
+                                      matx_reduce_max,
+                                      start,
+                                      i,
+                                      seq_ids[start+seq_id_offset]-output_offset);
+                start = i;
+            }
+        }
+
+        cudf::type_dispatcher(cudf_type,
+                              matx_reduce_max,
+                              start,
+                              num_input_rows,
+                              seq_ids[start+seq_id_offset]-output_offset);
+
+        srf::enqueue_stream_sync_event(output->stream()).get();
+        return output;
+    }
 }
diff --git a/morpheus/_lib/tests/CMakeLists.txt b/morpheus/_lib/tests/CMakeLists.txt
index 89d7e29fb5..4e5217574e 100644
--- a/morpheus/_lib/tests/CMakeLists.txt
+++ b/morpheus/_lib/tests/CMakeLists.txt
@@ -19,6 +19,8 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "tests")
 add_executable(test_libmorpheus
   # test_cuda.cu
   test_main.cpp
+  test_matx_util.cpp
+  test_morpheus.cpp
   test_multi_slices.cpp
   test_tensor.cpp
   test_type_util_detail.cpp
diff --git a/morpheus/_lib/tests/test_matx_util.cpp b/morpheus/_lib/tests/test_matx_util.cpp
new file mode 100644
index 0000000000..50091f5aaa
--- /dev/null
+++ b/morpheus/_lib/tests/test_matx_util.cpp
@@ -0,0 +1,186 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "./test_morpheus.hpp"  // IWYU pragma: associated
+
+#include "morpheus/utilities/matx_util.hpp"
+#include "morpheus/utilities/type_util.hpp"
+#include "morpheus/utilities/type_util_detail.hpp"
+
+#include <cudf/table/table.hpp>
+#include <gtest/gtest.h>
+#include <rmm/device_buffer.hpp>
+#include <srf/cuda/common.hpp>
+
+#include <cstdlib>  // for std::getenv
+#include <filesystem>
+#include <string>
+#include <vector>
+
+using namespace morpheus;
+
+TEST_CLASS(MatxUtil);
+
+TEST_F(TestMatxUtil, ReduceMax1d)
+{
+    // Test mimics example from the method's docstring
+    std::vector<float> input{5, 2, 8, 9, 8, 2, 1};
+    std::vector<int32_t> seq_ids{0, 0, 0, 1, 2, 3, 3};
+    std::vector<float> expected_output{8, 9, 8, 2};
+
+    DataType dtype(TypeId::FLOAT32);
+
+    auto input_buffer =
+        std::make_shared<rmm::device_buffer>(input.size() * dtype.item_size(), rmm::cuda_stream_per_thread);
+
+    SRF_CHECK_CUDA(cudaMemcpy(input_buffer->data(), input.data(), input_buffer->size(), cudaMemcpyHostToDevice));
+
+    DevMemInfo dm{input.size(), dtype.type_id(), input_buffer, 0};
+    std::vector<int64_t> input_shape{static_cast<int64_t>(input.size()), 1};
+    std::vector<int64_t> output_shape{static_cast<int64_t>(expected_output.size()), 1};
+    auto output_buffer = MatxUtil::reduce_max(dm, seq_ids, 0, input_shape, {1, 0}, output_shape);
+
+    std::vector<float> output(expected_output.size());
+    SRF_CHECK_CUDA(cudaMemcpy(output.data(), output_buffer->data(), output_buffer->size(), cudaMemcpyDeviceToHost));
+
+    EXPECT_EQ(output, expected_output);
+}
+
+TEST_F(TestMatxUtil, ReduceMax2dRowMajor)
+{
+    // clang-format off
+    // disabling clang-format to illustrate row-major layout
+    std::vector<double> input{
+        0.1, 0.7, 0.7, 0.7,
+        1.0, 0.9, 0.5, 0.9,
+        1.0, 0.6, 0.7, 0.9,
+        1.0, 0.2, 0.2, 0.9,
+        0.5, 0.8, 0.6, 0.0,
+        0.3, 0.4, 0.1, 0.4,
+        0.9, 0.3, 1.0, 0.6,
+        0.5, 0.5, 0.6, 0.8,
+        0.0, 0.3, 0.5, 0.6,
+        0.6, 1.0, 0.8, 0.7,
+        0.8, 0.8, 1.0, 0.6,
+        0.1, 0.9, 0.1, 0.3};
+
+    // reducing 12 rows down to 5
+    std::vector<int32_t> seq_ids{0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4};
+
+    std::vector<double> expected_output{
+        1.0, 0.9, 0.7, 0.9,
+        1.0, 0.6, 0.7, 0.9,
+        1.0, 0.8, 1.0, 0.9,
+        0.8, 1.0, 1.0, 0.8,
+        0.1, 0.9, 0.1, 0.3};
+    // clang-format on
+
+    // Copy data from table into one big buffer
+    std::size_t num_cols      = 4;
+    std::size_t num_rows      = 12;
+    std::size_t expected_rows = expected_output.size() / num_cols;
+
+    EXPECT_EQ(num_cols * num_rows, input.size());
+    EXPECT_EQ(expected_rows, 5);
+    DataType dtype(TypeId::FLOAT64);
+    EXPECT_EQ(dtype.item_size(), sizeof(double));
+
+    std::size_t buff_size = input.size() * dtype.item_size();
+    auto input_buffer     = std::make_shared<rmm::device_buffer>(buff_size, rmm::cuda_stream_per_thread);
+
+    SRF_CHECK_CUDA(cudaMemcpy(input_buffer->data(), input.data(), input_buffer->size(), cudaMemcpyHostToDevice));
+
+    DevMemInfo dm{input.size(), dtype.type_id(), input_buffer, 0};
+    std::vector<int64_t> input_shape{static_cast<int64_t>(num_rows), static_cast<int64_t>(num_cols)};
+    std::vector<int64_t> output_shape{static_cast<int64_t>(expected_rows), static_cast<int64_t>(num_cols)};
+    auto output_buffer =
+        MatxUtil::reduce_max(dm, seq_ids, 0, input_shape, {static_cast<int64_t>(num_cols), 1}, output_shape);
+
+    EXPECT_EQ(output_buffer->size(), expected_rows * num_cols * dtype.item_size());
+
+    std::vector<double> output(expected_rows * num_cols);
+    SRF_CHECK_CUDA(cudaMemcpy(output.data(), output_buffer->data(), output_buffer->size(), cudaMemcpyDeviceToHost));
+
+    EXPECT_EQ(output.size(), expected_output.size());
+    for (std::size_t i = 0; i < output.size(); ++i)
+    {
+        EXPECT_DOUBLE_EQ(output[i], expected_output[i]);
+    }
+}
+
+TEST_F(TestMatxUtil, ReduceMax2dColMajor)
+{
+    std::filesystem::path morpheus_root{std::getenv("MORPHEUS_ROOT")};
+    auto input_file = morpheus_root / "tests/tests_data/filter_probs.csv";
+
+    auto table_m  = load_table_from_csv(input_file);
+    auto num_rows = table_m.tbl->num_rows();
+    auto num_cols = table_m.tbl->num_columns();
+
+    EXPECT_EQ(num_rows, 20);
+    EXPECT_EQ(num_cols, 4);
+
+    // Copy data from table into one big buffer
+    auto dtype            = DType::from_cudf(table_m.tbl->get_column(0).type().id());
+    std::size_t buff_size = num_cols * num_rows * dtype.item_size();
+
+    EXPECT_EQ(dtype.item_size(), sizeof(double));
+    auto input_buffer = std::make_shared<rmm::device_buffer>(buff_size, rmm::cuda_stream_per_thread);
+
+    std::size_t offset{0};
+    for (cudf::size_type i = 0; i < num_cols; ++i)
+    {
+        auto cv = table_m.tbl->get_column(i).view();
+        SRF_CHECK_CUDA(cudaMemcpy(static_cast<uint8_t*>(input_buffer->data()) + offset,
+                                  cv.data<uint8_t>(),
+                                  num_rows * dtype.item_size(),
+                                  cudaMemcpyDeviceToDevice));
+
+        offset += num_rows * dtype.item_size();
+    }
+
+    EXPECT_EQ(offset, buff_size);
+
+    // reducing 20 rows down to 12
+    std::vector<int32_t> seq_ids{0, 0, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 9, 9, 10, 11};
+    // disabling formatting so I can enter the literal values by column
+    // clang-format off
+    std::vector<double> expected_output{0.1, 1.0, 1.0, 1.0, 0.5, 0.3, 0.9, 0.5, 0.0, 0.6, 0.8, 0.1,
+                                        0.7, 0.9, 0.6, 0.2, 0.8, 0.4, 0.3, 0.5, 0.3, 1.0, 0.8, 0.9,
+                                        0.7, 0.5, 0.7, 0.2, 0.6, 0.1, 1.0, 0.6, 0.5, 0.8, 1.0, 0.1,
+                                        0.7, 0.9, 0.9, 0.9, 0.0, 0.4, 0.6, 0.8, 0.6, 0.7, 0.6, 0.3};
+    // clang-format on
+    const std::size_t expected_rows = 12;
+    EXPECT_EQ(expected_rows * num_cols, expected_output.size());
+
+    DevMemInfo dm{static_cast<std::size_t>(num_rows * num_cols), dtype.type_id(), input_buffer, 0};
+    std::vector<int64_t> input_shape{static_cast<int64_t>(num_rows), static_cast<int64_t>(num_cols)};
+    std::vector<int64_t> output_shape{static_cast<int64_t>(expected_rows), static_cast<int64_t>(num_cols)};
+    auto output_buffer =
+        MatxUtil::reduce_max(dm, seq_ids, 0, input_shape, {1, static_cast<int64_t>(num_rows)}, output_shape);
+
+    EXPECT_EQ(output_buffer->size(), expected_rows * num_cols * dtype.item_size());
+
+    std::vector<double> output(expected_rows * num_cols);
+    SRF_CHECK_CUDA(cudaMemcpy(output.data(), output_buffer->data(), output_buffer->size(), cudaMemcpyDeviceToHost));
+
+    EXPECT_EQ(output.size(), expected_output.size());
+    for (std::size_t i = 0; i < output.size(); ++i)
+    {
+        EXPECT_DOUBLE_EQ(output[i], expected_output[i]);
+    }
+}
diff --git a/morpheus/_lib/tests/test_morpheus.cpp b/morpheus/_lib/tests/test_morpheus.cpp
new file mode 100644
index 0000000000..3c41db3f1e
--- /dev/null
+++ b/morpheus/_lib/tests/test_morpheus.cpp
@@ -0,0 +1,27 @@
+/**
+ * SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "test_morpheus.hpp"
+
+#include <cudf/io/csv.hpp>
+#include <cudf/table/table.hpp>
+
+cudf::io::table_with_metadata load_table_from_csv(std::string filename)
+{
+    auto options = cudf::io::csv_reader_options::builder(cudf::io::source_info{filename});
+    return cudf::io::read_csv(options.build());
+}
diff --git a/morpheus/_lib/tests/test_morpheus.hpp b/morpheus/_lib/tests/test_morpheus.hpp
index 1b8c9d2667..50c97c8cba 100644
--- a/morpheus/_lib/tests/test_morpheus.hpp
+++ b/morpheus/_lib/tests/test_morpheus.hpp
@@ -17,9 +17,14 @@
 
 #pragma once
 
+#include <cudf/io/types.hpp>
 #include <glog/logging.h>  // IWYU pragma: keep
 #include <gtest/gtest.h>   // IWYU pragma: keep
 
+#include <string>
+
+cudf::io::table_with_metadata load_table_from_csv(std::string filename);
+
 #define TEST_CLASS(name)                      \
     class Test##name : public ::testing::Test \
     {}
diff --git a/morpheus/_lib/tests/test_multi_slices.cpp b/morpheus/_lib/tests/test_multi_slices.cpp
index 28d0a8e803..d92bdac887 100644
--- a/morpheus/_lib/tests/test_multi_slices.cpp
+++ b/morpheus/_lib/tests/test_multi_slices.cpp
@@ -19,7 +19,6 @@
 
 #include <cudf/concatenate.hpp>
 #include <cudf/copying.hpp>
-#include <cudf/io/csv.hpp>
 #include <cudf/io/types.hpp>
 #include <cudf/strings/replace.hpp>
 #include <cudf/table/table.hpp>
@@ -29,12 +28,6 @@
 #include <filesystem>
 #include <vector>
 
-cudf::io::table_with_metadata load_table_from_csv(std::string filename)
-{
-    auto options = cudf::io::csv_reader_options::builder(cudf::io::source_info{filename});
-    return cudf::io::read_csv(options.build());
-}
-
 TEST_CLASS(Masking);
 
 TEST_F(TestMasking, Ranges)
diff --git a/morpheus/stages/inference/auto_encoder_inference_stage.py b/morpheus/stages/inference/auto_encoder_inference_stage.py
index 3f7a5a6646..c65b427977 100644
--- a/morpheus/stages/inference/auto_encoder_inference_stage.py
+++ b/morpheus/stages/inference/auto_encoder_inference_stage.py
@@ -56,17 +56,18 @@ def build_output_message(self, x: MultiInferenceAEMessage) -> MultiResponseAEMes
             Response message with autoencoder results calculated from inference results.
         """
 
-        output_dims = self.calc_output_dims(x)
+        dims = self.calc_output_dims(x)
+        output_dims = (x.mess_count, *dims[1:])
 
-        memory = ResponseMemoryProbs(count=x.count, probs=cp.zeros(output_dims))
+        memory = ResponseMemoryProbs(count=output_dims[0], probs=cp.zeros(output_dims))
 
         # Override the default to return the response AE
         output_message = MultiResponseAEMessage(meta=x.meta,
                                                 mess_offset=x.mess_offset,
                                                 mess_count=x.mess_count,
                                                 memory=memory,
-                                                offset=x.offset,
-                                                count=x.count,
+                                                offset=0,
+                                                count=memory.count,
                                                 user_id=x.user_id)
         return output_message
 
diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py
index ca65ae1586..48f3471619 100644
--- a/morpheus/stages/inference/inference_stage.py
+++ b/morpheus/stages/inference/inference_stage.py
@@ -70,16 +70,17 @@ def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseProbsMe
             Response message with probabilities calculated from inference results.
         """
 
-        output_dims = self.calc_output_dims(x)
+        dims = self.calc_output_dims(x)
+        output_dims = (x.mess_count, *dims[1:])
 
-        memory = ResponseMemoryProbs(count=x.count, probs=cp.zeros(output_dims))
+        memory = ResponseMemoryProbs(count=output_dims[0], probs=cp.zeros(output_dims))
 
         output_message = MultiResponseProbsMessage(meta=x.meta,
                                                    mess_offset=x.mess_offset,
                                                    mess_count=x.mess_count,
                                                    memory=memory,
-                                                   offset=x.offset,
-                                                   count=x.count)
+                                                   offset=0,
+                                                   count=memory.count)
         return output_message
 
     @abstractmethod
@@ -384,10 +385,15 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re
 
         probs = memory.get_output("probs")
 
+        seq_offset = inf.seq_ids[0, 0].item()
+        seq_count = inf.seq_ids[-1, 0].item() + 1 - seq_offset
+
         # Two scenarios:
         if (inf.mess_count == inf.count):
+            assert seq_count == res.count
+
             # In message and out message have same count. Just use probs as is
-            probs[inf.offset:inf.count + inf.offset, :] = res.probs
+            probs[seq_offset:seq_offset + seq_count, :] = res.probs
         else:
             assert inf.count == res.count
 
diff --git a/tests/benchmarks/test_bench_e2e_pipelines.py b/tests/benchmarks/test_bench_e2e_pipelines.py
index 0cd09b5905..87c14ba1c4 100644
--- a/tests/benchmarks/test_bench_e2e_pipelines.py
+++ b/tests/benchmarks/test_bench_e2e_pipelines.py
@@ -38,7 +38,7 @@
 from morpheus.stages.preprocess.preprocess_fil_stage import PreprocessFILStage
 from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage
 from morpheus.stages.preprocess.train_ae_stage import TrainAEStage
-from morpheus.utils.logging import configure_logging
+from morpheus.utils.logger import configure_logging
 from utils import TEST_DIRS
 
 e2e_config_file = os.path.join(TEST_DIRS.morpheus_root, "tests/benchmarks/e2e_test_configs.json")
diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock
new file mode 100644
index 0000000000..85e9132d7b
--- /dev/null
+++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock
@@ -0,0 +1,5 @@
+HTTP/1.1 200 OK
+Content-Type: application/json
+Content-Length: 269
+
+{"name":"sid-minibert-onnx","versions":["1"],"platform":"onnxruntime_onnx","inputs":[{"name":"input_ids","datatype":"INT32","shape":[-1,256]},{"name":"attention_mask","datatype":"INT32","shape":[-1,256]}],"outputs":[{"name":"output","datatype":"FP32","shape":[-1,10]}]}
diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock
new file mode 100644
index 0000000000..b999dcaabd
--- /dev/null
+++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock
@@ -0,0 +1,5 @@
+HTTP/1.1 200 OK
+Content-Type: application/json
+Content-Length: 1430
+
+{"name":"sid-minibert-onnx-no-trunc","platform":"onnxruntime_onnx","backend":"onnxruntime","version_policy":{"latest":{"num_versions":1}},"max_batch_size":32,"input":[{"name":"input_ids","data_type":"TYPE_INT32","format":"FORMAT_NONE","dims":[256],"is_shape_tensor":false,"allow_ragged_batch":false},{"name":"attention_mask","data_type":"TYPE_INT32","format":"FORMAT_NONE","dims":[256],"is_shape_tensor":false,"allow_ragged_batch":false}],"output":[{"name":"output","data_type":"TYPE_FP32","dims":[10],"label_filename":"","is_shape_tensor":false}],"batch_input":[],"batch_output":[],"optimization":{"priority":"PRIORITY_DEFAULT","execution_accelerators":{"gpu_execution_accelerator":[{"name":"tensorrt","parameters":{"max_workspace_size_bytes":"1073741824","precision_mode":"FP16"}}],"cpu_execution_accelerator":[]},"input_pinned_memory":{"enable":true},"output_pinned_memory":{"enable":true},"gather_kernel_buffer_threshold":0,"eager_batching":false},"dynamic_batching":{"preferred_batch_size":[1,4,8,16,32],"max_queue_delay_microseconds":50000,"preserve_ordering":false,"priority_levels":0,"default_priority_level":0,"priority_queue_policy":{}},"instance_group":[{"name":"sid-minibert-onnx","kind":"KIND_GPU","count":1,"gpus":[0,1,2,3,4,5,6,7],"secondary_devices":[],"profile":[],"passive":false,"host_policy":""}],"default_model_filename":"model.onnx","cc_model_filenames":{},"metric_tags":{},"parameters":{},"model_warmup":[]}
diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock
new file mode 100644
index 0000000000..ade4d64f25
--- /dev/null
+++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock
@@ -0,0 +1,24 @@
+HTTP/1.1 200 OK
+Content-Type: application/octet-stream
+{{#inject}}(()=>{
+    if(!this.counter) {
+        this.counter=0;
+    }
+
+    this.counter+=1;
+    this.filename = "payloads/sid-no-trunc/sid_infer_resp." + this.counter + ".body"
+
+    let inf_header_content_length = 157;
+    if (this.counter === 33) {
+        inf_header_content_length = 156;
+    } else if (this.counter === 65) {
+        inf_header_content_length = 155;
+    }
+
+    // This seems like the only way to pass a variable to the file helper
+    request._nv_morpheus_params = {counter: this.counter, filename: this.filename};
+
+    return "Inference-Header-Content-Length: " + inf_header_content_length;
+})();{{/inject}}
+
+{{file path=request._nv_morpheus_params.filename}}
diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock
new file mode 100644
index 0000000000..ac53519000
--- /dev/null
+++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock
@@ -0,0 +1,3 @@
+HTTP/1.1 200 OK
+Content-Length: 0
+Content-Type: text/plain
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body
new file mode 100644
index 0000000000..73e526fd7b
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73051bc868b96c2df61f9b4b20579fe42dd47ff3f93d406f715b6e38e107b42b
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body
new file mode 100644
index 0000000000..ec53f8c810
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cf163270a505b6047049eb417a2274c4f637eb7ceabab758143737f4accb881
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body
new file mode 100644
index 0000000000..d7548df7f6
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3310e0959db4747cbbea8d9a435e0141e8a9af15b3caf2fa4c4ae1145c6cb1c
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body
new file mode 100644
index 0000000000..56ffbf61a5
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca87e261d602921f0266a4ae30193dc8c845706c04412765d62d94249252d5b9
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body
new file mode 100644
index 0000000000..409b74357c
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14df69085de8e854cbc30a04454b836e1b9c8c13a6081185df38a58b9d49b085
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body
new file mode 100644
index 0000000000..19babce566
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cea40d505a28803a6178dc351a01dc497e17bf0ae481c986860d77afdcdd44c4
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body
new file mode 100644
index 0000000000..79f26cf1a8
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5cf50a4baabfb18854c4ab3f04dcf14c654dbfef793a1b85e5e5712521dfcfe
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body
new file mode 100644
index 0000000000..59775280df
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d4c7030120e6285b10477a024891668a797efbe3f845c7d3998964a131798d0
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body
new file mode 100644
index 0000000000..6f7639e982
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f8d9689bf56f29f6c2f5523703fd6f3304d2154b6e900186dbcdfcf66937e15
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body
new file mode 100644
index 0000000000..671acae3a0
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e16be9cc31da7de9ed702128bf1906cc23710061a5a37d02a9e6db56c0de098
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body
new file mode 100644
index 0000000000..7080bc794e
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3da7ae7b899799a05e17c414a0e185d0cdc423d3d3ac4c156bee4b15b722034
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body
new file mode 100644
index 0000000000..874a6876f0
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36e23c772fa810c33a3e6ee01634393d7b8588d9425ac1158a7379dcc83c2ea0
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body
new file mode 100644
index 0000000000..d564c06b35
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f3f3691419f7ad8af36dc0ab0b044794860188821ab4593ccbf2c74480a3e00
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body
new file mode 100644
index 0000000000..aa0b7940d9
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5086b91d5720f0cd0ddba7a415649f602c592e705e3b52607caf432d58c3ac4
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body
new file mode 100644
index 0000000000..b03a5f9082
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c92cc2832365add134f64f17bfe7975723bb0a1974a68fb26c8fae9efea5c6b5
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body
new file mode 100644
index 0000000000..a8ccae3829
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0d7d2aab93f0f0a00959da7e4de781b0cacbaa9ad9c6fc429359c57c81cd2c8
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body
new file mode 100644
index 0000000000..989e8df73b
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c523c85196e7b3d327fe7fc6e7de7e4f4da464b5d3af61a89cdd1f5b21fd300a
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body
new file mode 100644
index 0000000000..80d8d9a547
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce1f175938725fd25980f138fc49e0c34385f6ee888019fc6ae863c6d0570056
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body
new file mode 100644
index 0000000000..8ae55dba6d
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58d28a52397324002181c5e03524226079b4f07662915cdee6475e9a5251d058
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body
new file mode 100644
index 0000000000..691360751d
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa8691e609d609a5b183446eb7999312b218e56298c506c4c67fc5dad9692d5b
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body
new file mode 100644
index 0000000000..53dcc91ff4
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cc4e315feb391b84d269dfff43f4e98e877fc24ac64e04249e8ea2f2d755a97
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body
new file mode 100644
index 0000000000..e95aef6d2e
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bc2dd163e1b36930634f2de2fb0b4e6d29b2590c0e43d39efa2ea90c461d669
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body
new file mode 100644
index 0000000000..527b219234
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca080e4e213033983b14e5142ced874e73446103623a3108225a979de1faaec9
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body
new file mode 100644
index 0000000000..1f01647562
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4703e8b9851f654120becbb6fe171f0205320570f5c436631dba8656744e9f85
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body
new file mode 100644
index 0000000000..a26af18719
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec6a53aef1db8abe2a151305544cb42a44216ec416c79136d66473680458cddc
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body
new file mode 100644
index 0000000000..1db9cc4107
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbd789dcc76c733bfa3bfdc5912233e43c065e473b4ccb4dee0c2cd5ba97a07d
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body
new file mode 100644
index 0000000000..c4c27b930e
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d1f3449fc85e2cea901f87b77b1a827e92d66108fbe213df960ed015e8030d
+size 838
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body
new file mode 100644
index 0000000000..721925edd4
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37b3e2227502e234c7756b2e99db9def690934614a74b3aa224d8a9d9aa33208
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body
new file mode 100644
index 0000000000..36a4fd45ca
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a38669a459b184fe799f77b0ccaed38e93fa7d9c724bcb1446c6ba8a1dc7972
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body
new file mode 100644
index 0000000000..3b75f2a14e
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c350b27c99510c44e327f415ef62f4acef4c7977f8f992ad074b6893633fb4b7
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body
new file mode 100644
index 0000000000..a5f4f4aa11
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8b4337c1515c94efcc9a9df1ffdc8c39fbaa2994bf0e6a2a5dddaebc7d10b83
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body
new file mode 100644
index 0000000000..b3132f7709
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d0ea6a4c73d03543749f5010e0c8bfa2dedb891177705b6d1b66356a06a9c0d
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body
new file mode 100644
index 0000000000..47e8e15b3e
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0138ca7c11a078c23bae12216b6161ca57338f442eb22cd94fff1bca03a7ea86
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body
new file mode 100644
index 0000000000..c6fd6c046d
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:989a7a4e21b4ab82e3e5d73f836c4d191455eebfc9c7a8ae5b685966338c9703
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body
new file mode 100644
index 0000000000..234ac747f7
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:426bbee9d4d5e43716168e5f6c8f260e128eb6bf3dae96193a6e626061e4aacb
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body
new file mode 100644
index 0000000000..c8b6897c02
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6a960efadd6f99be13f7484210b870ffbdbf6c7acb1506ca8c53ebf239d472f
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body
new file mode 100644
index 0000000000..53821a424c
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41043709f06e5f68120bc4dcd540b37c15ba8ec80a07d6a26bd0b295e302909f
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body
new file mode 100644
index 0000000000..e51b3d2dda
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82e8bc2619c0d586388b1723363886f75a5ee62428c74627b600b5852891c08d
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body
new file mode 100644
index 0000000000..4676c04185
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:213e7192e9c09e445bf5028522894c95d4839f02448ce178d0c188e8a502e864
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body
new file mode 100644
index 0000000000..f4f316c4d9
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d9e208c67a6c3ad4386c2c64c6890faa683ee14b0a713e5479986935da7c26b
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body
new file mode 100644
index 0000000000..8714134c48
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c15b45933f0abc16b9a83369d6ccf3fabe50de1d6f78c88200a9b58504a0bc9
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body
new file mode 100644
index 0000000000..8ef91dec2b
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:562c92ac51b2038474daa86e364e0d6e71512e166cb2f755139b999c4302a61b
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body
new file mode 100644
index 0000000000..02d26ca887
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caf5ecefb819c557e1859aeebd18f12cd8401d7c11932b91f5649f0a67b49822
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body
new file mode 100644
index 0000000000..49fc1b52b2
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f31031a3705556006b168d18a9c9dc68332cb5fa13d46a3502cb4bb02791a2ab
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body
new file mode 100644
index 0000000000..687905baf9
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df1d43e9e6916d1f08875df40b58d724e2093e6ff7740e8534c099f7ae5c21f7
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body
new file mode 100644
index 0000000000..0f7720e711
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78591c40979b365ffbcf07ec5e47383b1653cbc34a93b5838e98ef90061a8269
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body
new file mode 100644
index 0000000000..61631241c8
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45ccd17d7907ebb7cf38049b5efb967eb461def9e29214b88a4d89130cf54177
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body
new file mode 100644
index 0000000000..02ba923617
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0c1c0b282da3a9b0834903c99ef9f986124b68505e93a00b30177a2ac83576b
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body
new file mode 100644
index 0000000000..c38e1305c3
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:feef2268d3fde724eebb2179b43b6e140c505e15171b4d2833512926cf938dca
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body
new file mode 100644
index 0000000000..c483a712d4
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5fedcfffe03ea2afc2eb334fe381cb8fe0f84495e24944733b597755f71c801
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body
new file mode 100644
index 0000000000..37a031ca3e
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d39a804619ce06d8cd0ab0f9b5b165d2093f30ed7535c709817fc66947bd0d4e
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body
new file mode 100644
index 0000000000..288e0203f5
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a2f77da3ec1a00cad0208c40cf49776d2f0edd8a91b021cc018e1af8fd10e37
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body
new file mode 100644
index 0000000000..775386b1ac
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b806af5a740714d9f03947e2bd37d17649c2736fcdbce7048b480c18f7910014
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body
new file mode 100644
index 0000000000..07b9eea9f2
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc7750c57c01ea03ffb3952605ff54bd31d3b8b428d2605cbca78f3943950641
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body
new file mode 100644
index 0000000000..954f11650a
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3ee83ee62eca8cf3448612a10c36522681f6854c581c37f4eb44c61fe11fc73
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body
new file mode 100644
index 0000000000..3f0b54ba90
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44d3fa02443f59ba264f405d95e84683c0fd75abeb002ad328627d607ad16892
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body
new file mode 100644
index 0000000000..de537e3c98
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dbb1516ecbd3d0962c8dd678b377e3786ac5234428cfd7c8c0a464d6f1af0ee
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body
new file mode 100644
index 0000000000..5d5eacb50e
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fde526625654d2dc0f339a4af949d8f697964b520ce073ebcecff571c6edffad
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body
new file mode 100644
index 0000000000..e3aecbe233
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b10703cab024ed7b3ce8134a0fcd8b9d5cf7b34d424405b26796344b704917de
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body
new file mode 100644
index 0000000000..b36d8588d1
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73109af79f1bddcb289b3e43c5baa19c4228c4c43489b39ea39bb4b31cc95795
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body
new file mode 100644
index 0000000000..31024d4a82
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cf92043c0106286c0971613eb9e001e6d747611bf6a468314eb43b9ab146a5f
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body
new file mode 100644
index 0000000000..8d9287bcfd
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e9d593f164b8e8d0797e4e17b0bf857f6388e841bb51d41e03397e98fca7fda
+size 357
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body
new file mode 100644
index 0000000000..a1d63703f6
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37694aeb534444ab02ced6e95f2fa07e39b0f7f4609132fcfd8fe878f199497c
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body
new file mode 100644
index 0000000000..669085aa1f
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b309a79ea3ec05487b6087b0fdebe3ec7edb0139ce8815b06dad0550f21018f
+size 1439
diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body
new file mode 100644
index 0000000000..43c885fb3b
--- /dev/null
+++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb10fcd6e33a82e457159392d6b4526921b5e9b683e2c6b9603d7e3e383a970f
+size 1439
diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py
index 7c92d3588e..412d77077d 100755
--- a/tests/test_inference_stage.py
+++ b/tests/test_inference_stage.py
@@ -40,7 +40,8 @@ def _mk_message(count=1, mess_count=1, offset=0, mess_offset=0):
     m.mess_offset = mess_offset
     m.mess_count = mess_count
     m.probs = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]])
-    m.get_input.return_value = cp.array([[0, 1, 2], [0, 1, 2]])
+    m.seq_ids = cp.array([list(range(count)), list(range(count)), list(range(count))])
+    m.get_input.return_value = cp.array([[0, 1, 2], [0, 1, 2], [0, 1, 2]])
     return m
 
 
@@ -138,6 +139,7 @@ def test_py_inf_fn_on_next(mock_ops, mock_future, config):
     mock_slice = mock.MagicMock()
     mock_slice.mess_count = 1
     mock_slice.count = 1
+    mock_slice.seq_ids = mock_message.seq_ids
     mock_message.get_slice.return_value = mock_slice
 
     output_message = on_next(mock_message)
diff --git a/tests/test_inference_worker.py b/tests/test_inference_worker.py
index b02d7dacbe..2ed41a96cd 100755
--- a/tests/test_inference_worker.py
+++ b/tests/test_inference_worker.py
@@ -38,14 +38,26 @@ def test_build_output_message(config):
     pq = ProducerConsumerQueue()
     iw = IW(pq)
 
+    mock_message = mock.MagicMock()
+    mock_message.count = 10
+    mock_message.mess_offset = 11
+    mock_message.mess_count = 2
+    mock_message.offset = 12
+
+    response = iw.build_output_message(mock_message)
+    assert response.count == 2
+    assert response.mess_offset == 11
+    assert response.mess_count == 2
+    assert response.offset == 0
+
     mock_message = mock.MagicMock()
     mock_message.count = 2
     mock_message.mess_offset = 11
-    mock_message.mess_count = 10
+    mock_message.mess_count = 2
     mock_message.offset = 12
 
     response = iw.build_output_message(mock_message)
     assert response.count == 2
     assert response.mess_offset == 11
-    assert response.mess_count == 10
-    assert response.offset == 12
+    assert response.mess_count == 2
+    assert response.offset == 0
diff --git a/tests/test_sid.py b/tests/test_sid.py
index ebe67bea1f..0aac67a751 100755
--- a/tests/test_sid.py
+++ b/tests/test_sid.py
@@ -120,10 +120,7 @@ def async_infer(callback=None, **k):
     assert results.diff_rows == 1333
 
 
-@pytest.mark.slow
-@pytest.mark.use_cpp
-@pytest.mark.usefixtures("launch_mock_triton")
-def test_minibert_cpp(config, tmp_path):
+def _run_minibert_cpp(config, tmp_path, model_name, truncated):
     config.mode = PipelineModes.NLP
     config.class_labels = [
         "address",
@@ -154,14 +151,11 @@ def test_minibert_cpp(config, tmp_path):
     pipe.add_stage(
         PreprocessNLPStage(config,
                            vocab_hash_file=vocab_file_name,
-                           truncation=True,
+                           truncation=truncated,
                            do_lower_case=True,
                            add_special_tokens=False))
     pipe.add_stage(
-        TritonInferenceStage(config,
-                             model_name='sid-minibert-onnx',
-                             server_url='localhost:8001',
-                             force_convert_inputs=True))
+        TritonInferenceStage(config, model_name=model_name, server_url='localhost:8001', force_convert_inputs=True))
     pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf"))
     pipe.add_stage(AddClassificationsStage(config, threshold=0.5, prefix="si_"))
     pipe.add_stage(
@@ -170,5 +164,20 @@ def test_minibert_cpp(config, tmp_path):
     pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False))
 
     pipe.run()
-    results = calc_error_val(results_file_name)
+    return calc_error_val(results_file_name)
+
+
+@pytest.mark.slow
+@pytest.mark.use_cpp
+@pytest.mark.usefixtures("launch_mock_triton")
+def test_minibert_cpp_truncated(config, tmp_path):
+    results = _run_minibert_cpp(config, tmp_path, 'sid-minibert-onnx', True)
     assert results.diff_rows == 1204
+
+
+@pytest.mark.slow
+@pytest.mark.use_cpp
+@pytest.mark.usefixtures("launch_mock_triton")
+def test_minibert_cpp(config, tmp_path):
+    results = _run_minibert_cpp(config, tmp_path, 'sid-minibert-onnx-no-trunc', False)
+    assert results.diff_rows == 18