From 99d767f41d925ca623fff0c43e81eeaddd055b1c Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Mon, 8 Aug 2022 12:47:28 -0700 Subject: [PATCH] Fixes issues with NLP pipelines when data is not truncated (#316) * Fixes type-os in `TensorObject::read_element` as suggested in description in #305 * In cases where the length of the output results do not match the length of the data frame the `seq_ids` array is used to perform reduction of the rows. Such that if rows 5,6 & 7 of the output results map to row 5 in the dataframe, the max value of each row is stored in the response output. * Add new method `MatxUtil::reduce_max` to perform reduction. fixes #305 Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/316 --- .../include/morpheus/objects/dev_mem_info.hpp | 4 +- .../morpheus/objects/tensor_object.hpp | 10 +- .../include/morpheus/utilities/matx_util.hpp | 20 ++ .../_lib/src/messages/multi_inference.cpp | 17 +- morpheus/_lib/src/messages/multi_response.cpp | 8 +- morpheus/_lib/src/stages/triton_inference.cpp | 97 +++++++-- morpheus/_lib/src/utilities/matx_util.cu | 92 +++++++++ morpheus/_lib/tests/CMakeLists.txt | 2 + morpheus/_lib/tests/test_matx_util.cpp | 186 ++++++++++++++++++ morpheus/_lib/tests/test_morpheus.cpp | 27 +++ morpheus/_lib/tests/test_morpheus.hpp | 5 + morpheus/_lib/tests/test_multi_slices.cpp | 7 - .../inference/auto_encoder_inference_stage.py | 9 +- morpheus/stages/inference/inference_stage.py | 16 +- tests/benchmarks/test_bench_e2e_pipelines.py | 2 +- .../sid-minibert-onnx-no-trunc/GET.mock | 5 + .../config/GET.mock | 5 + .../infer/POST.mock | 24 +++ .../sid-minibert-onnx-no-trunc/ready/GET.mock | 3 + .../sid-no-trunc/sid_infer_resp.1.body | 3 + .../sid-no-trunc/sid_infer_resp.10.body | 3 + .../sid-no-trunc/sid_infer_resp.11.body | 3 + .../sid-no-trunc/sid_infer_resp.12.body | 3 + .../sid-no-trunc/sid_infer_resp.13.body | 3 + .../sid-no-trunc/sid_infer_resp.14.body | 3 + .../sid-no-trunc/sid_infer_resp.15.body | 3 + .../sid-no-trunc/sid_infer_resp.16.body | 3 + .../sid-no-trunc/sid_infer_resp.17.body | 3 + .../sid-no-trunc/sid_infer_resp.18.body | 3 + .../sid-no-trunc/sid_infer_resp.19.body | 3 + .../sid-no-trunc/sid_infer_resp.2.body | 3 + .../sid-no-trunc/sid_infer_resp.20.body | 3 + .../sid-no-trunc/sid_infer_resp.21.body | 3 + .../sid-no-trunc/sid_infer_resp.22.body | 3 + .../sid-no-trunc/sid_infer_resp.23.body | 3 + .../sid-no-trunc/sid_infer_resp.24.body | 3 + .../sid-no-trunc/sid_infer_resp.25.body | 3 + .../sid-no-trunc/sid_infer_resp.26.body | 3 + .../sid-no-trunc/sid_infer_resp.27.body | 3 + .../sid-no-trunc/sid_infer_resp.28.body | 3 + .../sid-no-trunc/sid_infer_resp.29.body | 3 + .../sid-no-trunc/sid_infer_resp.3.body | 3 + .../sid-no-trunc/sid_infer_resp.30.body | 3 + .../sid-no-trunc/sid_infer_resp.31.body | 3 + .../sid-no-trunc/sid_infer_resp.32.body | 3 + .../sid-no-trunc/sid_infer_resp.33.body | 3 + .../sid-no-trunc/sid_infer_resp.34.body | 3 + .../sid-no-trunc/sid_infer_resp.35.body | 3 + .../sid-no-trunc/sid_infer_resp.36.body | 3 + .../sid-no-trunc/sid_infer_resp.37.body | 3 + .../sid-no-trunc/sid_infer_resp.38.body | 3 + .../sid-no-trunc/sid_infer_resp.39.body | 3 + .../sid-no-trunc/sid_infer_resp.4.body | 3 + .../sid-no-trunc/sid_infer_resp.40.body | 3 + .../sid-no-trunc/sid_infer_resp.41.body | 3 + .../sid-no-trunc/sid_infer_resp.42.body | 3 + .../sid-no-trunc/sid_infer_resp.43.body | 3 + .../sid-no-trunc/sid_infer_resp.44.body | 3 + .../sid-no-trunc/sid_infer_resp.45.body | 3 + .../sid-no-trunc/sid_infer_resp.46.body | 3 + .../sid-no-trunc/sid_infer_resp.47.body | 3 + .../sid-no-trunc/sid_infer_resp.48.body | 3 + .../sid-no-trunc/sid_infer_resp.49.body | 3 + .../sid-no-trunc/sid_infer_resp.5.body | 3 + .../sid-no-trunc/sid_infer_resp.50.body | 3 + .../sid-no-trunc/sid_infer_resp.51.body | 3 + .../sid-no-trunc/sid_infer_resp.52.body | 3 + .../sid-no-trunc/sid_infer_resp.53.body | 3 + .../sid-no-trunc/sid_infer_resp.54.body | 3 + .../sid-no-trunc/sid_infer_resp.55.body | 3 + .../sid-no-trunc/sid_infer_resp.56.body | 3 + .../sid-no-trunc/sid_infer_resp.57.body | 3 + .../sid-no-trunc/sid_infer_resp.58.body | 3 + .../sid-no-trunc/sid_infer_resp.59.body | 3 + .../sid-no-trunc/sid_infer_resp.6.body | 3 + .../sid-no-trunc/sid_infer_resp.60.body | 3 + .../sid-no-trunc/sid_infer_resp.61.body | 3 + .../sid-no-trunc/sid_infer_resp.62.body | 3 + .../sid-no-trunc/sid_infer_resp.63.body | 3 + .../sid-no-trunc/sid_infer_resp.64.body | 3 + .../sid-no-trunc/sid_infer_resp.65.body | 3 + .../sid-no-trunc/sid_infer_resp.7.body | 3 + .../sid-no-trunc/sid_infer_resp.8.body | 3 + .../sid-no-trunc/sid_infer_resp.9.body | 3 + tests/test_inference_stage.py | 4 +- tests/test_inference_worker.py | 18 +- tests/test_sid.py | 29 ++- 87 files changed, 714 insertions(+), 71 deletions(-) create mode 100644 morpheus/_lib/tests/test_matx_util.cpp create mode 100644 morpheus/_lib/tests/test_morpheus.cpp create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock create mode 100644 tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body create mode 100644 tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body diff --git a/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp b/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp index e72d86680c..9c0f2e0f42 100644 --- a/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp +++ b/morpheus/_lib/include/morpheus/objects/dev_mem_info.hpp @@ -42,7 +42,9 @@ struct DevMemInfo size_t offset; /** - * TODO(Documentation) + * @brief Returns raw pointer to underlying buffer offset by the `offset` + * + * @return void* */ void *data() const; }; diff --git a/morpheus/_lib/include/morpheus/objects/tensor_object.hpp b/morpheus/_lib/include/morpheus/objects/tensor_object.hpp index 7986b01c69..766e4a1cc3 100644 --- a/morpheus/_lib/include/morpheus/objects/tensor_object.hpp +++ b/morpheus/_lib/include/morpheus/objects/tensor_object.hpp @@ -476,8 +476,10 @@ struct TensorObject final auto stride = this->get_stride(); auto shape = this->get_shape(); + CHECK(shape.size() == N) << "Length of idx must match lengh of shape"; + CHECK(std::transform_reduce( - stride.begin(), stride.end(), std::begin(idx), 0, std::logical_and<>(), std::less<>())) + shape.begin(), shape.end(), std::begin(idx), 1, std::logical_and<>(), std::greater<>())) << "Index is outsize of the bounds of the tensor. Index=" << detail::array_to_str(std::begin(idx), std::begin(idx) + N) << ", Size=" << detail::array_to_str(shape.begin(), shape.end()) << ""; @@ -504,8 +506,10 @@ struct TensorObject final auto stride = this->get_stride(); auto shape = this->get_shape(); - CHECK(std::transform_reduce( - stride.begin(), stride.end(), std::begin(idx), 0, std::logical_and<>(), std::less<>())) + CHECK(shape.size() == N) << "Length of idx must match lengh of shape"; + + CHECK( + std::transform_reduce(shape.begin(), shape.end(), std::begin(idx), 1, std::logical_and<>(), std::less<>())) << "Index is outsize of the bounds of the tensor. Index=" << detail::array_to_str(std::begin(idx), std::begin(idx) + N) << ", Size=" << detail::array_to_str(shape.begin(), shape.end()) << ""; diff --git a/morpheus/_lib/include/morpheus/utilities/matx_util.hpp b/morpheus/_lib/include/morpheus/utilities/matx_util.hpp index 05e559ba27..25b5109704 100644 --- a/morpheus/_lib/include/morpheus/utilities/matx_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/matx_util.hpp @@ -24,6 +24,7 @@ #include #include +#include namespace morpheus { struct MatxUtil @@ -63,5 +64,24 @@ struct MatxUtil const std::vector &stride, double thresh_val, bool by_row); + + /** + * @brief Returns a buffer with `output_shape` containing the max value from values in `input` mapped according to + * `seq_ids`. + * Ex given a hypothetical input of: + * + * input = [5, 2, 8, 9, 8, 2, 1] + * seq_ids = [0, 0, 0, 1, 2, 3, 3] + * + * Will return: + * [8, 9, 8, 2] + * @return std::shared_ptr + */ + static std::shared_ptr reduce_max(const DevMemInfo &input, + const std::vector &seq_ids, + size_t seq_id_offset, + const std::vector &input_shape, + const std::vector &input_stride, + const std::vector &output_shape); }; } // namespace morpheus diff --git a/morpheus/_lib/src/messages/multi_inference.cpp b/morpheus/_lib/src/messages/multi_inference.cpp index 936382d453..24c4d8a013 100644 --- a/morpheus/_lib/src/messages/multi_inference.cpp +++ b/morpheus/_lib/src/messages/multi_inference.cpp @@ -73,8 +73,6 @@ void MultiInferenceMessage::get_slice_impl(std::shared_ptr new_mes std::size_t start, std::size_t stop) const { - CHECK(this->mess_count == this->count) << "At this time, mess_count and count must be the same for slicing"; - auto sliced_message = DCHECK_NOTNULL(std::dynamic_pointer_cast(new_message)); sliced_message->offset = start; @@ -82,7 +80,7 @@ void MultiInferenceMessage::get_slice_impl(std::shared_ptr new_mes // If we have more inference rows than message rows, we need to use the seq_ids to figure out the slicing. This // will be slow and should be avoided at all costs - if (this->memory->has_input("seq_ids") && this->count != this->mess_count) + if (this->count != this->mess_count && this->memory->has_input("seq_ids")) { auto seq_ids = this->get_input("seq_ids"); @@ -146,12 +144,6 @@ std::size_t MultiInferenceMessageInterfaceProxy::count(MultiInferenceMessage &se pybind11::object MultiInferenceMessageInterfaceProxy::get_input(MultiInferenceMessage &self, const std::string &name) { const auto &py_tensor = CupyUtil::tensor_to_cupy(self.get_input(name)); - - // // Need to get just our portion. TODO(MDD): THis should be handled in get_input - // py::object sliced = py_tensor[py::make_tuple( - // py::slice(py::int_(self.offset), py::int_(self.offset + self.count), py::none()), - // py::slice(py::none(), py::none(), py::none()))]; - return py_tensor; } @@ -159,13 +151,6 @@ std::shared_ptr MultiInferenceMessageInterfaceProxy::get_ std::size_t start, std::size_t stop) { - // py::object seq_ids = CupyUtil::tensor_to_cupy(self.get_input("seq_ids"), m); - - // int mess_start = seq_ids[py::make_tuple(start, 0)].attr("item")().cast(); - // int mess_stop = seq_ids[py::make_tuple(stop - 1, 0)].attr("item")().cast() + 1; - - // return std::make_shared( - // self.meta, mess_start, mess_stop - mess_start, self.memory, start, stop - start); return self.get_slice(start, stop); } } // namespace morpheus diff --git a/morpheus/_lib/src/messages/multi_response.cpp b/morpheus/_lib/src/messages/multi_response.cpp index 8ad7cd0109..7b12506bbf 100644 --- a/morpheus/_lib/src/messages/multi_response.cpp +++ b/morpheus/_lib/src/messages/multi_response.cpp @@ -91,13 +91,17 @@ void MultiResponseMessage::get_slice_impl(std::shared_ptr new_mess std::size_t start, std::size_t stop) const { - CHECK(this->mess_count == this->count) << "At this time, mess_count and count must be the same for slicing"; - auto sliced_message = DCHECK_NOTNULL(std::dynamic_pointer_cast(new_message)); sliced_message->offset = start; sliced_message->count = stop - start; + // Currently our output lengths should always match mess_count, and even if they didn't we wouldn't have any way to + // associate rows in the output with rows in the dataframe. Note on the input side we have the seq_ids array to + // but we don't have any equivelant for the output. + DCHECK(this->count == this->mess_count) + << "Number of rows in response output does not match number of messages in DF"; + // Pass onto the base DerivedMultiMessage::get_slice_impl(new_message, start, stop); } diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index 838379df35..0e6c4eb7f0 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -54,6 +54,12 @@ void InferenceClientStage__check_triton_errors(triton::client::Error status, throw std::runtime_error(err_msg); } } + +template +inline IndexT get_elem_count(const std::vector &shape) +{ + return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>()); +} } // namespace namespace morpheus { @@ -87,34 +93,51 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() return input.subscribe(rxcpp::make_observer( [this, &output, &client](sink_type_t x) { - auto reponse_memory = std::make_shared(x->count); + // When our tensor lengths are longer than our dataframe we will need to use the seq_ids + // array to lookup how the values should map back into the dataframe + const bool needs_seq_ids = x->mess_count != x->count; + auto reponse_memory = std::make_shared(x->mess_count); // Create the output memory blocks for (auto &model_output : m_model_outputs) { - auto total_shape = model_output.shape; + std::vector total_shape{model_output.shape.begin(), model_output.shape.end()}; - // First dimension will always end up being the number of rows - total_shape[0] = x->count; - - auto elem_count = std::accumulate(total_shape.begin(), total_shape.end(), 1, std::multiplies<>()); + // First dimension will always end up being the number of rows in the dataframe + total_shape[0] = static_cast(x->mess_count); + auto elem_count = get_elem_count(total_shape); // Create the output memory auto output_buffer = std::make_shared( elem_count * model_output.datatype.item_size(), rmm::cuda_stream_per_thread); reponse_memory->tensors[model_output.mapped_name] = Tensor::create( - std::move(output_buffer), - model_output.datatype, - std::vector{static_cast(total_shape[0]), static_cast(total_shape[1])}, - std::vector{}, - 0); + std::move(output_buffer), model_output.datatype, total_shape, std::vector{}, 0); } // This will be the final output of all mini-batches auto response = std::make_shared( x->meta, x->mess_offset, x->mess_count, std::move(reponse_memory), 0, reponse_memory->count); + std::unique_ptr> host_seq_ids{nullptr}; + if (needs_seq_ids) + { + // Take a copy of the sequence Ids allowing us to map rows in the response to rows in the dataframe + // The output tensors we store in `reponse_memory` will all be of the same length as the the + // dataframe. seq_ids has three columns, but we are only interested in the first column. + auto seq_ids = x->get_input("seq_ids"); + const auto item_size = seq_ids.dtype().item_size(); + + host_seq_ids = std::make_unique>(x->count); + SRF_CHECK_CUDA(cudaMemcpy2D(host_seq_ids->data(), + item_size, + seq_ids.data(), + seq_ids.stride(0) * item_size, + item_size, + host_seq_ids->size(), + cudaMemcpyDeviceToHost)); + } + for (size_t i = 0; i < x->count; i += m_max_batch_size) { triton::client::InferInput *input1; @@ -122,8 +145,24 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() size_t start = i; size_t stop = std::min(i + m_max_batch_size, x->count); - sink_type_t mini_batch_input = x->get_slice(start, stop); - source_type_t mini_batch_output = response->get_slice(start, stop); + sink_type_t mini_batch_input = x->get_slice(start, stop); + + size_t out_start = start; + size_t out_stop = stop; + if (needs_seq_ids) + { + out_start = (*host_seq_ids)[out_start]; + if (out_stop < host_seq_ids->size()) + { + out_stop = (*host_seq_ids)[out_stop]; + } + else + { + out_stop = x->mess_count; + } + } + + source_type_t mini_batch_output = response->get_slice(out_start, out_stop); // Iterate on the model inputs in case the model takes less than what tensors are available std::vector, std::vector>> @@ -199,12 +238,34 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() SRF_CHECK_CUDA( cudaMemcpy(output_buffer->data(), output_ptr, output_ptr_size, cudaMemcpyHostToDevice)); + if (needs_seq_ids && output_shape[0] != mini_batch_output->count) + { + // Since we are working with slices of both the input and the output, the seq_ids have + // already been applied to the output's start & stop, so we only need to reduce the + // response tensort when the size doesn't match our output + std::vector mapped_output_shape{output_shape}; + mapped_output_shape[0] = mini_batch_output->count; + + size_t element_count = get_elem_count(output_shape); + + // Triton results are always in row-major as required by the KServe protocol + // https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data + std::vector stride{output_shape[1], 1}; + output_buffer = MatxUtil::reduce_max( + DevMemInfo{element_count, model_output.datatype.type_id(), output_buffer, 0}, + *host_seq_ids, + mini_batch_input->offset, + output_shape, + stride, + mapped_output_shape); + output_shape = std::move(mapped_output_shape); + } + // If we need to do logits, do that here if (m_needs_logits) { - size_t element_count = - std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<>()); - output_buffer = MatxUtil::logits( + size_t element_count = get_elem_count(output_shape); + output_buffer = MatxUtil::logits( DevMemInfo{element_count, model_output.datatype.type_id(), output_buffer, 0}); } @@ -212,8 +273,8 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() model_output.mapped_name, Tensor::create(std::move(output_buffer), model_output.datatype, - std::vector{static_cast(output_shape[0]), - static_cast(output_shape[1])}, + std::vector{static_cast(output_shape[0]), + static_cast(output_shape[1])}, std::vector{}, 0)); } diff --git a/morpheus/_lib/src/utilities/matx_util.cu b/morpheus/_lib/src/utilities/matx_util.cu index 5edabbfba8..dabc5d74b6 100644 --- a/morpheus/_lib/src/utilities/matx_util.cu +++ b/morpheus/_lib/src/utilities/matx_util.cu @@ -243,6 +243,45 @@ namespace morpheus { } }; + struct MatxUtil__MatxReduceMax { + matx::index_t num_input_rows; + matx::index_t num_cols; + std::vector input_stride; + matx::index_t num_output_rows; + void *input_data; + void *output_data; + rmm::cuda_stream_view stream; + + template()> * = nullptr> + void operator()(std::size_t start, std::size_t stop, int32_t output_idx) { + throw std::invalid_argument("Unsupported conversion"); + } + + template()> * = nullptr> + void operator()(std::size_t start, std::size_t stop, int32_t output_idx) { + auto input_count = stop - start; + matx::tensorShape_t<2> input_shape({static_cast(input_count), num_cols}); + matx::tensorShape_t<1> output_shape({num_cols}); + + matx::index_t output_stride[2] = {input_stride[0], input_stride[1]}; + if (output_stride[0] == 1) + { + output_stride[1] = num_output_rows; + } + + auto input_ptr = static_cast(input_data) + (start * input_stride[0]); + auto output_ptr = static_cast(output_data) + (output_idx * output_stride[0]); + + matx::tensor_t input_tensor(input_ptr, input_shape, {input_stride[0], input_stride[1]}); + matx::tensor_t output_tensor(output_ptr, output_shape, {output_stride[1]}); + + // We need to transpose the input such that rmax will reduce the rows + // Matx performs reductions over the innermost dimensions. + // see https://nvidia.github.io/MatX/api/reduce.html + matx::rmax(output_tensor, input_tensor.Permute({1, 0}), stream.value()); + } + }; + // Component public implementations // ************ MatxUtil************************* // std::shared_ptr MatxUtil::cast(const DevMemInfo &input, TypeId output_type) { @@ -337,4 +376,57 @@ namespace morpheus { return output; } + + std::shared_ptr + MatxUtil::reduce_max(const DevMemInfo &input, + const std::vector &seq_ids, + size_t seq_id_offset, + const std::vector &input_shape, + const std::vector &input_stride, + const std::vector &output_shape) + { + auto dtype = DType(input.type_id); + auto elem_size = dtype.item_size(); + auto cudf_type = cudf::data_type{dtype.cudf_type_id()}; + auto num_input_rows = input_shape[0]; + auto num_input_cols = input_shape[1]; + + std::vectormatx_stride{input_stride[0], input_stride[1]}; + std::size_t output_element_count = output_shape[0] * output_shape[1]; + std::size_t output_buff_size = elem_size * output_element_count; + + DCHECK(output_element_count <= input.element_count) << "Output buffer size should be less than or equal to the input"; + DCHECK(num_input_cols == output_shape[1]) << "Number of input and output columns must match"; + + auto output = std::make_shared(output_buff_size, + input.buffer->stream(), + input.buffer->memory_resource()); + + MatxUtil__MatxReduceMax matx_reduce_max{num_input_rows, num_input_cols, matx_stride, output_shape[0], input.data(), output->data(), output->stream()}; + + std::size_t start = 0; + auto output_offset = seq_ids[seq_id_offset]; + for (std::size_t i=0; i < num_input_rows; ++i) + { + auto idx = seq_ids[i+seq_id_offset]; + if (idx != seq_ids[start+seq_id_offset]) + { + cudf::type_dispatcher(cudf_type, + matx_reduce_max, + start, + i, + seq_ids[start+seq_id_offset]-output_offset); + start = i; + } + } + + cudf::type_dispatcher(cudf_type, + matx_reduce_max, + start, + num_input_rows, + seq_ids[start+seq_id_offset]-output_offset); + + srf::enqueue_stream_sync_event(output->stream()).get(); + return output; + } } diff --git a/morpheus/_lib/tests/CMakeLists.txt b/morpheus/_lib/tests/CMakeLists.txt index 89d7e29fb5..4e5217574e 100644 --- a/morpheus/_lib/tests/CMakeLists.txt +++ b/morpheus/_lib/tests/CMakeLists.txt @@ -19,6 +19,8 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "tests") add_executable(test_libmorpheus # test_cuda.cu test_main.cpp + test_matx_util.cpp + test_morpheus.cpp test_multi_slices.cpp test_tensor.cpp test_type_util_detail.cpp diff --git a/morpheus/_lib/tests/test_matx_util.cpp b/morpheus/_lib/tests/test_matx_util.cpp new file mode 100644 index 0000000000..50091f5aaa --- /dev/null +++ b/morpheus/_lib/tests/test_matx_util.cpp @@ -0,0 +1,186 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "./test_morpheus.hpp" // IWYU pragma: associated + +#include "morpheus/utilities/matx_util.hpp" +#include "morpheus/utilities/type_util.hpp" +#include "morpheus/utilities/type_util_detail.hpp" + +#include +#include +#include +#include + +#include // for std::getenv +#include +#include +#include + +using namespace morpheus; + +TEST_CLASS(MatxUtil); + +TEST_F(TestMatxUtil, ReduceMax1d) +{ + // Test mimics example from the method's docstring + std::vector input{5, 2, 8, 9, 8, 2, 1}; + std::vector seq_ids{0, 0, 0, 1, 2, 3, 3}; + std::vector expected_output{8, 9, 8, 2}; + + DataType dtype(TypeId::FLOAT32); + + auto input_buffer = + std::make_shared(input.size() * dtype.item_size(), rmm::cuda_stream_per_thread); + + SRF_CHECK_CUDA(cudaMemcpy(input_buffer->data(), input.data(), input_buffer->size(), cudaMemcpyHostToDevice)); + + DevMemInfo dm{input.size(), dtype.type_id(), input_buffer, 0}; + std::vector input_shape{static_cast(input.size()), 1}; + std::vector output_shape{static_cast(expected_output.size()), 1}; + auto output_buffer = MatxUtil::reduce_max(dm, seq_ids, 0, input_shape, {1, 0}, output_shape); + + std::vector output(expected_output.size()); + SRF_CHECK_CUDA(cudaMemcpy(output.data(), output_buffer->data(), output_buffer->size(), cudaMemcpyDeviceToHost)); + + EXPECT_EQ(output, expected_output); +} + +TEST_F(TestMatxUtil, ReduceMax2dRowMajor) +{ + // clang-format off + // disabling clang-format to illustrate row-major layout + std::vector input{ + 0.1, 0.7, 0.7, 0.7, + 1.0, 0.9, 0.5, 0.9, + 1.0, 0.6, 0.7, 0.9, + 1.0, 0.2, 0.2, 0.9, + 0.5, 0.8, 0.6, 0.0, + 0.3, 0.4, 0.1, 0.4, + 0.9, 0.3, 1.0, 0.6, + 0.5, 0.5, 0.6, 0.8, + 0.0, 0.3, 0.5, 0.6, + 0.6, 1.0, 0.8, 0.7, + 0.8, 0.8, 1.0, 0.6, + 0.1, 0.9, 0.1, 0.3}; + + // reducing 12 rows down to 5 + std::vector seq_ids{0, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4}; + + std::vector expected_output{ + 1.0, 0.9, 0.7, 0.9, + 1.0, 0.6, 0.7, 0.9, + 1.0, 0.8, 1.0, 0.9, + 0.8, 1.0, 1.0, 0.8, + 0.1, 0.9, 0.1, 0.3}; + // clang-format on + + // Copy data from table into one big buffer + std::size_t num_cols = 4; + std::size_t num_rows = 12; + std::size_t expected_rows = expected_output.size() / num_cols; + + EXPECT_EQ(num_cols * num_rows, input.size()); + EXPECT_EQ(expected_rows, 5); + DataType dtype(TypeId::FLOAT64); + EXPECT_EQ(dtype.item_size(), sizeof(double)); + + std::size_t buff_size = input.size() * dtype.item_size(); + auto input_buffer = std::make_shared(buff_size, rmm::cuda_stream_per_thread); + + SRF_CHECK_CUDA(cudaMemcpy(input_buffer->data(), input.data(), input_buffer->size(), cudaMemcpyHostToDevice)); + + DevMemInfo dm{input.size(), dtype.type_id(), input_buffer, 0}; + std::vector input_shape{static_cast(num_rows), static_cast(num_cols)}; + std::vector output_shape{static_cast(expected_rows), static_cast(num_cols)}; + auto output_buffer = + MatxUtil::reduce_max(dm, seq_ids, 0, input_shape, {static_cast(num_cols), 1}, output_shape); + + EXPECT_EQ(output_buffer->size(), expected_rows * num_cols * dtype.item_size()); + + std::vector output(expected_rows * num_cols); + SRF_CHECK_CUDA(cudaMemcpy(output.data(), output_buffer->data(), output_buffer->size(), cudaMemcpyDeviceToHost)); + + EXPECT_EQ(output.size(), expected_output.size()); + for (std::size_t i = 0; i < output.size(); ++i) + { + EXPECT_DOUBLE_EQ(output[i], expected_output[i]); + } +} + +TEST_F(TestMatxUtil, ReduceMax2dColMajor) +{ + std::filesystem::path morpheus_root{std::getenv("MORPHEUS_ROOT")}; + auto input_file = morpheus_root / "tests/tests_data/filter_probs.csv"; + + auto table_m = load_table_from_csv(input_file); + auto num_rows = table_m.tbl->num_rows(); + auto num_cols = table_m.tbl->num_columns(); + + EXPECT_EQ(num_rows, 20); + EXPECT_EQ(num_cols, 4); + + // Copy data from table into one big buffer + auto dtype = DType::from_cudf(table_m.tbl->get_column(0).type().id()); + std::size_t buff_size = num_cols * num_rows * dtype.item_size(); + + EXPECT_EQ(dtype.item_size(), sizeof(double)); + auto input_buffer = std::make_shared(buff_size, rmm::cuda_stream_per_thread); + + std::size_t offset{0}; + for (cudf::size_type i = 0; i < num_cols; ++i) + { + auto cv = table_m.tbl->get_column(i).view(); + SRF_CHECK_CUDA(cudaMemcpy(static_cast(input_buffer->data()) + offset, + cv.data(), + num_rows * dtype.item_size(), + cudaMemcpyDeviceToDevice)); + + offset += num_rows * dtype.item_size(); + } + + EXPECT_EQ(offset, buff_size); + + // reducing 20 rows down to 12 + std::vector seq_ids{0, 0, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 7, 7, 7, 8, 9, 9, 10, 11}; + // disabling formatting so I can enter the literal values by column + // clang-format off + std::vector expected_output{0.1, 1.0, 1.0, 1.0, 0.5, 0.3, 0.9, 0.5, 0.0, 0.6, 0.8, 0.1, + 0.7, 0.9, 0.6, 0.2, 0.8, 0.4, 0.3, 0.5, 0.3, 1.0, 0.8, 0.9, + 0.7, 0.5, 0.7, 0.2, 0.6, 0.1, 1.0, 0.6, 0.5, 0.8, 1.0, 0.1, + 0.7, 0.9, 0.9, 0.9, 0.0, 0.4, 0.6, 0.8, 0.6, 0.7, 0.6, 0.3}; + // clang-format on + const std::size_t expected_rows = 12; + EXPECT_EQ(expected_rows * num_cols, expected_output.size()); + + DevMemInfo dm{static_cast(num_rows * num_cols), dtype.type_id(), input_buffer, 0}; + std::vector input_shape{static_cast(num_rows), static_cast(num_cols)}; + std::vector output_shape{static_cast(expected_rows), static_cast(num_cols)}; + auto output_buffer = + MatxUtil::reduce_max(dm, seq_ids, 0, input_shape, {1, static_cast(num_rows)}, output_shape); + + EXPECT_EQ(output_buffer->size(), expected_rows * num_cols * dtype.item_size()); + + std::vector output(expected_rows * num_cols); + SRF_CHECK_CUDA(cudaMemcpy(output.data(), output_buffer->data(), output_buffer->size(), cudaMemcpyDeviceToHost)); + + EXPECT_EQ(output.size(), expected_output.size()); + for (std::size_t i = 0; i < output.size(); ++i) + { + EXPECT_DOUBLE_EQ(output[i], expected_output[i]); + } +} diff --git a/morpheus/_lib/tests/test_morpheus.cpp b/morpheus/_lib/tests/test_morpheus.cpp new file mode 100644 index 0000000000..3c41db3f1e --- /dev/null +++ b/morpheus/_lib/tests/test_morpheus.cpp @@ -0,0 +1,27 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "test_morpheus.hpp" + +#include +#include + +cudf::io::table_with_metadata load_table_from_csv(std::string filename) +{ + auto options = cudf::io::csv_reader_options::builder(cudf::io::source_info{filename}); + return cudf::io::read_csv(options.build()); +} diff --git a/morpheus/_lib/tests/test_morpheus.hpp b/morpheus/_lib/tests/test_morpheus.hpp index 1b8c9d2667..50c97c8cba 100644 --- a/morpheus/_lib/tests/test_morpheus.hpp +++ b/morpheus/_lib/tests/test_morpheus.hpp @@ -17,9 +17,14 @@ #pragma once +#include #include // IWYU pragma: keep #include // IWYU pragma: keep +#include + +cudf::io::table_with_metadata load_table_from_csv(std::string filename); + #define TEST_CLASS(name) \ class Test##name : public ::testing::Test \ {} diff --git a/morpheus/_lib/tests/test_multi_slices.cpp b/morpheus/_lib/tests/test_multi_slices.cpp index 28d0a8e803..d92bdac887 100644 --- a/morpheus/_lib/tests/test_multi_slices.cpp +++ b/morpheus/_lib/tests/test_multi_slices.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -29,12 +28,6 @@ #include #include -cudf::io::table_with_metadata load_table_from_csv(std::string filename) -{ - auto options = cudf::io::csv_reader_options::builder(cudf::io::source_info{filename}); - return cudf::io::read_csv(options.build()); -} - TEST_CLASS(Masking); TEST_F(TestMasking, Ranges) diff --git a/morpheus/stages/inference/auto_encoder_inference_stage.py b/morpheus/stages/inference/auto_encoder_inference_stage.py index 3f7a5a6646..c65b427977 100644 --- a/morpheus/stages/inference/auto_encoder_inference_stage.py +++ b/morpheus/stages/inference/auto_encoder_inference_stage.py @@ -56,17 +56,18 @@ def build_output_message(self, x: MultiInferenceAEMessage) -> MultiResponseAEMes Response message with autoencoder results calculated from inference results. """ - output_dims = self.calc_output_dims(x) + dims = self.calc_output_dims(x) + output_dims = (x.mess_count, *dims[1:]) - memory = ResponseMemoryProbs(count=x.count, probs=cp.zeros(output_dims)) + memory = ResponseMemoryProbs(count=output_dims[0], probs=cp.zeros(output_dims)) # Override the default to return the response AE output_message = MultiResponseAEMessage(meta=x.meta, mess_offset=x.mess_offset, mess_count=x.mess_count, memory=memory, - offset=x.offset, - count=x.count, + offset=0, + count=memory.count, user_id=x.user_id) return output_message diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index ca65ae1586..48f3471619 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -70,16 +70,17 @@ def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseProbsMe Response message with probabilities calculated from inference results. """ - output_dims = self.calc_output_dims(x) + dims = self.calc_output_dims(x) + output_dims = (x.mess_count, *dims[1:]) - memory = ResponseMemoryProbs(count=x.count, probs=cp.zeros(output_dims)) + memory = ResponseMemoryProbs(count=output_dims[0], probs=cp.zeros(output_dims)) output_message = MultiResponseProbsMessage(meta=x.meta, mess_offset=x.mess_offset, mess_count=x.mess_count, memory=memory, - offset=x.offset, - count=x.count) + offset=0, + count=memory.count) return output_message @abstractmethod @@ -384,10 +385,15 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re probs = memory.get_output("probs") + seq_offset = inf.seq_ids[0, 0].item() + seq_count = inf.seq_ids[-1, 0].item() + 1 - seq_offset + # Two scenarios: if (inf.mess_count == inf.count): + assert seq_count == res.count + # In message and out message have same count. Just use probs as is - probs[inf.offset:inf.count + inf.offset, :] = res.probs + probs[seq_offset:seq_offset + seq_count, :] = res.probs else: assert inf.count == res.count diff --git a/tests/benchmarks/test_bench_e2e_pipelines.py b/tests/benchmarks/test_bench_e2e_pipelines.py index 0cd09b5905..87c14ba1c4 100644 --- a/tests/benchmarks/test_bench_e2e_pipelines.py +++ b/tests/benchmarks/test_bench_e2e_pipelines.py @@ -38,7 +38,7 @@ from morpheus.stages.preprocess.preprocess_fil_stage import PreprocessFILStage from morpheus.stages.preprocess.preprocess_nlp_stage import PreprocessNLPStage from morpheus.stages.preprocess.train_ae_stage import TrainAEStage -from morpheus.utils.logging import configure_logging +from morpheus.utils.logger import configure_logging from utils import TEST_DIRS e2e_config_file = os.path.join(TEST_DIRS.morpheus_root, "tests/benchmarks/e2e_test_configs.json") diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock new file mode 100644 index 0000000000..85e9132d7b --- /dev/null +++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/GET.mock @@ -0,0 +1,5 @@ +HTTP/1.1 200 OK +Content-Type: application/json +Content-Length: 269 + +{"name":"sid-minibert-onnx","versions":["1"],"platform":"onnxruntime_onnx","inputs":[{"name":"input_ids","datatype":"INT32","shape":[-1,256]},{"name":"attention_mask","datatype":"INT32","shape":[-1,256]}],"outputs":[{"name":"output","datatype":"FP32","shape":[-1,10]}]} diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock new file mode 100644 index 0000000000..b999dcaabd --- /dev/null +++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/config/GET.mock @@ -0,0 +1,5 @@ +HTTP/1.1 200 OK +Content-Type: application/json +Content-Length: 1430 + +{"name":"sid-minibert-onnx-no-trunc","platform":"onnxruntime_onnx","backend":"onnxruntime","version_policy":{"latest":{"num_versions":1}},"max_batch_size":32,"input":[{"name":"input_ids","data_type":"TYPE_INT32","format":"FORMAT_NONE","dims":[256],"is_shape_tensor":false,"allow_ragged_batch":false},{"name":"attention_mask","data_type":"TYPE_INT32","format":"FORMAT_NONE","dims":[256],"is_shape_tensor":false,"allow_ragged_batch":false}],"output":[{"name":"output","data_type":"TYPE_FP32","dims":[10],"label_filename":"","is_shape_tensor":false}],"batch_input":[],"batch_output":[],"optimization":{"priority":"PRIORITY_DEFAULT","execution_accelerators":{"gpu_execution_accelerator":[{"name":"tensorrt","parameters":{"max_workspace_size_bytes":"1073741824","precision_mode":"FP16"}}],"cpu_execution_accelerator":[]},"input_pinned_memory":{"enable":true},"output_pinned_memory":{"enable":true},"gather_kernel_buffer_threshold":0,"eager_batching":false},"dynamic_batching":{"preferred_batch_size":[1,4,8,16,32],"max_queue_delay_microseconds":50000,"preserve_ordering":false,"priority_levels":0,"default_priority_level":0,"priority_queue_policy":{}},"instance_group":[{"name":"sid-minibert-onnx","kind":"KIND_GPU","count":1,"gpus":[0,1,2,3,4,5,6,7],"secondary_devices":[],"profile":[],"passive":false,"host_policy":""}],"default_model_filename":"model.onnx","cc_model_filenames":{},"metric_tags":{},"parameters":{},"model_warmup":[]} diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock new file mode 100644 index 0000000000..ade4d64f25 --- /dev/null +++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/infer/POST.mock @@ -0,0 +1,24 @@ +HTTP/1.1 200 OK +Content-Type: application/octet-stream +{{#inject}}(()=>{ + if(!this.counter) { + this.counter=0; + } + + this.counter+=1; + this.filename = "payloads/sid-no-trunc/sid_infer_resp." + this.counter + ".body" + + let inf_header_content_length = 157; + if (this.counter === 33) { + inf_header_content_length = 156; + } else if (this.counter === 65) { + inf_header_content_length = 155; + } + + // This seems like the only way to pass a variable to the file helper + request._nv_morpheus_params = {counter: this.counter, filename: this.filename}; + + return "Inference-Header-Content-Length: " + inf_header_content_length; +})();{{/inject}} + +{{file path=request._nv_morpheus_params.filename}} diff --git a/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock new file mode 100644 index 0000000000..ac53519000 --- /dev/null +++ b/tests/mock_triton_server/mocks/v2/models/sid-minibert-onnx-no-trunc/ready/GET.mock @@ -0,0 +1,3 @@ +HTTP/1.1 200 OK +Content-Length: 0 +Content-Type: text/plain diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body new file mode 100644 index 0000000000..73e526fd7b --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.1.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73051bc868b96c2df61f9b4b20579fe42dd47ff3f93d406f715b6e38e107b42b +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body new file mode 100644 index 0000000000..ec53f8c810 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.10.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf163270a505b6047049eb417a2274c4f637eb7ceabab758143737f4accb881 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body new file mode 100644 index 0000000000..d7548df7f6 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.11.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3310e0959db4747cbbea8d9a435e0141e8a9af15b3caf2fa4c4ae1145c6cb1c +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body new file mode 100644 index 0000000000..56ffbf61a5 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.12.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca87e261d602921f0266a4ae30193dc8c845706c04412765d62d94249252d5b9 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body new file mode 100644 index 0000000000..409b74357c --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.13.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14df69085de8e854cbc30a04454b836e1b9c8c13a6081185df38a58b9d49b085 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body new file mode 100644 index 0000000000..19babce566 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.14.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea40d505a28803a6178dc351a01dc497e17bf0ae481c986860d77afdcdd44c4 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body new file mode 100644 index 0000000000..79f26cf1a8 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.15.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5cf50a4baabfb18854c4ab3f04dcf14c654dbfef793a1b85e5e5712521dfcfe +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body new file mode 100644 index 0000000000..59775280df --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.16.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d4c7030120e6285b10477a024891668a797efbe3f845c7d3998964a131798d0 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body new file mode 100644 index 0000000000..6f7639e982 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.17.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8d9689bf56f29f6c2f5523703fd6f3304d2154b6e900186dbcdfcf66937e15 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body new file mode 100644 index 0000000000..671acae3a0 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.18.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e16be9cc31da7de9ed702128bf1906cc23710061a5a37d02a9e6db56c0de098 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body new file mode 100644 index 0000000000..7080bc794e --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.19.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3da7ae7b899799a05e17c414a0e185d0cdc423d3d3ac4c156bee4b15b722034 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body new file mode 100644 index 0000000000..874a6876f0 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.2.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e23c772fa810c33a3e6ee01634393d7b8588d9425ac1158a7379dcc83c2ea0 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body new file mode 100644 index 0000000000..d564c06b35 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.20.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3f3691419f7ad8af36dc0ab0b044794860188821ab4593ccbf2c74480a3e00 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body new file mode 100644 index 0000000000..aa0b7940d9 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.21.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5086b91d5720f0cd0ddba7a415649f602c592e705e3b52607caf432d58c3ac4 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body new file mode 100644 index 0000000000..b03a5f9082 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.22.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92cc2832365add134f64f17bfe7975723bb0a1974a68fb26c8fae9efea5c6b5 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body new file mode 100644 index 0000000000..a8ccae3829 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.23.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0d7d2aab93f0f0a00959da7e4de781b0cacbaa9ad9c6fc429359c57c81cd2c8 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body new file mode 100644 index 0000000000..989e8df73b --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.24.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c523c85196e7b3d327fe7fc6e7de7e4f4da464b5d3af61a89cdd1f5b21fd300a +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body new file mode 100644 index 0000000000..80d8d9a547 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.25.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce1f175938725fd25980f138fc49e0c34385f6ee888019fc6ae863c6d0570056 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body new file mode 100644 index 0000000000..8ae55dba6d --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.26.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d28a52397324002181c5e03524226079b4f07662915cdee6475e9a5251d058 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body new file mode 100644 index 0000000000..691360751d --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.27.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8691e609d609a5b183446eb7999312b218e56298c506c4c67fc5dad9692d5b +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body new file mode 100644 index 0000000000..53dcc91ff4 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.28.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc4e315feb391b84d269dfff43f4e98e877fc24ac64e04249e8ea2f2d755a97 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body new file mode 100644 index 0000000000..e95aef6d2e --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.29.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc2dd163e1b36930634f2de2fb0b4e6d29b2590c0e43d39efa2ea90c461d669 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body new file mode 100644 index 0000000000..527b219234 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.3.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca080e4e213033983b14e5142ced874e73446103623a3108225a979de1faaec9 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body new file mode 100644 index 0000000000..1f01647562 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.30.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4703e8b9851f654120becbb6fe171f0205320570f5c436631dba8656744e9f85 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body new file mode 100644 index 0000000000..a26af18719 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.31.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6a53aef1db8abe2a151305544cb42a44216ec416c79136d66473680458cddc +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body new file mode 100644 index 0000000000..1db9cc4107 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.32.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd789dcc76c733bfa3bfdc5912233e43c065e473b4ccb4dee0c2cd5ba97a07d +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body new file mode 100644 index 0000000000..c4c27b930e --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.33.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d1f3449fc85e2cea901f87b77b1a827e92d66108fbe213df960ed015e8030d +size 838 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body new file mode 100644 index 0000000000..721925edd4 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.34.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37b3e2227502e234c7756b2e99db9def690934614a74b3aa224d8a9d9aa33208 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body new file mode 100644 index 0000000000..36a4fd45ca --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.35.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a38669a459b184fe799f77b0ccaed38e93fa7d9c724bcb1446c6ba8a1dc7972 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body new file mode 100644 index 0000000000..3b75f2a14e --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.36.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c350b27c99510c44e327f415ef62f4acef4c7977f8f992ad074b6893633fb4b7 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body new file mode 100644 index 0000000000..a5f4f4aa11 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.37.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b4337c1515c94efcc9a9df1ffdc8c39fbaa2994bf0e6a2a5dddaebc7d10b83 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body new file mode 100644 index 0000000000..b3132f7709 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.38.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0ea6a4c73d03543749f5010e0c8bfa2dedb891177705b6d1b66356a06a9c0d +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body new file mode 100644 index 0000000000..47e8e15b3e --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.39.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0138ca7c11a078c23bae12216b6161ca57338f442eb22cd94fff1bca03a7ea86 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body new file mode 100644 index 0000000000..c6fd6c046d --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.4.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:989a7a4e21b4ab82e3e5d73f836c4d191455eebfc9c7a8ae5b685966338c9703 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body new file mode 100644 index 0000000000..234ac747f7 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.40.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:426bbee9d4d5e43716168e5f6c8f260e128eb6bf3dae96193a6e626061e4aacb +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body new file mode 100644 index 0000000000..c8b6897c02 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.41.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6a960efadd6f99be13f7484210b870ffbdbf6c7acb1506ca8c53ebf239d472f +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body new file mode 100644 index 0000000000..53821a424c --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.42.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41043709f06e5f68120bc4dcd540b37c15ba8ec80a07d6a26bd0b295e302909f +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body new file mode 100644 index 0000000000..e51b3d2dda --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.43.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e8bc2619c0d586388b1723363886f75a5ee62428c74627b600b5852891c08d +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body new file mode 100644 index 0000000000..4676c04185 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.44.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:213e7192e9c09e445bf5028522894c95d4839f02448ce178d0c188e8a502e864 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body new file mode 100644 index 0000000000..f4f316c4d9 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.45.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d9e208c67a6c3ad4386c2c64c6890faa683ee14b0a713e5479986935da7c26b +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body new file mode 100644 index 0000000000..8714134c48 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.46.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c15b45933f0abc16b9a83369d6ccf3fabe50de1d6f78c88200a9b58504a0bc9 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body new file mode 100644 index 0000000000..8ef91dec2b --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.47.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:562c92ac51b2038474daa86e364e0d6e71512e166cb2f755139b999c4302a61b +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body new file mode 100644 index 0000000000..02d26ca887 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.48.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf5ecefb819c557e1859aeebd18f12cd8401d7c11932b91f5649f0a67b49822 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body new file mode 100644 index 0000000000..49fc1b52b2 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.49.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f31031a3705556006b168d18a9c9dc68332cb5fa13d46a3502cb4bb02791a2ab +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body new file mode 100644 index 0000000000..687905baf9 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.5.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1d43e9e6916d1f08875df40b58d724e2093e6ff7740e8534c099f7ae5c21f7 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body new file mode 100644 index 0000000000..0f7720e711 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.50.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78591c40979b365ffbcf07ec5e47383b1653cbc34a93b5838e98ef90061a8269 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body new file mode 100644 index 0000000000..61631241c8 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.51.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ccd17d7907ebb7cf38049b5efb967eb461def9e29214b88a4d89130cf54177 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body new file mode 100644 index 0000000000..02ba923617 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.52.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0c1c0b282da3a9b0834903c99ef9f986124b68505e93a00b30177a2ac83576b +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body new file mode 100644 index 0000000000..c38e1305c3 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.53.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feef2268d3fde724eebb2179b43b6e140c505e15171b4d2833512926cf938dca +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body new file mode 100644 index 0000000000..c483a712d4 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.54.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5fedcfffe03ea2afc2eb334fe381cb8fe0f84495e24944733b597755f71c801 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body new file mode 100644 index 0000000000..37a031ca3e --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.55.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d39a804619ce06d8cd0ab0f9b5b165d2093f30ed7535c709817fc66947bd0d4e +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body new file mode 100644 index 0000000000..288e0203f5 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.56.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2f77da3ec1a00cad0208c40cf49776d2f0edd8a91b021cc018e1af8fd10e37 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body new file mode 100644 index 0000000000..775386b1ac --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.57.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b806af5a740714d9f03947e2bd37d17649c2736fcdbce7048b480c18f7910014 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body new file mode 100644 index 0000000000..07b9eea9f2 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.58.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7750c57c01ea03ffb3952605ff54bd31d3b8b428d2605cbca78f3943950641 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body new file mode 100644 index 0000000000..954f11650a --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.59.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ee83ee62eca8cf3448612a10c36522681f6854c581c37f4eb44c61fe11fc73 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body new file mode 100644 index 0000000000..3f0b54ba90 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.6.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44d3fa02443f59ba264f405d95e84683c0fd75abeb002ad328627d607ad16892 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body new file mode 100644 index 0000000000..de537e3c98 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.60.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dbb1516ecbd3d0962c8dd678b377e3786ac5234428cfd7c8c0a464d6f1af0ee +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body new file mode 100644 index 0000000000..5d5eacb50e --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.61.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde526625654d2dc0f339a4af949d8f697964b520ce073ebcecff571c6edffad +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body new file mode 100644 index 0000000000..e3aecbe233 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.62.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b10703cab024ed7b3ce8134a0fcd8b9d5cf7b34d424405b26796344b704917de +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body new file mode 100644 index 0000000000..b36d8588d1 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.63.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73109af79f1bddcb289b3e43c5baa19c4228c4c43489b39ea39bb4b31cc95795 +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body new file mode 100644 index 0000000000..31024d4a82 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.64.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf92043c0106286c0971613eb9e001e6d747611bf6a468314eb43b9ab146a5f +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body new file mode 100644 index 0000000000..8d9287bcfd --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.65.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9d593f164b8e8d0797e4e17b0bf857f6388e841bb51d41e03397e98fca7fda +size 357 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body new file mode 100644 index 0000000000..a1d63703f6 --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.7.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37694aeb534444ab02ced6e95f2fa07e39b0f7f4609132fcfd8fe878f199497c +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body new file mode 100644 index 0000000000..669085aa1f --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.8.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b309a79ea3ec05487b6087b0fdebe3ec7edb0139ce8815b06dad0550f21018f +size 1439 diff --git a/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body new file mode 100644 index 0000000000..43c885fb3b --- /dev/null +++ b/tests/mock_triton_server/payloads/sid-no-trunc/sid_infer_resp.9.body @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb10fcd6e33a82e457159392d6b4526921b5e9b683e2c6b9603d7e3e383a970f +size 1439 diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py index 7c92d3588e..412d77077d 100755 --- a/tests/test_inference_stage.py +++ b/tests/test_inference_stage.py @@ -40,7 +40,8 @@ def _mk_message(count=1, mess_count=1, offset=0, mess_offset=0): m.mess_offset = mess_offset m.mess_count = mess_count m.probs = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) - m.get_input.return_value = cp.array([[0, 1, 2], [0, 1, 2]]) + m.seq_ids = cp.array([list(range(count)), list(range(count)), list(range(count))]) + m.get_input.return_value = cp.array([[0, 1, 2], [0, 1, 2], [0, 1, 2]]) return m @@ -138,6 +139,7 @@ def test_py_inf_fn_on_next(mock_ops, mock_future, config): mock_slice = mock.MagicMock() mock_slice.mess_count = 1 mock_slice.count = 1 + mock_slice.seq_ids = mock_message.seq_ids mock_message.get_slice.return_value = mock_slice output_message = on_next(mock_message) diff --git a/tests/test_inference_worker.py b/tests/test_inference_worker.py index b02d7dacbe..2ed41a96cd 100755 --- a/tests/test_inference_worker.py +++ b/tests/test_inference_worker.py @@ -38,14 +38,26 @@ def test_build_output_message(config): pq = ProducerConsumerQueue() iw = IW(pq) + mock_message = mock.MagicMock() + mock_message.count = 10 + mock_message.mess_offset = 11 + mock_message.mess_count = 2 + mock_message.offset = 12 + + response = iw.build_output_message(mock_message) + assert response.count == 2 + assert response.mess_offset == 11 + assert response.mess_count == 2 + assert response.offset == 0 + mock_message = mock.MagicMock() mock_message.count = 2 mock_message.mess_offset = 11 - mock_message.mess_count = 10 + mock_message.mess_count = 2 mock_message.offset = 12 response = iw.build_output_message(mock_message) assert response.count == 2 assert response.mess_offset == 11 - assert response.mess_count == 10 - assert response.offset == 12 + assert response.mess_count == 2 + assert response.offset == 0 diff --git a/tests/test_sid.py b/tests/test_sid.py index ebe67bea1f..0aac67a751 100755 --- a/tests/test_sid.py +++ b/tests/test_sid.py @@ -120,10 +120,7 @@ def async_infer(callback=None, **k): assert results.diff_rows == 1333 -@pytest.mark.slow -@pytest.mark.use_cpp -@pytest.mark.usefixtures("launch_mock_triton") -def test_minibert_cpp(config, tmp_path): +def _run_minibert_cpp(config, tmp_path, model_name, truncated): config.mode = PipelineModes.NLP config.class_labels = [ "address", @@ -154,14 +151,11 @@ def test_minibert_cpp(config, tmp_path): pipe.add_stage( PreprocessNLPStage(config, vocab_hash_file=vocab_file_name, - truncation=True, + truncation=truncated, do_lower_case=True, add_special_tokens=False)) pipe.add_stage( - TritonInferenceStage(config, - model_name='sid-minibert-onnx', - server_url='localhost:8001', - force_convert_inputs=True)) + TritonInferenceStage(config, model_name=model_name, server_url='localhost:8001', force_convert_inputs=True)) pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf")) pipe.add_stage(AddClassificationsStage(config, threshold=0.5, prefix="si_")) pipe.add_stage( @@ -170,5 +164,20 @@ def test_minibert_cpp(config, tmp_path): pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False)) pipe.run() - results = calc_error_val(results_file_name) + return calc_error_val(results_file_name) + + +@pytest.mark.slow +@pytest.mark.use_cpp +@pytest.mark.usefixtures("launch_mock_triton") +def test_minibert_cpp_truncated(config, tmp_path): + results = _run_minibert_cpp(config, tmp_path, 'sid-minibert-onnx', True) assert results.diff_rows == 1204 + + +@pytest.mark.slow +@pytest.mark.use_cpp +@pytest.mark.usefixtures("launch_mock_triton") +def test_minibert_cpp(config, tmp_path): + results = _run_minibert_cpp(config, tmp_path, 'sid-minibert-onnx-no-trunc', False) + assert results.diff_rows == 18