diff --git a/src/c++/perf_analyzer/client_backend/mock_client_backend.h b/src/c++/perf_analyzer/client_backend/mock_client_backend.h index 890dc21a5..dca2a403b 100644 --- a/src/c++/perf_analyzer/client_backend/mock_client_backend.h +++ b/src/c++/perf_analyzer/client_backend/mock_client_backend.h @@ -147,6 +147,50 @@ class MockClientStats { std::atomic num_active_infer_calls{0}; + // Struct tracking shared memory method calls + // + struct SharedMemoryStats { + std::atomic num_unregister_all_shared_memory_calls{0}; + std::atomic num_register_system_shared_memory_calls{0}; + std::atomic num_register_cuda_shared_memory_calls{0}; + std::atomic num_register_cuda_memory_calls{0}; + std::atomic num_register_system_memory_calls{0}; + std::atomic num_create_shared_memory_region_calls{0}; + std::atomic num_map_shared_memory_calls{0}; + std::atomic num_close_shared_memory_calls{0}; + std::atomic num_unlink_shared_memory_region_calls{0}; + std::atomic num_unmap_shared_memory_calls{0}; + + // bool operator==(const SharedMemoryStats& lhs, const SharedMemoryStats& + // rhs) + bool operator==(const SharedMemoryStats& rhs) const + { + if (this->num_unregister_all_shared_memory_calls == + rhs.num_unregister_all_shared_memory_calls && + this->num_register_system_shared_memory_calls == + rhs.num_register_system_shared_memory_calls && + this->num_register_cuda_shared_memory_calls == + rhs.num_register_cuda_shared_memory_calls && + this->num_register_cuda_memory_calls == + rhs.num_register_cuda_memory_calls && + this->num_register_system_memory_calls == + rhs.num_register_system_memory_calls && + this->num_create_shared_memory_region_calls == + rhs.num_create_shared_memory_region_calls && + this->num_map_shared_memory_calls == + rhs.num_map_shared_memory_calls && + this->num_close_shared_memory_calls == + rhs.num_close_shared_memory_calls && + this->num_unlink_shared_memory_region_calls == + rhs.num_unlink_shared_memory_region_calls && + this->num_unmap_shared_memory_calls == + rhs.num_unmap_shared_memory_calls) { + return true; + } + return false; + } + }; + std::chrono::milliseconds response_delay{0}; bool start_stream_enable_stats_value{false}; @@ -154,6 +198,7 @@ class MockClientStats { std::vector> request_timestamps; SeqStatus sequence_status; + SharedMemoryStats memory_stats; std::vector>> recorded_inputs{}; @@ -302,6 +347,75 @@ class MockClientBackend : public ClientBackend { return Error::Success; } + Error UnregisterAllSharedMemory() override + { + stats_->memory_stats.num_unregister_all_shared_memory_calls++; + return Error::Success; + } + + Error RegisterSystemSharedMemory( + const std::string& name, const std::string& key, + const size_t byte_size) override + { + stats_->memory_stats.num_register_system_shared_memory_calls++; + return Error::Success; + } + + Error RegisterCudaSharedMemory( + const std::string& name, const cudaIpcMemHandle_t& handle, + const size_t byte_size) override + { + stats_->memory_stats.num_register_cuda_shared_memory_calls++; + return Error::Success; + } + + Error RegisterCudaMemory( + const std::string& name, void* handle, const size_t byte_size) override + { + stats_->memory_stats.num_register_cuda_memory_calls++; + return Error::Success; + } + + Error RegisterSystemMemory( + const std::string& name, void* memory_ptr, + const size_t byte_size) override + { + stats_->memory_stats.num_register_system_memory_calls++; + return Error::Success; + } + + Error CreateSharedMemoryRegion( + std::string shm_key, size_t byte_size, int* shm_fd) override + { + stats_->memory_stats.num_create_shared_memory_region_calls++; + return Error::Success; + } + + Error MapSharedMemory( + int shm_fd, size_t offset, size_t byte_size, void** shm_addr) override + { + stats_->memory_stats.num_map_shared_memory_calls++; + return Error::Success; + } + + Error CloseSharedMemory(int shm_fd) override + { + stats_->memory_stats.num_close_shared_memory_calls++; + return Error::Success; + } + + Error UnlinkSharedMemoryRegion(std::string shm_key) override + { + stats_->memory_stats.num_unlink_shared_memory_region_calls++; + return Error::Success; + } + + Error UnmapSharedMemory(void* shm_addr, size_t byte_size) override + { + stats_->memory_stats.num_unmap_shared_memory_calls++; + return Error::Success; + } + private: void LaunchAsyncMockRequest(const InferOptions options, OnCompleteFn callback) { diff --git a/src/c++/perf_analyzer/concurrency_manager.cc b/src/c++/perf_analyzer/concurrency_manager.cc index ca9915c35..d6d439b72 100644 --- a/src/c++/perf_analyzer/concurrency_manager.cc +++ b/src/c++/perf_analyzer/concurrency_manager.cc @@ -40,11 +40,9 @@ cb::Error ConcurrencyManager::Create( const bool async, const bool streaming, const int32_t batch_size, const size_t max_threads, const size_t max_concurrency, - const size_t sequence_length, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, - const SharedMemoryType shared_memory_type, const size_t output_shm_size, - const uint64_t start_sequence_id, const uint64_t sequence_id_range, + const size_t sequence_length, const SharedMemoryType shared_memory_type, + const size_t output_shm_size, const uint64_t start_sequence_id, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory, std::unique_ptr* manager) @@ -52,8 +50,7 @@ ConcurrencyManager::Create( std::unique_ptr local_manager(new ConcurrencyManager( async, streaming, batch_size, max_threads, max_concurrency, sequence_length, shared_memory_type, output_shm_size, start_sequence_id, - sequence_id_range, string_length, string_data, zero_input, user_data, - parser, factory)); + sequence_id_range, parser, factory)); *manager = std::move(local_manager); @@ -65,16 +62,13 @@ ConcurrencyManager::ConcurrencyManager( const size_t max_threads, const size_t max_concurrency, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, - const uint64_t sequence_id_range, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory) : LoadManager( async, streaming, batch_size, max_threads, sequence_length, shared_memory_type, output_shm_size, start_sequence_id, - sequence_id_range, string_length, string_data, zero_input, user_data, - parser, factory), + sequence_id_range, parser, factory), execute_(true), max_concurrency_(max_concurrency) { if (on_sequence_model_) { diff --git a/src/c++/perf_analyzer/concurrency_manager.h b/src/c++/perf_analyzer/concurrency_manager.h index 24facdb00..c6a20e59f 100644 --- a/src/c++/perf_analyzer/concurrency_manager.h +++ b/src/c++/perf_analyzer/concurrency_manager.h @@ -79,11 +79,9 @@ class ConcurrencyManager : public LoadManager { static cb::Error Create( const bool async, const bool streaming, const int32_t batch_size, const size_t max_threads, const size_t max_concurrency, - const size_t sequence_length, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, - const SharedMemoryType shared_memory_type, const size_t output_shm_size, - const uint64_t start_sequence_id, const uint64_t sequence_id_range, + const size_t sequence_length, const SharedMemoryType shared_memory_type, + const size_t output_shm_size, const uint64_t start_sequence_id, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory, std::unique_ptr* manager); @@ -100,9 +98,7 @@ class ConcurrencyManager : public LoadManager { const size_t max_threads, const size_t max_concurrency, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, - const uint64_t sequence_id_range, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory); diff --git a/src/c++/perf_analyzer/custom_load_manager.cc b/src/c++/perf_analyzer/custom_load_manager.cc index 455303202..78139d872 100644 --- a/src/c++/perf_analyzer/custom_load_manager.cc +++ b/src/c++/perf_analyzer/custom_load_manager.cc @@ -36,11 +36,9 @@ CustomLoadManager::Create( const uint64_t measurement_window_ms, const std::string& request_intervals_file, const int32_t batch_size, const size_t max_threads, const uint32_t num_of_sequences, - const size_t sequence_length, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, - const SharedMemoryType shared_memory_type, const size_t output_shm_size, - const uint64_t start_sequence_id, const uint64_t sequence_id_range, + const size_t sequence_length, const SharedMemoryType shared_memory_type, + const size_t output_shm_size, const uint64_t start_sequence_id, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory, std::unique_ptr* manager) @@ -49,7 +47,7 @@ CustomLoadManager::Create( async, streaming, request_intervals_file, batch_size, measurement_window_ms, max_threads, num_of_sequences, sequence_length, shared_memory_type, output_shm_size, start_sequence_id, sequence_id_range, - string_length, string_data, zero_input, user_data, parser, factory)); + parser, factory)); *manager = std::move(local_manager); @@ -63,16 +61,13 @@ CustomLoadManager::CustomLoadManager( const uint32_t num_of_sequences, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, const uint64_t sequence_id_range, - const size_t string_length, const std::string& string_data, - const bool zero_input, std::vector& user_data, const std::shared_ptr& parser, const std::shared_ptr& factory) : RequestRateManager( async, streaming, Distribution::CUSTOM, batch_size, measurement_window_ms, max_threads, num_of_sequences, sequence_length, shared_memory_type, output_shm_size, start_sequence_id, - sequence_id_range, string_length, string_data, zero_input, user_data, - parser, factory), + sequence_id_range, parser, factory), request_intervals_file_(request_intervals_file) { } diff --git a/src/c++/perf_analyzer/custom_load_manager.h b/src/c++/perf_analyzer/custom_load_manager.h index 90d719497..341a13885 100644 --- a/src/c++/perf_analyzer/custom_load_manager.h +++ b/src/c++/perf_analyzer/custom_load_manager.h @@ -76,11 +76,9 @@ class CustomLoadManager : public RequestRateManager { const uint64_t measurement_window_ms, const std::string& request_intervals_file, const int32_t batch_size, const size_t max_threads, const uint32_t num_of_sequences, - const size_t sequence_length, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, - const SharedMemoryType shared_memory_type, const size_t output_shm_size, - const uint64_t start_sequence_id, const uint64_t sequence_id_range, + const size_t sequence_length, const SharedMemoryType shared_memory_type, + const size_t output_shm_size, const uint64_t start_sequence_id, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory, std::unique_ptr* manager); @@ -105,8 +103,6 @@ class CustomLoadManager : public RequestRateManager { const uint32_t num_of_sequences, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, const uint64_t sequence_id_range, - const size_t string_length, const std::string& string_data, - const bool zero_input, std::vector& user_data, const std::shared_ptr& parser, const std::shared_ptr& factory); diff --git a/src/c++/perf_analyzer/load_manager.cc b/src/c++/perf_analyzer/load_manager.cc index b90865450..43f8a1bb9 100644 --- a/src/c++/perf_analyzer/load_manager.cc +++ b/src/c++/perf_analyzer/load_manager.cc @@ -188,8 +188,6 @@ LoadManager::LoadManager( const size_t max_threads, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, const uint64_t sequence_id_range, - const size_t string_length, const std::string& string_data, - const bool zero_input, std::vector& user_data, const std::shared_ptr& parser, const std::shared_ptr& factory) : async_(async), streaming_(streaming), batch_size_(batch_size), @@ -202,8 +200,14 @@ LoadManager::LoadManager( on_sequence_model_ = ((parser_->SchedulerType() == ModelParser::SEQUENCE) || (parser_->SchedulerType() == ModelParser::ENSEMBLE_SEQUENCE)); +} - data_loader_.reset(new DataLoader(batch_size)); +void +LoadManager::InitManager( + const size_t string_length, const std::string& string_data, + const bool zero_input, std::vector& user_data) +{ + data_loader_.reset(new DataLoader(batch_size_)); auto status = InitManagerInputs(string_length, string_data, zero_input, user_data); @@ -451,40 +455,51 @@ LoadManager::InitSharedMemory() RETURN_IF_ERROR(CreateMemoryRegion( region_name, shared_memory_type_, alloc_size, reinterpret_cast(&input_shm_ptr))); - if (shared_memory_type_ == SharedMemoryType::SYSTEM_SHARED_MEMORY) { - // Populate the region with data - size_t count = 0; - size_t offset = 0; - size_t max_count = input.second.is_shape_tensor_ ? 1 : batch_size_; - while (count < max_count) { - memcpy(input_shm_ptr + offset, data_ptrs[count], byte_size[count]); - offset += byte_size[count]; - count++; - } - } else { + RETURN_IF_ERROR(CopySharedMemory( + input_shm_ptr, data_ptrs, byte_size, input.second.is_shape_tensor_, + region_name)); + } + } + } + return cb::Error::Success; +} + +cb::Error +LoadManager::CopySharedMemory( + uint8_t* input_shm_ptr, std::vector& data_ptrs, + std::vector& byte_size, bool is_shape_tensor, + std::string& region_name) +{ + if (shared_memory_type_ == SharedMemoryType::SYSTEM_SHARED_MEMORY) { + // Populate the region with data + size_t count = 0; + size_t offset = 0; + size_t max_count = is_shape_tensor ? 1 : batch_size_; + while (count < max_count) { + memcpy(input_shm_ptr + offset, data_ptrs[count], byte_size[count]); + offset += byte_size[count]; + count++; + } + } else { #ifdef TRITON_ENABLE_GPU - // Populate the region with data - size_t count = 0; - size_t offset = 0; - size_t max_count = input.second.is_shape_tensor_ ? 1 : batch_size_; - while (count < max_count) { - cudaError_t cuda_err = cudaMemcpy( - (void*)(input_shm_ptr + offset), (void*)data_ptrs[count], - byte_size[count], cudaMemcpyHostToDevice); - if (cuda_err != cudaSuccess) { - return cb::Error( - "Failed to copy data to cuda shared memory for " + - region_name + " : " + - std::string(cudaGetErrorString(cuda_err)), - pa::GENERIC_ERROR); - } - offset += byte_size[count]; - count++; - } -#endif // TRITON_ENABLE_GPU - } + // Populate the region with data + size_t count = 0; + size_t offset = 0; + size_t max_count = is_shape_tensor ? 1 : batch_size_; + while (count < max_count) { + cudaError_t cuda_err = cudaMemcpy( + (void*)(input_shm_ptr + offset), (void*)data_ptrs[count], + byte_size[count], cudaMemcpyHostToDevice); + if (cuda_err != cudaSuccess) { + return cb::Error( + "Failed to copy data to cuda shared memory for " + region_name + + " : " + std::string(cudaGetErrorString(cuda_err)), + pa::GENERIC_ERROR); } + offset += byte_size[count]; + count++; } +#endif // TRITON_ENABLE_GPU } return cb::Error::Success; } diff --git a/src/c++/perf_analyzer/load_manager.h b/src/c++/perf_analyzer/load_manager.h index 94acd5aa0..d3045f22e 100644 --- a/src/c++/perf_analyzer/load_manager.h +++ b/src/c++/perf_analyzer/load_manager.h @@ -41,6 +41,17 @@ class LoadManager { public: virtual ~LoadManager(); + /// Initialize the Manager class to set up shared memory and inputs + /// \param string_length The length of the random strings to be generated + /// for string inputs. + /// \param string_data The string to be used as string inputs for model. + /// \param zero_input Whether to use zero for model inputs. + /// \param user_data The vector containing path/paths to user-provided data + /// that can be a directory or path to a json data file. + void InitManager( + const size_t string_length, const std::string& string_data, + const bool zero_input, std::vector& user_data); + /// Check if the load manager is working as expected. /// \return cb::Error object indicating success or failure. cb::Error CheckHealth(); @@ -77,8 +88,6 @@ class LoadManager { const size_t max_threads, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, const uint64_t sequence_id_range, - const size_t string_length, const std::string& string_data, - const bool zero_input, std::vector& user_data, const std::shared_ptr& parser, const std::shared_ptr& factory); @@ -105,6 +114,19 @@ class LoadManager { const std::string& shm_region_name, const SharedMemoryType& memory_type, const size_t byte_size, void** ptr); + /// \brief Helper function to handle copying shared memory to the correct + /// memory region + /// \param input_shm_ptr Pointer to the shared memory for a specific input + /// \param data_ptrs Pointer to the data for the batch + /// \param byte_size Size of the data being copied + /// \param is_shape_tensor Is the input a shape tensor + /// \param region_name Name of the shared memory region + /// \return cb::Error object indicating success or failure + virtual cb::Error CopySharedMemory( + uint8_t* input_shm_ptr, std::vector& data_ptrs, + std::vector& byte_size, bool is_shape_tensor, + std::string& region_name); + /// Stops all the worker threads generating the request load. void StopWorkerThreads(); diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index 1d698a734..1b607435a 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -202,11 +202,9 @@ PerfAnalyzer::CreateAnalyzerObjects() pa::ConcurrencyManager::Create( params_->async, params_->streaming, params_->batch_size, params_->max_threads, params_->max_concurrency, - params_->sequence_length, params_->string_length, - params_->string_data, params_->zero_input, params_->user_data, - params_->shared_memory_type, params_->output_shm_size, - params_->start_sequence_id, params_->sequence_id_range, parser_, - factory, &manager), + params_->sequence_length, params_->shared_memory_type, + params_->output_shm_size, params_->start_sequence_id, + params_->sequence_id_range, parser_, factory, &manager), "failed to create concurrency manager"); } else if (params_->using_request_rate_range) { @@ -223,11 +221,9 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->async, params_->streaming, params_->measurement_window_ms, params_->request_distribution, params_->batch_size, params_->max_threads, params_->num_of_sequences, - params_->sequence_length, params_->string_length, - params_->string_data, params_->zero_input, params_->user_data, - params_->shared_memory_type, params_->output_shm_size, - params_->start_sequence_id, params_->sequence_id_range, parser_, - factory, &manager), + params_->sequence_length, params_->shared_memory_type, + params_->output_shm_size, params_->start_sequence_id, + params_->sequence_id_range, parser_, factory, &manager), "failed to create request rate manager"); } else { @@ -244,14 +240,16 @@ PerfAnalyzer::CreateAnalyzerObjects() params_->async, params_->streaming, params_->measurement_window_ms, params_->request_intervals_file, params_->batch_size, params_->max_threads, params_->num_of_sequences, - params_->sequence_length, params_->string_length, - params_->string_data, params_->zero_input, params_->user_data, - params_->shared_memory_type, params_->output_shm_size, - params_->start_sequence_id, params_->sequence_id_range, parser_, - factory, &manager), + params_->sequence_length, params_->shared_memory_type, + params_->output_shm_size, params_->start_sequence_id, + params_->sequence_id_range, parser_, factory, &manager), "failed to create custom load manager"); } + manager->InitManager( + params_->string_length, params_->string_data, params_->zero_input, + params_->user_data); + FAIL_IF_ERR( pa::InferenceProfiler::Create( params_->verbose, params_->stability_threshold, diff --git a/src/c++/perf_analyzer/request_rate_manager.cc b/src/c++/perf_analyzer/request_rate_manager.cc index 738221061..bcf56bb26 100644 --- a/src/c++/perf_analyzer/request_rate_manager.cc +++ b/src/c++/perf_analyzer/request_rate_manager.cc @@ -41,8 +41,6 @@ RequestRateManager::Create( const uint64_t measurement_window_ms, Distribution request_distribution, const int32_t batch_size, const size_t max_threads, const uint32_t num_of_sequences, const size_t sequence_length, - const size_t string_length, const std::string& string_data, - const bool zero_input, std::vector& user_data, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, const uint64_t sequence_id_range, const std::shared_ptr& parser, @@ -52,8 +50,7 @@ RequestRateManager::Create( std::unique_ptr local_manager(new RequestRateManager( async, streaming, request_distribution, batch_size, measurement_window_ms, max_threads, num_of_sequences, sequence_length, shared_memory_type, - output_shm_size, start_sequence_id, sequence_id_range, string_length, - string_data, zero_input, user_data, parser, factory)); + output_shm_size, start_sequence_id, sequence_id_range, parser, factory)); *manager = std::move(local_manager); @@ -66,16 +63,13 @@ RequestRateManager::RequestRateManager( const size_t max_threads, const uint32_t num_of_sequences, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, - const uint64_t sequence_id_range, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory) : LoadManager( async, streaming, batch_size, max_threads, sequence_length, shared_memory_type, output_shm_size, start_sequence_id, - sequence_id_range, string_length, string_data, zero_input, user_data, - parser, factory), + sequence_id_range, parser, factory), request_distribution_(request_distribution), execute_(false) { if (on_sequence_model_) { diff --git a/src/c++/perf_analyzer/request_rate_manager.h b/src/c++/perf_analyzer/request_rate_manager.h index 50a2a5f7c..d70dd053a 100644 --- a/src/c++/perf_analyzer/request_rate_manager.h +++ b/src/c++/perf_analyzer/request_rate_manager.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. +// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -88,8 +88,6 @@ class RequestRateManager : public LoadManager { const uint64_t measurement_window_ms, Distribution request_distribution, const int32_t batch_size, const size_t max_threads, const uint32_t num_of_sequences, const size_t sequence_length, - const size_t string_length, const std::string& string_data, - const bool zero_input, std::vector& user_data, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, const uint64_t sequence_id_range, const std::shared_ptr& parser, @@ -113,9 +111,7 @@ class RequestRateManager : public LoadManager { const size_t max_threads, const uint32_t num_of_sequences, const size_t sequence_length, const SharedMemoryType shared_memory_type, const size_t output_shm_size, const uint64_t start_sequence_id, - const uint64_t sequence_id_range, const size_t string_length, - const std::string& string_data, const bool zero_input, - std::vector& user_data, + const uint64_t sequence_id_range, const std::shared_ptr& parser, const std::shared_ptr& factory); diff --git a/src/c++/perf_analyzer/test_concurrency_manager.cc b/src/c++/perf_analyzer/test_concurrency_manager.cc index 8a1246fba..21b17f20b 100644 --- a/src/c++/perf_analyzer/test_concurrency_manager.cc +++ b/src/c++/perf_analyzer/test_concurrency_manager.cc @@ -44,10 +44,12 @@ class TestConcurrencyManager : public TestLoadManagerBase, params.async, params.streaming, params.batch_size, params.max_threads, params.max_concurrency, params.sequence_length, params.shared_memory_type, params.output_shm_size, - params.start_sequence_id, params.sequence_id_range, - params.string_length, params.string_data, params.zero_input, - params.user_data, GetParser(), GetFactory()) + params.start_sequence_id, params.sequence_id_range, GetParser(), + GetFactory()) { + InitManager( + params.string_length, params.string_data, params.zero_input, + params.user_data); } /// Test that the correct Infer function is called in the backend diff --git a/src/c++/perf_analyzer/test_load_manager.cc b/src/c++/perf_analyzer/test_load_manager.cc index 934f8a22b..7d07bf25a 100644 --- a/src/c++/perf_analyzer/test_load_manager.cc +++ b/src/c++/perf_analyzer/test_load_manager.cc @@ -44,9 +44,8 @@ class TestLoadManager : public TestLoadManagerBase, public LoadManager { params.async, params.streaming, params.batch_size, params.max_threads, params.sequence_length, params.shared_memory_type, params.output_shm_size, - params.start_sequence_id, params.sequence_id_range, - params.string_length, params.string_data, params.zero_input, - params.user_data, GetParser(), GetFactory()) + params.start_sequence_id, params.sequence_id_range, GetParser(), + GetFactory()) { } diff --git a/src/c++/perf_analyzer/test_load_manager_base.h b/src/c++/perf_analyzer/test_load_manager_base.h index b6f199660..d727eca37 100644 --- a/src/c++/perf_analyzer/test_load_manager_base.h +++ b/src/c++/perf_analyzer/test_load_manager_base.h @@ -107,6 +107,14 @@ class TestLoadManagerBase { } } + + void CheckSharedMemory( + const cb::MockClientStats::SharedMemoryStats& expected_stats) + { + auto actual_stats = GetStats(); + CHECK(expected_stats == actual_stats->memory_stats); + } + void CheckSequences(uint64_t expected_num_seq) { auto stats = GetStats(); @@ -206,5 +214,4 @@ class TestLoadManagerBase { return params; } }; - }} // namespace triton::perfanalyzer diff --git a/src/c++/perf_analyzer/test_request_rate_manager.cc b/src/c++/perf_analyzer/test_request_rate_manager.cc index 16e724ee7..5d50f99ee 100644 --- a/src/c++/perf_analyzer/test_request_rate_manager.cc +++ b/src/c++/perf_analyzer/test_request_rate_manager.cc @@ -111,10 +111,44 @@ class TestRequestRateManager : public TestLoadManagerBase, params.batch_size, params.measurement_window_ms, params.max_threads, params.num_of_sequences, params.sequence_length, params.shared_memory_type, params.output_shm_size, - params.start_sequence_id, params.sequence_id_range, - params.string_length, params.string_data, params.zero_input, - params.user_data, GetParser(), GetFactory()) + params.start_sequence_id, params.sequence_id_range, GetParser(), + GetFactory()) { + InitManager( + params.string_length, params.string_data, params.zero_input, + params.user_data); + } + + + /// Constructor that adds an arg to pass in the model parser and does NOT call + /// the InitManager code. This enables InitManager to be overloaded and mocked + /// out. + /// + TestRequestRateManager( + PerfAnalyzerParameters params, const std::shared_ptr& parser, + bool is_sequence_model = false, bool is_decoupled_model = false, + bool use_mock_infer = false) + : use_mock_infer_(use_mock_infer), + TestLoadManagerBase(params, is_sequence_model, is_decoupled_model), + RequestRateManager( + params.async, params.streaming, params.request_distribution, + params.batch_size, params.measurement_window_ms, params.max_threads, + params.num_of_sequences, params.sequence_length, + params.shared_memory_type, params.output_shm_size, + params.start_sequence_id, params.sequence_id_range, parser, + GetFactory()) + { + } + + // Mocked version of the CopySharedMemory method in loadmanager. + // This is strictly for testing to mock out the memcpy calls + // + cb::Error CopySharedMemory( + uint8_t* input_shm_ptr, std::vector& data_ptrs, + std::vector& byte_size, bool is_shape_tensor, + std::string& region_name) override + { + return cb::Error::Success; } std::shared_ptr MakeWorker( @@ -303,6 +337,15 @@ class TestRequestRateManager : public TestLoadManagerBase, CheckSequences(params_.num_of_sequences); } + /// Test that the shared memory methods are called correctly + /// + void TestSharedMemory(uint request_rate, uint duration_ms) + { + ChangeRequestRate(request_rate); + std::this_thread::sleep_for(std::chrono::milliseconds(duration_ms)); + StopWorkerThreads(); + } + std::shared_ptr& parser_{LoadManager::parser_}; std::shared_ptr& data_loader_{LoadManager::data_loader_}; bool& using_json_data_{LoadManager::using_json_data_}; @@ -317,6 +360,7 @@ class TestRequestRateManager : public TestLoadManagerBase, size_t& max_threads_{LoadManager::max_threads_}; bool& async_{LoadManager::async_}; bool& streaming_{LoadManager::streaming_}; + bool& using_shared_memory_{LoadManager::using_shared_memory_}; std::uniform_int_distribution& distribution_{ LoadManager::distribution_}; @@ -639,4 +683,109 @@ TEST_CASE( CHECK(recorded_inputs[3][0].second == 4); } +/// Check that the using_shared_memory_ is being set correctly +/// +TEST_CASE("Check setting of InitSharedMemory") +{ + PerfAnalyzerParameters params; + bool is_sequence = false; + bool is_decoupled = false; + bool use_mock_infer = true; + + SUBCASE("No shared memory") + { + params.shared_memory_type = NO_SHARED_MEMORY; + TestRequestRateManager trrm( + params, is_sequence, is_decoupled, use_mock_infer); + CHECK(false == trrm.using_shared_memory_); + } + + SUBCASE("System shared memory") + { + params.shared_memory_type = SYSTEM_SHARED_MEMORY; + TestRequestRateManager trrm( + params, is_sequence, is_decoupled, use_mock_infer); + CHECK(true == trrm.using_shared_memory_); + } +} + +/// Verify Shared Memory api calls +/// +TEST_CASE("Shared memory methods") +{ + PerfAnalyzerParameters params; + bool is_sequence = false; + bool is_decoupled = false; + bool use_mock_infer = true; + uint request_rate = 500; + uint duration_ms = 1000; + + std::shared_ptr mmp{ + std::make_shared(false, false)}; + ModelTensor model_tensor{}; + model_tensor.datatype_ = "INT32"; + model_tensor.is_optional_ = false; + model_tensor.is_shape_tensor_ = false; + model_tensor.name_ = "INPUT0"; + model_tensor.shape_ = {1}; + mmp->inputs_ = std::make_shared(); + (*mmp->inputs_)[model_tensor.name_] = model_tensor; + + std::shared_ptr mdl{std::make_shared()}; + const std::string json_str{R"( + { + "data": [ + { + "INPUT0": [2123456789] + } + ] + } + )"}; + + mdl->ReadDataFromStr(json_str, mmp->Inputs(), mmp->Outputs()); + + cb::MockClientStats::SharedMemoryStats expected_stats; + + SUBCASE("System shared memory usage") + { + params.shared_memory_type = SYSTEM_SHARED_MEMORY; + TestRequestRateManager trrm( + params, mmp, is_sequence, is_decoupled, use_mock_infer); + trrm.InitManager( + params.string_length, params.string_data, params.zero_input, + params.user_data); + + expected_stats.num_unregister_all_shared_memory_calls = 1; + expected_stats.num_register_system_shared_memory_calls = 1; + expected_stats.num_create_shared_memory_region_calls = 1; + expected_stats.num_map_shared_memory_calls = 1; + trrm.CheckSharedMemory(expected_stats); + } + + SUBCASE("Cuda shared memory usage") + { + params.shared_memory_type = CUDA_SHARED_MEMORY; + TestRequestRateManager trrm( + params, mmp, is_sequence, is_decoupled, use_mock_infer); + trrm.InitManager( + params.string_length, params.string_data, params.zero_input, + params.user_data); + + expected_stats.num_unregister_all_shared_memory_calls = 1; + expected_stats.num_register_cuda_shared_memory_calls = 1; + trrm.CheckSharedMemory(expected_stats); + } + + SUBCASE("No shared memory usage") + { + params.shared_memory_type = NO_SHARED_MEMORY; + TestRequestRateManager trrm( + params, mmp, is_sequence, is_decoupled, use_mock_infer); + trrm.InitManager( + params.string_length, params.string_data, params.zero_input, + params.user_data); + + trrm.CheckSharedMemory(expected_stats); + } +} }} // namespace triton::perfanalyzer