diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..9b4ed4c49 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @stanleytsang-amd @umfranzw @RobsonRLemos @lawruble13 diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 9cdf2d670..0e0a252eb 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -6,7 +6,13 @@ version: 2 updates: - package-ecosystem: "pip" # See documentation for possible values - directory: "/docs/.sphinx" # Location of package manifests + directory: "/docs/sphinx" # Location of package manifests open-pull-requests-limit: 10 schedule: interval: "daily" + labels: + - "documentation" + - "dependencies" + - "ci:docs-only" + reviewers: + - "samjwu" diff --git a/.gitignore b/.gitignore index 42ae29137..24bb3fafb 100644 --- a/.gitignore +++ b/.gitignore @@ -4,12 +4,6 @@ build*/ ### clangd. ### /.cache -### Docs dirs ### -doc/html/ -doc/xml/ -doc/latex/ -doc/*.tag - # Created by https://www.gitignore.io/api/c++,cmake ### C++ ### diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 4192a0418..9e6678abe 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,7 +10,7 @@ formats: [htmlzip, pdf, epub] python: install: - - requirements: docs/.sphinx/requirements.txt + - requirements: docs/sphinx/requirements.txt build: os: ubuntu-22.04 diff --git a/CHANGELOG.md b/CHANGELOG.md index d903ab5a2..baefd45a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ Documentation for rocPRIM is available at [https://rocm.docs.amd.com/projects/rocPRIM/en/latest/](https://rocm.docs.amd.com/projects/rocPRIM/en/latest/). -## [Unreleased rocPRIM-3.0.0 for ROCm 6.1.0] +## Unreleased rocPRIM-3.1.0 for ROCm 6.1.0 ### Additions @@ -35,48 +35,28 @@ Documentation for rocPRIM is available at * Build issues with `rmake.py` on Windows when using VS 2017 15.8 or later (due to a breaking fix with extended aligned storage) -## rocPRIM-2.13.1 for ROCm 5.7.0 +## rocPRIM-3.0.0 for ROCm 6.0.0 ### Additions +- `block_sort::sort()` overload for keys and values with a dynamic size, for all block sort algorithms. Additionally, all `block_sort::sort()` overloads with a dynamic size are now supported for `block_sort_algorithm::merge_sort` and `block_sort_algorithm::bitonic_sort`. +- New two-way partition primitive `partition_two_way` which can write to two separate iterators. -* `block_sort::sort()` overload for keys and values with a dynamic size, for all block sort algorithms - * All `block_sort::sort()` overloads with a dynamic size are now supported for - `block_sort_algorithm::merge_sort` and `block_sort_algorithm::bitonic_sort` -* New two-way partition primitive (`partition_two_way`) that can write to two separate iterators -* Added config tuning and dynamic dispatch to the `device_adjacent_difference` algorithm -* New `rocprim::group_elect` warp intrinsic, which chooses one lane from the lanes enabled by a mask - -### Changes +### Optimizations +- Improved the performance of `partition`. -* Removed erroneous implementation of device-level `inclusive_scan` and `exclusive_scan` (the prior - default implementation that uses `lookback-scan` is now the only available implementation) -* The benchmark metric indicating the bytes processed for `exclusive_scan_by_key` and - `inclusive_scan_by_key` has been changed to incorporate the key type; the benchmark log has been - changed so that these algorithms are reported as `scan` and `scan_by_key` instead of - `scan_exclusive` and `scan_inclusive` -* Improved the performance of `partition` -* `merge_sort_block_sort` always uses stable merge sort because it's faster than the fallback - implementation -* The `rocprim::match_any` interface has a new parameter (`valid`) to enable and disable lanes; the - default value is true, so it doesn't change the previous behavior +### Fixes +- Fixed `rocprim::MatchAny` for devices with 64-bit warp size. The function `rocprim::MatchAny` is deprecated and `rocprim::match_any` is preferred instead. -### Deprecations +## rocPRIM-2.13.1 for ROCm 5.7.0 -* `radix_sort_config` for device-level Radix Sort (it no longer matches the algorithm's parameters); use - `radix_sort_config_v2` instead -* `scan_config` and `scan_by_key_config` for device-level scans (they no longer match the algorithm's - parameters); use`scan_config_v2` and `scan_by_key_config_v2` instead) +### Changes +- Deprecated configuration `radix_sort_config` for device-level radix sort as it no longer matches the algorithm's parameters. New configuration `radix_sort_config_v2` is preferred instead. +- Removed erroneous implementation of device-level `inclusive_scan` and `exclusive_scan`. The prior default implementation using lookback-scan now is the only available implementation. +- The benchmark metric indicating the bytes processed for `exclusive_scan_by_key` and `inclusive_scan_by_key` has been changed to incorporate the key type. Furthermore, the benchmark log has been changed such that these algorithms are reported as `scan` and `scan_by_key` instead of `scan_exclusive` and `scan_inclusive`. +- Deprecated configurations `scan_config` and `scan_by_key_config` for device-level scans, as they no longer match the algorithm's parameters. New configurations `scan_config_v2` and `scan_by_key_config_v2` are preferred instead. ### Fixes - -* Build issue caused by a missing header in `thread/thread_search.hpp` -* `rocprim::MatchAny` for devices with 64-bit warp size (`rocprim::MatchAny` is deprecated and is - replaced with `rocprim::match_any`) -* Fixed `device_adjacent_difference` using more shared memory than required -* Fixed a compilation error when `ROCPRIM_DISABLE_DPP` is defined -* Improved robustness for detecting GPU architecture features - * Explicitly listing each architecture is no longer required - * Fixed compilation failures when targeting devices are not known by rocPRIM +- Fixed build issue caused by missing header in `thread/thread_search.hpp`. ## rocPRIM-2.13.0 for ROCm 5.5.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 40c8d8cf9..b2725db26 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -114,7 +114,7 @@ if(USE_HIP_CPU) endif() # Setup VERSION -set(VERSION_STRING "2.13.1") +set(VERSION_STRING "3.1.0") rocm_setup_version(VERSION ${VERSION_STRING}) # Print configuration summary diff --git a/README.md b/README.md index c9abe5910..ec23889eb 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ develop performant GPU-accelerated code on AMD ROCm platforms. * CMake (3.16 or later) * AMD [ROCm](https://rocm.docs.amd.com/en/latest/) platform (1.8.2 or later) * Including - [HIP-clang](https://github.com/ROCm-Developer-Tools/HIP/blob/master/INSTALL.md#hip-clang) + [HIP-clang](https://github.com/ROCm/HIP/blob/master/INSTALL.md#hip-clang) compiler * C++14 * Python 3.6 or higher (HIP on Windows only, required only for install script) @@ -37,7 +37,7 @@ To build our documentation locally, use the following code: cd rocPRIM; cd docs # Install Python dependencies -python3 -m pip install -r .sphinx/requirements.txt +python3 -m pip install -r sphinx/requirements.txt # Build the documentation python3 -m sphinx -T -E -b html -d _build/doctrees -D language=en . _build/html @@ -54,7 +54,7 @@ You can build and install rocPRIM on Linux or Windows. * Linux: ```shell - git clone https://github.com/ROCmSoftwarePlatform/rocPRIM.git + git clone https://github.com/ROCm/rocPRIM.git # Go to rocPRIM directory, create and go to the build directory. cd rocPRIM; mkdir build; cd build @@ -101,7 +101,7 @@ You can build and install rocPRIM on Linux or Windows. We've added initial support for HIP on Windows; to install, use the provided `rmake.py` python script: ```shell - git clone https://github.com/ROCmSoftwarePlatform/rocPRIM.git + git clone https://github.com/ROCm/rocPRIM.git cd rocPRIM # the -i option will install rocPRIM to C:\hipSDK by default @@ -289,17 +289,17 @@ algorithms optimized for gfx803 GCN version, or to `900` for gfx900. ## hipCUB -[hipCUB](https://github.com/ROCmSoftwarePlatform/hipCUB/) is a thin wrapper library on top of -[rocPRIM](https://github.com/ROCmSoftwarePlatform/rocPRIM) or +[hipCUB](https://github.com/ROCm/hipCUB/) is a thin wrapper library on top of +[rocPRIM](https://github.com/ROCm/rocPRIM) or [CUB](https://github.com/NVlabs/cub). You can use it to port projects that use the CUB library to the -[HIP](https://github.com/ROCm-Developer-Tools/HIP) layer and run them on AMD hardware. In the +[HIP](https://github.com/ROCm/HIP) layer and run them on AMD hardware. In the [ROCm](https://rocm.docs.amd.com/en/latest/) environment, hipCUB uses the rocPRIM library as a backend; on CUDA platforms, it uses CUB as a backend. ## Support You can report bugs and feature requests through our GitHub -[issue tracker](https://github.com/ROCmSoftwarePlatform/rocPRIM/issues). +[issue tracker](https://github.com/ROCm/rocPRIM/issues). ## Contributions and license diff --git a/benchmark/benchmark_block_sort.parallel.hpp b/benchmark/benchmark_block_sort.parallel.hpp index 9bb56186a..dfdc6e5b9 100644 --- a/benchmark/benchmark_block_sort.parallel.hpp +++ b/benchmark/benchmark_block_sort.parallel.hpp @@ -187,11 +187,11 @@ struct block_sort_benchmark : public config_autotune_interface static constexpr unsigned int warmup_size = 5; static constexpr bool debug_synchronous = false; - auto dispatch_block_sort(std::false_type /*stable_sort*/, + static auto dispatch_block_sort(std::false_type /*stable_sort*/, size_t size, const hipStream_t stream, KeyType* d_input, - KeyType* d_output) const + KeyType* d_output) { hipLaunchKernelGGL( HIP_KERNEL_NAME( @@ -204,11 +204,11 @@ struct block_sort_benchmark : public config_autotune_interface d_output); } - auto dispatch_block_sort(std::true_type /*stable_sort*/, + static auto dispatch_block_sort(std::true_type /*stable_sort*/, size_t size, const hipStream_t stream, KeyType* d_input, - KeyType* d_output) const + KeyType* d_output) { hipLaunchKernelGGL(HIP_KERNEL_NAME(stable_sort_kernel struct operation { ROCPRIM_HOST_DEVICE inline - void operator()(T (&)[ItemsPerThread], void* = nullptr, unsigned int = 0, T* = nullptr) + void operator()(T (&)[ItemsPerThread], void* = nullptr, unsigned int = 0, T* = nullptr) const { // No operation } @@ -80,7 +80,7 @@ struct operation ROCPRIM_HOST_DEVICE inline void operator()(T (&input)[ItemsPerThread], void* shared_storage = nullptr, unsigned int shared_storage_size = 0, - T* global_mem_output = nullptr) + T* global_mem_output = nullptr) const { (void) shared_storage; (void) shared_storage_size; @@ -105,7 +105,7 @@ struct operation ROCPRIM_HOST_DEVICE inline void operator()(T (&input)[ItemsPerThread], void* shared_storage = nullptr, unsigned int shared_storage_size = 0, - T* global_mem_output = nullptr) + T* global_mem_output = nullptr) const { (void) global_mem_output; using block_scan_type = typename rocprim::block_scan< @@ -419,19 +419,10 @@ void run_benchmark_memcpy(benchmark::State& state, size_t size, const hipStream_t stream) { - std::vector input; - if(std::is_floating_point::value) - { - input = get_random_data(size, (T)-1000, (T)+1000); - } - else - { - input = get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } + // Allocate device buffers + // Note: since this benchmark only tests performance by memcpying between device buffers, + // we don't really need to transfer data into these from the host - whatever happens + // to be in device memory will do. T * d_input; T * d_output; HIP_CHECK(hipMalloc(reinterpret_cast(&d_input), size * sizeof(T))); diff --git a/benchmark/benchmark_device_radix_sort_onesweep.parallel.hpp b/benchmark/benchmark_device_radix_sort_onesweep.parallel.hpp index 82fd2faff..02320e17b 100644 --- a/benchmark/benchmark_device_radix_sort_onesweep.parallel.hpp +++ b/benchmark/benchmark_device_radix_sort_onesweep.parallel.hpp @@ -49,6 +49,8 @@ constexpr const char* radix_rank_algorithm_name(rp::block_radix_rank_algorithm a return "block_radix_rank_algorithm::basic_memoize"; case rp::block_radix_rank_algorithm::match: return "block_radix_rank_algorithm::match"; } + + return ""; // unknown algorithm } template @@ -403,7 +405,7 @@ struct device_radix_sort_onesweep_benchmark_generator RadixRankAlgorithm, std::enable_if_t<(!is_buildable())>> { - void operator()(std::vector>&) {} + void operator()(std::vector>&) const {} }; template diff --git a/benchmark/benchmark_device_select.cpp b/benchmark/benchmark_device_select.cpp index 79cc025cf..a0cff17cc 100644 --- a/benchmark/benchmark_device_select.cpp +++ b/benchmark/benchmark_device_select.cpp @@ -51,7 +51,6 @@ void run_flagged_benchmark(benchmark::State& state, { std::vector input; std::vector flags = get_random_data01(size, true_probability); - std::vector selected_count_output(1); if(std::is_floating_point::value) { input = get_random_data(size, T(-1000), T(1000)); @@ -181,7 +180,6 @@ void run_selectop_benchmark(benchmark::State& state, float true_probability) { std::vector input = get_random_data(size, T(0), T(1000)); - std::vector selected_count_output(1); auto select_op = [true_probability] __device__ (const T& value) -> bool { @@ -308,7 +306,6 @@ void run_unique_benchmark(benchmark::State& state, input[i] = op(acc, input01[i]); } } - std::vector selected_count_output(1); auto equality_op = rocprim::equal_to(); T * d_input; diff --git a/benchmark/benchmark_utils.hpp b/benchmark/benchmark_utils.hpp index fe6b9a077..2d67ff3fa 100644 --- a/benchmark/benchmark_utils.hpp +++ b/benchmark/benchmark_utils.hpp @@ -467,7 +467,7 @@ struct bench_naming static format storage = human; return storage; } - static void set_format(std::string argument) + static void set_format(const std::string& argument) { format result = human; if(argument == "json") diff --git a/benchmark/benchmark_warp_exchange.cpp b/benchmark/benchmark_warp_exchange.cpp index 2abbe389d..997c2e357 100644 --- a/benchmark/benchmark_warp_exchange.cpp +++ b/benchmark/benchmark_warp_exchange.cpp @@ -52,7 +52,7 @@ struct BlockedToStripedOp ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void operator()(warp_exchange_type warp_exchange, T (&items)[ItemsPerThread], - typename warp_exchange_type::storage_type& storage) + typename warp_exchange_type::storage_type& storage) const { warp_exchange.blocked_to_striped(items, items, storage); } @@ -68,7 +68,7 @@ struct StripedToBlockedOp ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void operator()(warp_exchange_type warp_exchange, T (&items)[ItemsPerThread], - typename warp_exchange_type::storage_type& storage) + typename warp_exchange_type::storage_type& storage) const { warp_exchange.striped_to_blocked(items, items, storage); } @@ -84,7 +84,7 @@ struct BlockedToStripedShuffleOp ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void operator()(warp_exchange_type warp_exchange, T (&items)[ItemsPerThread], - typename warp_exchange_type::storage_type& /*storage*/) + typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.blocked_to_striped_shuffle(items, items); } @@ -100,7 +100,7 @@ struct StripedToBlockedShuffleOp ROCPRIM_DEVICE ROCPRIM_FORCE_INLINE void operator()(warp_exchange_type warp_exchange, T (&items)[ItemsPerThread], - typename warp_exchange_type::storage_type& /*storage*/) + typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.striped_to_blocked_shuffle(items, items); } @@ -118,7 +118,7 @@ struct ScatterToStripedOp void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], const OffsetT (&ranks)[ItemsPerThread], - typename warp_exchange_type::storage_type& storage) + typename warp_exchange_type::storage_type& storage) const { warp_exchange.scatter_to_striped(thread_data, thread_data, ranks, storage); } diff --git a/benchmark/cmdparser.hpp b/benchmark/cmdparser.hpp index 0dfc73ca2..ffee10ecb 100644 --- a/benchmark/cmdparser.hpp +++ b/benchmark/cmdparser.hpp @@ -96,7 +96,7 @@ namespace cli { CmdBase(name, alternative, description, required, dominant, ArgumentCountChecker::Variadic) { } - virtual bool parse(std::ostream& output, std::ostream& error) { + virtual bool parse(std::ostream& output, std::ostream& error) override { try { CallbackArgs args { arguments, output, error }; value = callback(args); @@ -106,7 +106,7 @@ namespace cli { } } - virtual std::string print_value() const { + virtual std::string print_value() const override { return ""; } @@ -118,10 +118,10 @@ namespace cli { class CmdArgument final : public CmdBase { public: explicit CmdArgument(const std::string& name, const std::string& alternative, const std::string& description, bool required, bool dominant) : - CmdBase(name, alternative, description, required, dominant, ArgumentCountChecker::Variadic) { + CmdBase(name, alternative, description, required, dominant, ArgumentCountChecker::Variadic), value(T()) { } - virtual bool parse(std::ostream&, std::ostream&) { + virtual bool parse(std::ostream&, std::ostream&) override { try { value = Parser::parse(arguments, value); return true; @@ -130,7 +130,7 @@ namespace cli { } } - virtual std::string print_value() const { + virtual std::string print_value() const override { return stringify(value); } @@ -306,7 +306,7 @@ namespace cli { } template - void set_optional(const std::string& name, const std::string& alternative, T defaultValue, const std::string& description = "", bool dominant = false) { + void set_optional(const std::string& name, const std::string& alternative, const T& defaultValue, const std::string& description = "", bool dominant = false) { auto command = new CmdArgument { name, alternative, description, false, dominant }; command->value = defaultValue; _commands.push_back(command); diff --git a/docs/.gitignore b/docs/.gitignore index ff4315605..08e9fe6fa 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,9 +1,6 @@ /_build/ /_doxygen/ -/_images/ -/_static/ -/_templates/ -/.doxygen/docBin -/.doxygen/rocPRIM.tag -/.sphinx/_toc.yml -/api +/doxygen/html +/doxygen/xml +/doxygen/*.tag +/sphinx/_toc.yml diff --git a/docs/.sphinx/requirements.in b/docs/.sphinx/requirements.in deleted file mode 100644 index 781cd3ac3..000000000 --- a/docs/.sphinx/requirements.in +++ /dev/null @@ -1 +0,0 @@ -rocm-docs-core>=0.20.0 diff --git a/docs/conf.py b/docs/conf.py index 63415f374..d70124f3c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,12 +4,32 @@ # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html +import re + from rocm_docs import ROCmDocs -docs_core = ROCmDocs("rocPRIM Documentation") -docs_core.run_doxygen() +with open('../CMakeLists.txt', encoding='utf-8') as f: + match = re.search(r'.*\bset\(VERSION_STRING\s+\"?([0-9.]+)[^0-9.]+', f.read()) + if not match: + raise ValueError("VERSION not found!") + version_number = match[1] +left_nav_title = f"rocPRIM {version_number} Documentation" + +# for PDF output on Read the Docs +project = "rocPRIM Documentation" +author = "Advanced Micro Devices, Inc." +copyright = "Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved." +version = version_number +release = version_number + +external_toc_path = "./sphinx/_toc.yml" + +docs_core = ROCmDocs(left_nav_title) +docs_core.run_doxygen(doxygen_root="doxygen", doxygen_path="doxygen/xml") docs_core.setup() +external_projects_current_project = "rocprim" + for sphinx_var in ROCmDocs.SPHINX_VARS: globals()[sphinx_var] = getattr(docs_core, sphinx_var) diff --git a/docs/.doxygen/Doxyfile b/docs/doxygen/Doxyfile similarity index 99% rename from docs/.doxygen/Doxyfile rename to docs/doxygen/Doxyfile index a7950c5ac..fe2180aa8 100644 --- a/docs/.doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -58,7 +58,7 @@ PROJECT_LOGO = # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. -OUTPUT_DIRECTORY = docBin +OUTPUT_DIRECTORY = . # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and diff --git a/docs/.doxygen/blockmodule.dox b/docs/doxygen/blockmodule.dox similarity index 100% rename from docs/.doxygen/blockmodule.dox rename to docs/doxygen/blockmodule.dox diff --git a/docs/.doxygen/devicemodule.dox b/docs/doxygen/devicemodule.dox similarity index 100% rename from docs/.doxygen/devicemodule.dox rename to docs/doxygen/devicemodule.dox diff --git a/docs/.doxygen/glossary.dox b/docs/doxygen/glossary.dox similarity index 100% rename from docs/.doxygen/glossary.dox rename to docs/doxygen/glossary.dox diff --git a/docs/.doxygen/intrinsicsmodule.dox b/docs/doxygen/intrinsicsmodule.dox similarity index 100% rename from docs/.doxygen/intrinsicsmodule.dox rename to docs/doxygen/intrinsicsmodule.dox diff --git a/docs/.doxygen/iteratormodule.dox b/docs/doxygen/iteratormodule.dox similarity index 100% rename from docs/.doxygen/iteratormodule.dox rename to docs/doxygen/iteratormodule.dox diff --git a/docs/.doxygen/mainpage.dox b/docs/doxygen/mainpage.dox similarity index 100% rename from docs/.doxygen/mainpage.dox rename to docs/doxygen/mainpage.dox diff --git a/docs/.doxygen/primitivesmodule.dox b/docs/doxygen/primitivesmodule.dox similarity index 100% rename from docs/.doxygen/primitivesmodule.dox rename to docs/doxygen/primitivesmodule.dox diff --git a/docs/.doxygen/utilsmodule.dox b/docs/doxygen/utilsmodule.dox similarity index 100% rename from docs/.doxygen/utilsmodule.dox rename to docs/doxygen/utilsmodule.dox diff --git a/docs/.doxygen/warpmodule.dox b/docs/doxygen/warpmodule.dox similarity index 100% rename from docs/.doxygen/warpmodule.dox rename to docs/doxygen/warpmodule.dox diff --git a/docs/license.rst b/docs/license.rst new file mode 100644 index 000000000..60fbe8594 --- /dev/null +++ b/docs/license.rst @@ -0,0 +1,4 @@ +License +======= + +.. include:: ../LICENSE.txt diff --git a/docs/.sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in similarity index 87% rename from docs/.sphinx/_toc.yml.in rename to docs/sphinx/_toc.yml.in index 8b76e5609..48466c9b1 100644 --- a/docs/.sphinx/_toc.yml.in +++ b/docs/sphinx/_toc.yml.in @@ -12,3 +12,6 @@ subtrees: - file: iterators - file: intrinsics - file: glossary + - caption: About + entries: + - file: license diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in new file mode 100644 index 000000000..2dc317aca --- /dev/null +++ b/docs/sphinx/requirements.in @@ -0,0 +1 @@ +rocm-docs-core==0.31.0 diff --git a/docs/.sphinx/requirements.txt b/docs/sphinx/requirements.txt similarity index 98% rename from docs/.sphinx/requirements.txt rename to docs/sphinx/requirements.txt index 012d0392f..279bb23e1 100644 --- a/docs/.sphinx/requirements.txt +++ b/docs/sphinx/requirements.txt @@ -40,7 +40,7 @@ fastjsonschema==2.16.3 # via rocm-docs-core gitdb==4.0.10 # via gitpython -gitpython==3.1.37 +gitpython==3.1.41 # via rocm-docs-core idna==3.4 # via requests @@ -50,7 +50,7 @@ importlib-metadata==6.8.0 # via sphinx importlib-resources==6.1.0 # via rocm-docs-core -jinja2==3.1.2 +jinja2==3.1.3 # via # myst-parser # sphinx @@ -100,7 +100,7 @@ requests==2.31.0 # via # pygithub # sphinx -rocm-docs-core==0.30.0 +rocm-docs-core==0.31.0 # via -r requirements.in smmap==5.0.0 # via gitdb diff --git a/rocprim/include/rocprim/thread/thread_load.hpp b/rocprim/include/rocprim/thread/thread_load.hpp index 1f22b7da9..d10f6cf2c 100644 --- a/rocprim/include/rocprim/thread/thread_load.hpp +++ b/rocprim/include/rocprim/thread/thread_load.hpp @@ -90,11 +90,16 @@ ROCPRIM_DEVICE __forceinline__ T AsmThreadLoad(void * ptr) // [HIP-CPU] MSVC: erronous inline assembly specification (Triggers error C2059: syntax error: 'volatile') #ifndef __HIP_CPU_RT__ -#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) +#if defined(__gfx940__) || defined(__gfx941__) ROCPRIM_ASM_THREAD_LOAD_GROUP(load_ca, "sc0", ""); ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cg, "sc1", ""); ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cv, "sc0 sc1", "vmcnt"); ROCPRIM_ASM_THREAD_LOAD_GROUP(load_volatile, "sc0 sc1", "vmcnt"); +#elif defined(__gfx942__) +ROCPRIM_ASM_THREAD_LOAD_GROUP(load_ca, "sc0", ""); +ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cg, "sc0 nt", ""); +ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cv, "sc0", "vmcnt"); +ROCPRIM_ASM_THREAD_LOAD_GROUP(load_volatile, "sc0", "vmcnt"); #else ROCPRIM_ASM_THREAD_LOAD_GROUP(load_ca, "glc", ""); ROCPRIM_ASM_THREAD_LOAD_GROUP(load_cg, "glc slc", ""); diff --git a/rocprim/include/rocprim/thread/thread_store.hpp b/rocprim/include/rocprim/thread/thread_store.hpp index d03173b4d..917f6bf01 100644 --- a/rocprim/include/rocprim/thread/thread_store.hpp +++ b/rocprim/include/rocprim/thread/thread_store.hpp @@ -92,11 +92,16 @@ ROCPRIM_DEVICE __forceinline__ void AsmThreadStore(void * ptr, T val) // [HIP-CPU] MSVC: erronous inline assembly specification (Triggers error C2059: syntax error: 'volatile') #ifndef __HIP_CPU_RT__ -#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) -ROCPRIM_ASM_THREAD_STORE_GROUP(store_wb, "sc0 sc1", ""); // TODO: gfx942 validation +#if defined(__gfx940__) || defined(__gfx941__) +ROCPRIM_ASM_THREAD_STORE_GROUP(store_wb, "sc0 sc1", ""); ROCPRIM_ASM_THREAD_STORE_GROUP(store_cg, "sc0 sc1", ""); ROCPRIM_ASM_THREAD_STORE_GROUP(store_wt, "sc0 sc1", "vmcnt"); ROCPRIM_ASM_THREAD_STORE_GROUP(store_volatile, "sc0 sc1", "vmcnt"); +#elif defined(__gfx942__) +ROCPRIM_ASM_THREAD_STORE_GROUP(store_wb, "sc0", ""); +ROCPRIM_ASM_THREAD_STORE_GROUP(store_cg, "sc0 nt", ""); +ROCPRIM_ASM_THREAD_STORE_GROUP(store_wt, "sc0", "vmcnt"); +ROCPRIM_ASM_THREAD_STORE_GROUP(store_volatile, "sc0", "vmcnt"); #else ROCPRIM_ASM_THREAD_STORE_GROUP(store_wb, "glc", ""); ROCPRIM_ASM_THREAD_STORE_GROUP(store_cg, "glc slc", ""); diff --git a/test/hip/test_hip_async_copy.cpp b/test/hip/test_hip_async_copy.cpp index ad74657c6..20ea96b68 100644 --- a/test/hip/test_hip_async_copy.cpp +++ b/test/hip/test_hip_async_copy.cpp @@ -51,12 +51,12 @@ struct PinnedAllocator HIP_CHECK(hipHostFree(ptr)); } - bool operator==(const PinnedAllocator&) + bool operator==(const PinnedAllocator&) const { return true; } - bool operator!=(const PinnedAllocator& other) + bool operator!=(const PinnedAllocator& other) const { return !(*this == other); } diff --git a/test/rocprim/bounds_checking_iterator.hpp b/test/rocprim/bounds_checking_iterator.hpp index bde07ebe8..fd2568a66 100644 --- a/test/rocprim/bounds_checking_iterator.hpp +++ b/test/rocprim/bounds_checking_iterator.hpp @@ -91,7 +91,7 @@ class bounds_checking_iterator } ROCPRIM_HOST_DEVICE inline - reference operator[](difference_type n) const + reference operator[](const difference_type& n) const { if(((ptr_ + n) < start_ptr_) || ((ptr_ + n) >= start_ptr_ + size_)) { @@ -101,28 +101,28 @@ class bounds_checking_iterator } ROCPRIM_HOST_DEVICE inline - bounds_checking_iterator operator+(difference_type distance) const + bounds_checking_iterator operator+(const difference_type& distance) const { auto i = ptr_ + distance; return bounds_checking_iterator(i, start_ptr_, out_of_bounds_flag_, size_); } ROCPRIM_HOST_DEVICE inline - bounds_checking_iterator& operator+=(difference_type distance) + bounds_checking_iterator& operator+=(const difference_type& distance) { ptr_ += distance; return *this; } ROCPRIM_HOST_DEVICE inline - bounds_checking_iterator operator-(difference_type distance) const + bounds_checking_iterator operator-(const difference_type& distance) const { auto i = ptr_ - distance; return bounds_checking_iterator(i, start_ptr_, out_of_bounds_flag_, size_); } ROCPRIM_HOST_DEVICE inline - bounds_checking_iterator& operator-=(difference_type distance) + bounds_checking_iterator& operator-=(const difference_type& distance) { ptr_ -= distance; return *this; diff --git a/test/rocprim/identity_iterator.hpp b/test/rocprim/identity_iterator.hpp index 5144da2b9..f5f6c9a3d 100644 --- a/test/rocprim/identity_iterator.hpp +++ b/test/rocprim/identity_iterator.hpp @@ -83,34 +83,34 @@ class identity_iterator } ROCPRIM_HOST_DEVICE inline - reference operator[](difference_type n) const + reference operator[](const difference_type& n) const { return *(ptr_ + n); } ROCPRIM_HOST_DEVICE inline - identity_iterator operator+(difference_type distance) const + identity_iterator operator+(const difference_type& distance) const { auto i = ptr_ + distance; return identity_iterator(i); } ROCPRIM_HOST_DEVICE inline - identity_iterator& operator+=(difference_type distance) + identity_iterator& operator+=(const difference_type& distance) { ptr_ += distance; return *this; } ROCPRIM_HOST_DEVICE inline - identity_iterator operator-(difference_type distance) const + identity_iterator operator-(const difference_type& distance) const { auto i = ptr_ - distance; return identity_iterator(i); } ROCPRIM_HOST_DEVICE inline - identity_iterator& operator-=(difference_type distance) + identity_iterator& operator-=(const difference_type& distance) { ptr_ -= distance; return *this; diff --git a/test/rocprim/test_block_discontinuity.kernels.hpp b/test/rocprim/test_block_discontinuity.kernels.hpp index 5849ac74c..0f2f79dd8 100644 --- a/test/rocprim/test_block_discontinuity.kernels.hpp +++ b/test/rocprim/test_block_discontinuity.kernels.hpp @@ -27,7 +27,7 @@ template struct custom_flag_op1 { ROCPRIM_HOST_DEVICE - bool operator()(const T& a, const T& b, unsigned int b_index) + bool operator()(const T& a, const T& b, unsigned int b_index) const { return (a == b) || (b_index % 10 == 0); } diff --git a/test/rocprim/test_block_reduce.kernels.hpp b/test/rocprim/test_block_reduce.kernels.hpp index b7257605f..7e42c102c 100644 --- a/test/rocprim/test_block_reduce.kernels.hpp +++ b/test/rocprim/test_block_reduce.kernels.hpp @@ -145,7 +145,7 @@ struct static_run_valid { static void run(std::vector& output, std::vector& output_reductions, - std::vector& expected_reductions, + const std::vector& expected_reductions, T* device_output, T* device_output_reductions, const unsigned int valid_items, diff --git a/test/rocprim/test_device_adjacent_difference.cpp b/test/rocprim/test_device_adjacent_difference.cpp index 158315fd1..522eaa5b7 100644 --- a/test/rocprim/test_device_adjacent_difference.cpp +++ b/test/rocprim/test_device_adjacent_difference.cpp @@ -387,11 +387,11 @@ class check_output_iterator : current_index_(0), incorrect_flag_(incorrect_flag), counter_(counter) {} - __device__ bool operator==(const check_output_iterator& rhs) + __device__ bool operator==(const check_output_iterator& rhs) const { return current_index_ == rhs.current_index_; } - __device__ bool operator!=(const check_output_iterator& rhs) + __device__ bool operator!=(const check_output_iterator& rhs) const { return !(*this == rhs); } @@ -399,7 +399,7 @@ class check_output_iterator { return reference(incorrect_flag_, current_index_, counter_); } - __device__ reference operator[](const difference_type distance) + __device__ reference operator[](const difference_type distance) const { return *(*this + distance); } diff --git a/test/rocprim/test_device_partition.cpp b/test/rocprim/test_device_partition.cpp index 42ef2bb57..44a2c92bc 100644 --- a/test/rocprim/test_device_partition.cpp +++ b/test/rocprim/test_device_partition.cpp @@ -871,11 +871,11 @@ class check_modulo_iterator : current_index_(0), modulo_(modulo), size_(size), incorrect_flag_(incorrect_flag) {} - ROCPRIM_HOST_DEVICE bool operator==(const check_modulo_iterator& rhs) + ROCPRIM_HOST_DEVICE bool operator==(const check_modulo_iterator& rhs) const { return current_index_ == rhs.current_index_; } - ROCPRIM_HOST_DEVICE bool operator!=(const check_modulo_iterator& rhs) + ROCPRIM_HOST_DEVICE bool operator!=(const check_modulo_iterator& rhs) const { return !(*this == rhs); } @@ -883,16 +883,16 @@ class check_modulo_iterator { return value_type(current_index_, modulo_, size_, incorrect_flag_); } - ROCPRIM_HOST_DEVICE reference operator[](const difference_type distance) const + ROCPRIM_HOST_DEVICE reference operator[](const difference_type& distance) const { return *(*this + distance); } - ROCPRIM_HOST_DEVICE check_modulo_iterator& operator+=(const difference_type rhs) + ROCPRIM_HOST_DEVICE check_modulo_iterator& operator+=(const difference_type& rhs) { current_index_ += rhs; return *this; } - ROCPRIM_HOST_DEVICE check_modulo_iterator& operator-=(const difference_type rhs) + ROCPRIM_HOST_DEVICE check_modulo_iterator& operator-=(const difference_type& rhs) { current_index_ -= rhs; return *this; @@ -901,11 +901,11 @@ class check_modulo_iterator { return current_index_ - rhs.current_index_; } - ROCPRIM_HOST_DEVICE check_modulo_iterator operator+(const difference_type rhs) const + ROCPRIM_HOST_DEVICE check_modulo_iterator operator+(const difference_type& rhs) const { return check_modulo_iterator(*this) += rhs; } - ROCPRIM_HOST_DEVICE check_modulo_iterator operator-(const difference_type rhs) const + ROCPRIM_HOST_DEVICE check_modulo_iterator operator-(const difference_type& rhs) const { return check_modulo_iterator(*this) -= rhs; } diff --git a/test/rocprim/test_device_scan.cpp b/test/rocprim/test_device_scan.cpp index 3b29bea48..8511ec6d6 100644 --- a/test/rocprim/test_device_scan.cpp +++ b/test/rocprim/test_device_scan.cpp @@ -938,12 +938,12 @@ class single_index_iterator { __host__ __device__ single_index_iterator& operator=(const single_index_iterator&) = default; // clang-format off - __host__ __device__ bool operator==(const single_index_iterator& rhs) { return index_ == rhs.index_; } - __host__ __device__ bool operator!=(const single_index_iterator& rhs) { return !(this == rhs); } + __host__ __device__ bool operator==(const single_index_iterator& rhs) const { return index_ == rhs.index_; } + __host__ __device__ bool operator!=(const single_index_iterator& rhs) const { return !(this == rhs); } __host__ __device__ reference operator*() { return value_type{value_, index_ == expected_index_}; } - __host__ __device__ reference operator[](const difference_type distance) { return *(*this + distance); } + __host__ __device__ reference operator[](const difference_type distance) const { return *(*this + distance); } __host__ __device__ single_index_iterator& operator+=(const difference_type rhs) { index_ += rhs; return *this; } __host__ __device__ single_index_iterator& operator-=(const difference_type rhs) { index_ -= rhs; return *this; } @@ -1233,11 +1233,11 @@ class check_run_iterator ROCPRIM_HOST_DEVICE check_run_iterator(const args_t args) : current_index_(0), args_(args) {} - ROCPRIM_HOST_DEVICE bool operator==(const check_run_iterator& rhs) + ROCPRIM_HOST_DEVICE bool operator==(const check_run_iterator& rhs) const { return current_index_ == rhs.current_index_; } - ROCPRIM_HOST_DEVICE bool operator!=(const check_run_iterator& rhs) + ROCPRIM_HOST_DEVICE bool operator!=(const check_run_iterator& rhs) const { return !(*this == rhs); } @@ -1245,7 +1245,7 @@ class check_run_iterator { return value_type{current_index_, args_}; } - ROCPRIM_HOST_DEVICE reference operator[](const difference_type distance) + ROCPRIM_HOST_DEVICE reference operator[](const difference_type distance) const { return *(*this + distance); } diff --git a/test/rocprim/test_intrinsics.cpp b/test/rocprim/test_intrinsics.cpp index 138705677..10029094c 100644 --- a/test/rocprim/test_intrinsics.cpp +++ b/test/rocprim/test_intrinsics.cpp @@ -448,14 +448,14 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleIndex) // Calculate expected results on host std::vector expected(size, test_type_helper::zero()); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) + for(size_t j = 0; j < input.size()/logical_warp_size; j++) { - int src_lane = src_lanes[i]; - for(size_t j = 0; j < logical_warp_size; j++) + int src_lane = src_lanes[j]; + for(size_t k = 0; k < logical_warp_size; k++) { - size_t index = j + logical_warp_size * i; + size_t index = k + logical_warp_size * j; if(src_lane >= int(logical_warp_size) || src_lane < 0) src_lane = index; - expected[index] = input[src_lane + logical_warp_size * i]; + expected[index] = input[src_lane + logical_warp_size * j]; } } @@ -493,9 +493,9 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleIndex) ) ); - for(size_t i = 0; i < output.size(); i++) + for(size_t j = 0; j < output.size(); j++) { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + ASSERT_EQ(output[j], expected[j]) << "where index = " << j; } } hipFree(device_data); @@ -952,7 +952,7 @@ TYPED_TEST(RocprimIntrinsicsTests, WarpPermute) template __global__ void match_any_kernel(max_lane_mask_type* output, - unsigned int* input, + const unsigned int* input, max_lane_mask_type active_lanes, max_lane_mask_type lane_predicates) { @@ -1075,7 +1075,7 @@ TEST(RocprimIntrinsicsTests, MatchAny) } __global__ void - ballot_kernel(max_lane_mask_type* output, unsigned int* input, max_lane_mask_type active_lanes) + ballot_kernel(max_lane_mask_type* output, const unsigned int* input, max_lane_mask_type active_lanes) { const unsigned int index = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/test/rocprim/test_utils_custom_test_types.hpp b/test/rocprim/test_utils_custom_test_types.hpp index 431547271..71c8c5d77 100644 --- a/test/rocprim/test_utils_custom_test_types.hpp +++ b/test/rocprim/test_utils_custom_test_types.hpp @@ -67,10 +67,9 @@ struct custom_test_type template ROCPRIM_HOST_DEVICE inline - custom_test_type(const custom_test_type& other) + custom_test_type(const custom_test_type& other) : + x(static_cast(other.x)), y(static_cast(other.y)) { - x = static_cast(other.x); - y = static_cast(other.y); } ROCPRIM_HOST_DEVICE inline diff --git a/test/rocprim/test_utils_sort_comparator.hpp b/test/rocprim/test_utils_sort_comparator.hpp index fa7be26ce..3f9358fe8 100644 --- a/test/rocprim/test_utils_sort_comparator.hpp +++ b/test/rocprim/test_utils_sort_comparator.hpp @@ -149,7 +149,7 @@ struct key_comparator struct key_value_comparator { - bool operator()(const std::pair& lhs, const std::pair& rhs) + bool operator()(const std::pair& lhs, const std::pair& rhs) const { return key_comparator()(lhs.first, rhs.first); } diff --git a/test/rocprim/test_warp_exchange.cpp b/test/rocprim/test_warp_exchange.cpp index ba985a904..769d6764a 100644 --- a/test/rocprim/test_warp_exchange.cpp +++ b/test/rocprim/test_warp_exchange.cpp @@ -56,7 +56,7 @@ struct BlockedToStripedOp ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], - typename warp_exchange_type::storage_type& storage) + typename warp_exchange_type::storage_type& storage) const { warp_exchange.blocked_to_striped(thread_data, thread_data, storage); } @@ -72,7 +72,7 @@ struct BlockedToStripedShuffleOp ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], - typename warp_exchange_type::storage_type& /*storage*/) + typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.blocked_to_striped_shuffle(thread_data, thread_data); } @@ -88,7 +88,7 @@ struct StripedToBlockedOp ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], - typename warp_exchange_type::storage_type& storage) + typename warp_exchange_type::storage_type& storage) const { warp_exchange.striped_to_blocked(thread_data, thread_data, storage); } @@ -104,7 +104,7 @@ struct StripedToBlockedShuffleOp ROCPRIM_DEVICE ROCPRIM_INLINE void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], - typename warp_exchange_type::storage_type& /*storage*/) + typename warp_exchange_type::storage_type& /*storage*/) const { warp_exchange.striped_to_blocked_shuffle(thread_data, thread_data); } @@ -122,7 +122,7 @@ struct ScatterToStripedOp void operator()(warp_exchange_type warp_exchange, T (&thread_data)[ItemsPerThread], OffsetT (&positions)[ItemsPerThread], - typename warp_exchange_type::storage_type& storage) + typename warp_exchange_type::storage_type& storage) const { warp_exchange.scatter_to_striped(thread_data, thread_data, positions, storage); } diff --git a/test/rocprim/test_warp_reduce.hpp b/test/rocprim/test_warp_reduce.hpp index 02da74dd3..ca3a705be 100644 --- a/test/rocprim/test_warp_reduce.hpp +++ b/test/rocprim/test_warp_reduce.hpp @@ -67,7 +67,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSum) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -187,7 +187,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSum) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -312,7 +312,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceSumValid) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -434,7 +434,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, AllReduceSumValid) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -557,7 +557,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, ReduceCustomStruct) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -690,7 +690,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, HeadSegmentedReduceSum) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -846,7 +846,7 @@ typed_test_def(RocprimWarpReduceTests, name_suffix, TailSegmentedReduceSum) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } diff --git a/test/rocprim/test_warp_scan.hpp b/test/rocprim/test_warp_scan.hpp index 4573b22f8..7eb807f23 100644 --- a/test/rocprim/test_warp_scan.hpp +++ b/test/rocprim/test_warp_scan.hpp @@ -67,7 +67,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScan) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -190,7 +190,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanReduce) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -335,7 +335,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveScan) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -461,7 +461,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ExclusiveReduceScan) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -614,7 +614,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, Scan) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -773,7 +773,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, ScanReduce) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -953,7 +953,7 @@ typed_test_def(RocprimWarpScanTests, name_suffix, InclusiveScanCustomType) if( (logical_warp_size > current_device_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } diff --git a/test/rocprim/test_warp_sort.hpp b/test/rocprim/test_warp_sort.hpp index b1da8d1b5..85addaa4e 100644 --- a/test/rocprim/test_warp_sort.hpp +++ b/test/rocprim/test_warp_sort.hpp @@ -55,7 +55,7 @@ typed_test_def(RocprimWarpSortShuffleBasedTests, name_suffix, Sort) !rocprim::detail::is_power_of_two(logical_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); } @@ -154,7 +154,7 @@ typed_test_def(RocprimWarpSortShuffleBasedTests, name_suffix, SortKeyInt) !rocprim::detail::is_power_of_two(logical_warp_size) || (current_device_warp_size != ws32 && current_device_warp_size != ws64) ) // Only WarpSize 32 and 64 is supported { - printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %d. Skipping test\n", + printf("Unsupported test warp size/computed block size: %zu/%zu. Current device warp size: %u. Skipping test\n", logical_warp_size, block_size, current_device_warp_size); GTEST_SKIP(); }