Skip to content

Commit

Permalink
CUTLASS 2.5
Browse files Browse the repository at this point in the history
  • Loading branch information
kerrmudgeon committed Feb 26, 2021
1 parent ccb697b commit 0e13748
Show file tree
Hide file tree
Showing 771 changed files with 15,479 additions and 1,720 deletions.
15 changes: 14 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
# NVIDIA CUTLASS Changelog

# CUTLASS 2.x

## [2.5.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.5.0) (2021-02-26)
* Tensor reductions
* User-supplied reduction operations across one or more dimensions of tensors with affine layouts
* Optimizations for vectorized memory accesses
* Large tensor support, up to 2^63 elements (however, each dimension is limited to an extent of 2^31)
* Fused inlined operations on Convolution input
* Vector broadcast and transformation on Convolution input
* Optimizations for 3-D convolution
* Tile iterators using precomputed delta table for three spatial dimensions
* Performance parity with 2-D convolution implementation


## [2.4.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.4.0) (2020-11-19)
* Implicit GEMM convolution kernels supporting CUDA and Tensor Cores on NVIDIA GPUs
* Operators: forward (Fprop), backward data gradient (Dgrad), and backward weight gradient (Wgrad) convolution
Expand Down Expand Up @@ -126,7 +139,7 @@

## Copyright

Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.

```
Redistribution and use in source and binary forms, with or without modification, are permitted
Expand Down
82 changes: 54 additions & 28 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down Expand Up @@ -32,7 +32,7 @@ endif()

message(STATUS "CMake Version: ${CMAKE_VERSION}")

project(CUTLASS VERSION 2.4.0 LANGUAGES CXX)
project(CUTLASS VERSION 2.5.0 LANGUAGES CXX)
include(${CMAKE_CURRENT_SOURCE_DIR}/CUDA.cmake)

find_package(Doxygen QUIET)
Expand Down Expand Up @@ -67,6 +67,8 @@ else()
set(CUTLASS_ENABLE_TOOLS_INIT ON)
endif()

set(CUTLASS_TEST_UNIT_ENABLE_WARNINGS OFF CACHE BOOL "Enable warnings on waived unit tests.")

set(CUTLASS_ENABLE_EXAMPLES ${CUTLASS_ENABLE_EXAMPLES_INIT} CACHE BOOL "Enable CUTLASS Examples")
set(CUTLASS_ENABLE_TOOLS ${CUTLASS_ENABLE_TOOLS_INIT} CACHE BOOL "Enable CUTLASS Tools")
set(CUTLASS_ENABLE_LIBRARY ${CUTLASS_ENABLE_TOOLS} CACHE BOOL "Enable CUTLASS Library")
Expand Down Expand Up @@ -114,10 +116,6 @@ if (POLICY CMP0076)
cmake_policy(SET CMP0076 NEW)
endif()

if( NOT CMAKE_SIZEOF_VOID_P EQUAL 8 )
message(FATAL_ERROR "CUTLASS requires a 64-bit compiler!")
endif()

include(GNUInstallDirs)

link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs)
Expand Down Expand Up @@ -257,6 +255,17 @@ if (NOT CMAKE_BUILD_TYPE MATCHES "Release")
list(APPEND CUTLASS_CUDA_NVCC_FLAGS -lineinfo)
endif()

#Report CUDA build flags
if (CUDA_COMPILER MATCHES "[Cc]lang")
if(CUTLASS_CUDA_CLANG_FLAGS)
message(STATUS "Using CLANG flags: ${CUTLASS_CUDA_CLANG_FLAGS}")
endif()
else()
if(CUTLASS_CUDA_NVCC_FLAGS)
message(STATUS "Using NVCC flags: ${CUTLASS_CUDA_NVCC_FLAGS}")
endif()
endif()

if(CUDA_COMPILER MATCHES "[Cc]lang")
if( NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang" )
message(FATAL_ERROR "Clang CUDA compilation requires Clang CXX compilation. Currently CMAKE_CXX_COMPILER is ${CMAKE_CXX_COMPILER_ID}" )
Expand Down Expand Up @@ -318,20 +327,35 @@ function(cutlass_apply_cuda_gencode_flags TARGET)

endfunction()

# Cache the flags so they are available when the function below is called anywhere globally.

set(__CUTLASS_CUDA_FLAGS ${CUTLASS_CUDA_FLAGS} CACHE INTERNAL "")
set(__CUTLASS_CUDA_FLAGS_RELEASE ${CUTLASS_CUDA_FLAGS_RELEASE} CACHE INTERNAL "")
set(__CUTLASS_CUDA_FLAGS_RELWITHDEBINFO ${CUTLASS_CUDA_FLAGS_RELWITHDEBINFO} CACHE INTERNAL "")
set(__CUTLASS_CUDA_FLAGS_DEBUG ${CUTLASS_CUDA_FLAGS_DEBUG} CACHE INTERNAL "")
set(__CUTLASS_CUDA_CLANG_FLAGS ${CUTLASS_CUDA_CLANG_FLAGS} CACHE INTERNAL "")
set(__CUTLASS_CUDA_CLANG_FLAGS_RELEASE ${CUTLASS_CUDA_CLANG_FLAGS_RELEASE} CACHE INTERNAL "")
set(__CUTLASS_CUDA_CLANG_FLAGS_RELWITHDEBINFO ${CUTLASS_CUDA_CLANG_FLAGS_RELWITHDEBINFO} CACHE INTERNAL "")
set(__CUTLASS_CUDA_CLANG_FLAGS_DEBUG ${CUTLASS_CUDA_CLANG_FLAGS_DEBUG} CACHE INTERNAL "")
set(__CUTLASS_CUDA_NVCC_FLAGS ${CUTLASS_CUDA_NVCC_FLAGS} CACHE INTERNAL "")
set(__CUTLASS_CUDA_NVCC_FLAGS_RELEASE ${CUTLASS_CUDA_NVCC_FLAGS_RELEASE} CACHE INTERNAL "")
set(__CUTLASS_CUDA_NVCC_FLAGS_RELWITHDEBINFO ${CUTLASS_CUDA_NVCC_FLAGS_RELWITHDEBINFO} CACHE INTERNAL "")
set(__CUTLASS_CUDA_NVCC_FLAGS_DEBUG ${CUTLASS_CUDA_NVCC_FLAGS_DEBUG} CACHE INTERNAL "")

function(cutlass_apply_standard_compile_options TARGET)

if(CUDA_COMPILER MATCHES "[Cc]lang")
set(CUDA_COMPILE_LANGUAGE CXX)
set(_FLAGS ${CUTLASS_CUDA_FLAGS} ${CUTLASS_CUDA_CLANG_FLAGS})
set(_FLAGS_RELEASE ${CUTLASS_CUDA_FLAGS_RELEASE} ${CUTLASS_CUDA_CLANG_FLAGS_RELEASE})
set(_FLAGS_RELWITHDEBINFO ${CUTLASS_CUDA_FLAGS_RELWITHDEBINFO} ${CUTLASS_CUDA_CLANG_FLAGS_RELWITHDEBINFO})
set(_FLAGS_DEBUG ${CUTLASS_CUDA_FLAGS_DEBUG} ${CUTLASS_CUDA_CLANG_FLAGS_DEBUG})
set(_FLAGS ${__CUTLASS_CUDA_FLAGS} ${__CUTLASS_CUDA_CLANG_FLAGS})
set(_FLAGS_RELEASE ${__CUTLASS_CUDA_FLAGS_RELEASE} ${__CUTLASS_CUDA_CLANG_FLAGS_RELEASE})
set(_FLAGS_RELWITHDEBINFO ${__CUTLASS_CUDA_FLAGS_RELWITHDEBINFO} ${__CUTLASS_CUDA_CLANG_FLAGS_RELWITHDEBINFO})
set(_FLAGS_DEBUG ${__CUTLASS_CUDA_FLAGS_DEBUG} ${__CUTLASS_CUDA_CLANG_FLAGS_DEBUG})
else()
set(CUDA_COMPILE_LANGUAGE CUDA)
set(_FLAGS ${CUTLASS_CUDA_FLAGS} ${CUTLASS_CUDA_NVCC_FLAGS})
set(_FLAGS_RELEASE ${CUTLASS_CUDA_FLAGS_RELEASE} ${CUTLASS_CUDA_NVCC_FLAGS_RELEASE})
set(_FLAGS_RELWITHDEBINFO ${CUTLASS_CUDA_FLAGS_RELWITHDEBINFO} ${CUTLASS_CUDA_NVCC_FLAGS_RELWITHDEBINFO})
set(_FLAGS_DEBUG ${CUTLASS_CUDA_FLAGS_DEBUG} ${CUTLASS_CUDA_NVCC_FLAGS_DEBUG})
set(_FLAGS ${__CUTLASS_CUDA_FLAGS} ${__CUTLASS_CUDA_NVCC_FLAGS})
set(_FLAGS_RELEASE ${__CUTLASS_CUDA_FLAGS_RELEASE} ${__CUTLASS_CUDA_NVCC_FLAGS_RELEASE})
set(_FLAGS_RELWITHDEBINFO ${__CUTLASS_CUDA_FLAGS_RELWITHDEBINFO} ${__CUTLASS_CUDA_NVCC_FLAGS_RELWITHDEBINFO})
set(_FLAGS_DEBUG ${__CUTLASS_CUDA_FLAGS_DEBUG} ${__CUTLASS_CUDA_NVCC_FLAGS_DEBUG})
endif()

target_compile_options(
Expand Down Expand Up @@ -464,20 +488,6 @@ endif()

################################################################################

include(${CMAKE_CURRENT_SOURCE_DIR}/cuBLAS.cmake)

if (CUTLASS_ENABLE_CUBLAS)
target_compile_definitions(CUTLASS INTERFACE CUTLASS_ENABLE_CUBLAS=1)
endif()

include(${CMAKE_CURRENT_SOURCE_DIR}/cuDNN.cmake)

if (CUTLASS_ENABLE_CUDNN)
target_compile_definitions(CUTLASS INTERFACE CUTLASS_ENABLE_CUDNN=1)
endif()

################################################################################

include(CTest)
enable_testing()
if (NOT TARGET test_all)
Expand All @@ -497,6 +507,22 @@ install(DIRECTORY DESTINATION ${CUTLASS_TEST_INSTALL_BINDIR})
install(DIRECTORY DESTINATION ${CUTLASS_TEST_INSTALL_LIBDIR})
install(DIRECTORY DESTINATION ${CUTLASS_TEST_INSTALL_PREFIX}/ctest)

################################################################################

include(${CMAKE_CURRENT_SOURCE_DIR}/cuBLAS.cmake)

if (CUTLASS_ENABLE_CUBLAS)
target_compile_definitions(CUTLASS INTERFACE CUTLASS_ENABLE_CUBLAS=1)
endif()

include(${CMAKE_CURRENT_SOURCE_DIR}/cuDNN.cmake)

if (CUTLASS_ENABLE_CUDNN)
target_compile_definitions(CUTLASS INTERFACE CUTLASS_ENABLE_CUDNN=1)
endif()

################################################################################

set(CUTLASS_CTEST_TEMPLATE_FILE ${CMAKE_CURRENT_LIST_DIR}/cmake/CTestTestfile.config.cmake)
set(CUTLASS_CTEST_GENERATED_FILES "" CACHE INTERNAL "")

Expand Down
4 changes: 2 additions & 2 deletions CUDA.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down Expand Up @@ -204,7 +204,7 @@ include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
# paths by default, so we add it explicitly here.

function(cutlass_correct_source_file_language_property)
if(CUDA_COMPILER MATCHES "clang")
if(CUDA_COMPILER MATCHES "[Cc]lang")
foreach(File ${ARGN})
if(File MATCHES ".*\.cu$")
set_source_files_properties(${File} PROPERTIES LANGUAGE CXX)
Expand Down
15 changes: 10 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
![ALT](/media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition")

# CUTLASS 2.4
# CUTLASS 2.5

_CUTLASS 2.4 - November 2020_
_CUTLASS 2.5 - February 2021_

CUTLASS is a collection of CUDA C++ template abstractions for implementing
high-performance matrix-multiplication (GEMM) at all levels and scales within CUDA.
Expand Down Expand Up @@ -34,20 +34,25 @@ See the [Quick Start Guide](/media/docs/quickstart.md) to get started quickly.
See the [functionality listing](/media/docs/functionality.md) for the list of operations
supported at each level of the execution model hierarchy.

# What's New in CUTLASS 2.5
CUTLASS 2.5 is a minor update to CUTLASS adding:
- Tensor reductions
- Fused inlined operations on Convolution input
- Optimizations for 3-D convolution
- See the [CHANGELOG](CHANGELOG.md) for more details

# What's New in CUTLASS 2.4
CUTLASS 2.4 is a significant update to CUTLASS adding:
- 1-D, 2-D, and 3-D convolution targeting Tensor and CUDA cores for NVIDIA Ampere, Turing, and Volta GPU architectures
- CUTLASS profiler support for convolution
- [Documentation](/media/docs/implicit_gemm_convolution.md) describing Implicit GEMM Convolution algorithm and implementation
- See the [CHANGELOG](CHANGELOG.md) for more details.

# What's New in CUTLASS 2.3

CUTLASS 2.3 is a minor update to CUTLASS adding:
- GEMMs targeting structured [Sparse Tensor Cores](test/unit/gemm/device/gemm_f16n_f16n_f32t_tensor_op_f32_sparse_sm80.cu) in NVIDIA Ampere Architecture GPUs
- Fast SGEMM kernels targeting GeForce RTX 30-series CUDA Cores
- Intended to be compiled with [CUDA 11.1 Toolkit](https://developer.nvidia.com/cuda-toolkit)
- See the [CHANGELOG](CHANGELOG.md) for more details.

# What's New in CUTLASS 2.2

Expand Down Expand Up @@ -508,7 +513,7 @@ The official list of CUTLASS developers and contributors is available here: [CON

# Copyright

Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.

```
Redistribution and use in source and binary forms, with or without modification, are permitted
Expand Down
2 changes: 1 addition & 1 deletion cmake/nop.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion cuBLAS.cmake
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion cuDNN.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/00_basic_gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/00_basic_gemm/basic_gemm.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/01_cutlass_utilities/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/01_cutlass_utilities/cutlass_utilities.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/02_dump_reg_shmem/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/02_dump_reg_shmem/dump_reg_shmem.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
*modification, are permitted provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/03_visualize_layout/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/03_visualize_layout/options.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/03_visualize_layout/register_layout.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/03_visualize_layout/register_layout.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/03_visualize_layout/visualize_layout.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/03_visualize_layout/visualize_layout.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/04_tile_iterator/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/04_tile_iterator/tile_iterator.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/05_batched_gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/05_batched_gemm/batched_gemm.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/06_splitK_gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/06_splitK_gemm/splitk_gemm.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down
2 changes: 1 addition & 1 deletion examples/07_volta_tensorop_gemm/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are permitted
# provided that the following conditions are met:
Expand Down
8 changes: 6 additions & 2 deletions examples/07_volta_tensorop_gemm/volta_tensorop_gemm.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***************************************************************************************************
* Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
Expand Down Expand Up @@ -284,8 +284,12 @@ int run() {
// Instantiate CUTLASS kernel depending on templates
Gemm gemm_op;

// Check the problem size is supported or not
cutlass::Status status = gemm_op.can_implement(arguments);
CUTLASS_CHECK(status);

// Initialize CUTLASS kernel with arguments and workspace pointer
cutlass::Status status = gemm_op.initialize(arguments, workspace.get());
status = gemm_op.initialize(arguments, workspace.get());
CUTLASS_CHECK(status);

// Launch initialized CUTLASS kernel
Expand Down
Loading

0 comments on commit 0e13748

Please sign in to comment.