Skip to content

Commit

Permalink
CUTLASS 2.4 (Implicit GEMM convolution) (NVIDIA#147)
Browse files Browse the repository at this point in the history
CUTLASS 2.4 (Implicit GEMM Convolution)

Co-authored-by: Manish Gupta <manigupta@nvidia.com>, Haicheng Wu <haichengw@nvidia.com>, Dustyn Blasig <dblasig@nvidia.com>, Andrew Kerr <akerr@nvidia.com>
  • Loading branch information
manishucsd and kerrmudgeon authored Nov 20, 2020
1 parent c2b80ad commit 6615010
Show file tree
Hide file tree
Showing 224 changed files with 43,925 additions and 1,047 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
# NVIDIA CUTLASS Changelog

# CUTLASS 2.x
## [2.4.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.4.0) (2020-11-19)
* Implicit GEMM convolution kernels supporting CUDA and Tensor Cores on NVIDIA GPUs
* Operators: forward (Fprop), backward data gradient (Dgrad), and backward weight gradient (Wgrad) convolution
* Data type: FP32, complex<FP32>, Tensor Float 32 (TF32), BFloat16 (BF16), Float16, Int4, Int8, Int32
* Spatial dimensions: 1-D, 2-D, and 3-D
* Layout: NHWC, NCxHWx
* Implicit GEMM convolution components:
* Global memory iterators supporting fprop, dgrad, and wgrad
* `MmaMultistage` for implicit GEMM convolution for NVIDIA Ampere architecture
* `MmaPipeline` for implicit GEMM convolution for NVIDIA Volta and Turing architectures
* [Documentation](/media/docs/implicit_gemm_convolution.md) describing Implicit GEMM Convolution algorithm and implementation

## [2.3.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.3.0) (2020-09-23)
* [NVIDIA Ampere Architecture features](https://devblogs.nvidia.com/nvidia-ampere-architecture-in-depth/)
Expand Down
194 changes: 186 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ endif()

message(STATUS "CMake Version: ${CMAKE_VERSION}")

project(CUTLASS VERSION 2.3.0 LANGUAGES CXX)
project(CUTLASS VERSION 2.4.0 LANGUAGES CXX)
include(${CMAKE_CURRENT_SOURCE_DIR}/CUDA.cmake)

find_package(Doxygen QUIET)
Expand Down Expand Up @@ -137,7 +137,12 @@ if (NOT (CMAKE_BUILD_TYPE OR CONFIGURATION_TYPES))
endif()

set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CUTLASS_LIBRARY_DEBUG_POSTFIX ".debug" CACHE STRING "Default postfix value for debug libraries")
if (DEFINED CMAKE_DEBUG_POSTFIX)
set(CUTLASS_LIBRARY_DEBUG_POSTFIX_INIT ${CMAKE_DEBUG_POSTFIX})
else()
set(CUTLASS_LIBRARY_DEBUG_POSTFIX_INIT .debug)
endif()
set(CUTLASS_LIBRARY_DEBUG_POSTFIX ${CUTLASS_LIBRARY_DEBUG_POSTFIX_INIT} CACHE STRING "Default postfix value for debug libraries")

if(WIN32)
# On Windows we link against the shared (DLL) runtime. Change gtest settings to match this.
Expand Down Expand Up @@ -192,7 +197,6 @@ endif()
set(CUTLASS_DEBUG_TRACE_LEVEL "0" CACHE STRING "Level of debug tracing to perform.")
list(APPEND CUTLASS_CUDA_NVCC_FLAGS -DCUTLASS_DEBUG_TRACE_LEVEL=${CUTLASS_DEBUG_TRACE_LEVEL})


set(CUTLASS_ENABLE_TENSOR_CORE_MMA ${CUTLASS_ENABLE_TENSOR_CORE_MMA_DEFAULT} CACHE BOOL
"Enable PTX mma instruction for collective matrix multiply operations.")

Expand Down Expand Up @@ -466,21 +470,195 @@ if (CUTLASS_ENABLE_CUBLAS)
target_compile_definitions(CUTLASS INTERFACE CUTLASS_ENABLE_CUBLAS=1)
endif()

include(${CMAKE_CURRENT_SOURCE_DIR}/cuDNN.cmake)

if (CUTLASS_ENABLE_CUDNN)
target_compile_definitions(CUTLASS INTERFACE CUTLASS_ENABLE_CUDNN=1)
endif()

################################################################################

if(CUTLASS_ENABLE_TOOLS)
include(CTest)
enable_testing()
if (NOT TARGET test_all)
add_custom_target(test_all)
endif()

set(CUTLASS_INSTALL_TESTS ON CACHE BOOL "Install test executables")
set(CUTLASS_TEST_EXECUTION_ENVIRONMENT "" CACHE BOOL "Environment in which to invoke unit test executables")

set(CMAKE_TEST_INSTALL_PREFIX test CACHE STRING "Test root install location, relative to CMAKE_INSTALL_PREFIX.")
set(CUTLASS_TEST_INSTALL_PREFIX ${CMAKE_TEST_INSTALL_PREFIX}/cutlass CACHE STRING "Test root install location, relative to CMAKE_INSTALL_PREFIX.")
set(CUTLASS_TEST_INSTALL_BINDIR ${CUTLASS_TEST_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR} CACHE STRING "Test root install location, relative to CMAKE_INSTALL_PREFIX.")
set(CUTLASS_TEST_INSTALL_LIBDIR ${CUTLASS_TEST_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR} CACHE STRING "Test root install location, relative to CMAKE_INSTALL_PREFIX.")

install(DIRECTORY DESTINATION ${CUTLASS_TEST_INSTALL_PREFIX})
install(DIRECTORY DESTINATION ${CUTLASS_TEST_INSTALL_BINDIR})
install(DIRECTORY DESTINATION ${CUTLASS_TEST_INSTALL_LIBDIR})
install(DIRECTORY DESTINATION ${CUTLASS_TEST_INSTALL_PREFIX}/ctest)

set(CUTLASS_CTEST_TEMPLATE_FILE ${CMAKE_CURRENT_LIST_DIR}/cmake/CTestTestfile.config.cmake)
set(CUTLASS_CTEST_GENERATED_FILES "" CACHE INTERNAL "")

function(cutlass_add_executable_tests NAME TARGET)
#
# Generates test rules for `make test`, `make test_all`, and `ctest` invoked from either the
# <CMAKE_BINARY_DIR> or the <CMAKE_INSTALL_PREFIX>/<CUTLASS_TEST_INSTALL_PREFIX> after installation.
#
# NAME: The base name for the test. Can be run with `make <NAME>` or `ctest -R 'c<NAME>'`.
# TARGET: The target corresponding to the executable under test.
# DISABLE_EXECUTABLE_INSTALL_RULE: An option, if given, that disables creating an install rule for TARGET.
# DEPENDS: A list of targets or files on which this test is dependent.
# DEPENDEES: A list of targets which should depend on this test.
# TEST_COMMAND_OPTIONS: A list of variables (i.e. by reference params) which contain command line arguments
# to pass to the test executable. A unique test with suffix _0, _1, ... is generated for each set of
# options given. If this option is not used, a single test with no arguments is generated.
#

set(options DISABLE_EXECUTABLE_INSTALL_RULE)
set(oneValueArgs)
set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})

if (NOT __DISABLE_EXECUTABLE_INSTALL_RULE AND CUTLASS_INSTALL_TESTS)

# file(RELATIVE_PATH CMAKE_CURRENT_BINARY_RELATIVE_DIR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})

install(
TARGETS ${TARGET}
RUNTIME DESTINATION ${CUTLASS_TEST_INSTALL_BINDIR}
)

endif()

if (NOT __TEST_COMMAND_OPTIONS)
set(__TEST_COMMAND_OPTIONS " ")
endif()

list(LENGTH __TEST_COMMAND_OPTIONS CMD_COUNT)
set(CMD_IDX 0)

if (CMD_COUNT GREATER 1)
add_custom_target(${NAME} DEPENDS ${TARGET} ${__DEPENDS})
foreach(DEPENDEE ${__DEPENDEES})
add_dependencies(${DEPENDEE} ${NAME})
endforeach()
endif()

foreach(CMD_OPTIONS ${__TEST_COMMAND_OPTIONS})

if (CMD_COUNT GREATER 1)
set(TEST_NAME ${NAME}_${CMD_IDX})
else()
set(TEST_NAME ${NAME})
endif()

# The following rigmarole is needed to deal with spaces and possible quotes in
# command line arguments. The options are passed "by reference" as the actual
# variable names holding the real options. We then expand these in a way that
# preserves any quotes. Note, they have to be in this order for it to work for
# all the use cases below.

set(CMD_OPTIONS ${${CMD_OPTIONS}})
list(JOIN CMD_OPTIONS " " TEST_COMMAND_OPTIONS)
separate_arguments(CMD_OPTIONS)

add_custom_target(
${TEST_NAME}
COMMAND
${CUTLASS_TEST_EXECUTION_ENVIRONMENT} $<TARGET_FILE:${TARGET}> ${CMD_OPTIONS}
DEPENDS
${TARGET}
)

if (CMD_COUNT GREATER 1)
add_dependencies(${NAME} ${TEST_NAME})
endif()

foreach(DEPENDEE ${__DEPENDEES})
add_dependencies(${DEPENDEE} ${TEST_NAME})
endforeach()

add_test(
NAME c${TEST_NAME}
COMMAND ${CUTLASS_TEST_EXECUTION_ENVIRONMENT} $<TARGET_FILE:${TARGET}> ${CMD_OPTIONS}
)

if (CUTLASS_INSTALL_TESTS)

# To run the tests from an install package with tests enabled, we need to generate test files
# that don't rely on the current directory structure in build.

set(TEST_NAME c${TEST_NAME})
set(TEST_EXE $<TARGET_FILE_NAME:${TARGET}>)
set(TEST_EXE_WORKING_DIRECTORY ./${CMAKE_INSTALL_BINDIR})
configure_file("${CUTLASS_CTEST_TEMPLATE_FILE}" "${CMAKE_PROJECT_DIR}${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.${TEST_NAME}.config.cmake" @ONLY)

file(GENERATE
OUTPUT "${CMAKE_PROJECT_DIR}${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.${TEST_NAME}.cmake"
INPUT "${CMAKE_PROJECT_DIR}${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.${TEST_NAME}.config.cmake"
)

install(
FILES "${CMAKE_PROJECT_DIR}${CMAKE_CURRENT_BINARY_DIR}/CTestTestfile.${TEST_NAME}.cmake"
DESTINATION ${CUTLASS_TEST_INSTALL_PREFIX}/ctest/
)

set(CUTLASS_CTEST_GENERATED_FILES ${CUTLASS_CTEST_GENERATED_FILES};ctest/CTestTestfile.${TEST_NAME}.cmake CACHE INTERNAL "")

endif()

math(EXPR CMD_IDX "${CMD_IDX} + 1")

endforeach()

endfunction()

if (CUTLASS_ENABLE_TOOLS)
add_subdirectory(tools)
if (CUTLASS_ENABLE_PROFILER)
add_dependencies(test_all test_profiler)
endif()
endif()
if(CUTLASS_ENABLE_EXAMPLES)
if (CUTLASS_ENABLE_EXAMPLES)
add_subdirectory(examples)
add_dependencies(test_all test_examples)
endif()

if(CUTLASS_ENABLE_TESTS)
include(CTest)
enable_testing()
if (CUTLASS_ENABLE_TESTS)
add_subdirectory(test)
add_dependencies(test_all test_unit)
endif()

if (CUTLASS_INSTALL_TESTS)

file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/cmake")

file(WRITE "${CMAKE_BINARY_DIR}/cmake/CTestTestfile.cmake" "# Generated File\n")
foreach(GENERATED_FILE ${CUTLASS_CTEST_GENERATED_FILES})
file(APPEND "${CMAKE_BINARY_DIR}/cmake/CTestTestfile.cmake" "include(${GENERATED_FILE})\n")
endforeach()

install(
FILES "${CMAKE_BINARY_DIR}/cmake/CTestTestfile.cmake"
DESTINATION ${CUTLASS_TEST_INSTALL_PREFIX}/
)

endif()

#? install(
#? FILES ${CMAKE_BINARY_DIR}/CTestTestfile.cmake
#? DESTINATION ${CUTLASS_TEST_INSTALL_PREFIX}/
#? )
#?
#? install(
#? DIRECTORY
#? ${CMAKE_BINARY_DIR}/tools
#? ${CMAKE_BINARY_DIR}/test
#? DESTINATION ${CUTLASS_TEST_INSTALL_PREFIX}/
#? FILES_MATCHING PATTERN "CTestTestfile.cmake"
#? )

################################################################################

install(
Expand Down
Loading

0 comments on commit 6615010

Please sign in to comment.