Skip to content

Commit

Permalink
Merge pull request NVIDIA#24 from NVIDIA/cutlass_1.1
Browse files Browse the repository at this point in the history
Cutlass 1.1
  • Loading branch information
kerrmudgeon authored Sep 20, 2018
2 parents cf0301e + 0826572 commit d85f6a1
Show file tree
Hide file tree
Showing 1,315 changed files with 92,843 additions and 16,834 deletions.
18 changes: 17 additions & 1 deletion changelog.md → CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,22 @@
# NVIDIA CUTLASS Changelog

## [1.0.1](https://github.com/NVIDIA/cutlass/releases/tag/v1.0.1) (2018-06-11)

## 1.1.0 (2018-09-19)
* Turing Features
* WMMA GEMM targeting TensorCores - INT8, INT4, 1-bit
* Batched Strided GEMM
* Threadblock rasterization strategies
* Improved performance for adverse problem sizes and data layouts
* Extended CUTLASS Core comonents
* Tensor views support arbitrary matrix and tensor layouts
* Zip iterators for structuring multiple data streams
* Enhanced CUTLASS utilities
* Reference code for tensor operations in host and device code
* Added HostMatrix<> for simplified matrix creation
* Examples
* Basic GEMM, tensor views, CUTLASS utilities, batched GEMM, WMMA GEMM

## 1.0.1 (2018-06-11)

* Intra-threadblock reduction added for small threadblock tile sizes
* sgemm_64x128x16, sgemm_128x128x16, sgemm_128x64x16, sgemm_128x32x16, sgemm_64x64x16, sgemm_64x32x16
Expand Down
91 changes: 68 additions & 23 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,21 @@ endif()
find_package(CUDA)
find_package(Doxygen QUIET)

###################################################################################################
#
# Configure CMake variables
#
###################################################################################################

find_library(CUBLAS_LIBRARY cublas HINTS
${CUDA_TOOLKIT_ROOT_DIR}/lib64
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64)

# By default we want to build in Release mode to ensure that we're getting best performance
if (NOT (CMAKE_BUILD_TYPE OR CONFIGURATION_TYPES))
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose build level" FORCE)
# We do support Debug or Release builds
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "RelWithDebInfo" "Release")
endif()

if(WIN32)
Expand All @@ -68,27 +78,59 @@ if(WIN32)
endif()

if (WIN32)
# Enable more warnings and treat as errors
string(APPEND NVCC_FLAGS " -Xcompiler /W3 -Xcompiler /WX")
# Enable more warnings and treat as errors
string(APPEND NVCC_FLAGS " -Xcompiler /W3 -Xcompiler /WX")

# Disable excess x86 floating point precision that can lead to results being labeled incorrectly
string(APPEND NVCC_FLAGS " -Xcompiler /fp:strict")
# Disable warning on Unicode characters
string(APPEND NVCC_FLAGS " -Xcompiler /wd4819")

# Verbose option
if (${CUTLASS_NVCC_VERBOSE})
string(APPEND NVCC_FLAGS " -v")
endif()
# Disable excess x86 floating point precision that can lead to results being labeled incorrectly
string(APPEND NVCC_FLAGS " -Xcompiler /fp:strict")

# Verbose option
if (${CUTLASS_NVCC_VERBOSE})
string(APPEND NVCC_FLAGS " -v")
endif()
endif(WIN32)

# Configure CUDA options
set(CUTLASS_NVCC_ARCHS "50;60;61;70" CACHE STRING "The SM architectures to build code for.")
set(CUTLASS_NVCC_KEEP OFF CACHE BOOL "Keep intermediate files generated by NVCC.")
set(CUTLASS_NVCC_ARCHS "50;60;61;70;75" CACHE STRING "The SM architectures to build code for.")
set(CUTLASS_NVCC_EMBED_CUBIN ON CACHE BOOL "Embed compiled CUDA kernel binaries into executables.")
set(CUTLASS_NVCC_EMBED_PTX ON CACHE BOOL "Embed compiled PTX into executables.")
set(CUTLASS_NVCC_KEEP OFF CACHE BOOL "Keep intermediate files generated by NVCC.")

#
# NOTE: running with asan and CUDA requires the following environment variable:
#
# ASAN_OPTIONS=protect_shadow_gap=0:replace_intrin=0:detect_leaks=0
#
# without the above environment setting, an error like the following may be generated:
#
# *** Error: Could not detect active GPU device ID [out of memory]
# ...
# ==9149==ERROR: LeakSanitizer: detected memory leaks
# ...
#
if(ENABLE_ASAN) # https://github.com/google/sanitizers/wiki/AddressSanitizer
string(APPEND NVCC_FLAGS " --compiler-options -fsanitize=address --compiler-options -fno-omit-frame-pointer")
string(APPEND CMAKE_EXE_LINKER_FLAGS " -fsanitize=address")
endif()

###################################################################################################
#
# Configure CUDA build options
#
###################################################################################################

# Set NVCC arguments
foreach(ARCH ${CUTLASS_NVCC_ARCHS})
string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
if(CUTLASS_NVCC_EMBED_CUBIN)
string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
endif()
if(CUTLASS_NVCC_EMBED_PTX)
string(APPEND NVCC_FLAGS " -gencode arch=compute_${ARCH},code=compute_${ARCH}")
endif()
endforeach()


if (CUTLASS_NVCC_KEEP)
string(APPEND NVCC_FLAGS " -keep")
endif()
Expand All @@ -99,23 +141,22 @@ else()
string(APPEND NVCC_FLAGS " -lineinfo")
endif()

if (UNIX)
string(APPEND NVCC_FLAGS " -Xcompiler -Wconversion")
endif()

string(APPEND NVCC_FLAGS_DEBUG " -g")
string(APPEND NVCC_FLAGS_RELWITHDEBINFO " -O3")
string(APPEND NVCC_FLAGS_RELEASE " -O3")

# define NDEBUG for release mode to disable assertions
string(APPEND NVCC_FLAGS_RELEASE " -DNDEBUG")

if (CUTLASS_NATIVE_CUDA)
set(CMAKE_CUDA_FLAGS "${NVCC_FLAGS}")
set(CMAKE_CUDA_FLAGS_DEBUG "${NVCC_FLAGS_DEBUG}")
set(CMAKE_CUDA_FLAGS_RELEASE "${NVCC_FLAGS_RELEASE}")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "${NVCC_FLAGS_RELWITHDEBINFO}")
set(CMAKE_CUDA_FLAGS_DEBUG "${NVCC_FLAGS_DEBUG}")
else()
set(CUDA_NVCC_FLAGS ${NVCC_FLAGS})
set(CUDA_NVCC_FLAGS_DEBUG ${NVCC_FLAGS_DEBUG})
set(CUDA_NVCC_FLAGS_RELWITHDEBINFO ${NVCC_FLAGS_RELWITHDEBINFO})
set(CUDA_NVCC_FLAGS_RELEASE ${NVCC_FLAGS_RELEASE})
endif()

Expand All @@ -128,6 +169,11 @@ file(GLOB CUTLASS_GEMM RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/gemm/*.h)
file(GLOB CUTLASS_UTIL RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/util/*.h)
file(GLOB CUTLASS_DEVICE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/device/*.h)
file(GLOB CUTLASS_CORE RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} cutlass/*.h)
###################################################################################################
#
# Define build targets
#
###################################################################################################

source_group("cutlass\\gemm" FILES ${CUTLASS_GEMM})
source_group("cutlass\\util" FILES ${CUTLASS_UTIL})
Expand Down Expand Up @@ -156,9 +202,9 @@ add_custom_target(cutlass_ide SOURCES
if (DOXYGEN_FOUND)
# DOT is available. Enable graph generation in the documentation
if (DOXYGEN_DOT_EXECUTABLE)
set(CUTLASS_ENABLE_DOXYGEN_DOT ON CACHE BOOL "Use dot to generate graphs in the doxygen documentation.")
set(CUTLASS_ENABLE_DOXYGEN_DOT ON CACHE BOOL "Use dot to generate graphs in the doxygen documentation.")
else()
set(CUTLASS_ENABLE_DOXYGEN_DOT OFF CACHE BOOL "Use dot to generate graphs in the doxygen documentation." FORCE)
set(CUTLASS_ENABLE_DOXYGEN_DOT OFF CACHE BOOL "Use dot to generate graphs in the doxygen documentation." FORCE)
endif()

if (CUTLASS_ENABLE_DOXYGEN_DOT)
Expand All @@ -177,6 +223,5 @@ if (DOXYGEN_FOUND)
)
endif()


#add_subdirectory(examples/gemm)
add_subdirectory(tools)
add_subdirectory(examples)
Loading

0 comments on commit d85f6a1

Please sign in to comment.