Skip to content

Commit 5e3f3e6

Browse files
DickJC123piiswrong
authored andcommitted
Changed make to support more gpu archs, multiple toolkits, reduce lib size. (apache#6588)
* Updated make to support more gpu archs, tolerate multiple toolkit versions, reduce lib size. * Moved CUDA_ARCH setting to Makefile, removed from all make/*.mk files.
1 parent 0985c2e commit 5e3f3e6

File tree

4 files changed

+31
-23
lines changed

4 files changed

+31
-23
lines changed

Makefile

+31-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ifndef NNVM_PATH
1919
endif
2020

2121
ifndef DLPACK_PATH
22-
DLPACK_PATH = $(ROOTDIR)/dlpack
22+
DLPACK_PATH = $(ROOTDIR)/dlpack
2323
endif
2424

2525
ifneq ($(USE_OPENMP), 1)
@@ -58,7 +58,7 @@ LDFLAGS = -pthread $(MSHADOW_LDFLAGS) $(DMLC_LDFLAGS)
5858
ifeq ($(DEBUG), 1)
5959
NVCCFLAGS = -std=c++11 -Xcompiler -D_FORCE_INLINES -g -G -O0 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
6060
else
61-
NVCCFLAGS = -std=c++11 -Xcompiler -D_FORCE_INLINES -g -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
61+
NVCCFLAGS = -std=c++11 -Xcompiler -D_FORCE_INLINES -O3 -ccbin $(CXX) $(MSHADOW_NVCCFLAGS)
6262
endif
6363

6464
# CFLAGS for profiler
@@ -129,6 +129,35 @@ ifneq ($(USE_CUDA_PATH), NONE)
129129
NVCC=$(USE_CUDA_PATH)/bin/nvcc
130130
endif
131131

132+
# Sets 'CUDA_ARCH', which determines the GPU architectures supported
133+
# by the compiled kernels. Users can edit the KNOWN_CUDA_ARCHS list below
134+
# to remove archs they don't wish to support to speed compilation, or they
135+
# can pre-set the CUDA_ARCH args in config.mk for full control.
136+
#
137+
# For archs in this list, nvcc will create a fat-binary that will include
138+
# the binaries (SASS) for all architectures supported by the installed version
139+
# of the cuda toolkit, plus the assembly (PTX) for the most recent such architecture.
140+
# If these kernels are then run on a newer-architecture GPU, the binary will
141+
# be JIT-compiled by the updated driver from the included PTX.
142+
ifeq ($(USE_CUDA), 1)
143+
ifeq ($(origin CUDA_ARCH), undefined)
144+
KNOWN_CUDA_ARCHS := 30 35 50 52 60 61
145+
# Run nvcc on a zero-length file to check architecture-level support.
146+
# Create args to include SASS in the fat binary for supported levels.
147+
CUDA_ARCH := $(foreach arch,$(KNOWN_CUDA_ARCHS), \
148+
$(shell $(NVCC) -arch=sm_$(arch) -E --x cu /dev/null >/dev/null 2>&1 && \
149+
echo -gencode arch=compute_$(arch),code=sm_$(arch)))
150+
# Convert a trailing "code=sm_NN" to "code=[sm_NN,compute_NN]" to also
151+
# include the PTX of the most recent arch in the fat-binaries for
152+
# forward compatibility with newer GPUs.
153+
CUDA_ARCH := $(shell echo $(CUDA_ARCH) | sed 's/sm_\([0-9]*\)$$/[sm_\1,compute_\1]/')
154+
# Add fat binary compression if supported by nvcc.
155+
COMPRESS := --fatbin-options -compress-all
156+
CUDA_ARCH += $(shell $(NVCC) -cuda $(COMPRESS) --x cu /dev/null -o /dev/null >/dev/null 2>&1 && \
157+
echo $(COMPRESS))
158+
endif
159+
endif
160+
132161
# ps-lite
133162
PS_PATH=$(ROOTDIR)/ps-lite
134163
DEPS_PATH=$(shell pwd)/deps

make/config.mk

-7
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,6 @@ USE_CUDA_PATH = NONE
5454
# whether use CuDNN R3 library
5555
USE_CUDNN = 0
5656

57-
# CUDA architecture setting: going with all of them.
58-
# For CUDA < 6.0, comment the *_50 lines for compatibility.
59-
CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
60-
-gencode arch=compute_35,code=sm_35 \
61-
-gencode arch=compute_50,code=sm_50 \
62-
-gencode arch=compute_50,code=compute_50
63-
6457
# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
6558
USE_NVRTC = 0
6659

make/osx.mk

-7
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,6 @@ USE_CUDA = 0
4848
# USE_CUDA_PATH = /usr/local/cuda
4949
USE_CUDA_PATH = NONE
5050

51-
# CUDA architecture setting: going with all of them.
52-
# For CUDA < 6.0, comment the *_50 lines for compatibility.
53-
CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
54-
-gencode arch=compute_35,code=sm_35 \
55-
-gencode arch=compute_50,code=sm_50 \
56-
-gencode arch=compute_50,code=compute_50
57-
5851
# whether use CUDNN R3 library
5952
USE_CUDNN = 0
6053

make/pip_linux_cpu.mk

-7
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,6 @@ USE_CUDA_PATH = NONE
5050
# whether use CuDNN R3 library
5151
USE_CUDNN = 0
5252

53-
# CUDA architecture setting: going with all of them.
54-
# For CUDA < 6.0, comment the *_50 lines for compatibility.
55-
CUDA_ARCH := -gencode arch=compute_30,code=sm_30 \
56-
-gencode arch=compute_35,code=sm_35 \
57-
-gencode arch=compute_50,code=sm_50 \
58-
-gencode arch=compute_50,code=compute_50
59-
6053
# whether use cuda runtime compiling for writing kernels in native language (i.e. Python)
6154
USE_NVRTC = 0
6255

0 commit comments

Comments
 (0)