-
Notifications
You must be signed in to change notification settings - Fork 25
/
CMakeLists.txt
83 lines (64 loc) · 3.05 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(extra_losses)
find_package(CUDA REQUIRED)
EXECUTE_PROCESS(COMMAND python3.5 -c "import os; print(os.getcwd(), end='', flush=True)" OUTPUT_VARIABLE CWD)
MESSAGE(STATUS "Found CWD: " ${CWD})
EXECUTE_PROCESS(COMMAND python3.5 -c "import subprocess; process = subprocess.Popen('nvidia-smi -i 0 --query-gpu=name --format=csv'.split(), stdout=subprocess.PIPE); output, _ = process.communicate(); output = str(output); device_capability_map = {
'Tesla K80' : '37',
'Tesla K40' : '35',
'Tesla K20' : '35',
'Tesla C2075' : '20',
'Tesla C2050' : '20',
'Tesla C2070' : '20',
'Tesla V100' : '70',
'Tesla P100' : '60',
'Tesla P40' : '61',
'Tesla P4' : '61',
'Tesla M60' : '52',
'Tesla M40' : '52',
'Tesla K80' : '37',
'Tesla K40' : '35',
'Tesla K20' : '35',
'Tesla K10' : '30',
'GeForce GTX 1080 Ti' : '61'
}; cap = '61';
for k, v in device_capability_map.items():
if k in output:
cap = v
break
print('gencode arch=compute_' + cap + ',code=sm_' + cap)" OUTPUT_VARIABLE GPU_CAPABILITY)
MESSAGE(STATUS "Found GPU_CAPABILITY: " ${GPU_CAPABILITY})
# Pass options to NVCC
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --keep --keep-dir ${CWD} -${GPU_CAPABILITY} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr -DNDEBUG")
#set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} --keep --keep-dir ${CWD} -gencode arch=compute_61,code=sm_61 -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr")
# compiler flags
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -O2 ${OpenMP_CXX_FLAGS} -Wall -fPIC -D_GLIBCXX_USE_CXX11_ABI=0 -DGOOGLE_CUDA=1 -DNDEBUG")
# TensorFlow dependencies
EXECUTE_PROCESS(COMMAND python3.5 -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_include(), end='', flush=True)" OUTPUT_VARIABLE TF_INC)
EXECUTE_PROCESS(COMMAND python3.5 -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_lib(), end='', flush=True)" OUTPUT_VARIABLE TF_LIB)
MESSAGE(STATUS "Found TF_INC: " ${TF_INC})
MESSAGE(STATUS "Found TF_INC_EXTERNAL: " ${TF_INC}/external/nsync/public)
MESSAGE(STATUS "Found TF_LIB: " ${TF_LIB})
INCLUDE_DIRECTORIES(${TF_INC})
INCLUDE_DIRECTORIES(${TF_INC}/external/nsync/public)
LINK_DIRECTORIES(${TF_LIB})
# approach 1
# CUDA_ADD_LIBRARY(l_softmax_gpu SHARED l_softmax_op.cu OPTIONS -I$TF_INC/tensorflow/stream_executor/cuda -I/usr/local)
# ADD_LIBRARY(l_softmax SHARED
# l_softmax_op.h
# l_softmax_op.cc
# )
# TARGET_LINK_LIBRARIES(l_softmax tensorflow_framework ${CUDA_LIBRARIES} l_softmax_gpu)
# approach 2
CUDA_COMPILE(L_SOFTMAX_CU_O l_softmax_op.cu MODULE OPTIONS -I$TF_INC -I/usr/local)
CUDA_COMPILE(L_SOFTMAX_GRAD_CU_O l_softmax_grad_op.cu MODULE OPTIONS -I$TF_INC -I/usr/local)
ADD_LIBRARY(extra_losses SHARED
${L_SOFTMAX_CU_O}
${L_SOFTMAX_GRAD_CU_O}
l_softmax_op.h
l_softmax_op.cc
l_softmax_grad_op.cc
common.h
common.cc
)
TARGET_LINK_LIBRARIES(extra_losses tensorflow_framework ${CUDA_LIBRARIES})