Skip to content

Commit d324ed7

Browse files
authored
Merge pull request #1698 from Xreki/build_arm
Add cross-compiling support for arm architecture.
2 parents b22cd96 + 2ae3dd0 commit d324ed7

37 files changed

+887
-207
lines changed

CMakeLists.txt

+28-7
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,26 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License
1414

15-
cmake_minimum_required(VERSION 3.0)
16-
17-
project(paddle CXX C)
18-
1915
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake")
2016
set(PROJ_ROOT ${CMAKE_SOURCE_DIR})
2117

18+
include(system)
19+
20+
if(ANDROID)
21+
cmake_minimum_required(VERSION 3.7)
22+
else()
23+
cmake_minimum_required(VERSION 3.0)
24+
endif()
25+
26+
project(paddle CXX C)
27+
2228
find_package(Sphinx)
23-
find_package(CUDA QUIET)
29+
if(NOT CMAKE_CROSSCOMPILING)
30+
find_package(CUDA QUIET)
31+
endif(NOT CMAKE_CROSSCOMPILING)
2432
find_package(Git REQUIRED)
2533
find_package(Threads REQUIRED)
2634

27-
include(system)
2835
include(simd)
2936

3037
################################ Configurations #######################################
@@ -51,6 +58,21 @@ if(NOT CMAKE_BUILD_TYPE)
5158
FORCE)
5259
endif()
5360

61+
if(ANDROID)
62+
if(${CMAKE_SYSTEM_VERSION} VERSION_LESS "21")
63+
message(FATAL_ERROR "Unsupport standalone toolchains with Android API level lower than 21")
64+
endif()
65+
66+
set(WITH_GPU OFF CACHE STRING
67+
"Disable GPU when cross-compiling for Android" FORCE)
68+
set(WITH_AVX OFF CACHE STRING
69+
"Disable AVX when cross-compiling for Android" FORCE)
70+
set(WITH_PYTHON OFF CACHE STRING
71+
"Disable PYTHON when cross-compiling for Android" FORCE)
72+
set(WITH_RDMA OFF CACHE STRING
73+
"Disable RDMA when cross-compiling for Android" FORCE)
74+
endif(ANDROID)
75+
5476
set(THIRD_PARTY_PATH "${PROJ_ROOT}/third_party" CACHE STRING
5577
"A path setting third party libraries download & build directories.")
5678
########################################################################################
@@ -75,7 +97,6 @@ include(flags) # set paddle compile flags
7597
include(cudnn) # set cudnn libraries
7698
include(version) # set PADDLE_VERSION
7799
include(coveralls) # set code coverage
78-
79100
include(configure) # add paddle env configuration
80101

81102
include_directories("${PROJ_ROOT}")

cmake/cblas.cmake

+20-8
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ set(CBLAS_FOUND OFF)
1919
set(INTEL_ROOT "/opt/intel" CACHE PATH "Folder contains intel libs")
2020
set(MKL_ROOT ${INTEL_ROOT}/mkl CACHE PATH "Folder contains MKL")
2121

22-
find_path(MKL_INCLUDE_DIR mkl.h PATHS
22+
find_path(MKL_INC_DIR mkl.h PATHS
2323
${MKL_ROOT}/include)
24-
find_path(MKL_INCLUDE_DIR mkl_lapacke.h PATHS
24+
find_path(MKL_LAPACK_INC_DIR mkl_lapacke.h PATHS
2525
${MKL_ROOT}/include)
2626
find_library(MKL_CORE_LIB NAMES mkl_core PATHS
2727
${MKL_ROOT}/lib
@@ -34,15 +34,19 @@ find_library(MKL_INTEL_LP64 NAMES mkl_intel_lp64 PATHS
3434
${MKL_ROOT}/lib/intel64)
3535

3636

37-
if(MKL_INCLUDE_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
37+
if(MKL_INC_DIR AND MKL_CORE_LIB AND MKL_SEQUENTIAL_LIB AND MKL_INTEL_LP64)
3838
set(CBLAS_PROVIDER MKL)
39-
set(CBLAS_INC_DIR ${MKL_INCLUDE_DIR})
39+
set(CBLAS_INC_DIR ${MKL_INC_DIR})
4040
set(CBLAS_LIBRARIES ${MKL_INTEL_LP64}
4141
${MKL_SEQUENTIAL_LIB}
4242
${MKL_CORE_LIB})
4343
add_definitions(-DPADDLE_USE_MKL)
4444
message(STATUS "Found MKL (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
4545
set(CBLAS_FOUND ON)
46+
if(${MKL_LAPACK_INC_DIR})
47+
add_definitions(-DPADDLE_USE_LAPACK)
48+
message(STATUS "Found lapack in MKL (include: ${MKL_LAPACK_INC_DIR})")
49+
endif()
4650
return() # return file.
4751
endif()
4852

@@ -68,13 +72,17 @@ find_library(ATLAS_CBLAS_LIB NAMES cblas libcblas.so.3
6872
find_library(ATLAS_LIB NAMES lapack_atlas liblapack_atlas.so.3
6973
PATHS ${ATLAS_LIB_SEARCH_PATHS})
7074

71-
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB)
75+
if(ATLAS_INC_DIR AND ATLAS_CBLAS_LIB AND ATLAS_LIB AND NOT CBLAS_FOUND)
7276
set(CBLAS_PROVIDER ATLAS)
73-
set(CBLAS_INC_DIR ${ATLAS_INC_DIR} ${ATLAS_CLAPACK_INC_DIR})
77+
set(CBLAS_INC_DIR ${ATLAS_INC_DIR})
7478
set(CBLAS_LIBRARIES ${ATLAS_LIB} ${ATLAS_CBLAS_LIB})
7579
add_definitions(-DPADDLE_USE_ATLAS)
76-
message(STATUS "Found Atlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
80+
message(STATUS "Found ATLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
7781
set(CBLAS_FOUND ON)
82+
if(ATLAS_CLAPACK_INC_DIR)
83+
add_definitions(-DPADDLE_USE_LAPACK)
84+
message(STATUS "Found lapack in ATLAS (include: ${ATLAS_CLAPACK_INC_DIR})")
85+
endif()
7886
return()
7987
endif()
8088

@@ -103,8 +111,12 @@ if(OPENBLAS_INC_DIR AND OPENBLAS_LIB)
103111
set(CBLAS_PROVIDER OPENBLAS)
104112
set(CBLAS_INC_DIR ${OPENBLAS_INC_DIR})
105113
set(CBLAS_LIBRARIES ${OPENBLAS_LIB})
106-
message(STATUS "Found OpenBlas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
114+
message(STATUS "Found OpenBLAS (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
107115
set(CBLAS_FOUND ON)
116+
if(OPENBLAS_LAPACKE_INC_DIR)
117+
add_definitions(-DPADDLE_USE_LAPACK)
118+
message(STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})")
119+
endif()
108120
return()
109121
endif()
110122

cmake/configure.cmake

+11-12
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ if(NOT WITH_PROFILER)
3232
add_definitions(-DPADDLE_DISABLE_PROFILER)
3333
endif(NOT WITH_PROFILER)
3434

35+
if(NOT CMAKE_CROSSCOMPILING)
36+
if(WITH_AVX AND AVX_FOUND)
37+
set(SIMD_FLAG ${AVX_FLAG})
38+
elseif(SSE3_FOUND)
39+
set(SIMD_FLAG ${SSE3_FLAG})
40+
endif()
41+
endif()
42+
3543
if(NOT WITH_GPU)
3644
add_definitions(-DPADDLE_ONLY_CPU)
3745
add_definitions(-DHPPL_STUB_FUNC)
@@ -48,21 +56,12 @@ else()
4856
message(FATAL_ERROR "Paddle need cudnn to compile")
4957
endif()
5058

51-
if(WITH_AVX)
52-
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${AVX_FLAG}")
53-
else(WITH_AVX)
54-
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SSE3_FLAG}")
55-
endif(WITH_AVX)
59+
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
5660

5761
# Include cuda and cudnn
5862
include_directories(${CUDNN_INCLUDE_DIR})
5963
include_directories(${CUDA_TOOLKIT_INCLUDE})
6064
endif(NOT WITH_GPU)
6165

62-
if(WITH_AVX)
63-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${AVX_FLAG}")
64-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${AVX_FLAG}")
65-
else(WITH_AVX)
66-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SSE3_FLAG}")
67-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SSE3_FLAG}")
68-
endif(WITH_AVX)
66+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SIMD_FLAG}")
67+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAG}")

cmake/cudnn.cmake

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
if(NOT WITH_GPU)
2+
return()
3+
endif()
4+
15
set(CUDNN_ROOT "" CACHE PATH "CUDNN ROOT")
26
find_path(CUDNN_INCLUDE_DIR cudnn.h
37
PATHS ${CUDNN_ROOT} ${CUDNN_ROOT}/include

cmake/external/gflags.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ ExternalProject_Add(
3333
UPDATE_COMMAND ""
3434
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
3535
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
36+
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
37+
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
3638
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
3739
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
3840
CMAKE_ARGS -DBUILD_TESTING=OFF

cmake/external/glog.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ ExternalProject_Add(
3535
UPDATE_COMMAND ""
3636
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
3737
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
38+
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
39+
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
3840
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
3941
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
4042
CMAKE_ARGS -DWITH_GFLAGS=ON

cmake/external/gtest.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ IF(WITH_TESTING)
4343
UPDATE_COMMAND ""
4444
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
4545
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
46+
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
47+
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
4648
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
4749
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
4850
CMAKE_ARGS -DBUILD_GMOCK=ON

cmake/external/openblas.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ IF(NOT ${CBLAS_FOUND})
5454
"you need to set gfortran compiler: cmake .. -DCMAKE_Fortran_COMPILER=...")
5555
ENDIF(NOT CMAKE_Fortran_COMPILER)
5656

57+
ADD_DEFINITIONS(-DPADDLE_USE_LAPACK)
58+
5759
ExternalProject_Add(
5860
openblas
5961
${EXTERNAL_PROJECT_LOG_ARGS}

cmake/external/python.cmake

+4-4
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,9 @@ ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
219219

220220
ENDIF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
221221

222-
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
223-
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
224-
225-
IF(NOT WITH_PYTHON)
222+
IF(WITH_PYTHON)
223+
INCLUDE_DIRECTORIES(${PYTHON_INCLUDE_DIR})
224+
INCLUDE_DIRECTORIES(${PYTHON_NUMPY_INCLUDE_DIR})
225+
ELSE()
226226
SET(PYTHON_LIBRARIES "")
227227
ENDIF()

cmake/external/warpctc.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ ExternalProject_Add(
5050
UPDATE_COMMAND ""
5151
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
5252
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
53+
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
54+
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
5355
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
5456
CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
5557
CMAKE_ARGS -DWITH_OMP=${USE_OMP}

cmake/external/zlib.cmake

+3-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ SET(ZLIB_INCLUDE_DIR "${ZLIB_INSTALL_DIR}/include" CACHE PATH "zlib include dire
2222
IF(WIN32)
2323
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/zlibstatic.lib" CACHE FILEPATH "zlib library." FORCE)
2424
ELSE(WIN32)
25-
set(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
25+
SET(ZLIB_LIBRARIES "${ZLIB_INSTALL_DIR}/lib/libz.a" CACHE FILEPATH "zlib library." FORCE)
2626
ENDIF(WIN32)
2727

2828
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR})
@@ -36,6 +36,8 @@ ExternalProject_Add(
3636
UPDATE_COMMAND ""
3737
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
3838
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
39+
CMAKE_ARGS -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
40+
CMAKE_ARGS -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
3941
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${ZLIB_INSTALL_DIR}
4042
CMAKE_ARGS -DBUILD_SHARED_LIBS=OFF
4143
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON

cmake/flags.cmake

+13-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
include(CheckCXXCompilerFlag)
33
include(CheckCCompilerFlag)
44
include(CheckCXXSymbolExists)
5+
include(CheckTypeSize)
56

67
function(CheckCompilerCXX11Flag)
78
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
@@ -25,7 +26,7 @@ function(CheckCompilerCXX11Flag)
2526
endfunction()
2627

2728
CheckCompilerCXX11Flag()
28-
LIST(APPEND CMAKE_CXX_FLAGS -std=c++11)
29+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
2930

3031
# safe_set_flag
3132
#
@@ -83,6 +84,17 @@ if(NOT UINT64_MAX_EXISTS)
8384
endif()
8485
endif()
8586

87+
SET(CMAKE_EXTRA_INCLUDE_FILES "pthread.h")
88+
CHECK_TYPE_SIZE(pthread_spinlock_t SPINLOCK_FOUND)
89+
CHECK_TYPE_SIZE(pthread_barrier_t BARRIER_FOUND)
90+
if(SPINLOCK_FOUND)
91+
add_definitions(-DPADDLE_USE_PTHREAD_SPINLOCK)
92+
endif(SPINLOCK_FOUND)
93+
if(BARRIER_FOUND)
94+
add_definitions(-DPADDLE_USE_PTHREAD_BARRIER)
95+
endif(BARRIER_FOUND)
96+
SET(CMAKE_EXTRA_INCLUDE_FILES "")
97+
8698
# Common flags. the compiler flag used for C/C++ sources whenever release or debug
8799
# Do not care if this flag is support for gcc.
88100
set(COMMON_FLAGS

cmake/simd.cmake

+4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# so that PaddlePaddle can unleash the vectorization power of muticore.
33

44
INCLUDE(CheckCXXSourceRuns)
5+
INCLUDE(CheckCXXSourceCompiles)
56

67
IF(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
78
set(MMX_FLAG "-mmmx")
@@ -17,6 +18,8 @@ ELSEIF(MSVC)
1718
SET(AVX2_FLAG "/arch:AVX2")
1819
ENDIF()
1920

21+
set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS})
22+
2023
# Check MMX
2124
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
2225
CHECK_CXX_SOURCE_RUNS("
@@ -73,4 +76,5 @@ int main()
7376
return 0;
7477
}" AVX2_FOUND)
7578

79+
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
7680
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND)

cmake/system.cmake

+6
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,12 @@ MARK_AS_ADVANCED(HOST_SYSTEM CPU_CORES)
6767
MESSAGE(STATUS "Found Paddle host system: ${HOST_SYSTEM}")
6868
MESSAGE(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
6969

70+
IF(DEFINED CMAKE_SYSTEM_NAME)
71+
IF(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
72+
SET(ANDROID TRUE)
73+
ENDIF()
74+
ENDIF()
75+
7076
# external dependencies log output
7177
SET(EXTERNAL_PROJECT_LOG_ARGS
7278
LOG_DOWNLOAD 0 # Wrap download in script to log output

cmake/util.cmake

+4
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ function(link_paddle_exe TARGET_NAME)
9090
${RDMA_LD_FLAGS}
9191
${RDMA_LIBS})
9292

93+
if(ANDROID)
94+
target_link_libraries(${TARGET_NAME} log)
95+
endif(ANDROID)
96+
9397
add_dependencies(${TARGET_NAME} ${external_project_dependencies})
9498
endfunction()
9599

paddle/cuda/include/hl_cpu_matrix_kernel.cuh

+4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,11 @@ limitations under the License. */
1717

1818
#include <stdio.h>
1919
#include "hl_base.h"
20+
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
21+
#include "hl_neon_matrix_kernel.cuh"
22+
#else
2023
#include "hl_sse_matrix_kernel.cuh"
24+
#endif
2125

2226
/**
2327
* @brief cpu element wise unary operator.

paddle/cuda/include/hl_matrix_base.cuh

+2
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ typedef BaseOp SSESquaredDiff;
6666
typedef BaseOp SSEFirst;
6767
typedef BaseOp SSESecond;
6868
typedef BaseOp SSEClassificationError;
69+
#elif defined(__ARM__NEON__) || defined(__ARM_NEON)
70+
#include "hl_matrix_base_neon.cuh"
6971
#else
7072
#include "hl_matrix_base_sse.cuh"
7173
#endif

0 commit comments

Comments
 (0)