Skip to content

Commit cc0645c

Browse files
authored
Merge branch 'develop' into offload_scheduler
2 parents 65cb396 + 7cb4953 commit cc0645c

File tree

428 files changed

+19635
-4441
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

428 files changed

+19635
-4441
lines changed

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ if(APPLE AND WITH_ARM)
1717
cmake_minimum_required(VERSION 3.19.2)
1818
cmake_policy(VERSION 3.19.2)
1919
else(APPLE AND WITH_ARM)
20-
cmake_minimum_required(VERSION 3.10)
20+
cmake_minimum_required(VERSION 3.15)
2121
cmake_policy(VERSION 3.10)
2222
endif(APPLE AND WITH_ARM)
23+
# use to get_property location of static lib
24+
# https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026
25+
cmake_policy(SET CMP0026 OLD)
2326
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
2427
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
2528
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})

cmake/external/boost.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ set(BOOST_URL "http://paddlepaddledeps.bj.bcebos.com/${BOOST_TAR}.tar.gz" CACH
3232
MESSAGE(STATUS "BOOST_VERSION: ${BOOST_VER}, BOOST_URL: ${BOOST_URL}")
3333

3434
set(BOOST_PREFIX_DIR ${THIRD_PARTY_PATH}/boost)
35-
3635
set(BOOST_INCLUDE_DIR "${THIRD_PARTY_PATH}/boost/src/extern_boost" CACHE PATH "boost include directory." FORCE)
3736
set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1)
3837
include_directories(${BOOST_INCLUDE_DIR})

cmake/external/paddle2onnx.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,15 @@ set(PADDLE2ONNX_OPTIONAL_ARGS
5353
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
5454
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
5555
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
56+
-DCMAKE_CXX_STANDARD=14
5657
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
5758
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
5859
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
5960
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
6061
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
6162
-DONNX_CUSTOM_PROTOC_PATH=${PROTOC_BIN_PATH}
6263
-DWITH_STATIC=OFF
64+
-DMSVC_STATIC_CRT=${MSVC_STATIC_CRT}
6365
-DCMAKE_INSTALL_PREFIX=${PADDLE2ONNX_INSTALL_DIR}
6466
-DCMAKE_INSTALL_LIBDIR=${PADDLE2ONNX_INSTALL_DIR}/${LIBDIR}
6567
-DCMAKE_POSITION_INDEPENDENT_CODE=ON

cmake/external/xpu.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ endif()
1717
# ubuntu and centos: use output by XDNN API team
1818
if(NOT DEFINED XPU_XDNN_BASE_URL)
1919
SET(XPU_XDNN_BASE_URL_WITHOUT_DATE "https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
20-
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220412")
20+
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220425")
2121
else()
2222
SET(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}")
2323
endif()

cmake/flags.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ if(NOT APPLE)
163163
set(COMMON_FLAGS
164164
${COMMON_FLAGS}
165165
-Wno-format-truncation # Warning in boost gcc 8.2
166-
-Wno-error=cast-function-type # Warning in boost gcc 8.2
167166
-Wno-error=parentheses # Warning in boost gcc 8.2
168167
-Wno-error=catch-value # Warning in boost gcc 8.2
169168
-Wno-error=nonnull-compare # Warning in boost gcc 8.2

cmake/generic.cmake

Lines changed: 63 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,36 @@ function(create_static_lib TARGET_NAME)
176176
endif()
177177
endfunction()
178178

179+
function(create_dummy_static_lib TARGET_NAME)
180+
set(options "")
181+
set(oneValueArgs "")
182+
set(multiValueArgs LIBS DEPS LIMIT)
183+
cmake_parse_arguments(merge "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
184+
185+
list(REMOVE_DUPLICATES merge_LIBS)
186+
set(index 1)
187+
set(offset 1)
188+
# the dummy target would be consisted of limit size libraries
189+
set(limit ${merge_LIMIT})
190+
list(LENGTH merge_LIBS libs_len)
191+
foreach(lib ${merge_LIBS})
192+
list(APPEND merge_list ${lib})
193+
list(LENGTH merge_list listlen)
194+
if ((${listlen} GREATER ${limit}) OR (${offset} EQUAL ${libs_len}))
195+
message("Merge and generate static library: ${TARGET_NAME}_static_${index}")
196+
merge_static_libs(${TARGET_NAME}_static_${index} ${merge_list})
197+
if(merge_DEPS)
198+
target_link_libraries(${TARGET_NAME}_static_${index} ${merge_DEPS})
199+
endif()
200+
set(merge_list)
201+
list(APPEND ${TARGET_NAME}_list ${TARGET_NAME}_static_${index})
202+
MATH(EXPR index "${index}+1")
203+
endif()
204+
MATH(EXPR offset "${offset}+1")
205+
endforeach()
206+
cc_library(${TARGET_NAME} DEPS ${${TARGET_NAME}_list})
207+
endfunction()
208+
179209
function(merge_static_libs TARGET_NAME)
180210
set(libs ${ARGN})
181211
list(REMOVE_DUPLICATES libs)
@@ -193,92 +223,61 @@ function(merge_static_libs TARGET_NAME)
193223
# also help to track dependencies.
194224
set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
195225

196-
if(APPLE) # Use OSX's libtool to merge archives
197-
# Make the generated dummy source file depended on all static input
198-
# libs. If input lib changes,the source file is touched
199-
# which causes the desired effect (relink).
200-
add_custom_command(OUTPUT ${target_SRCS}
201-
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
202-
DEPENDS ${libs})
203-
204-
# Generate dummy static lib
205-
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
206-
207-
target_link_libraries(${TARGET_NAME} ${libs_deps})
226+
# Make the generated dummy source file depended on all static input
227+
# libs. If input lib changes,the source file is touched
228+
# which causes the desired effect (relink).
229+
add_custom_command(OUTPUT ${target_SRCS}
230+
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
231+
DEPENDS ${libs})
232+
233+
# Generate dummy staic lib
234+
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
235+
target_link_libraries(${TARGET_NAME} ${libs_deps})
208236

237+
# OSX: use 'libtool' to merge archives
238+
if(APPLE)
209239
foreach(lib ${libs})
210240
# Get the file names of the libraries to be merged
211241
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
212242
endforeach()
213243
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
244+
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
214245
COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
215246
COMMAND /usr/bin/libtool -static -o "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles}
216247
)
217-
endif(APPLE)
218-
if(LINUX) # general UNIX: use "ar" to extract objects and re-add to a common lib
219-
set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
220-
221-
foreach(lib ${libs})
222-
set(objlistfile ${target_DIR}/${lib}.objlist) # list of objects in the input library
223-
set(objdir ${target_DIR}/${lib}.objdir)
224-
225-
add_custom_command(OUTPUT ${objdir}
226-
COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
227-
DEPENDS ${lib})
248+
endif()
228249

229-
add_custom_command(OUTPUT ${objlistfile}
230-
COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
231-
COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
232-
DEPENDS ${lib} ${objdir}
233-
WORKING_DIRECTORY ${objdir})
250+
# LINUX: use "ar" to extract objects and re-add to a common lib
251+
if(LINUX)
252+
set(mri_file ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.mri CACHE INTERNAL "phi_static.mri file")
253+
get_property(ABS_MERGE_LIB_PATH TARGET ${TARGET_NAME} PROPERTY LOCATION)
254+
file(WRITE ${mri_file} "create ${ABS_MERGE_LIB_PATH}\n")
234255

235-
list(APPEND target_OBJS "${objlistfile}")
256+
foreach(lib ${libs})
257+
get_property(ABS_LIB_PATH TARGET ${lib} PROPERTY LOCATION)
258+
file(APPEND ${mri_file} "addlib ${ABS_LIB_PATH}\n")
236259
endforeach()
237-
238-
# Make the generated dummy source file depended on all static input
239-
# libs. If input lib changes,the source file is touched
240-
# which causes the desired effect (relink).
241-
add_custom_command(OUTPUT ${target_SRCS}
242-
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
243-
DEPENDS ${libs} ${target_OBJS})
244-
245-
# Generate dummy staic lib
246-
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
247-
248-
target_link_libraries(${TARGET_NAME} ${libs_deps})
249-
250-
# Get the file name of the generated library
251-
set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
260+
file(APPEND ${mri_file} "save\nend\n")
252261

253262
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
254-
COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
255-
COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
256-
WORKING_DIRECTORY ${target_DIR})
257-
endif(LINUX)
258-
if(WIN32) # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
259-
# Make the generated dummy source file depended on all static input
260-
# libs. If input lib changes,the source file is touched
261-
# which causes the desired effect (relink).
262-
add_custom_command(OUTPUT ${target_SRCS}
263-
COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
264-
DEPENDS ${libs})
265-
# Generate dummy staic lib
266-
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:merge_static_libs")
267-
268-
target_link_libraries(${TARGET_NAME} ${libs_deps})
263+
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.a"
264+
COMMAND ${CMAKE_AR} -M < ${mri_file}
265+
COMMAND ${CMAKE_RANLIB} "$<TARGET_FILE:${TARGET_NAME}>")
266+
endif()
269267

268+
# Windows do not support gcc/nvcc combined compiling. Use msvc 'lib.exe' to merge libs.
269+
if(WIN32)
270270
foreach(lib ${libs})
271-
# Get the file names of the libraries to be merged
272271
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
273272
endforeach()
274-
# msvc will put libarary in directory of "/Release/xxxlib" by default
275-
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
273+
# msvc compiler will put libarary in directory of "/Release/xxxlib" by default
276274
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
275+
COMMENT "Merge and generate static lib: lib${TARGET_NAME}.lib"
277276
COMMAND cmake -E make_directory $<TARGET_FILE_DIR:${TARGET_NAME}>
278277
COMMAND lib /OUT:$<TARGET_FILE:${TARGET_NAME}> ${libfiles}
279278
)
280-
endif(WIN32)
281-
endfunction(merge_static_libs)
279+
endif()
280+
endfunction()
282281

283282
function(check_coverage_opt TARGET_NAME SRCS)
284283
if(WITH_COVERAGE AND WITH_INCREMENTAL_COVERAGE)
@@ -1076,4 +1075,3 @@ function(math_library TARGET)
10761075
cc_library(${TARGET} SRCS ${cc_srcs} DEPS ${math_library_DEPS} ${math_common_deps})
10771076
endif()
10781077
endfunction()
1079-

cmake/third_party.cmake

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -357,10 +357,8 @@ if (WITH_PSCORE)
357357
include(external/libmct) # download, build, install libmct
358358
list(APPEND third_party_deps extern_libmct)
359359

360-
if (WITH_HETERPS)
361-
include(external/rocksdb) # download, build, install libmct
362-
list(APPEND third_party_deps extern_rocksdb)
363-
endif()
360+
include(external/rocksdb) # download, build, install libmct
361+
list(APPEND third_party_deps extern_rocksdb)
364362
endif()
365363

366364
if(WITH_XBYAK)
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
1-
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi phi_api eager_api)
2-
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi phi_api string_helper)
1+
cc_library(processgroup SRCS ProcessGroup.cc DEPS phi_api eager_api)
2+
cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup phi_api string_helper)
33
cc_library(nccl_tool SRCS NCCLTools.cc DEPS place cuda_stream enforce collective_helper device_context)
44

55
if (WITH_DISTRIBUTE)
6-
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi phi_api eager_api gloo_wrapper)
6+
cc_library(processgroup_gloo SRCS ProcessGroupGloo.cc DEPS phi_api eager_api gloo_wrapper)
77
endif()
88

99
if(WITH_NCCL)
10-
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
10+
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
1111
if (WITH_DISTRIBUTE AND WITH_PSCORE)
12-
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
12+
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc NCCLTools.cc Common.cc DEPS place cuda_stream enforce collective_helper device_context phi_api eager_api)
1313
endif()
1414
endif()
1515

1616
if(WITH_ASCEND_CL)
17-
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
17+
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
1818
if (WITH_DISTRIBUTE AND WITH_PSCORE)
19-
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
19+
cc_library(processgroup_heter SRCS ProcessGroupHeter.cc HCCLTools.cc Common.cc DEPS place npu_stream enforce collective_helper device_context phi_api eager_api)
2020
endif()
2121
endif()

paddle/fluid/distributed/collective/ProcessGroupHeter.cc

100644100755
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::AllReduce(
116116
HeterClient* client_ =
117117
HeterClient::GetInstance({switch_endpoint_}, {}, 0).get();
118118
auto dense_cpu_tensor = cpu_tensors[0];
119-
std::vector<int> send_size;
119+
std::vector<int64_t> send_size;
120120
send_size.push_back(dense_cpu_tensor.numel());
121121
int ret = client_->Send(
122122
gid_, {dense_cpu_tensor.name()}, send_size, dense_cpu_tensor.data(),
@@ -212,7 +212,7 @@ std::shared_ptr<ProcessGroup::Task> ProcessGroupHeter::Broadcast(
212212
HeterClient::GetInstance({switch_endpoint_}, {}, 0).get();
213213
auto dense_cpu_tensor = cpu_tensors[0];
214214
if (gloo_rank_ == 0) {
215-
std::vector<int> send_size;
215+
std::vector<int64_t> send_size;
216216
send_size.push_back(dense_cpu_tensor.numel());
217217
int ret = client_->Send(
218218
gid_, {dense_cpu_tensor.name()}, send_size,
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
#include <queue>
17+
#include <unordered_map>
18+
19+
namespace paddle {
20+
namespace distributed {
21+
class TopkCalculator {
22+
public:
23+
TopkCalculator(int shard_num, size_t k)
24+
: _shard_num(shard_num), _total_max_size(k) {
25+
_shard_max_size = _total_max_size / shard_num;
26+
_shard_max_size = _shard_max_size > 1 ? _shard_max_size : 1;
27+
for (int i = 0; i < shard_num; ++i) {
28+
_mpq.emplace(i, std::priority_queue<double, std::vector<double>,
29+
std::greater<double>>());
30+
}
31+
}
32+
~TopkCalculator() {}
33+
bool push(int shard_id, double value) {
34+
if (_mpq.find(shard_id) == _mpq.end()) {
35+
return false;
36+
}
37+
auto &pq = _mpq[shard_id];
38+
if (pq.size() < _shard_max_size) {
39+
pq.push(value);
40+
} else {
41+
if (pq.top() < value) {
42+
pq.pop();
43+
pq.push(value);
44+
}
45+
}
46+
return true;
47+
}
48+
// TODO 再进行一次堆排序merge各个shard的结果
49+
int top() {
50+
double total = 0;
51+
for (const auto &item : _mpq) {
52+
auto &pq = item.second;
53+
if (!pq.empty()) {
54+
total += pq.top();
55+
}
56+
}
57+
return total / _shard_num;
58+
}
59+
60+
private:
61+
std::unordered_map<int, std::priority_queue<double, std::vector<double>,
62+
std::greater<double>>>
63+
_mpq;
64+
int _shard_num;
65+
size_t _total_max_size;
66+
size_t _shard_max_size;
67+
};
68+
69+
} // namespace distributed
70+
} // namespace paddle

0 commit comments

Comments
 (0)