Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
1e70140
Fix bug of reduce_sum op. When input.numel() > INT32_MAX, its result
GhostScreaming Sep 14, 2022
e1f08a2
Merge branch 'reduce_sum' of https://github.com/GhostScreaming/Paddle…
GhostScreaming Sep 14, 2022
ff1bfbc
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Sep 17, 2022
5b7bc39
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Sep 26, 2022
f5acb14
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Oct 13, 2022
8ae9384
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Oct 18, 2022
2ff2de4
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Oct 21, 2022
91c0ae4
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Nov 7, 2022
a2511ff
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Nov 11, 2022
8f9332a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Dec 7, 2022
506f00c
Remove climits.
GhostScreaming Dec 7, 2022
7fba456
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Dec 7, 2022
d368607
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Jan 4, 2023
1b0eaea
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Mar 2, 2023
bfda680
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Mar 22, 2023
034e157
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Apr 12, 2023
3c11844
Merge branch 'develop' of https://github.com/GhostScreaming/Paddle in…
GhostScreaming Apr 17, 2023
ee14ddd
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Apr 21, 2023
a9f1a45
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Apr 21, 2023
d75838a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Jun 2, 2023
2ba1921
Merge branch 'develop' of https://github.com/GhostScreaming/Paddle in…
GhostScreaming Jun 9, 2023
a48d2c8
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Jun 9, 2023
75fbed5
Fix problem of pickle and NCCL_P2P_DISABLE in distributed testcases in
GhostScreaming Jun 9, 2023
49089f1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Jun 13, 2023
0ed0f85
Fix problem of TimeOut of distributed testcases under cuda12.
GhostScreaming Jun 13, 2023
5bab68a
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
GhostScreaming Jun 14, 2023
2461e86
Remove useless modification.
GhostScreaming Jun 14, 2023
a13ea77
Remove useless modification.
GhostScreaming Jun 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions test/auto_parallel/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
py_test_modules(test_optimization_tuner_api MODULES
test_optimization_tuner_api)
set_tests_properties(test_optimization_tuner_api
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 120)
py_test_modules(test_converter MODULES test_converter)
set_tests_properties(test_converter PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE"
TIMEOUT 50)
Expand All @@ -48,10 +48,10 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50)
py_test_modules(test_pass_sharding MODULES test_pass_sharding)
set_tests_properties(test_pass_sharding
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 50)
PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE" TIMEOUT 100)
py_test_modules(test_pass_amp MODULES test_pass_amp)
set_tests_properties(test_pass_amp PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE"
TIMEOUT 50)
TIMEOUT 80)
py_test_modules(test_amp_o2_pass MODULES test_amp_o2_pass)
set_tests_properties(test_amp_o2_pass PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE"
TIMEOUT 50)
Expand Down Expand Up @@ -85,11 +85,11 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
py_test_modules(test_tuning_recompute MODULES test_tuning_recompute)
set_tests_properties(test_tuning_recompute PROPERTIES TIMEOUT 300)
py_test_modules(test_fused_linear_pass MODULES test_fused_linear_pass)
set_tests_properties(test_fused_linear_pass PROPERTIES TIMEOUT 20)
set_tests_properties(test_fused_linear_pass PROPERTIES TIMEOUT 40)
py_test_modules(test_align_tool MODULES test_align_tool)
set_tests_properties(test_align_tool PROPERTIES TIMEOUT 20)
py_test_modules(test_pass_base_list MODULES test_pass_base_list)
set_tests_properties(test_pass_base_list PROPERTIES TIMEOUT 20)
set_tests_properties(test_pass_base_list PROPERTIES TIMEOUT 40)
py_test_modules(test_fuse_adamw_pass MODULES test_fuse_adamw_pass)
set_tests_properties(test_fuse_adamw_pass PROPERTIES TIMEOUT 20)
py_test_modules(test_rule_based_tuner_o2 MODULES test_rule_based_tuner_o2)
Expand Down
14 changes: 7 additions & 7 deletions test/collective/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
test_collective_alltoall_single_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_alltoall_single_api
PROPERTIES TIMEOUT "120" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "160" LABELS "RUN_TYPE=DIST")
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
Expand Down Expand Up @@ -137,10 +137,10 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
if(${CUDA_ARCH_NAME} STREQUAL "Ampere")
set_tests_properties(test_collective_broadcast_api
PROPERTIES TIMEOUT "360" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "500" LABELS "RUN_TYPE=DIST")
else()
set_tests_properties(test_collective_broadcast_api
PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "450" LABELS "RUN_TYPE=DIST")
endif()
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
Expand Down Expand Up @@ -178,7 +178,7 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
test_collective_isend_irecv_api MODULES test_collective_isend_irecv_api
ENVS "http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_isend_irecv_api
PROPERTIES TIMEOUT "120" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "160" LABELS "RUN_TYPE=DIST")
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
Expand Down Expand Up @@ -240,10 +240,10 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
if(${CUDA_ARCH_NAME} STREQUAL "Ampere")
set_tests_properties(test_collective_reduce_scatter_api
PROPERTIES TIMEOUT "210" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "360" LABELS "RUN_TYPE=DIST")
else()
set_tests_properties(test_collective_reduce_scatter_api
PROPERTIES TIMEOUT "150" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "250" LABELS "RUN_TYPE=DIST")
endif()
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
Expand All @@ -258,7 +258,7 @@ if((WITH_GPU OR WITH_ROCM) AND (LINUX))
test_collective_scatter_api MODULES test_collective_scatter_api ENVS
"http_proxy=;https_proxy=;PYTHONPATH=..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_collective_scatter_api
PROPERTIES TIMEOUT "180" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
endif()
if((WITH_GPU OR WITH_ROCM) AND (LINUX))
py_test_modules(
Expand Down
8 changes: 4 additions & 4 deletions test/collective/fleet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
START_BASH
../../legacy_test/dist_test.sh
TIMEOUT
"120"
"160"
LABELS
"RUN_TYPE=DIST"
ENVS
Expand Down Expand Up @@ -682,13 +682,13 @@ if(LOCAL_ALL_ARCH AND LOCAL_ALL_PLAT)
START_BASH
../../legacy_test/dist_test.sh
TIMEOUT
"120"
"240"
LABELS
"RUN_TYPE=DIST"
ENVS
"PADDLE_DIST_UT_PORT=21272;http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python"
)
set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT "120")
set_tests_properties(test_ir_pass_pipeline PROPERTIES TIMEOUT "240")
endif()
if((WITH_GPU OR WITH_ROCM) AND LOCAL_ALL_PLAT)
bash_test_modules(
Expand Down Expand Up @@ -922,7 +922,7 @@ if((WITH_GPU) AND (LINUX))
test_dygraph_dist_save_load MODULES test_dygraph_dist_save_load ENVS
"http_proxy=;https_proxy=;PYTHONPATH=../..:${PADDLE_BINARY_DIR}/python")
set_tests_properties(test_dygraph_dist_save_load
PROPERTIES TIMEOUT "200" LABELS "RUN_TYPE=DIST")
PROPERTIES TIMEOUT "300" LABELS "RUN_TYPE=DIST")
endif()
if((WITH_GPU) AND (LINUX))
py_test_modules(
Expand Down
5 changes: 4 additions & 1 deletion test/collective/fleet/dygraph_group_sharded_stage3.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,10 @@ def test_stage2_stage3():
# bfp16
nccl_version = core.nccl_version()

if nccl_version >= 21000:
if (
nccl_version >= 21000
and paddle.device.cuda.get_device_properties().major >= 8
):
stage2_params = train_mlp(
mlp11,
sharding_stage=2,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,10 @@ def test_stage3_offload():

# bfp16 offload
nccl_version = core.nccl_version()
if nccl_version >= 21000:
if (
nccl_version >= 21000
and paddle.device.cuda.get_device_properties().major >= 8
):
stage3_params = train_mlp(mlp7, use_pure_fp16=True, use_bfp16=True)
stage3_params_offload = train_mlp(
mlp8, use_pure_fp16=True, offload=True, use_bfp16=True
Expand Down
5 changes: 4 additions & 1 deletion test/collective/fleet/hybrid_parallel_mp_bf16.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,5 +60,8 @@ def train_batch(self, batch, model, optimizer, is_mp):


if __name__ == "__main__":
if check_nccl_version_for_bf16():
if (
check_nccl_version_for_bf16()
and paddle.device.cuda.get_device_properties().major >= 8
):
unittest.main()
5 changes: 4 additions & 1 deletion test/collective/fleet/hybrid_parallel_pp_bf16.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,8 @@ def test_pp_model(self):


if __name__ == "__main__":
if check_nccl_version_for_bf16():
if (
check_nccl_version_for_bf16()
and paddle.device.cuda.get_device_properties().major >= 8
):
unittest.main()
2 changes: 1 addition & 1 deletion test/distributed_passes/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ endif()
foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS "NVIDIA_TF32_OVERRIDE=0")
list(APPEND DIST_TEST_OPS ${TEST_OP})
set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 200)
set_tests_properties(${TEST_OP} PROPERTIES TIMEOUT 250)
set_tests_properties(${TEST_OP} PROPERTIES LABELS "RUN_TYPE=DIST")
endforeach()
2 changes: 1 addition & 1 deletion test/legacy_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1310,4 +1310,4 @@ set_tests_properties(test_reduce_op_static_build PROPERTIES TIMEOUT 500)
set_tests_properties(test_sync_batch_norm_op_static_build
PROPERTIES LABELS "RUN_TYPE=DIST")
set_tests_properties(test_sync_batch_norm_op_static_build PROPERTIES TIMEOUT
160)
250)