Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
527 commits
Select commit Hold shift + click to select a range
c9a334e
add VecCastCUDAKernel (#30296)
zhangting2020 Jan 15, 2021
1d7bf1d
Update voc dataset url (#30450)
LielinJiang Jan 15, 2021
c5ffad1
[oneDNN] Refactor fuse pass helper functions to one place. (#30460)
arogowie-intel Jan 16, 2021
11e78eb
Modify the calculation logic of LambOptimizer (#29313)
gfwm2013 Jan 17, 2021
5d8d463
Collect weight threshold for lstm op in post_training_quantization (#…
juncaipeng Jan 18, 2021
d8a9ba5
fix random seed in nll_loss unittest test=develop (#30468)
lijianshe02 Jan 18, 2021
16ba0ab
Recompute Offload: fixed bug in memcpy (#30484)
JZ-LIANG Jan 18, 2021
18ecd43
Avoid bug on 'MAC python3.5/6'. (#30485)
hbwx24 Jan 18, 2021
5e5c282
fix range op crash in dygraph xpu place (#30469)
taixiurong Jan 18, 2021
bd97192
if pybind.cc changed, generate total report, test=develop (#30514)
wanghuancoder Jan 18, 2021
8489d4f
optimize batch_norm & pool op for kunlun (#30490)
QingshuChen Jan 18, 2021
843dc3c
[Kunlun]PR3: add xpu executor, multi xpu card train function optimiza…
vslyu Jan 18, 2021
bdae7ed
Fix potential port conflicts. (#30508)
gongweibao Jan 18, 2021
34bf8df
avoid calling cast twice (#30527)
zhangting2020 Jan 18, 2021
40ede12
Ascend Framework Part1: OP & Wrapper (#30281)
hutuxian Jan 18, 2021
e207fe6
Ascend Framework Part2: pybind files (#30410)
hutuxian Jan 18, 2021
9fec161
Ascend Framework Part3: Ascend Parser (#30391)
hutuxian Jan 18, 2021
c0fb03a
Supplement PR29988(https://github.com/PaddlePaddle/Paddle/pull/29988)…
hbwx24 Jan 18, 2021
ce6777f
Fix bug of supporting channelwise dygraph quantized model, test=devel…
juncaipeng Jan 18, 2021
59ad6ff
delete empty line of pybing.cc, test=develop (#30529)
wanghuancoder Jan 19, 2021
7a0a576
fix adamw lr_to_coeff is fixed when dygraph (#30526)
wangxicoding Jan 19, 2021
66c514c
[2.0 API] device guard (#30307)
zhangting2020 Jan 19, 2021
28eb7b6
fix logs dir error with auto retry, test=document_fix (#30466)
Avin0323 Jan 19, 2021
7043b8c
support layer_norm fp16 in dygraph amp (#30430)
zhiqiu Jan 19, 2021
d849ecc
update kunlun dependence for aarch64 & sunway platform (#30516)
QingshuChen Jan 19, 2021
00554b3
fix error message of Inplace strategy (#30520)
pangyoki Jan 19, 2021
81217a9
unify calling cudaSetDevice (#30470)
zhiqiu Jan 19, 2021
f30d005
Fix the compiling error of update_loss_scaling when using cuda9. (#30…
wzzju Jan 19, 2021
fb20ec9
fix bug of multicard grad ncclAllReduce (#30553)
zhwesky2010 Jan 19, 2021
549855a
add rmsprop_op_xpu test=kunlun (#30493)
ykkk2333 Jan 19, 2021
572c466
[Prepare for MultiProcess xpu] unified gen nccl id, refine imperative…
wangxicoding Jan 19, 2021
ff25c5b
Fix bug: GetAttrValue should deal with attr with attrType vector<doub…
liym27 Jan 19, 2021
bbea5a1
The new unit test cannot have the same name as the existing unit test…
XieYunshen Jan 19, 2021
d1b25ed
add some RecordEvent, for dygraph timeline (#30299)
wanghuancoder Jan 20, 2021
5067e3a
[Dy2Static]Enhance check of TracedLayers out vars (#30576)
Aurelius84 Jan 20, 2021
c9e78a2
add trainers for pserver (#30523)
seiriosPlus Jan 20, 2021
9dd71c7
disable test_analyzer_detect (#30541)
luotao1 Jan 20, 2021
621bc4f
[Dy2static]Fix paddle prefix in is_paddle_api (#30569)
Aurelius84 Jan 20, 2021
8126a41
fix the bug of all_reduce pipeline gradient multiple times (#30437)
Jan 20, 2021
27a5c0c
fix layers train eval bug (#30580)
wanghuancoder Jan 20, 2021
2d5758c
update. (#30585)
jiweibo Jan 20, 2021
1386200
Add fleet amp_init() (#30572)
huangxu96 Jan 20, 2021
358106f
make abs op support complex types (#30375)
MingMingShangTian Jan 20, 2021
9077347
use nvtx push pop in timeline (#30567)
wanghuancoder Jan 20, 2021
9674e44
optimize windows CI, clear tp cache,polish code,improve level of msvc…
zhwesky2010 Jan 20, 2021
ca33821
延长单测'test_static_save_load'超时 (#30599)
hbwx24 Jan 20, 2021
e5b0d9e
[Kunlun] Add condition_variable and notify() in BindThreadedSSAGraphE…
vslyu Jan 20, 2021
7e671c0
optimize unity build (#30195)
Avin0323 Jan 20, 2021
5013c67
fix softmax bug for multi_card in kunlun (#30600)
QingshuChen Jan 20, 2021
10271dd
support reduce_max op on kunlun (#30581)
tangzhiyi11 Jan 20, 2021
430f844
Fix the error of save_quantized_model (#30583)
gfwm2013 Jan 20, 2021
dfdb035
- Disabling oneDNN inplace pass (#30588)
jczaja Jan 20, 2021
7e9f336
update document of paddle.vision.dataset, test=document (#30414)
Jan 20, 2021
4a9de93
Fix the bug in fleet amp_init. (#30606)
wzzju Jan 21, 2021
33bf6eb
revert external gflags, test=develop (#30623)
Shixiaowei02 Jan 21, 2021
1bebc09
solve build gpu task core (#30626)
Thunderbrook Jan 21, 2021
cf9bdb9
extend trt ut timeout threshold (#30537)
Jan 21, 2021
1f5841c
[ROCM] update cmake and dockerfile, test=develop (#30598)
qili93 Jan 21, 2021
9514b4a
Fix scatter grad bug (#30604)
ForFishes Jan 22, 2021
39fac84
delete the lite meta info because of ccache, test=develop (#30644)
Shixiaowei02 Jan 22, 2021
f400bd7
set WITH_INFERENCE_API_TEST=ON on Windows with GPU (#30090)
Avin0323 Jan 22, 2021
3792661
clean dockerfile (#30650)
tianshuo78520a Jan 25, 2021
fb7fbc7
fix abs bug and add abs test case (#30637)
MingMingShangTian Jan 25, 2021
8c5f158
remove PrettyTable dependence from paddle.flops (#30675)
yukavio Jan 25, 2021
ae0f88a
add DLA support:C++&&Python api (#30165)
shangzhizhou Jan 25, 2021
173660b
[oneDNN] Cache oneDNN stream not to recreate in each oneDNN op (#30358)
jczaja Jan 25, 2021
846ce40
[ROCM] update eigen cmake and patch, test=develop (#30602)
qili93 Jan 25, 2021
06a3e31
test=develop, fix test_lookahead (#30677)
123malin Jan 25, 2021
0eea5d7
post quantize support insert fake_quantize_dequantize node before the…
yingshengBD Jan 25, 2021
1642757
test=develop, fix nonzero astuple=true (#30647)
123malin Jan 25, 2021
a28a202
fix test_gen_nccl_id_op failed (#30686)
wangxicoding Jan 25, 2021
5bf25d1
More precise mkldnn kernel rules in GetExpectedKernelType (#29840)
arlesniak Jan 25, 2021
1a13626
polish printing dtype (#30682)
zhiqiu Jan 26, 2021
d834f4e
Added vanilla LSTM and LSTM with peepholes oneDNN fp32 kernel (#30661)
jakpiase Jan 26, 2021
7fbc68a
update, test=develop (#30692)
Jan 26, 2021
824a79d
Revert "Added vanilla LSTM and LSTM with peepholes oneDNN fp32 kernel…
luotao1 Jan 26, 2021
5ace20f
modified conv+bn fuse pass to fix wrong mask in mask rcnn (#30704)
alncat Jan 27, 2021
13ef444
[Dy2Stat] Fix error message when the message has more than one lines.…
liym27 Jan 27, 2021
67abfc1
[Kunlun] fix dead lock for exec_op_count_ (#30718)
vslyu Jan 27, 2021
f8da553
REUPLOAD Added vanilla LSTM and LSTM with peepholes oneDNN fp32 kerne…
jakpiase Jan 27, 2021
fef3654
upgrade gather_tree to core.ops (#30697)
FrostML Jan 27, 2021
a87d78f
update gather_tree doc (#30693)
FrostML Jan 27, 2021
caf3680
fix bugs in transformer predict in xpu place (#30730)
taixiurong Jan 28, 2021
3491acf
Split unittest. (#30727)
hbwx24 Jan 28, 2021
78d37c3
【Paddle.Fleet】Fix brpc get hostname (#30703)
MrChengmo Jan 28, 2021
5b59499
fixed compilation error on gcc 4.8.x due to the usage of isfinite (#3…
alncat Jan 28, 2021
a12b6bb
add readme in whl package (#30726)
tianshuo78520a Jan 28, 2021
46989e8
Fix python3 incompatibility issues (#30698)
lidanqing-vv Jan 28, 2021
fc00240
A fix for oneDNN matmul kernel. Fixes issue #30309 (#30723)
Jan 28, 2021
f89da4a
[ROCM] update fluid platform for rocm35 (part1), test=develop (#30639)
qili93 Jan 28, 2021
3fa2e2c
update readme links (#30756)
TCChenlong Jan 29, 2021
65a9744
fix paddle.static.acc and auc sample code bug, test=document_fix (#30…
LiuChiachi Jan 29, 2021
2c974cc
【CustomOp】support setup.py to compile custom op (#30753)
Aurelius84 Jan 29, 2021
3858f45
rm Singleton of reducer (#30775)
ForFishes Jan 29, 2021
53d01af
Fix the nan bug when passing all zero values into clip_by_norm_op. (#…
wzzju Jan 29, 2021
31ed9c9
Fleet distributed strategy support pure fp16 (#30754)
wangxicoding Jan 31, 2021
c35a988
fix malloc L3 failed bug for kunlun (#30745)
QingshuChen Feb 1, 2021
5b1ab51
Change PR-CI-PY3 cc version (#30771)
tianshuo78520a Feb 1, 2021
69875dc
[ROCM] update fluid memory for rocm35 (part1), test=develop (#30758)
qili93 Feb 1, 2021
d3fac0e
fix int64 bug (#30780)
MrChengmo Feb 1, 2021
cb66c53
dump to cpu (#30750)
Thunderbrook Feb 1, 2021
db87087
Optimize the encoder of Transformer. (#30439)
xiemoyuan Feb 1, 2021
200ee33
fix unittest random error (#30808)
shangzhizhou Feb 1, 2021
b08ae36
ci compilation depends on a stable release (#30755)
jiweibo Feb 1, 2021
b909450
fix trt plugin clone and initialize bugs in TRT7.1+ (#30709)
shangzhizhou Feb 2, 2021
04532b8
Update Xbyak to v5.81 (#30809)
wozna Feb 2, 2021
3a3ff75
Fix unittest random failed of test_datasets (#30804)
LielinJiang Feb 2, 2021
b1026f6
【kunlun】dygraph supports multi xpu card training (#30671)
wangxicoding Feb 3, 2021
4f066e3
Layer normalization fuse pass. (#30721)
arogowie-intel Feb 3, 2021
2cb55ef
fix WITH_XPU_BKCL in CMakeLists.txt (#30854)
vslyu Feb 3, 2021
e49d074
[CustomOp] Support install as Package and Add load interface (#30798)
Aurelius84 Feb 3, 2021
05d2b7a
Update paddle.static.Print with paddle2.0 api (#30846)
Joejiong Feb 3, 2021
2ac4143
support xpu with analysis predictor, test=develop (#30832)
Shixiaowei02 Feb 3, 2021
5c8455d
try again if kunlun memory malloc failed (#30855)
QingshuChen Feb 3, 2021
666efc2
Call new cudnn batch norm API regardless of data type and data layout…
AshburnLee Feb 3, 2021
6e1e036
Implement cuda kernel for index_sample. (#30380)
JamesLim-sy Feb 3, 2021
b7560a5
fix the broadcast for the large second input (#30818)
wawltor Feb 3, 2021
3024271
remove numpy array check in single-process dataloader. test=develop (…
heavengate Feb 3, 2021
ac2e2e6
add clip_by_norm on kunlun, *test=kunlun (#30862)
wangleilei001 Feb 3, 2021
635e168
Update README_cn.md (#30867)
fluffyrita Feb 4, 2021
4b2d52a
Update README.md (#30873)
GT-ZhangAcer Feb 4, 2021
e97905c
improve performance of momentum (#30881)
zhangting2020 Feb 4, 2021
35c5b23
use iwyu clean include second time, test=develop (#30829)
wanghuancoder Feb 4, 2021
6e3856d
fix xpu dygraph place (#30868)
wangxicoding Feb 4, 2021
e6095bc
fix split trt plugin initialize (#30875)
shangzhizhou Feb 4, 2021
73cdea0
Add bf16 fast performance verification (#30551)
wozna Feb 4, 2021
abfa822
[oneDNN]Extended adaptive pooling support for oneDNN pool kernel (#30…
jczaja Feb 4, 2021
a80fe67
Change cmake/third_party files for CI (#30833)
tianshuo78520a Feb 5, 2021
092a2b1
More UT for LayerNormFuse pass (#30891)
arogowie-intel Feb 5, 2021
79fa8fb
rm test_datasets from file parallel_UT_relu.py (#30907)
LielinJiang Feb 5, 2021
71acde9
Use correct master weights in AdamW. (#30895)
wzzju Feb 5, 2021
24873f4
dyngraph (#30892)
taixiurong Feb 5, 2021
aab3a30
add include for heterbox_trainer.cc, develop=test (#30910)
wanghuancoder Feb 5, 2021
90d9211
let LayerList could add [None], test=develop (#30911)
wanghuancoder Feb 5, 2021
bef46cc
[Kunlun]fix include files of gen_comm_id_helper.cc (#30917)
vslyu Feb 5, 2021
39f41cb
Performance optimization for dynamic setitem: Call op set_value to sp…
liym27 Feb 5, 2021
4a8b8b4
[Kunlun] add gen_bkcl_id_op, support multi XPU cards training using m…
vslyu Feb 5, 2021
c98f144
add truncated gaussian random (#30922)
MrChengmo Feb 5, 2021
9e527d9
[oneDNN] Added basic changes for elementwise_add_grad bf16 (#30925)
jczaja Feb 6, 2021
5ded39f
fix cpplint cfg, test=develop (#30924)
wanghuancoder Feb 7, 2021
34f1628
[ROCM] update fluid platform for rocm39 (part2), test=develop (#30774)
qili93 Feb 7, 2021
823f499
fix a bug of Sequential::__getitem__ (#30899)
wanghuancoder Feb 7, 2021
2e93233
Add WITH_XPU_BKCL in Kunlun-CI (#30919)
tianshuo78520a Feb 7, 2021
99bd16e
bug fix of xpu lite engine, test=develop (#30918)
Shixiaowei02 Feb 7, 2021
99bf622
op benchmark ci retry with specfied id (#30743)
Avin0323 Feb 7, 2021
87197f8
[kunlun]fix sync in multi kunlun xpu dygraph training. (#30943)
vslyu Feb 8, 2021
97f7a70
Add error message for slice op(#30851)
liym27 Feb 8, 2021
12c15be
[Static setitem] Support index is ellipsis for setitem in static mode…
liym27 Feb 8, 2021
15297a0
fix depends of kunlun bkcl (#30945)
QingshuChen Feb 8, 2021
93c1d9e
[ROCM] update fluid platform for rocm39 (part3), test=develop (#30913)
qili93 Feb 8, 2021
3ba6980
Fix LayerNorm tester for gcc4.8 (#30962)
arogowie-intel Feb 9, 2021
010f2ca
try to fix reader and signal test failed (#30960)
chenwhql Feb 9, 2021
8e72e03
Update gast requirement, test=develop (#30932)
zhhsplendid Feb 9, 2021
14d039e
Fix the problem that the number of ops executed by xpu is wrong (#30961)
wangxicoding Feb 9, 2021
dae3e1f
Solve inconsistent order in each card in dynamic graph (#30931)
ForFishes Feb 9, 2021
9b3c80c
update eigen version on Windows (#30573)
Avin0323 Feb 9, 2021
52edaec
modify dockerfile: support cuda11 and delete gcc8.2 in cpu version (#…
pangyoki Feb 9, 2021
f5ca2db
support label with float input of cross_entropy, test=develop (#30929)
chajchaj Feb 9, 2021
5c03327
fix bug of Linux UT parallel level (#30971)
zhwesky2010 Feb 9, 2021
f649442
New custom operator extension mechanism (#30690)
chenwhql Feb 10, 2021
8ab29f4
delay timeout of unnittest 'test_static_save_load'. (#30975)
hbwx24 Feb 10, 2021
20e300e
fix lrn bug in reshape size, test=develop (#30968)
huangjun12 Feb 11, 2021
5653c3a
[CustomOp] Check Compiler ABI compatibility (#30869)
Aurelius84 Feb 18, 2021
2497f43
Handle missing symlink method on Windows (#31006)
Feb 18, 2021
c137578
Add Support for Tuple in for Loop (#30998)
zhhsplendid Feb 18, 2021
cbbe127
Refine fake_interface Error Message (#30981)
zhhsplendid Feb 18, 2021
caf9d39
Add Conv Transpose BF16 (#30877)
wozna Feb 18, 2021
4c9f96c
[CustomOp] Support Compile multi ops at same time (#30920)
Aurelius84 Feb 18, 2021
9b54fe4
add trt transpose and flatten converter (#31022)
Feb 18, 2021
f0ee159
enable exhaustive_search for forward and backward algos when dtype is…
zhangting2020 Feb 18, 2021
75f8123
fix regex error & simplify marco name (#31031)
chenwhql Feb 19, 2021
5b26747
add offset parameter in roi_align,generate_proposals.etc ops (#30864)
yghstill Feb 19, 2021
c4ddc3a
fix dataloader collate return list mix tensor and numpy array (#30904)
heavengate Feb 19, 2021
cf8b8f9
resolve memory leak in cudnn8.0 (#31029)
jiweibo Feb 19, 2021
01ccfbc
update trt error message when input height or width is -1 (#31019)
jiweibo Feb 19, 2021
39aeaa1
fix jetson problem (#30939)
jiweibo Feb 19, 2021
0020d91
fix python pass builder error. (#30946)
jiweibo Feb 19, 2021
9401173
Remove scale loss before reduce in dygraph (#30807)
ForFishes Feb 19, 2021
615d8a2
Modify relu native implementation 2 (#30996)
Feb 19, 2021
f2dc29a
[CustomOp] Support output dtypes in generated Python API (#31045)
Aurelius84 Feb 19, 2021
4dbe16c
[CustomOp] Refine name argument in setup (#31049)
Aurelius84 Feb 19, 2021
f746564
Added reshape grad bf16 (#31035)
jczaja Feb 19, 2021
ef627ac
Fix that convert_var_shape doesn't support slice like [0:], test=deve…
zhhsplendid Feb 20, 2021
6df1ca5
add detail about states index in rnn result, test=document_fix (#31048)
joey12300 Feb 20, 2021
eb3050f
[ROCM] update fluid inference for rocm (part1), test=develop (#31018)
qili93 Feb 20, 2021
5b367da
[static setitem] Support the index is Tensor; step>1; step<0 .(#30949)
liym27 Feb 20, 2021
a2170a0
change fleet reviewer (#31069)
seiriosPlus Feb 20, 2021
463eae0
update paddle_fluid.so to paddle_inference.so (#30850)
jiweibo Feb 20, 2021
628451a
hide useless headers and add complex support (#31074)
JiabinYang Feb 20, 2021
4424aac
export paddle.static.normalize_program method. (#31072)
T8T9 Feb 20, 2021
16b4260
test=develop, save/load, shrink (#30625)
123malin Feb 20, 2021
d5323da
add squeeze_op/unsqueeze_op on kunlun;fix conv op and parallel execut…
tangzhiyi11 Feb 20, 2021
6beeafe
[CustomOp] Add more dispatch marco for users (#31058)
chenwhql Feb 20, 2021
6b3371e
Remove PE special profiler (#30886)
MrChengmo Feb 20, 2021
b95eb38
fix the bug in backward OP of index_sample. (#31026)
JamesLim-sy Feb 22, 2021
1d99663
[ROCM] update fluid imperative for rocm (part1), test=develop (#31017)
qili93 Feb 22, 2021
2168f08
add optional for param attr args, test=document_fix (#31105)
chenwhql Feb 22, 2021
adaec00
[2.0Custom OP]Support New Custom OP on Windows (#31063)
zhwesky2010 Feb 22, 2021
a5c56d8
update trt int8 calibrator to IEntropyCalibratorV2 (#31060)
shangzhizhou Feb 22, 2021
3342963
[ROCM] update fluid platform for rocm39 (part4), test=develop (#30936)
qili93 Feb 22, 2021
8fe09fa
[ROCM] update fluid framework for rocm (part1), test=develop (#31009)
qili93 Feb 22, 2021
0e4b154
fix dist fleet ctr ut (#31087)
seiriosPlus Feb 22, 2021
cf43a32
[Dy2stat] Refactoring tensor_shape_transformer.py to Fix Change after…
zhhsplendid Feb 22, 2021
5096713
[ROCM] update fluid framework for rocm (part3), test=develop (#31011)
qili93 Feb 22, 2021
565354f
support save multi sparse table in one path (#31108)
Thunderbrook Feb 22, 2021
a60d93f
[ROCM] update fluid framework for rocm (part2), test=develop (#31010)
qili93 Feb 23, 2021
44ee251
fix UNIX cmake problem (#31113)
zhwesky2010 Feb 23, 2021
16fe11d
fix softmax cross entropy integer overflow (#30590)
ZHUI Feb 23, 2021
781df30
Unification of BF16 enablement process (#31034)
wozna Feb 23, 2021
364cfa2
fix windows for optimization of elementwise_add Op (#31068)
wangchaochaohu Feb 23, 2021
99fd981
fix flops api (#31081)
yukavio Feb 23, 2021
cced930
[ROCM] update fluid operators for rocm (part1), test=develop (#31077)
qili93 Feb 23, 2021
ee1801c
Save load/save pickle protocol (#31044)
hbwx24 Feb 23, 2021
edacb62
Optimization of Transformer API (#30957)
xiemoyuan Feb 23, 2021
24ba5ee
merge develop conflict (#31122)
yghstill Feb 23, 2021
d3f09ad
Update of onednn to 2.2 (#31067)
jczaja Feb 23, 2021
e60fd1f
[CustomOp] Split test and add inference test (#31078)
chenwhql Feb 23, 2021
5d6a8c7
added support for fake_quantize_dequantize_abs_max op in quantization…
alncat Feb 23, 2021
be61c2d
support build whl and inference library nightly,test=windows3 (#30616)
zhwesky2010 Feb 23, 2021
70131b4
add warning message when dtypes of operator are not same (#31136)
MingMingShangTian Feb 24, 2021
4b22055
[Custom OP]Fix problem of custom op unitests on Windows CI (#31114)
zhwesky2010 Feb 24, 2021
dce2db4
[CustomOp] Split build directory for each setup.py (#31124)
Aurelius84 Feb 24, 2021
d8fa65a
fix heter compile (#30518)
yaoxuefeng6 Feb 24, 2021
ee76ea7
[ROCM] update fluid collective op for rocm, test=develop (#31075)
qili93 Feb 24, 2021
ebbdf52
fix entry (#31079)
seiriosPlus Feb 24, 2021
1ce96fa
[CustomOp] Add new paddle custom op so (#31141)
chenwhql Feb 24, 2021
1531214
fix ut timeout (#31061)
XieYunshen Feb 24, 2021
c209751
change test_multiprocess_reader_exception cmake (#31174)
chenwhql Feb 24, 2021
00b09e8
[Paddle-TRT] support group_norm (#31040)
Feb 24, 2021
572cc8b
Update doc for 2.0 API and some callback (#31180)
qingqing01 Feb 24, 2021
406f4a7
[CustomOp] Support to specific extra_cflags and exctra_cuda_flags ind…
Aurelius84 Feb 24, 2021
ae2be49
Add cublas_handle() to expose cublas_handle to ops (#31157)
FrostML Feb 24, 2021
c4f279f
support multi node in heterps (#31102)
Thunderbrook Feb 24, 2021
a373aa7
fix the bug in expand_v2 op (#30984)
Feb 24, 2021
dc8dfba
align the default value of some configuration for fleet to that of si…
Feb 24, 2021
0f1fde5
fix the modification of set_expected_place (#31177)
zhiqiu Feb 24, 2021
ffbf713
modify custom op dependent from paddle_framework to paddle_custom_op …
zhwesky2010 Feb 24, 2021
e8cdb49
[CustomOp] Support attributes as func input in custom op (#31128)
chenwhql Feb 25, 2021
912022f
[CustomOp]Add cpp_extension en doc (#31187)
Aurelius84 Feb 25, 2021
2f11653
OneDNN hardswish integration (#30211)
jakpiase Feb 25, 2021
d116024
Add bf16 gru model test (#31158)
wozna Feb 25, 2021
f114c3f
fix the branch of code choose (#31200)
wangchaochaohu Feb 25, 2021
ca3b6bc
add cache for VariableWrapper (#30880)
MingMingShangTian Feb 25, 2021
d18c5e4
fix ignore_index check in softmax_with_cross_entropy (#31201)
yghstill Feb 25, 2021
9d24726
graph engine demo
seemingwang Feb 25, 2021
7753f38
upload unsaved changes
seemingwang Feb 26, 2021
5fa66ab
fix dependency error
seemingwang Feb 27, 2021
3e12386
fix shard_num problem
seemingwang Feb 28, 2021
acacce0
py client
seemingwang Mar 2, 2021
3f32bf1
remove lock and graph-type
seemingwang Mar 9, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
92 changes: 57 additions & 35 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,15 @@ include(generic) # simplify cmake module
find_package(CUDA QUIET)
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
if (WITH_GPU AND WITH_XPU)
message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
endif()
if (WITH_GPU AND WITH_ASCEND)
message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
endif()
# cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them.
if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15))
message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. "
Expand Down Expand Up @@ -63,12 +67,17 @@ if(WIN32)
set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_STATIC_LIBRARY_PREFIX lib)

set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj")

if (MSVC_STATIC_CRT)
message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
Expand All @@ -80,20 +89,16 @@ if(WIN32)
endforeach(flag_var)
endif()

# NOTE(Avin0323): Less parallel count result in faster compilation.
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
# windows build turn off warnings, use parallel compiling.
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
# NOTE(Avin0323): Less parallel count result in faster compilation with
# Unity Build on GPU.
if(WITH_UNITY_BUILD AND WITH_GPU)
set(${flag_var} "${${flag_var}} /MP8")
else()
set(${flag_var} "${${flag_var}} /MP")
endif()
set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
endforeach(flag_var)
foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
set(${flag_var} "${${flag_var}} /w")
Expand Down Expand Up @@ -148,8 +153,9 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF)
################################ Internal Configurations #######################################
option(WITH_ROCM_PLATFORM "Compile PaddlePaddle with ROCM platform" OFF)
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
option(WITH_RCCL "Compile PaddlePaddle with RCCL support" OFF)
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(WITH_INCREMENTAL_COVERAGE "Generate coverage reports only for incremental code" OFF)
Expand All @@ -160,15 +166,18 @@ option(WITH_BOX_PS "Compile with box_ps support" OFF)
option(WITH_XBYAK "Compile with xbyak support" ON)
option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF)
option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON)
option(WITH_XPU_BKCL "Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL" OFF)
option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON)
option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
option(WITH_SW "Compile PaddlePaddle with sw support" OFF)
option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)

Expand Down Expand Up @@ -213,6 +222,13 @@ if (NOT WITH_GPU AND WITH_NCCL)
"Disable NCCL when compiling without GPU" FORCE)
endif()

if (NOT WITH_XPU AND WITH_XPU_BKCL)
MESSAGE(WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.")
set(WITH_XPU_BKCL OFF CACHE STRING
"Disable BKCL when compiling without XPU" FORCE)
endif()

if(WITH_NCCL)
add_definitions("-DPADDLE_WITH_NCCL")
include(nccl)
Expand Down Expand Up @@ -246,17 +262,6 @@ endif()

include(third_party) # download, build, install third_party, Contains about 20+ dependencies

if(WITH_DISTRIBUTE)
if(WITH_GRPC)
message(STATUS "Use grpc framework.")
include(external/grpc)
else()
message(STATUS "Use brpc framework.")
include(external/leveldb)
include(external/brpc)
endif()
endif()

include(flags) # set paddle compile flags

if(WITH_PROFILER)
Expand All @@ -279,19 +284,29 @@ include(configure) # add paddle env configuration

include_directories("${PADDLE_SOURCE_DIR}")

if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
if(WITH_ROCM)
include(hip)
endif(WITH_ROCM)

if (NOT WITH_ROCM AND WITH_RCCL)
MESSAGE(WARNING
"Disable RCCL when compiling without GPU. Force WITH_RCCL=OFF.")
set(WITH_NCCL OFF CACHE STRING
"Disable RCCL when compiling without GPU" FORCE)
endif()

if(WITH_RCCL)
add_definitions("-DPADDLE_WITH_RCCL")
include(rccl)
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
if(WITH_ROCM)
MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.")
endif()
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})

if(WITH_ROCM_PLATFORM)
find_package(HIP)
include(hip)
endif(WITH_ROCM_PLATFORM)
if(WITH_NV_JETSON)
set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE)
endif()

if(WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
Expand All @@ -310,11 +325,18 @@ if (WITH_SW)
add_definitions(-DPADDLE_WITH_SW)
endif()

if (WITH_MIPS)
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_MIPS=ON" FORCE)
add_definitions(-DPADDLE_WITH_MIPS)
endif()

set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")

set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")

add_definitions(-DPADDLE_DLL_EXPORT)

if(ON_INFER)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message(STATUS "On inference mode, will take place some specific optimization.")
Expand Down
Loading