Skip to content

Commit 59508e3

Browse files
authored
Add v6e PCIe overload workaround flag (apple#955)
1 parent 028ecfd commit 59508e3

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

axlearn/common/compiler_options.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ def default_xla_options(
7474
xla_tpu_scoped_vmem_limit_kib=98304,
7575
# For megascale performance.
7676
xla_jf_crs_combiner_threshold_count=10,
77+
# TODO(hanzhi-zhou): temporary workaround to avoid PCIe overload when using multi-slice
78+
# v6e training caused by allreduce over DCN. This flag doesn't impact performance.
79+
xla_tpu_iova_dma_chunk_size_bytes=1048576,
7780
)
7881
options.update(
7982
# Improved performance for v6e.

0 commit comments

Comments
 (0)