We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 028ecfd commit 59508e3Copy full SHA for 59508e3
axlearn/common/compiler_options.py
@@ -74,6 +74,9 @@ def default_xla_options(
74
xla_tpu_scoped_vmem_limit_kib=98304,
75
# For megascale performance.
76
xla_jf_crs_combiner_threshold_count=10,
77
+ # TODO(hanzhi-zhou): temporary workaround to avoid PCIe overload when using multi-slice
78
+ # v6e training caused by allreduce over DCN. This flag doesn't impact performance.
79
+ xla_tpu_iova_dma_chunk_size_bytes=1048576,
80
)
81
options.update(
82
# Improved performance for v6e.
0 commit comments