We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d562c2e commit faae7a7Copy full SHA for faae7a7
csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp
@@ -135,10 +135,10 @@ class MLA {
135
max_splits = min(16, max_splits);
136
137
// TODO: This avoids a hang when the batch size larger than 1 and
138
- // there is more than 4 kv_splits.
+ // there is more than 1 kv_splits.
139
// Discuss with NVIDIA how this can be fixed.
140
if (B > 1) {
141
- max_splits = min(2, max_splits);
+ max_splits = min(1, max_splits);
142
}
143
144
// printf(" max_splits = %d\n", max_splits);
0 commit comments