Skip to content

Commit

Permalink
Turn off SLP vectorization for avx512 only (halide#7918)
Browse files Browse the repository at this point in the history
  • Loading branch information
abadams authored and ardier committed Mar 3, 2024
1 parent e6e2f8f commit 6ba7ec3
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/CodeGen_LLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,7 @@ void CodeGen_LLVM::optimize_module() {
PipelineTuningOptions pto;
pto.LoopInterleaving = do_loop_opt;
pto.LoopVectorization = do_loop_opt;
pto.SLPVectorization = true; // Note: SLP vectorization has no analogue in the Halide scheduling model
pto.SLPVectorization = use_slp_vectorization();
pto.LoopUnrolling = do_loop_opt;
// Clear ScEv info for all loops. Certain Halide applications spend a very
// long time compiling in forgetLoop, and prefer to forget everything
Expand Down
7 changes: 7 additions & 0 deletions src/CodeGen_LLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ class CodeGen_LLVM : public IRVisitor {
virtual bool use_pic() const;
// @}

/** Should SLP vectorization be turned on in LLVM? SLP vectorization has no
* analogue in the Halide scheduling model so this is decided heuristically
* depending on the target. */
virtual bool use_slp_vectorization() const {
return true;
}

/** Should indexing math be promoted to 64-bit on platforms with
* 64-bit pointers? */
virtual bool promote_indices() const {
Expand Down
16 changes: 16 additions & 0 deletions src/CodeGen_X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class CodeGen_X86 : public CodeGen_Posix {
bool use_soft_float_abi() const override;
int native_vector_bits() const override;

bool use_slp_vectorization() const override;

int vector_lanes_for_slice(const Type &t) const;

using CodeGen_Posix::visit;
Expand Down Expand Up @@ -1028,6 +1030,20 @@ int CodeGen_X86::vector_lanes_for_slice(const Type &t) const {
return slice_bits / t.bits();
}

bool CodeGen_X86::use_slp_vectorization() const {
if (target.has_feature(Target::AVX512)) {
// LLVM's SLP vectorizer emits avx512 gather intrinsics for LUTs and
// boundary conditions, even though they're slower than just
// scalarizing. See https://github.com/llvm/llvm-project/issues/70259
//
// TODO: Once that issue is fixed, we should conditionalize this based on the
// LLVM version.
return false;
} else {
return true;
}
}

} // namespace

std::unique_ptr<CodeGen_Posix> new_CodeGen_X86(const Target &target) {
Expand Down

0 comments on commit 6ba7ec3

Please sign in to comment.