-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[Loads] Support dereferenceable assumption with variable size. #128436
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-analysis Author: Florian Hahn (fhahn) ChangesUpdate isDereferenceableAndAlignedPointer to make use of dereferenceable assumptions with variable sizes via SCEV. To do so, factor out the logic to check via an assumption to a helper, and use SE to check if the access size is less than the dereferenceable size. Full diff: https://github.com/llvm/llvm-project/pull/128436.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index f7a893708758c..8577fc72ecd0f 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -99,6 +99,7 @@ void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result);
struct RetainedKnowledge {
Attribute::AttrKind AttrKind = Attribute::None;
uint64_t ArgValue = 0;
+ Value *IRArgValue = nullptr;
Value *WasOn = nullptr;
bool operator==(RetainedKnowledge Other) const {
return AttrKind == Other.AttrKind && WasOn == Other.WasOn &&
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index c27bfa6f3cc2c..7366fabca3eeb 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -114,6 +114,7 @@ llvm::getKnowledgeFromBundle(AssumeInst &Assume,
};
if (BOI.End - BOI.Begin > ABA_Argument)
Result.ArgValue = GetArgOr1(0);
+ Result.IRArgValue = getValueFromBundleOpInfo(Assume, BOI, ABA_Argument);
if (Result.AttrKind == Attribute::Alignment)
if (BOI.End - BOI.Begin > ABA_Argument + 1)
Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1));
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index b461c41d29e84..3b9df62f3a0bd 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -31,6 +31,35 @@ static bool isAligned(const Value *Base, Align Alignment,
return Base->getPointerAlignment(DL) >= Alignment;
}
+static bool isDereferenceableAndAlignedPointerViaAssumption(
+ const Value *Ptr, Align Alignment,
+ function_ref<bool(const RetainedKnowledge &RK)> CheckSize,
+ const DataLayout &DL, const Instruction *CtxI, AssumptionCache *AC,
+ const DominatorTree *DT) {
+ if (!CtxI || Ptr->canBeFreed())
+ return false;
+ /// Look through assumes to see if both dereferencability and alignment can
+ /// be proven by an assume if needed.
+ RetainedKnowledge AlignRK;
+ RetainedKnowledge DerefRK;
+ bool IsAligned = Ptr->getPointerAlignment(DL) >= Alignment;
+ return getKnowledgeForValue(
+ Ptr, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
+ [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+ if (!isValidAssumeForContext(Assume, CtxI, DT))
+ return false;
+ if (RK.AttrKind == Attribute::Alignment)
+ AlignRK = std::max(AlignRK, RK);
+ if (RK.AttrKind == Attribute::Dereferenceable)
+ DerefRK = std::max(DerefRK, RK);
+ IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
+ if (IsAligned && DerefRK && CheckSize(DerefRK))
+ return true; // We have found what we needed so we stop looking
+ return false; // Other assumes may have better information. so
+ // keep looking
+ });
+}
+
/// Test if V is always a pointer to allocated and suitably aligned memory for
/// a simple load or store.
static bool isDereferenceableAndAlignedPointer(
@@ -174,33 +203,41 @@ static bool isDereferenceableAndAlignedPointer(
// information for values that cannot be freed in the function.
// TODO: More precisely check if the pointer can be freed between assumption
// and use.
- if (CtxI && !V->canBeFreed()) {
- /// Look through assumes to see if both dereferencability and alignment can
- /// be proven by an assume if needed.
- RetainedKnowledge AlignRK;
- RetainedKnowledge DerefRK;
- bool IsAligned = V->getPointerAlignment(DL) >= Alignment;
- if (getKnowledgeForValue(
- V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
- [&](RetainedKnowledge RK, Instruction *Assume, auto) {
- if (!isValidAssumeForContext(Assume, CtxI, DT))
- return false;
- if (RK.AttrKind == Attribute::Alignment)
- AlignRK = std::max(AlignRK, RK);
- if (RK.AttrKind == Attribute::Dereferenceable)
- DerefRK = std::max(DerefRK, RK);
- IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
- if (IsAligned && DerefRK &&
- DerefRK.ArgValue >= Size.getZExtValue())
- return true; // We have found what we needed so we stop looking
- return false; // Other assumes may have better information. so
- // keep looking
- }))
- return true;
+ if (CtxI) {
+ const Value *UO = getUnderlyingObjectAggressive(V);
+ if (!V->canBeFreed() || (UO && !UO->canBeFreed())) {
+ /// Look through assumes to see if both dereferencability and alignment
+ /// can be proven by an assume if needed.
+ RetainedKnowledge AlignRK;
+ RetainedKnowledge DerefRK;
+ bool IsAligned = V->getPointerAlignment(DL) >= Alignment;
+ if (getKnowledgeForValue(
+ V, {Attribute::Dereferenceable, Attribute::Alignment}, AC,
+ [&](RetainedKnowledge RK, Instruction *Assume, auto) {
+ if (!isValidAssumeForContext(Assume, CtxI, DT))
+ return false;
+ if (RK.AttrKind == Attribute::Alignment)
+ AlignRK = std::max(AlignRK, RK);
+ if (RK.AttrKind == Attribute::Dereferenceable)
+ DerefRK = std::max(DerefRK, RK);
+ IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value();
+ if (IsAligned && DerefRK &&
+ DerefRK.ArgValue >= Size.getZExtValue())
+ return true; // We have found what we needed so we stop
+ // looking
+ return false; // Other assumes may have better information. so
+ // keep looking
+ }))
+ return true;
+ }
}
- // If we don't know, assume the worst.
- return false;
+ return isDereferenceableAndAlignedPointerViaAssumption(
+ V, Alignment,
+ [Size](const RetainedKnowledge &RK) {
+ return RK.ArgValue >= Size.getZExtValue();
+ },
+ DL, CtxI, AC, DT);
}
bool llvm::isDereferenceableAndAlignedPointer(
@@ -317,8 +354,8 @@ bool llvm::isDereferenceableAndAlignedInLoop(
return false;
const SCEV *MaxBECount =
- Predicates ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
- : SE.getConstantMaxBackedgeTakenCount(L);
+ Predicates ? SE.getPredicatedSymbolicMaxBackedgeTakenCount(L, *Predicates)
+ : SE.getSymbolicMaxBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(MaxBECount))
return false;
@@ -334,9 +371,11 @@ bool llvm::isDereferenceableAndAlignedInLoop(
Value *Base = nullptr;
APInt AccessSize;
+ const SCEV *AccessSizeSCEV = nullptr;
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) {
Base = NewBase->getValue();
AccessSize = MaxPtrDiff;
+ AccessSizeSCEV = PtrDiff;
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) {
if (MinAdd->getNumOperands() != 2)
return false;
@@ -360,12 +399,20 @@ bool llvm::isDereferenceableAndAlignedInLoop(
return false;
AccessSize = MaxPtrDiff + Offset->getAPInt();
+ AccessSizeSCEV = SE.getAddExpr(PtrDiff, Offset);
Base = NewBase->getValue();
} else
return false;
Instruction *HeaderFirstNonPHI = &*L->getHeader()->getFirstNonPHIIt();
- return isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
+ return isDereferenceableAndAlignedPointerViaAssumption(
+ Base, Alignment,
+ [&SE, PtrDiff](const RetainedKnowledge &RK) {
+ return SE.isKnownPredicate(CmpInst::ICMP_ULE, PtrDiff,
+ SE.getSCEV(RK.IRArgValue));
+ },
+ DL, HeaderFirstNonPHI, AC, &DT) ||
+ isDereferenceableAndAlignedPointer(Base, Alignment, AccessSize, DL,
HeaderFirstNonPHI, AC, &DT);
}
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
index d1cbe02192e31..344f4c5bb0d79 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-variable-size.ll
@@ -185,15 +185,32 @@ define void @deref_assumption_in_preheader_too_small_non_constant_trip_count_acc
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
@@ -268,15 +285,32 @@ define void @deref_assumption_in_preheader_too_small2_non_constant_trip_count_ac
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP5]], align 1
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i1> [[TMP4]], splat (i1 true)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
+; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
+; CHECK: [[PRED_LOAD_IF]]:
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
+; CHECK: [[PRED_LOAD_CONTINUE]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
+; CHECK-NEXT: br i1 [[TMP10]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_IF1]]:
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1
+; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
+; CHECK: [[PRED_LOAD_CONTINUE2]]:
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] | ||
; CHECK: [[PRED_LOAD_IF]]: | ||
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP0]] | ||
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a bit confused. If you're increasing the number of places where we can treat the load as dereferenceable, why does it regress the code here to use conditional loads?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, the current code handles the assumption with variable sizes incorrectly due to getStartAndEndForAccess
wrapping; in this case we pass the unsigned max as trip count (-1) and the end wraps around to 0 (fixed by #128061)
46d4a8c
to
beabe4d
Compare
beabe4d
to
ce12cac
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks conceptually fine to me, but we should wait for #128061 to land first to clarify the wrapping situation.
b5a78c7
to
4d5792e
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do have a test case for when deref comes from the assumption, but alignment comes from some other source?
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) { | ||
Base = NewBase->getValue(); | ||
AccessSize = MaxPtrDiff; | ||
AccessSizeSCEV = PtrDiff; | ||
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OT - We may be able to strengthen this code using SCEV's getPointerBase and removePointerBase.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is a good point as a follow-up (I'll try it out). With a pre-loop before the vectorizable loop, I found it bailing out on the (MinAdd->getNumOperands() != 2)
, but I'm not sure if getPointerBase
will help.
4d5792e
to
131e865
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do have a test case for when deref comes from the assumption, but alignment comes from some other source?
Thanks, such a test was indeed missing. While adding it I noticed that all the assumption with variable sizes had also an "align" bundle, and for those the verifier would only check the first bundle element and not the rest.
I put up #145586 to adjust the verifier to don't skip checking other bundles and allow non-constants.
It's also included in this PR, in case people prefer it to be included here. W/o the change the verifier will reject assumptions with just a dereferenceable bundle with non-constant sizes.
131e865
to
ed7ff0b
Compare
For some reason, some of the checks for specific assumbe bundle elements exit early if the check pass, meaning we don't verify other entries. Replace the early returns with early continues. This also requires removing some tests that are currently rejected. They will be added back as part of #128436. PR: #145586
For some reason, some of the checks for specific assumbe bundle elements exit early if the check pass, meaning we don't verify other entries. Replace the early returns with early continues. This also requires removing some tests that are currently rejected. They will be added back as part of llvm/llvm-project#128436. PR: llvm/llvm-project#145586
ed7ff0b
to
c9b8020
Compare
Yes, I noticed this verifier bug when trying to use these bundles downstream. I'd added a pass which identifies dereferenceability and alignment for java arrays and it would fail without the alignment bundle in the assume. Could you pls add a test in |
Now that #128061 has landed, the code there also needs to be taught to use info from deref assumptions. I'd prefer to do that separately. |
if (const SCEVUnknown *NewBase = dyn_cast<SCEVUnknown>(AccessStart)) { | ||
Base = NewBase->getValue(); | ||
AccessSize = MaxPtrDiff; | ||
AccessSizeSCEV = PtrDiff; | ||
} else if (auto *MinAdd = dyn_cast<SCEVAddExpr>(AccessStart)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is a good point as a follow-up (I'll try it out). With a pre-loop before the vectorizable loop, I found it bailing out on the (MinAdd->getNumOperands() != 2)
, but I'm not sure if getPointerBase
will help.
return isDereferenceableAndAlignedPointerViaAssumption( | ||
Base, Alignment, | ||
[&SE, AccessSizeSCEV](const RetainedKnowledge &RK) { | ||
return SE.isKnownPredicate(CmpInst::ICMP_ULE, AccessSizeSCEV, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a potential to use isKnownPredicateAt
with Loop's predecessor as the Ctx instruction if it exists. I don't yet see a benefit for it, but just stating it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, there's probably a number of improvements we can make as follow ups!
For some reason, some of the checks for specific assumbe bundle elements exit early if the check pass, meaning we don't verify other entries. Replace the early returns with early continues. This also requires removing some tests that are currently rejected. They will be added back as part of llvm#128436. PR: llvm#145586
For some reason, some of the checks for specific assumbe bundle elements exit early if the check pass, meaning we don't verify other entries. Replace the early returns with early continues. This also requires removing some tests that are currently rejected. They will be added back as part of llvm#128436. PR: llvm#145586
c9b8020
to
66c0ed0
Compare
Adds additional test coverage for early-exit loops with deref assumptions, as suggested in #128436.
Update isDereferenceableAndAlignedPointer to make use of dereferenceable assumptions with variable sizes via SCEV. To do so, factor out the logic to check via an assumption to a helper, and use SE to check if the access size is less than the dereferenceable size.
66c0ed0
to
6b0f5e7
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ping :)
Could you pls add a test in
early_exit_legality.ll
to show that the dereferencability and alignment bundles allow early exit vectorization without the need for statically allocated arrays (the existing tests in the patch show why predication wouldn’t be needed). It works as expected when I tried, will be useful just to showcase that additional benefit.
I added some deref assumption tests with early exits and constant sizes in 0afbf17 and added one with non-constant sizes to this patch
I'll share a patch soon that updates LAA to use info from deref assumptions in the code there .
return isDereferenceableAndAlignedPointerViaAssumption( | ||
Base, Alignment, | ||
[&SE, AccessSizeSCEV](const RetainedKnowledge &RK) { | ||
return SE.isKnownPredicate(CmpInst::ICMP_ULE, AccessSizeSCEV, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, there's probably a number of improvements we can make as follow ups!
…ions. Adds additional test coverage for early-exit loops with deref assumptions, as suggested in llvm/llvm-project#128436.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
DerefRK = std::max(DerefRK, RK); | ||
IsAligned |= AlignRK && AlignRK.ArgValue >= Alignment.value(); | ||
if (IsAligned && DerefRK && CheckSize(DerefRK)) | ||
return true; // We have found what we needed so we stop looking |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: You can just do
return IsAligned && DerefRK && CheckSize(DerefRK);
@@ -94,3 +94,50 @@ loop.end: | |||
%retval = phi i64 [ %index, %loop ], [ -1, %loop.inc ] | |||
ret i64 %retval | |||
} | |||
|
|||
define i64 @early_exit_alignment_and_deref_known_via_assumption(ptr noalias %p1, ptr noalias %p2, i64 %n) nofree nosync { | |||
; CHECK-LABEL: define i64 @early_exit_alignment_and_deref_known_via_assumption( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, this loop isn't vectorising now that we have the deferenceable assumption so I'm not quite sure what this is actually testing? Is there more work required elsewhere or is it just because the test itself inhibits vectorisation?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like we bail out here:
// Try to get the access size.
const SCEV *PtrDiff = SE.getMinusSCEV(AccessEnd, AccessStart);
if (isa<SCEVCouldNotCompute>(PtrDiff))
return false;
in isDereferenceableAndAlignedInLoop
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like AccessEnd = inttoptr (i64 -1 to ptr)
and AccessStart = %p1
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In fact for every single test AccessEnd is inttoptr (i64 -1 to ptr)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like #128061 probably needs updating to use the new assumption?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @fhahn, just letting you know I'm working on a patch to fix this downstream. Hopefully have a PR ready soon!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I mentioned in the earlier conversation, I have been working on a patch, will share soon
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shared #147047 which adds support for assumptions with constant sizes which is independent of the the patch here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK no problem. I didn't see your comment about working on a fix separately. Nevermind!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No worries, just wanted to avoid working on the same thing unnecessarily :)
Update isDereferenceableAndAlignedPointer to make use of dereferenceable assumptions with variable sizes via SCEV.
To do so, factor out the logic to check via an assumption to a helper, and use SE to check if the access size is less than the dereferenceable size.