-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SimplifyCFG] Don't use a mask for lookup tables generated from switches with an unreachable default case #94468
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SimplifyCFG] Don't use a mask for lookup tables generated from switches with an unreachable default case #94468
Conversation
@llvm/pr-subscribers-llvm-transforms Author: DaPorkchop_ (DaMatrix) ChangesWhen transforming a switch with holes into a lookup table, we currently use a mask to check if the current index is handled by the switch or if it is a hole. If it is a hole, we skip loading from the lookup table. Normally, if the switch's default case is unreachable this has no impact, as the mask test gets optimized away by subsequent passes. However, if the switch is large enough that the number of lookup table entries exceeds the target's register width, we won't be able to fit all the cases into a mask and the switch won't get transformed into a lookup table. If we know that the switch's default case is unreachable, we know that the mask is unnecessary and can skip constructing it entirely, which allows us to transform the switch into a lookup table. In the future, it might be interesting to consider allowing lookup table masks to be more than one register large (e.g. using a constant array of bit flags, similar to Patch is 62.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/94468.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index fe6ec8819ff99..1ad71b9a8838d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6743,8 +6743,15 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
TableSize =
(MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
+ // If the default destination is unreachable, or if the lookup table covers
+ // all values of the conditional variable, branch directly to the lookup table
+ // BB. Otherwise, check that the condition is within the case range.
+ bool DefaultIsReachable =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+
bool TableHasHoles = (NumResults < TableSize);
- bool NeedMask = (TableHasHoles && !HasDefaultResults);
+ bool HolesHaveUndefinedResults = (TableHasHoles && !HasDefaultResults);
+ bool NeedMask = (HolesHaveUndefinedResults && DefaultIsReachable);
if (NeedMask) {
// As an extra penalty for the validity test we require more cases.
if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
@@ -6766,12 +6773,6 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
"It is impossible for a switch to have more entries than the max "
"representable value of its input integer type's size.");
- // If the default destination is unreachable, or if the lookup table covers
- // all values of the conditional variable, branch directly to the lookup table
- // BB. Otherwise, check that the condition is within the case range.
- bool DefaultIsReachable =
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
-
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
BasicBlock *LookupBB = BasicBlock::Create(
@@ -6895,8 +6896,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
for (PHINode *PHI : PHIs) {
const ResultListTy &ResultList = ResultLists[PHI];
- // If using a bitmask, use any value to fill the lookup table holes.
- Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
+ // Use any value to fill the lookup table holes.
+ Constant *DV = HolesHaveUndefinedResults ? ResultLists[PHI][0].second
+ : DefaultResults[PHI];
StringRef FuncName = Fn->getName();
SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
DL, FuncName);
diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
index 893d1e0ff60b5..2ac94afd95910 100644
--- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
@@ -11,11 +11,11 @@ define i32 @switch_of_powers(i32 %x) {
; RV64I-LABEL: @switch_of_powers(
; RV64I-NEXT: entry:
; RV64I-NEXT: switch i32 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [
-; RV64I-NEXT: i32 1, label [[RETURN:%.*]]
-; RV64I-NEXT: i32 8, label [[BB2:%.*]]
-; RV64I-NEXT: i32 16, label [[BB3:%.*]]
-; RV64I-NEXT: i32 32, label [[BB4:%.*]]
-; RV64I-NEXT: i32 64, label [[BB5:%.*]]
+; RV64I-NEXT: i32 1, label [[RETURN:%.*]]
+; RV64I-NEXT: i32 8, label [[BB2:%.*]]
+; RV64I-NEXT: i32 16, label [[BB3:%.*]]
+; RV64I-NEXT: i32 32, label [[BB4:%.*]]
+; RV64I-NEXT: i32 64, label [[BB5:%.*]]
; RV64I-NEXT: ]
; RV64I: default_case:
; RV64I-NEXT: unreachable
@@ -34,10 +34,6 @@ define i32 @switch_of_powers(i32 %x) {
; RV64ZBB-LABEL: @switch_of_powers(
; RV64ZBB-NEXT: entry:
; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
-; RV64ZBB-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP0]] to i8
-; RV64ZBB-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]]
-; RV64ZBB-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
-; RV64ZBB-NEXT: call void @llvm.assume(i1 [[SWITCH_LOBIT]])
; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers, i32 0, i32 [[TMP0]]
; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
; RV64ZBB-NEXT: ret i32 [[SWITCH_LOAD]]
@@ -69,11 +65,11 @@ define i32 @switch_of_powers_reachable_default(i32 %x) {
; CHECK-LABEL: @switch_of_powers_reachable_default(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [
-; CHECK-NEXT: i32 1, label [[BB1:%.*]]
-; CHECK-NEXT: i32 8, label [[BB2:%.*]]
-; CHECK-NEXT: i32 16, label [[BB3:%.*]]
-; CHECK-NEXT: i32 32, label [[BB4:%.*]]
-; CHECK-NEXT: i32 64, label [[BB5:%.*]]
+; CHECK-NEXT: i32 1, label [[BB1:%.*]]
+; CHECK-NEXT: i32 8, label [[BB2:%.*]]
+; CHECK-NEXT: i32 16, label [[BB3:%.*]]
+; CHECK-NEXT: i32 32, label [[BB4:%.*]]
+; CHECK-NEXT: i32 64, label [[BB5:%.*]]
; CHECK-NEXT: ]
; CHECK: bb1:
; CHECK-NEXT: br label [[RETURN]]
@@ -116,11 +112,11 @@ define i32 @switch_of_non_powers(i32 %x) {
; CHECK-LABEL: @switch_of_non_powers(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [
-; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT: i32 1, label [[BB2:%.*]]
-; CHECK-NEXT: i32 16, label [[BB3:%.*]]
-; CHECK-NEXT: i32 32, label [[BB4:%.*]]
-; CHECK-NEXT: i32 64, label [[BB5:%.*]]
+; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT: i32 1, label [[BB2:%.*]]
+; CHECK-NEXT: i32 16, label [[BB3:%.*]]
+; CHECK-NEXT: i32 32, label [[BB4:%.*]]
+; CHECK-NEXT: i32 64, label [[BB5:%.*]]
; CHECK-NEXT: ]
; CHECK: default_case:
; CHECK-NEXT: unreachable
@@ -162,10 +158,10 @@ define i32 @unable_to_create_dense_switch(i32 %x) {
; CHECK-LABEL: @unable_to_create_dense_switch(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [
-; CHECK-NEXT: i32 1, label [[RETURN:%.*]]
-; CHECK-NEXT: i32 2, label [[BB3:%.*]]
-; CHECK-NEXT: i32 4, label [[BB4:%.*]]
-; CHECK-NEXT: i32 4096, label [[BB5:%.*]]
+; CHECK-NEXT: i32 1, label [[RETURN:%.*]]
+; CHECK-NEXT: i32 2, label [[BB3:%.*]]
+; CHECK-NEXT: i32 4, label [[BB4:%.*]]
+; CHECK-NEXT: i32 4096, label [[BB5:%.*]]
; CHECK-NEXT: ]
; CHECK: default_case:
; CHECK-NEXT: unreachable
@@ -205,10 +201,10 @@ define i32 @unable_to_generate_lookup_table(i32 %x, i32 %y) {
; RV64I-LABEL: @unable_to_generate_lookup_table(
; RV64I-NEXT: entry:
; RV64I-NEXT: switch i32 [[Y:%.*]], label [[DEFAULT_CASE:%.*]] [
-; RV64I-NEXT: i32 1, label [[BB2:%.*]]
-; RV64I-NEXT: i32 2, label [[BB3:%.*]]
-; RV64I-NEXT: i32 8, label [[BB4:%.*]]
-; RV64I-NEXT: i32 64, label [[BB5:%.*]]
+; RV64I-NEXT: i32 1, label [[BB2:%.*]]
+; RV64I-NEXT: i32 2, label [[BB3:%.*]]
+; RV64I-NEXT: i32 8, label [[BB4:%.*]]
+; RV64I-NEXT: i32 64, label [[BB5:%.*]]
; RV64I-NEXT: ]
; RV64I: default_case:
; RV64I-NEXT: unreachable
@@ -239,10 +235,10 @@ define i32 @unable_to_generate_lookup_table(i32 %x, i32 %y) {
; RV64ZBB-NEXT: entry:
; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.cttz.i32(i32 [[Y:%.*]], i1 true)
; RV64ZBB-NEXT: switch i32 [[TMP0]], label [[DEFAULT_CASE:%.*]] [
-; RV64ZBB-NEXT: i32 0, label [[BB2:%.*]]
-; RV64ZBB-NEXT: i32 1, label [[BB3:%.*]]
-; RV64ZBB-NEXT: i32 3, label [[BB4:%.*]]
-; RV64ZBB-NEXT: i32 6, label [[BB5:%.*]]
+; RV64ZBB-NEXT: i32 0, label [[BB2:%.*]]
+; RV64ZBB-NEXT: i32 1, label [[BB3:%.*]]
+; RV64ZBB-NEXT: i32 3, label [[BB4:%.*]]
+; RV64ZBB-NEXT: i32 6, label [[BB5:%.*]]
; RV64ZBB-NEXT: ]
; RV64ZBB: default_case:
; RV64ZBB-NEXT: unreachable
@@ -314,10 +310,10 @@ define i128 @switch_with_long_condition(i128 %x) {
; CHECK-LABEL: @switch_with_long_condition(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i128 [[X:%.*]], label [[DEFAULT_CASE:%.*]] [
-; CHECK-NEXT: i128 1, label [[RETURN:%.*]]
-; CHECK-NEXT: i128 2, label [[BB3:%.*]]
-; CHECK-NEXT: i128 4, label [[BB4:%.*]]
-; CHECK-NEXT: i128 32, label [[BB5:%.*]]
+; CHECK-NEXT: i128 1, label [[RETURN:%.*]]
+; CHECK-NEXT: i128 2, label [[BB3:%.*]]
+; CHECK-NEXT: i128 4, label [[BB4:%.*]]
+; CHECK-NEXT: i128 32, label [[BB5:%.*]]
; CHECK-NEXT: ]
; CHECK: default_case:
; CHECK-NEXT: unreachable
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 9d6502072c16b..dae797068e409 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -308,10 +308,10 @@ define i32 @overflow(i32 %type) {
; CHECK-LABEL: @overflow(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[TYPE:%.*]], label [[IF_END:%.*]] [
-; CHECK-NEXT: i32 3, label [[SW_BB3:%.*]]
-; CHECK-NEXT: i32 -2147483645, label [[SW_BB3]]
-; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 3, label [[SW_BB3:%.*]]
+; CHECK-NEXT: i32 -2147483645, label [[SW_BB3]]
+; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb1:
; CHECK-NEXT: br label [[IF_END]]
@@ -931,11 +931,11 @@ define i96 @illegaltype(i32 %c) {
; CHECK-LABEL: @illegaltype(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT: i32 42, label [[RETURN:%.*]]
-; CHECK-NEXT: i32 43, label [[SW_BB1:%.*]]
-; CHECK-NEXT: i32 44, label [[SW_BB2:%.*]]
-; CHECK-NEXT: i32 45, label [[SW_BB3:%.*]]
-; CHECK-NEXT: i32 46, label [[SW_BB4:%.*]]
+; CHECK-NEXT: i32 42, label [[RETURN:%.*]]
+; CHECK-NEXT: i32 43, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i32 44, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 45, label [[SW_BB3:%.*]]
+; CHECK-NEXT: i32 46, label [[SW_BB4:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb1:
; CHECK-NEXT: br label [[RETURN]]
@@ -1050,9 +1050,9 @@ define i32 @threecasesholes(i32 %c) {
; CHECK-LABEL: @threecasesholes(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT: i32 3, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i32 3, label [[SW_BB2:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb1:
; CHECK-NEXT: br label [[RETURN]]
@@ -1142,9 +1142,9 @@ define ptr @tls(i32 %x) {
; CHECK-LABEL: @tls(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[X:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb1:
; CHECK-NEXT: br label [[RETURN]]
@@ -1182,9 +1182,9 @@ define ptr @dllimport(i32 %x) {
; CHECK-LABEL: @dllimport(
; CHECK-NEXT: entry:
; CHECK-NEXT: switch i32 [[X:%.*]], label [[SW_DEFAULT:%.*]] [
-; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
-; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
-; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
+; CHECK-NEXT: i32 0, label [[RETURN:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]]
+; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]]
; CHECK-NEXT: ]
; CHECK: sw.bb1:
; CHECK-NEXT: br label [[RETURN]]
@@ -1476,11 +1476,11 @@ define void @pr20210(i8 %x, i1 %y) {
; CHECK-NEXT: br i1 [[Y:%.*]], label [[SW:%.*]], label [[INTERMEDIATE:%.*]]
; CHECK: sw:
; CHECK-NEXT: switch i8 [[X:%.*]], label [[END:%.*]] [
-; CHECK-NEXT: i8 7, label [[INTERMEDIATE]]
-; CHECK-NEXT: i8 3, label [[INTERMEDIATE]]
-; CHECK-NEXT: i8 2, label [[INTERMEDIATE]]
-; CHECK-NEXT: i8 1, label [[INTERMEDIATE]]
-; CHECK-NEXT: i8 0, label [[INTERMEDIATE]]
+; CHECK-NEXT: i8 7, label [[INTERMEDIATE]]
+; CHECK-NEXT: i8 3, label [[INTERMEDIATE]]
+; CHECK-NEXT: i8 2, label [[INTERMEDIATE]]
+; CHECK-NEXT: i8 1, label [[INTERMEDIATE]]
+; CHECK-NEXT: i8 0, label [[INTERMEDIATE]]
; CHECK-NEXT: ]
; CHECK: intermediate:
; CHECK-NEXT: [[Z:%.*]] = zext i8 [[X]] to i32
@@ -1612,15 +1612,15 @@ define void @wineh_test(i64 %val) personality ptr @__CxxFrameHandler3 {
; CHECK-LABEL: @wineh_test(
; CHECK-NEXT: entry:
; CHECK-NEXT: invoke void @throw(i1 false)
-; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CLEANUP1:%.*]]
+; CHECK-NEXT: to label [[UNREACHABLE:%.*]] unwind label [[CLEANUP1:%.*]]
; CHECK: unreachable:
; CHECK-NEXT: unreachable
; CHECK: cleanup1:
; CHECK-NEXT: [[CLEANUPPAD1:%.*]] = cleanuppad within none []
; CHECK-NEXT: switch i64 [[VAL:%.*]], label [[CLEANUPDONE2:%.*]] [
-; CHECK-NEXT: i64 0, label [[CLEANUPDONE1:%.*]]
-; CHECK-NEXT: i64 1, label [[CLEANUPDONE1]]
-; CHECK-NEXT: i64 6, label [[CLEANUPDONE1]]
+; CHECK-NEXT: i64 0, label [[CLEANUPDONE1:%.*]]
+; CHECK-NEXT: i64 1, label [[CLEANUPDONE1]]
+; CHECK-NEXT: i64 6, label [[CLEANUPDONE1]]
; CHECK-NEXT: ]
; CHECK: cleanupdone1:
; CHECK-NEXT: cleanupret from [[CLEANUPPAD1]] unwind label [[CLEANUP2:%.*]]
@@ -1732,12 +1732,53 @@ define i32 @signed_overflow2(i8 %n) {
; CHECK-LABEL: @signed_overflow2(
; CHECK-NEXT: start:
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
-; CHECK-NEXT: switch i2 [[TRUNC]], label [[BB1:%.*]] [
-; CHECK-NEXT: i2 1, label [[BB6:%.*]]
-; CHECK-NEXT: i2 -2, label [[BB4:%.*]]
-; CHECK-NEXT: i2 -1, label [[BB5:%.*]]
+; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i2 [[TRUNC]], -2
+; CHECK-NEXT: [[SWITCH_TABLEIDX_ZEXT:%.*]] = zext i2 [[SWITCH_TABLEIDX]] to i3
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.signed_overflow2, i32 0, i3 [[SWITCH_TABLEIDX_ZEXT]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: ret i32 [[SWITCH_LOAD]]
+;
+start:
+ %trunc = trunc i8 %n to i2
+ switch i2 %trunc, label %bb1 [
+ i2 1, label %bb3
+ i2 -2, label %bb4
+ i2 -1, label %bb5
+ ]
+
+bb1: ; preds = %start
+ unreachable
+
+bb3: ; preds = %start
+ br label %bb6
+
+bb4: ; preds = %start
+ br label %bb6
+
+bb5: ; preds = %start
+ br label %bb6
+
+bb6: ; preds = %start, %bb3, %bb4, %bb5
+ %.sroa.0.0 = phi i32 [ 4444, %bb5 ], [ 3333, %bb4 ], [ 2222, %bb3 ]
+ ret i32 %.sroa.0.0
+}
+
+; This is the same as @signed_overflow2 except that the default case calls @exit(), so it
+; isn't treated as unreachable
+define i32 @signed_overflow3(i8 %n) {
+; CHECK-LABEL: @signed_overflow3(
+; CHECK-NEXT: start:
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i8 [[N:%.*]] to i2
+; CHECK-NEXT: switch i2 [[TRUNC]], label [[START_UNREACHABLEDEFAULT:%.*]] [
+; CHECK-NEXT: i2 1, label [[BB6:%.*]]
+; CHECK-NEXT: i2 -2, label [[BB4:%.*]]
+; CHECK-NEXT: i2 -1, label [[BB5:%.*]]
+; CHECK-NEXT: i2 0, label [[BB1:%.*]]
; CHECK-NEXT: ]
+; CHECK: start.unreachabledefault:
+; CHECK-NEXT: unreachable
; CHECK: bb1:
+; CHECK-NEXT: call void @exit(i32 1)
; CHECK-NEXT: unreachable
; CHECK: bb4:
; CHECK-NEXT: br label [[BB6]]
@@ -1756,6 +1797,7 @@ start:
]
bb1: ; preds = %start
+ call void @exit(i32 1)
unreachable
bb3: ; preds = %start
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
new file mode 100644
index 0000000000000..349436bc4c1f1
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
@@ -0,0 +1,1506 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals
+; RUN: opt < %s -passes=simplifycfg -switch-to-lookup=true -keep-loops=false -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: opt < %s -passes='simplifycfg<no-keep-loops;switch-to-lookup>' -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i32:8:8-i16:16:16-i32:32:32-i32:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;.
+; CHECK: @switch.table.reachable_default_dense_0to63 = private unnamed_addr constant [64 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1], align 8
+; CHECK: @switch.table.unreachable_default_dense_0to63 = private unnamed_addr constant [64 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1], align 8
+; CHECK: @switch.table.reachable_default_holes_0to63 = private unnamed_addr constant [64 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1, i32 0, i32 0, i32 6, i32 5, i32 4, i32 3, i32 0, i32 1, i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 1], align 8
+; CHECK: @switch.table.unreachable_default_holes_0to63 = private unnamed_addr constant [64 x i32] [i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 5, i32 0, i32 3, i32 2, i32 1, i32 0, i32 0, i32 6, i32 5, i32 4, i32 3, i32 0, i32 1, i32 0, i32 7, i32 6, i32 0, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 0, i32 2, i32 1, i32 0, i32 7, i32 0, i32 5, i32 4, i32 3, i32 2, i32 1], align 8
+; CHECK: @switch.table.reachable_default_dense_0to64 = private unnamed_addr constant [65 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 8
+; CHECK: @switch.table.unreachable_default_dense_0to64 = private unnamed_addr constant [65 x i32] [i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 8
+; CHECK: @switch.table.unreachable_default_holes_0to64 = private unnamed_addr constant [65 x i32] ...
[truncated]
|
cef9fbd
to
05f6be0
Compare
Please rebase your PR, see: 07b9d23. |
bool TableHasHoles = (NumResults < TableSize); | ||
bool NeedMask = (TableHasHoles && !HasDefaultResults); | ||
bool HolesHaveUndefinedResults = (TableHasHoles && !HasDefaultResults); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This variable name is confusing to me. Could you explain it further and add comments?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've pushed some new comments explaining this in more detail.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe you could change it to AllHolesAreUndefined
, AllHolesArePoison
or AllHolesAreUnreachable
.
// If using a bitmask, use any value to fill the lookup table holes. | ||
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; | ||
// Use any value to fill the lookup table holes. | ||
Constant *DV = HolesHaveUndefinedResults ? ResultLists[PHI][0].second |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When the default branch is undefined, we can use a poison value. This can be done as another PR, as it requires modifying the constructor of SwitchLookupTable
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But I'm not sure if this makes sense. :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this makes sense. For example, we could treat [1, poison, 3, 4]
as [1, 2, 3, 4]
or merge [1, poison, 3, 4]
and [1, 2, poison, 4]
into [1, 2, 3, 4]
. If you're considering implementing it, please let me know. :)
llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table_big.ll
Outdated
Show resolved
Hide resolved
05f6be0
to
4e9d372
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. If no one else comments, I'll merge it after next Monday.
bool TableHasHoles = (NumResults < TableSize); | ||
bool NeedMask = (TableHasHoles && !HasDefaultResults); | ||
bool HolesHaveUndefinedResults = (TableHasHoles && !HasDefaultResults); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe you could change it to AllHolesAreUndefined
, AllHolesArePoison
or AllHolesAreUnreachable
.
// If using a bitmask, use any value to fill the lookup table holes. | ||
Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI]; | ||
// Use any value to fill the lookup table holes. | ||
Constant *DV = HolesHaveUndefinedResults ? ResultLists[PHI][0].second |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe this makes sense. For example, we could treat [1, poison, 3, 4]
as [1, 2, 3, 4]
or merge [1, poison, 3, 4]
and [1, 2, poison, 4]
into [1, 2, 3, 4]
. If you're considering implementing it, please let me know. :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM as well.
// If the table has holes but the default destination doesn't produce any | ||
// constant results, the lookup table entries corresponding to the holes will | ||
// contain undefined values. | ||
bool HolesHaveUndefinedResults = (TableHasHoles && !HasDefaultResults); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool HolesHaveUndefinedResults = (TableHasHoles && !HasDefaultResults); | |
bool HolesHaveUndefinedResults = TableHasHoles && !HasDefaultResults; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My bad, I was keeping with the style of the existing code. I've removed these parens (and the other ones you pointed out)
// to the default case. | ||
// The mask is unnecessary if the table has holes but the default destination | ||
// is unreachable, as in that case the holes must also be unreachable. | ||
bool NeedMask = (HolesHaveUndefinedResults && DefaultIsReachable); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool NeedMask = (HolesHaveUndefinedResults && DefaultIsReachable); | |
bool NeedMask = HolesHaveUndefinedResults && DefaultIsReachable; |
@@ -0,0 +1,534 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | |||
; RUN: opt < %s -passes=simplifycfg -switch-to-lookup=true -keep-loops=false -S | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think -keep-loops
is relevant here? We also don't need both RUN lines, they do the same thing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alright, I've removed -keep-loops=false
and the second RUN
. I copied those lines from the original switch_to_lookup_table.ll
; I assumed there was some reason why both versions were being tested.
…ize and lookup tables with reachable/unreachable default cases
4e9d372
to
cbf2a2d
Compare
#94468 (comment) I like #94468 (comment) Yeah, I'm definitely interested in doing that, I'll get working on that as soon as this is merged. @dianqk |
…chable default branch to a lookup table This makes it possible to convert large switches (whose lookup table size would exceed the target register bit width) with holes and an unreachable default case into lookup tables.
cbf2a2d
to
8fd7b2d
Compare
Thanks. I prefer |
…hes with an unreachable default case (llvm#94468) When transforming a switch with holes into a lookup table, we currently use a mask to check if the current index is handled by the switch or if it is a hole. If it is a hole, we skip loading from the lookup table. Normally, if the switch's default case is unreachable this has no impact, as the mask test gets optimized away by subsequent passes. However, if the switch is large enough that the number of lookup table entries exceeds the target's register width, we won't be able to fit all the cases into a mask and the switch won't get transformed into a lookup table. If we know that the switch's default case is unreachable, we know that the mask is unnecessary and can skip constructing it entirely, which allows us to transform the switch into a lookup table. [Example](https://godbolt.org/z/7x7qfx8M1) In the future, it might be interesting to consider allowing lookup table masks to be more than one register large (e.g. using a constant array of bit flags, similar to `std::bitset`). Signed-off-by: Hafidz Muzakky <ais.muzakky@gmail.com>
When transforming a switch with holes into a lookup table, we currently use a mask to check if the current index is handled by the switch or if it is a hole. If it is a hole, we skip loading from the lookup table. Normally, if the switch's default case is unreachable this has no impact, as the mask test gets optimized away by subsequent passes. However, if the switch is large enough that the number of lookup table entries exceeds the target's register width, we won't be able to fit all the cases into a mask and the switch won't get transformed into a lookup table. If we know that the switch's default case is unreachable, we know that the mask is unnecessary and can skip constructing it entirely, which allows us to transform the switch into a lookup table.
Example
In the future, it might be interesting to consider allowing lookup table masks to be more than one register large (e.g. using a constant array of bit flags, similar to
std::bitset
).