Skip to content

Commit 6e5a085

Browse files
committed
[AMDGPU] Add BFX Formation Combines to RegBankCombiner
They're relatively safe to use there I believe. The only new registers they may create are the constants for the BFX. For those, borrow the RC from the source register. Fixes #140040
1 parent e5f2477 commit 6e5a085

File tree

9 files changed

+1332
-1438
lines changed

9 files changed

+1332
-1438
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4629,10 +4629,17 @@ bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
46294629
if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
46304630
return false;
46314631

4632+
const RegisterBank *RB = getRegBank(ShiftSrc);
4633+
46324634
MatchInfo = [=](MachineIRBuilder &B) {
46334635
auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
46344636
auto Cst2 = B.buildConstant(ExtractTy, Width);
46354637
B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4638+
4639+
if (RB) {
4640+
MRI.setRegBank(Cst1.getReg(0), *RB);
4641+
MRI.setRegBank(Cst2.getReg(0), *RB);
4642+
}
46364643
};
46374644
return true;
46384645
}
@@ -4667,10 +4674,18 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(MachineInstr &MI,
46674674
return false;
46684675

46694676
uint64_t Width = APInt(Size, AndImm).countr_one();
4677+
4678+
const RegisterBank *RB = getRegBank(ShiftSrc);
4679+
46704680
MatchInfo = [=](MachineIRBuilder &B) {
46714681
auto WidthCst = B.buildConstant(ExtractTy, Width);
46724682
auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
46734683
B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4684+
4685+
if (RB) {
4686+
MRI.setRegBank(WidthCst.getReg(0), *RB);
4687+
MRI.setRegBank(LSBCst.getReg(0), *RB);
4688+
}
46744689
};
46754690
return true;
46764691
}
@@ -4717,10 +4732,17 @@ bool CombinerHelper::matchBitfieldExtractFromShr(
47174732
const int64_t Pos = ShrAmt - ShlAmt;
47184733
const int64_t Width = Size - ShrAmt;
47194734

4735+
const RegisterBank *RB = getRegBank(ShlSrc);
4736+
47204737
MatchInfo = [=](MachineIRBuilder &B) {
47214738
auto WidthCst = B.buildConstant(ExtractTy, Width);
47224739
auto PosCst = B.buildConstant(ExtractTy, Pos);
47234740
B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4741+
4742+
if (RB) {
4743+
MRI.setRegBank(WidthCst.getReg(0), *RB);
4744+
MRI.setRegBank(PosCst.getReg(0), *RB);
4745+
}
47244746
};
47254747
return true;
47264748
}
@@ -4775,10 +4797,17 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
47754797
if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
47764798
return false;
47774799

4800+
const RegisterBank *RB = getRegBank(AndSrc);
4801+
47784802
MatchInfo = [=](MachineIRBuilder &B) {
47794803
auto WidthCst = B.buildConstant(ExtractTy, Width);
47804804
auto PosCst = B.buildConstant(ExtractTy, Pos);
47814805
B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4806+
4807+
if (RB) {
4808+
MRI.setRegBank(WidthCst.getReg(0), *RB);
4809+
MRI.setRegBank(PosCst.getReg(0), *RB);
4810+
}
47824811
};
47834812
return true;
47844813
}

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,5 +210,5 @@ def AMDGPURegBankCombiner : GICombiner<
210210
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
211211
identity_combines, redundant_and, constant_fold_cast_op,
212212
cast_of_cast_combines, sext_trunc, zext_of_shift_amount_combines,
213-
lower_uniform_sbfx, lower_uniform_ubfx]> {
213+
lower_uniform_sbfx, lower_uniform_ubfx, form_bitfield_extract]> {
214214
}

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 56 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -811,16 +811,15 @@ define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amou
811811
;
812812
; GFX8-LABEL: s_ashr_v2i16:
813813
; GFX8: ; %bb.0:
814-
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
815-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
816-
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
817-
; GFX8-NEXT: s_ashr_i32 s0, s0, s1
818-
; GFX8-NEXT: s_sext_i32_i16 s1, s2
819-
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
820-
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
814+
; GFX8-NEXT: s_lshr_b32 s2, s1, 16
815+
; GFX8-NEXT: s_sext_i32_i16 s3, s0
816+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
817+
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
818+
; GFX8-NEXT: s_ashr_i32 s1, s3, s1
821819
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
822-
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
823-
; GFX8-NEXT: s_or_b32 s0, s0, s1
820+
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
821+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
822+
; GFX8-NEXT: s_or_b32 s0, s1, s0
824823
; GFX8-NEXT: ; return to shader part epilog
825824
;
826825
; GFX9-LABEL: s_ashr_v2i16:
@@ -1014,26 +1013,24 @@ define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg
10141013
;
10151014
; GFX8-LABEL: s_ashr_v4i16:
10161015
; GFX8: ; %bb.0:
1017-
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1018-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1019-
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1020-
; GFX8-NEXT: s_ashr_i32 s0, s0, s2
1021-
; GFX8-NEXT: s_sext_i32_i16 s2, s4
1022-
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1023-
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1024-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1025-
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1026-
; GFX8-NEXT: s_ashr_i32 s1, s1, s3
1027-
; GFX8-NEXT: s_sext_i32_i16 s3, s5
1028-
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1029-
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1016+
; GFX8-NEXT: s_lshr_b32 s4, s2, 16
1017+
; GFX8-NEXT: s_sext_i32_i16 s6, s0
1018+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1019+
; GFX8-NEXT: s_lshr_b32 s5, s3, 16
1020+
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1021+
; GFX8-NEXT: s_sext_i32_i16 s4, s1
1022+
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1023+
; GFX8-NEXT: s_ashr_i32 s2, s6, s2
1024+
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
10301025
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1031-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1032-
; GFX8-NEXT: s_or_b32 s0, s0, s2
1033-
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1026+
; GFX8-NEXT: s_ashr_i32 s3, s4, s3
1027+
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1028+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
10341029
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1035-
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
1036-
; GFX8-NEXT: s_or_b32 s1, s1, s2
1030+
; GFX8-NEXT: s_or_b32 s0, s2, s0
1031+
; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
1032+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1033+
; GFX8-NEXT: s_or_b32 s1, s2, s1
10371034
; GFX8-NEXT: ; return to shader part epilog
10381035
;
10391036
; GFX9-LABEL: s_ashr_v4i16:
@@ -1223,46 +1220,42 @@ define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg
12231220
;
12241221
; GFX8-LABEL: s_ashr_v8i16:
12251222
; GFX8: ; %bb.0:
1226-
; GFX8-NEXT: s_lshr_b32 s8, s0, 16
1227-
; GFX8-NEXT: s_sext_i32_i16 s0, s0
1228-
; GFX8-NEXT: s_lshr_b32 s12, s4, 16
1229-
; GFX8-NEXT: s_ashr_i32 s0, s0, s4
1230-
; GFX8-NEXT: s_sext_i32_i16 s4, s8
1231-
; GFX8-NEXT: s_lshr_b32 s9, s1, 16
1232-
; GFX8-NEXT: s_ashr_i32 s4, s4, s12
1233-
; GFX8-NEXT: s_sext_i32_i16 s1, s1
1234-
; GFX8-NEXT: s_lshr_b32 s13, s5, 16
1235-
; GFX8-NEXT: s_ashr_i32 s1, s1, s5
1236-
; GFX8-NEXT: s_sext_i32_i16 s5, s9
1237-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1238-
; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1239-
; GFX8-NEXT: s_ashr_i32 s5, s5, s13
1240-
; GFX8-NEXT: s_sext_i32_i16 s2, s2
1223+
; GFX8-NEXT: s_lshr_b32 s8, s4, 16
1224+
; GFX8-NEXT: s_sext_i32_i16 s12, s0
1225+
; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010
1226+
; GFX8-NEXT: s_lshr_b32 s9, s5, 16
1227+
; GFX8-NEXT: s_ashr_i32 s0, s0, s8
1228+
; GFX8-NEXT: s_sext_i32_i16 s8, s1
1229+
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010
1230+
; GFX8-NEXT: s_lshr_b32 s10, s6, 16
1231+
; GFX8-NEXT: s_ashr_i32 s4, s12, s4
1232+
; GFX8-NEXT: s_ashr_i32 s5, s8, s5
1233+
; GFX8-NEXT: s_ashr_i32 s1, s1, s9
1234+
; GFX8-NEXT: s_sext_i32_i16 s8, s2
1235+
; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010
12411236
; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1242-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1243-
; GFX8-NEXT: s_lshr_b32 s14, s6, 16
1244-
; GFX8-NEXT: s_ashr_i32 s2, s2, s6
1245-
; GFX8-NEXT: s_sext_i32_i16 s6, s10
1246-
; GFX8-NEXT: s_or_b32 s0, s0, s4
1247-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248-
; GFX8-NEXT: s_lshr_b32 s11, s3, 16
1249-
; GFX8-NEXT: s_ashr_i32 s6, s6, s14
1250-
; GFX8-NEXT: s_sext_i32_i16 s3, s3
1237+
; GFX8-NEXT: s_lshr_b32 s11, s7, 16
1238+
; GFX8-NEXT: s_ashr_i32 s6, s8, s6
1239+
; GFX8-NEXT: s_ashr_i32 s2, s2, s10
1240+
; GFX8-NEXT: s_sext_i32_i16 s8, s3
1241+
; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010
1242+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1243+
; GFX8-NEXT: s_lshl_b32 s0, s0, 16
12511244
; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1252-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1253-
; GFX8-NEXT: s_lshr_b32 s15, s7, 16
1254-
; GFX8-NEXT: s_ashr_i32 s3, s3, s7
1255-
; GFX8-NEXT: s_sext_i32_i16 s7, s11
1256-
; GFX8-NEXT: s_or_b32 s1, s1, s4
1257-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1258-
; GFX8-NEXT: s_ashr_i32 s7, s7, s15
1245+
; GFX8-NEXT: s_ashr_i32 s3, s3, s11
1246+
; GFX8-NEXT: s_or_b32 s0, s4, s0
1247+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
1248+
; GFX8-NEXT: s_lshl_b32 s1, s1, 16
12591249
; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1260-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1261-
; GFX8-NEXT: s_or_b32 s2, s2, s4
1262-
; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1250+
; GFX8-NEXT: s_ashr_i32 s7, s8, s7
1251+
; GFX8-NEXT: s_or_b32 s1, s4, s1
1252+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s6
1253+
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
12631254
; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1264-
; GFX8-NEXT: s_lshl_b32 s4, s4, 16
1265-
; GFX8-NEXT: s_or_b32 s3, s3, s4
1255+
; GFX8-NEXT: s_or_b32 s2, s4, s2
1256+
; GFX8-NEXT: s_and_b32 s4, 0xffff, s7
1257+
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
1258+
; GFX8-NEXT: s_or_b32 s3, s4, s3
12661259
; GFX8-NEXT: ; return to shader part epilog
12671260
;
12681261
; GFX9-LABEL: s_ashr_v8i16:

0 commit comments

Comments
 (0)