Skip to content

Commit ef68de9

Browse files
committed
[NVPTX] Fix the error in a pattern match.
The replacement should've had BFE() as the arguments for the comparison, not the source register. While at that, tighten the patterns a bit, and expand them no cover variants with immediate arguments. Also change the default lowering of bfe() to use unsigned variant, so the value of the upper bits is predictable.
1 parent 2fcfc97 commit ef68de9

File tree

2 files changed

+258
-198
lines changed

2 files changed

+258
-198
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 67 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1886,10 +1886,12 @@ multiclass PRMT<ValueType T, RegisterClass RC> {
18861886
}
18871887

18881888
let hasSideEffects = false in {
1889-
defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
1889+
// order is somewhat importent here. signed/unsigned variants match
1890+
// the same patterns, so the first one wins.
18901891
defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>;
1891-
defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
1892+
defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
18921893
defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>;
1894+
defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
18931895

18941896
defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>;
18951897
defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>;
@@ -2259,27 +2261,69 @@ def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
22592261
(NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
22602262

22612263
// comparisons of i8 extracted with BFE as i32
2262-
def: Pat<(setgt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2263-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGT)>;
2264-
def: Pat<(setge (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2265-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGE)>;
2266-
def: Pat<(setlt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2267-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLT)>;
2268-
def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2269-
(SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>;
2270-
2271-
def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2272-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHI)>;
2273-
def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2274-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHS)>;
2275-
def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2276-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLO)>;
2277-
def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2278-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLS)>;
2279-
def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2280-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>;
2281-
def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2282-
(SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpNE)>;
2264+
// It's faster to do comparison directly on i32 extracted by BFE,
2265+
// instead of the long conversion and sign extending.
2266+
def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2267+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2268+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
2269+
def: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2270+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2271+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
2272+
def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2273+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2274+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
2275+
def: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2276+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2277+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
2278+
def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2279+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2280+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
2281+
def: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2282+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2283+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
2284+
def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2285+
(i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2286+
(SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
2287+
def: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2288+
(i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2289+
(SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
2290+
2291+
def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2292+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2293+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
2294+
def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2295+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2296+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
2297+
def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2298+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2299+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
2300+
def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2301+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2302+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
2303+
def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2304+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2305+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
2306+
def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2307+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2308+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
2309+
def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2310+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2311+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
2312+
def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2313+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2314+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
2315+
def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2316+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2317+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
2318+
def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2319+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2320+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
2321+
def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2322+
(i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2323+
(SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
2324+
def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2325+
(i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2326+
(SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
22832327

22842328
// i1 compare -> i32
22852329
def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),

0 commit comments

Comments
 (0)