@@ -1886,10 +1886,12 @@ multiclass PRMT<ValueType T, RegisterClass RC> {
1886
1886
}
1887
1887
1888
1888
let hasSideEffects = false in {
1889
- defm BFE_S32 : BFE<"bfe.s32", i32, Int32Regs>;
1889
+ // order is somewhat importent here. signed/unsigned variants match
1890
+ // the same patterns, so the first one wins.
1890
1891
defm BFE_U32 : BFE<"bfe.u32", i32, Int32Regs>;
1891
- defm BFE_S64 : BFE<"bfe.s64 ", i64, Int64Regs >;
1892
+ defm BFE_S32 : BFE<"bfe.s32 ", i32, Int32Regs >;
1892
1893
defm BFE_U64 : BFE<"bfe.u64", i64, Int64Regs>;
1894
+ defm BFE_S64 : BFE<"bfe.s64", i64, Int64Regs>;
1893
1895
1894
1896
defm BFI_B32 : BFI<"bfi.b32", i32, Int32Regs, i32imm>;
1895
1897
defm BFI_B64 : BFI<"bfi.b64", i64, Int64Regs, i64imm>;
@@ -2259,27 +2261,69 @@ def : Pat<(setueq Int1Regs:$a, Int1Regs:$b),
2259
2261
(NOT1 (XORb1rr Int1Regs:$a, Int1Regs:$b))>;
2260
2262
2261
2263
// comparisons of i8 extracted with BFE as i32
2262
- def: Pat<(setgt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2263
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGT)>;
2264
- def: Pat<(setge (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2265
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpGE)>;
2266
- def: Pat<(setlt (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2267
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLT)>;
2268
- def: Pat<(setle (sext_inreg (trunc Int32Regs:$a), i8), (sext_inreg (trunc Int32Regs:$b), i8)),
2269
- (SETP_s32rr Int32Regs:$a, Int32Regs:$b, CmpLE)>;
2270
-
2271
- def: Pat<(setugt (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2272
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHI)>;
2273
- def: Pat<(setuge (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2274
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpHS)>;
2275
- def: Pat<(setult (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2276
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLO)>;
2277
- def: Pat<(setule (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2278
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpLS)>;
2279
- def: Pat<(seteq (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2280
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpEQ)>;
2281
- def: Pat<(setne (i16 (and (trunc Int32Regs:$a), 255)), (i16 (and (trunc Int32Regs:$b), 255))),
2282
- (SETP_u32rr Int32Regs:$a, Int32Regs:$b, CmpNE)>;
2264
+ // It's faster to do comparison directly on i32 extracted by BFE,
2265
+ // instead of the long conversion and sign extending.
2266
+ def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2267
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2268
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
2269
+ def: Pat<(setgt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2270
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2271
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
2272
+ def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2273
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2274
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
2275
+ def: Pat<(setge (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2276
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2277
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
2278
+ def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2279
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2280
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
2281
+ def: Pat<(setlt (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2282
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2283
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
2284
+ def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8))), i8)),
2285
+ (i16 (sext_inreg (i16 (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8))), i8))),
2286
+ (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
2287
+ def: Pat<(setle (i16 (sext_inreg (trunc (bfe Int32Regs:$a, imm:$oa, 8)), i8)),
2288
+ (i16 (sext_inreg (trunc (bfe Int32Regs:$b, imm:$ob, 8)), i8))),
2289
+ (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
2290
+
2291
+ def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2292
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2293
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
2294
+ def: Pat<(setugt (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2295
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2296
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
2297
+ def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2298
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2299
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
2300
+ def: Pat<(setuge (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2301
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2302
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
2303
+ def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2304
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2305
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
2306
+ def: Pat<(setult (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2307
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2308
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
2309
+ def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2310
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2311
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
2312
+ def: Pat<(setule (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2313
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2314
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
2315
+ def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2316
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2317
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
2318
+ def: Pat<(seteq (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2319
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2320
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
2321
+ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, Int32Regs:$oa, 8)), 255)),
2322
+ (i16 (and (trunc (bfe Int32Regs:$b, Int32Regs:$ob, 8)), 255))),
2323
+ (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
2324
+ def: Pat<(setne (i16 (and (trunc (bfe Int32Regs:$a, imm:$oa, 8)), 255)),
2325
+ (i16 (and (trunc (bfe Int32Regs:$b, imm:$ob, 8)), 255))),
2326
+ (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
2283
2327
2284
2328
// i1 compare -> i32
2285
2329
def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
0 commit comments