|
3469 | 3469 | DONE; |
3470 | 3470 | }) |
3471 | 3471 |
|
| 3472 | +;; AND tree reductions. |
| 3473 | +;; Check if after a min pairwise reduction that all the lanes are 1. |
| 3474 | +;; |
| 3475 | +;; uminp v1.4s, v1.4s, v1.4s |
| 3476 | +;; fmov x1, d1 |
| 3477 | +;; cmn x1, #1 |
| 3478 | +;; cset w0, eq |
| 3479 | +;; |
| 3480 | +(define_expand "reduc_sbool_and_scal_<mode>" |
| 3481 | + [(set (match_operand:QI 0 "register_operand") |
| 3482 | + (unspec:QI [(match_operand:VALLI 1 "register_operand")] |
| 3483 | + UNSPEC_ANDV))] |
| 3484 | + "TARGET_SIMD" |
| 3485 | +{ |
| 3486 | + rtx tmp = operands[1]; |
| 3487 | + /* 128-bit vectors need to be compressed to 64-bits first. */ |
| 3488 | + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) |
| 3489 | + { |
| 3490 | + /* Always reduce using a V4SI. */ |
| 3491 | + rtx reduc = gen_lowpart (V4SImode, tmp); |
| 3492 | + rtx res = gen_reg_rtx (V4SImode); |
| 3493 | + emit_insn (gen_aarch64_uminpv4si (res, reduc, reduc)); |
| 3494 | + emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); |
| 3495 | + } |
| 3496 | + rtx val = gen_reg_rtx (DImode); |
| 3497 | + emit_move_insn (val, gen_lowpart (DImode, tmp)); |
| 3498 | + rtx cc_reg = aarch64_gen_compare_reg (EQ, val, constm1_rtx); |
| 3499 | + rtx cmp = gen_rtx_fmt_ee (EQ, SImode, cc_reg, constm1_rtx); |
| 3500 | + rtx tmp2 = gen_reg_rtx (SImode); |
| 3501 | + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); |
| 3502 | + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); |
| 3503 | + DONE; |
| 3504 | +}) |
| 3505 | + |
| 3506 | +;; IOR tree reductions. |
| 3507 | +;; Check that after a MAX pairwise reduction any lane is not 0 |
| 3508 | +;; |
| 3509 | +;; umaxp v1.4s, v1.4s, v1.4s |
| 3510 | +;; fmov x1, d1 |
| 3511 | +;; cmp x1, 0 |
| 3512 | +;; cset w0, ne |
| 3513 | +;; |
| 3514 | +(define_expand "reduc_sbool_ior_scal_<mode>" |
| 3515 | + [(set (match_operand:QI 0 "register_operand") |
| 3516 | + (unspec:QI [(match_operand:VALLI 1 "register_operand")] |
| 3517 | + UNSPEC_IORV))] |
| 3518 | + "TARGET_SIMD" |
| 3519 | +{ |
| 3520 | + rtx tmp = operands[1]; |
| 3521 | + /* 128-bit vectors need to be compressed to 64-bits first. */ |
| 3522 | + if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode))) |
| 3523 | + { |
| 3524 | + /* Always reduce using a V4SI. */ |
| 3525 | + rtx reduc = gen_lowpart (V4SImode, tmp); |
| 3526 | + rtx res = gen_reg_rtx (V4SImode); |
| 3527 | + emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc)); |
| 3528 | + emit_move_insn (tmp, gen_lowpart (<MODE>mode, res)); |
| 3529 | + } |
| 3530 | + rtx val = gen_reg_rtx (DImode); |
| 3531 | + emit_move_insn (val, gen_lowpart (DImode, tmp)); |
| 3532 | + rtx cc_reg = aarch64_gen_compare_reg (NE, val, const0_rtx); |
| 3533 | + rtx cmp = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx); |
| 3534 | + rtx tmp2 = gen_reg_rtx (SImode); |
| 3535 | + emit_insn (gen_aarch64_cstoresi (tmp2, cmp, cc_reg)); |
| 3536 | + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); |
| 3537 | + DONE; |
| 3538 | +}) |
| 3539 | + |
| 3540 | +;; Unpredicated predicate XOR tree reductions. |
| 3541 | +;; Check to see if the number of active lanes in the predicates is a multiple |
| 3542 | +;; of 2. We use a normal reduction after masking with 0x1. |
| 3543 | +;; |
| 3544 | +;; movi v1.16b, 0x1 |
| 3545 | +;; and v2.16b, v2.16b, v2.16b |
| 3546 | +;; addv b3, v2.16b |
| 3547 | +;; fmov w1, s3 |
| 3548 | +;; and w0, w1, 1 |
| 3549 | +;; |
| 3550 | +(define_expand "reduc_sbool_xor_scal_<mode>" |
| 3551 | + [(set (match_operand:QI 0 "register_operand") |
| 3552 | + (unspec:QI [(match_operand:VALLI 1 "register_operand")] |
| 3553 | + UNSPEC_XORV))] |
| 3554 | + "TARGET_SIMD" |
| 3555 | +{ |
| 3556 | + rtx tmp = gen_reg_rtx (<MODE>mode); |
| 3557 | + rtx one_reg = force_reg (<MODE>mode, CONST1_RTX (<MODE>mode)); |
| 3558 | + emit_move_insn (tmp, gen_rtx_AND (<MODE>mode, operands[1], one_reg)); |
| 3559 | + rtx tmp2 = gen_reg_rtx (<VEL>mode); |
| 3560 | + emit_insn (gen_reduc_plus_scal_<mode> (tmp2, tmp)); |
| 3561 | + rtx tmp3 = gen_reg_rtx (DImode); |
| 3562 | + emit_move_insn (tmp3, gen_rtx_AND (DImode, |
| 3563 | + lowpart_subreg (DImode, tmp2, <VEL>mode), |
| 3564 | + const1_rtx)); |
| 3565 | + emit_move_insn (operands[0], gen_lowpart (QImode, tmp2)); |
| 3566 | + DONE; |
| 3567 | +}) |
| 3568 | + |
3472 | 3569 | ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first |
3473 | 3570 | ;; sign or zero-extends its elements. |
3474 | 3571 | (define_insn "aarch64_<su>addlv<mode>" |
|
0 commit comments