Skip to content

Commit 8baabb3

Browse files
author
wilco
committed
[ARM] Cleanup DImode shifts
Like the logical operations, expand all shifts early rather than only sometimes. The Neon shift expansions are never emitted (not even with -fneon-for-64bits), so they are not useful. So all the late expansions and Neon shift patterns can be removed, and shifts are more optimized as a result. Since some extend patterns use Neon DImode shifts, remove the Neon extend variants and related splits. A simple example now generates the same efficient code after this patch with -mfpu=neon and -mfpu=vfp (previously just the fact of having Neon enabled resulted inefficient code for no reason). unsigned long long f(unsigned long long x, unsigned long long y) { return x & (y >> 33); } Before: strd r4, r5, [sp, #-8]! lsr r4, r3, #1 mov r5, #0 and r1, r1, r5 and r0, r0, r4 ldrd r4, r5, [sp] add sp, sp, #8 bx lr After: and r0, r0, r3, lsr #1 mov r1, #0 bx lr Bootstrap and regress OK on arm-none-linux-gnueabihf --with-cpu=cortex-a57 gcc/ * config/arm/iterators.md (qhs_extenddi_cstr): Update. (qhs_extenddi_cstr): Likewise. * config/arm/arm.md (ashldi3): Always expand early. (ashlsi3): Likewise. (ashrsi3): Likewise. (zero_extend<mode>di2): Remove Neon variants. (extend<mode>di2): Likewise. * config/arm/neon.md (ashldi3_neon_noclobber): Remove. (signed_shift_di3_neon): Likewise. (unsigned_shift_di3_neon): Likewise. (ashrdi3_neon_imm_noclobber): Likewise. (lshrdi3_neon_imm_noclobber): Likewise. (<shift>di3_neon): Likewise. (split extend): Remove DI extend split patterns. gcc/testsuite/ * gcc.target/arm/neon-extend-1.c: Remove test. * gcc.target/arm/neon-extend-2.c: Remove test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@274824 138bc75d-0d04-0410-961f-82ee72b054a4
1 parent 5eddd27 commit 8baabb3

File tree

7 files changed

+48
-359
lines changed

7 files changed

+48
-359
lines changed

gcc/ChangeLog

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,20 @@
1+
2019-08-22 Wilco Dijkstra <wdijkstr@arm.com>
2+
3+
* config/arm/iterators.md (qhs_extenddi_cstr): Update.
4+
(qhs_extenddi_cstr): Likewise.
5+
* config/arm/arm.md (ashldi3): Always expand early.
6+
(ashlsi3): Likewise.
7+
(ashrsi3): Likewise.
8+
(zero_extend<mode>di2): Remove Neon variants.
9+
(extend<mode>di2): Likewise.
10+
* config/arm/neon.md (ashldi3_neon_noclobber): Remove.
11+
(signed_shift_di3_neon): Likewise.
12+
(unsigned_shift_di3_neon): Likewise.
13+
(ashrdi3_neon_imm_noclobber): Likewise.
14+
(lshrdi3_neon_imm_noclobber): Likewise.
15+
(<shift>di3_neon): Likewise.
16+
(split extend): Remove DI extend split patterns.
17+
118
2019-08-22 Wilco Dijkstra <wdijkstr@arm.com>
219

320
* config/arm/arm.md (split and/eor/ior): Remove Neon check.

gcc/config/arm/arm.md

Lines changed: 24 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -3621,44 +3621,14 @@
36213621
(define_expand "ashldi3"
36223622
[(set (match_operand:DI 0 "s_register_operand")
36233623
(ashift:DI (match_operand:DI 1 "s_register_operand")
3624-
(match_operand:SI 2 "general_operand")))]
3624+
(match_operand:SI 2 "reg_or_int_operand")))]
36253625
"TARGET_32BIT"
36263626
"
3627-
if (TARGET_NEON)
3628-
{
3629-
/* Delay the decision whether to use NEON or core-regs until
3630-
register allocation. */
3631-
emit_insn (gen_ashldi3_neon (operands[0], operands[1], operands[2]));
3632-
DONE;
3633-
}
3634-
else
3635-
{
3636-
/* Only the NEON case can handle in-memory shift counts. */
3637-
if (!reg_or_int_operand (operands[2], SImode))
3638-
operands[2] = force_reg (SImode, operands[2]);
3639-
}
3640-
3641-
if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
3642-
; /* No special preparation statements; expand pattern as above. */
3643-
else
3644-
{
3645-
rtx scratch1, scratch2;
3646-
3647-
/* Ideally we should use iwmmxt here if we could know that operands[1]
3648-
ends up already living in an iwmmxt register. Otherwise it's
3649-
cheaper to have the alternate code being generated than moving
3650-
values to iwmmxt regs and back. */
3651-
3652-
/* Expand operation using core-registers.
3653-
'FAIL' would achieve the same thing, but this is a bit smarter. */
3654-
scratch1 = gen_reg_rtx (SImode);
3655-
scratch2 = gen_reg_rtx (SImode);
3656-
arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
3657-
operands[2], scratch1, scratch2);
3658-
DONE;
3659-
}
3660-
"
3661-
)
3627+
arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
3628+
operands[2], gen_reg_rtx (SImode),
3629+
gen_reg_rtx (SImode));
3630+
DONE;
3631+
")
36623632

36633633
(define_expand "ashlsi3"
36643634
[(set (match_operand:SI 0 "s_register_operand")
@@ -3681,35 +3651,11 @@
36813651
(match_operand:SI 2 "reg_or_int_operand")))]
36823652
"TARGET_32BIT"
36833653
"
3684-
if (TARGET_NEON)
3685-
{
3686-
/* Delay the decision whether to use NEON or core-regs until
3687-
register allocation. */
3688-
emit_insn (gen_ashrdi3_neon (operands[0], operands[1], operands[2]));
3689-
DONE;
3690-
}
3691-
3692-
if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
3693-
; /* No special preparation statements; expand pattern as above. */
3694-
else
3695-
{
3696-
rtx scratch1, scratch2;
3697-
3698-
/* Ideally we should use iwmmxt here if we could know that operands[1]
3699-
ends up already living in an iwmmxt register. Otherwise it's
3700-
cheaper to have the alternate code being generated than moving
3701-
values to iwmmxt regs and back. */
3702-
3703-
/* Expand operation using core-registers.
3704-
'FAIL' would achieve the same thing, but this is a bit smarter. */
3705-
scratch1 = gen_reg_rtx (SImode);
3706-
scratch2 = gen_reg_rtx (SImode);
3707-
arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
3708-
operands[2], scratch1, scratch2);
3709-
DONE;
3710-
}
3711-
"
3712-
)
3654+
arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
3655+
operands[2], gen_reg_rtx (SImode),
3656+
gen_reg_rtx (SImode));
3657+
DONE;
3658+
")
37133659

37143660
(define_expand "ashrsi3"
37153661
[(set (match_operand:SI 0 "s_register_operand")
@@ -3729,35 +3675,11 @@
37293675
(match_operand:SI 2 "reg_or_int_operand")))]
37303676
"TARGET_32BIT"
37313677
"
3732-
if (TARGET_NEON)
3733-
{
3734-
/* Delay the decision whether to use NEON or core-regs until
3735-
register allocation. */
3736-
emit_insn (gen_lshrdi3_neon (operands[0], operands[1], operands[2]));
3737-
DONE;
3738-
}
3739-
3740-
if (!CONST_INT_P (operands[2]) && TARGET_REALLY_IWMMXT)
3741-
; /* No special preparation statements; expand pattern as above. */
3742-
else
3743-
{
3744-
rtx scratch1, scratch2;
3745-
3746-
/* Ideally we should use iwmmxt here if we could know that operands[1]
3747-
ends up already living in an iwmmxt register. Otherwise it's
3748-
cheaper to have the alternate code being generated than moving
3749-
values to iwmmxt regs and back. */
3750-
3751-
/* Expand operation using core-registers.
3752-
'FAIL' would achieve the same thing, but this is a bit smarter. */
3753-
scratch1 = gen_reg_rtx (SImode);
3754-
scratch2 = gen_reg_rtx (SImode);
3755-
arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
3756-
operands[2], scratch1, scratch2);
3757-
DONE;
3758-
}
3759-
"
3760-
)
3678+
arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
3679+
operands[2], gen_reg_rtx (SImode),
3680+
gen_reg_rtx (SImode));
3681+
DONE;
3682+
")
37613683

37623684
(define_expand "lshrsi3"
37633685
[(set (match_operand:SI 0 "s_register_operand")
@@ -4782,30 +4704,30 @@
47824704
;; Zero and sign extension instructions.
47834705

47844706
(define_insn "zero_extend<mode>di2"
4785-
[(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,w")
4707+
[(set (match_operand:DI 0 "s_register_operand" "=r,?r")
47864708
(zero_extend:DI (match_operand:QHSI 1 "<qhs_zextenddi_op>"
47874709
"<qhs_zextenddi_cstr>")))]
47884710
"TARGET_32BIT <qhs_zextenddi_cond>"
47894711
"#"
4790-
[(set_attr "length" "8,4,8,8")
4791-
(set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
4712+
[(set_attr "length" "4,8")
4713+
(set_attr "arch" "*,*")
47924714
(set_attr "ce_count" "2")
47934715
(set_attr "predicable" "yes")
4794-
(set_attr "type" "multiple,mov_reg,multiple,multiple")]
4716+
(set_attr "type" "mov_reg,multiple")]
47954717
)
47964718

47974719
(define_insn "extend<mode>di2"
4798-
[(set (match_operand:DI 0 "s_register_operand" "=w,r,?r,?r,w")
4720+
[(set (match_operand:DI 0 "s_register_operand" "=r,?r,?r")
47994721
(sign_extend:DI (match_operand:QHSI 1 "<qhs_extenddi_op>"
48004722
"<qhs_extenddi_cstr>")))]
48014723
"TARGET_32BIT <qhs_sextenddi_cond>"
48024724
"#"
4803-
[(set_attr "length" "8,4,8,8,8")
4725+
[(set_attr "length" "4,8,8")
48044726
(set_attr "ce_count" "2")
48054727
(set_attr "shift" "1")
48064728
(set_attr "predicable" "yes")
4807-
(set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")
4808-
(set_attr "type" "multiple,mov_reg,multiple,multiple,multiple")]
4729+
(set_attr "arch" "*,a,t")
4730+
(set_attr "type" "mov_reg,multiple,multiple")]
48094731
)
48104732

48114733
;; Splits for all extensions to DImode

gcc/config/arm/iterators.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -741,8 +741,8 @@
741741
(define_mode_attr qhs_extenddi_op [(SI "s_register_operand")
742742
(HI "nonimmediate_operand")
743743
(QI "arm_reg_or_extendqisi_mem_op")])
744-
(define_mode_attr qhs_extenddi_cstr [(SI "r,0,r,r,r") (HI "r,0,rm,rm,r") (QI "r,0,rUq,rm,r")])
745-
(define_mode_attr qhs_zextenddi_cstr [(SI "r,0,r,r") (HI "r,0,rm,r") (QI "r,0,rm,r")])
744+
(define_mode_attr qhs_extenddi_cstr [(SI "0,r,r") (HI "0,rm,rm") (QI "0,rUq,rm")])
745+
(define_mode_attr qhs_zextenddi_cstr [(SI "0,r") (HI "0,rm") (QI "0,rm")])
746746

747747
;; Mode attributes used for fixed-point support.
748748
(define_mode_attr qaddsub_suf [(V4UQQ "8") (V2UHQ "16") (UQQ "8") (UHQ "16")

0 commit comments

Comments
 (0)