Skip to content

Commit a48912e

Browse files
author
Christophe Lyon
committed
arm: [MVE] Fix carry-in support for vadcq / vsbcq [PR122189]
The vadcq and vsbcq patterns had two problems: - the adc / sbc part of the pattern did not mention the use of vfpcc - the carry calcultation part should use a different unspec code In addtion, the get_fpscr_nzcvqc and set_fpscr_nzcvqc were over-cautious by using unspec_volatile when unspec is really what they need. Making them unspec enables to remove redundant accesses to FPSCR_nzcvqc. With unspec_volatile, we used to generate: test_2: @ args = 0, pretend = 0, frame = 8 @ frame_needed = 0, uses_anonymous_args = 0 vmov.i32 q0, #0x1 @ v4si push {lr} sub sp, sp, #12 vmrs r3, FPSCR_nzcvqc ;; [1] bic r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q3, q0, q0 vmrs r3, FPSCR_nzcvqc ;; [2] vmrs r3, FPSCR_nzcvqc orr r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q0, q0, q0 vmrs r3, FPSCR_nzcvqc ldr r0, .L8 ubfx r3, r3, #29, #1 str r3, [sp, #4] bl print_uint32x4_t add sp, sp, #12 @ sp needed pop {pc} .L9: .align 2 .L8: .word .LC1 with unspec, we generate: test_2: @ args = 0, pretend = 0, frame = 8 @ frame_needed = 0, uses_anonymous_args = 0 vmrs r3, FPSCR_nzcvqc ;; [1] bic r3, r3, #536870912 ;; [3] vmov.i32 q0, #0x1 @ v4si vmsr FPSCR_nzcvqc, r3 vadc.i32 q3, q0, q0 vmrs r3, FPSCR_nzcvqc orr r3, r3, #536870912 vmsr FPSCR_nzcvqc, r3 vadc.i32 q0, q0, q0 vmrs r3, FPSCR_nzcvqc push {lr} ubfx r3, r3, #29, #1 sub sp, sp, #12 ldr r0, .L8 str r3, [sp, #4] bl print_uint32x4_t add sp, sp, #12 @ sp needed pop {pc} .L9: .align 2 .L8: .word .LC1 That is, unspec in get_fpscr_nzcvqc enables to: - move [1] earlier - delete redundant [2] and unspec in set_fpscr_nzcvqc enables to move push {lr} and stack manipulation later. gcc/ChangeLog: PR target/122189 * config/arm/iterators.md (VxCIQ_carry, VxCIQ_M_carry, VxCQ_carry) (VxCQ_M_carry): New iterators. * config/arm/mve.md (get_fpscr_nzcvqc, set_fpscr_nzcvqc): Use unspec instead of unspec_volatile. (vadciq, vadciq_m, vadcq, vadcq_m): Use vfpcc in operation. Use a different unspec code for carry calcultation. * config/arm/unspecs.md (VADCQ_U_carry, VADCQ_M_U_carry) (VADCQ_S_carry, VADCQ_M_S_carry, VSBCIQ_U_carry ,VSBCIQ_S_carry ,VSBCIQ_M_U_carry ,VSBCIQ_M_S_carry ,VSBCQ_U_carry ,VSBCQ_S_carry ,VSBCQ_M_U_carry ,VSBCQ_M_S_carry ,VADCIQ_U_carry ,VADCIQ_M_U_carry ,VADCIQ_S_carry ,VADCIQ_M_S_carry): New unspec codes. gcc/testsuite/ChangeLog: PR target/122189 * gcc.target/arm/mve/intrinsics/vadcq-check-carry.c: New test. * gcc.target/arm/mve/intrinsics/vadcq_m_s32.c: Adjust instructions order. * gcc.target/arm/mve/intrinsics/vadcq_m_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c: Likewise. (cherry picked from commits 0272058 and 697ccad)
1 parent f243074 commit a48912e

File tree

8 files changed

+109
-16
lines changed

8 files changed

+109
-16
lines changed

gcc/config/arm/iterators.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3022,3 +3022,20 @@
30223022
;; Define iterators for VCMLA operations as MUL
30233023
(define_int_iterator VCMUL_OP [UNSPEC_VCMUL
30243024
UNSPEC_VCMUL_CONJ])
3025+
3026+
(define_int_attr VxCIQ_carry [(VADCIQ_U "VADCIQ_U_carry")
3027+
(VADCIQ_S "VADCIQ_S_carry")
3028+
(VSBCIQ_U "VSBCIQ_U_carry")
3029+
(VSBCIQ_S "VSBCIQ_S_carry")])
3030+
(define_int_attr VxCIQ_M_carry [(VADCIQ_M_U "VADCIQ_M_U_carry")
3031+
(VADCIQ_M_S "VADCIQ_M_S_carry")
3032+
(VSBCIQ_M_U "VSBCIQ_M_U_carry")
3033+
(VSBCIQ_M_S "VSBCIQ_M_S_carry")])
3034+
(define_int_attr VxCQ_carry [(VADCQ_U "VADCQ_U_carry")
3035+
(VADCQ_S "VADCQ_S_carry")
3036+
(VSBCQ_U "VSBCQ_U_carry")
3037+
(VSBCQ_S "VSBCQ_S_carry")])
3038+
(define_int_attr VxCQ_M_carry [(VADCQ_M_U "VADCQ_M_U_carry")
3039+
(VADCQ_M_S "VADCQ_M_S_carry")
3040+
(VSBCQ_M_U "VSBCQ_M_U_carry")
3041+
(VSBCQ_M_S "VSBCQ_M_S_carry")])

gcc/config/arm/mve.md

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3965,14 +3965,14 @@
39653965

39663966
(define_insn "get_fpscr_nzcvqc"
39673967
[(set (match_operand:SI 0 "register_operand" "=r")
3968-
(unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
3968+
(unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
39693969
"TARGET_HAVE_MVE"
39703970
"vmrs\\t%0, FPSCR_nzcvqc"
39713971
[(set_attr "type" "mve_move")])
39723972

39733973
(define_insn "set_fpscr_nzcvqc"
39743974
[(set (reg:SI VFPCC_REGNUM)
3975-
(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
3975+
(unspec:SI [(match_operand:SI 0 "register_operand" "r")]
39763976
VUNSPEC_SET_FPSCR_NZCVQC))]
39773977
"TARGET_HAVE_MVE"
39783978
"vmsr\\tFPSCR_nzcvqc, %0"
@@ -3988,8 +3988,9 @@
39883988
(match_operand:V4SI 2 "s_register_operand" "w")]
39893989
VxCIQ))
39903990
(set (reg:SI VFPCC_REGNUM)
3991-
(unspec:SI [(const_int 0)]
3992-
VxCIQ))
3991+
(unspec:SI [(match_dup 1)
3992+
(match_dup 2)]
3993+
<VxCIQ_carry>))
39933994
]
39943995
"TARGET_HAVE_MVE"
39953996
"<mve_insn>.i32\t%q0, %q1, %q2"
@@ -4009,8 +4010,11 @@
40094010
(match_operand:V4BI 4 "vpr_register_operand" "Up")]
40104011
VxCIQ_M))
40114012
(set (reg:SI VFPCC_REGNUM)
4012-
(unspec:SI [(const_int 0)]
4013-
VxCIQ_M))
4013+
(unspec:SI [(match_dup 1)
4014+
(match_dup 2)
4015+
(match_dup 3)
4016+
(match_dup 4)]
4017+
<VxCIQ_M_carry>))
40144018
]
40154019
"TARGET_HAVE_MVE"
40164020
"vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"
@@ -4025,11 +4029,14 @@
40254029
(define_insn "@mve_<mve_insn>q_<supf>v4si"
40264030
[(set (match_operand:V4SI 0 "s_register_operand" "=w")
40274031
(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
4028-
(match_operand:V4SI 2 "s_register_operand" "w")]
4032+
(match_operand:V4SI 2 "s_register_operand" "w")
4033+
(reg:SI VFPCC_REGNUM)]
40294034
VxCQ))
40304035
(set (reg:SI VFPCC_REGNUM)
4031-
(unspec:SI [(reg:SI VFPCC_REGNUM)]
4032-
VxCQ))
4036+
(unspec:SI [(match_dup 1)
4037+
(match_dup 2)
4038+
(reg:SI VFPCC_REGNUM)]
4039+
<VxCQ_carry>))
40334040
]
40344041
"TARGET_HAVE_MVE"
40354042
"<mve_insn>.i32\t%q0, %q1, %q2"
@@ -4047,11 +4054,16 @@
40474054
(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0")
40484055
(match_operand:V4SI 2 "s_register_operand" "w")
40494056
(match_operand:V4SI 3 "s_register_operand" "w")
4050-
(match_operand:V4BI 4 "vpr_register_operand" "Up")]
4057+
(match_operand:V4BI 4 "vpr_register_operand" "Up")
4058+
(reg:SI VFPCC_REGNUM)]
40514059
VxCQ_M))
40524060
(set (reg:SI VFPCC_REGNUM)
4053-
(unspec:SI [(reg:SI VFPCC_REGNUM)]
4054-
VxCQ_M))
4061+
(unspec:SI [(match_dup 1)
4062+
(match_dup 2)
4063+
(match_dup 3)
4064+
(match_dup 4)
4065+
(reg:SI VFPCC_REGNUM)]
4066+
<VxCQ_M_carry>))
40554067
]
40564068
"TARGET_HAVE_MVE"
40574069
"vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"

gcc/config/arm/unspecs.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1189,21 +1189,37 @@
11891189
VLDRGBWBQ
11901190
VLDRGBWBQ_Z
11911191
VADCQ_U
1192+
VADCQ_U_carry
11921193
VADCQ_M_U
1194+
VADCQ_M_U_carry
11931195
VADCQ_S
1196+
VADCQ_S_carry
11941197
VADCQ_M_S
1198+
VADCQ_M_S_carry
11951199
VSBCIQ_U
1200+
VSBCIQ_U_carry
11961201
VSBCIQ_S
1202+
VSBCIQ_S_carry
11971203
VSBCIQ_M_U
1204+
VSBCIQ_M_U_carry
11981205
VSBCIQ_M_S
1206+
VSBCIQ_M_S_carry
11991207
VSBCQ_U
1208+
VSBCQ_U_carry
12001209
VSBCQ_S
1210+
VSBCQ_S_carry
12011211
VSBCQ_M_U
1212+
VSBCQ_M_U_carry
12021213
VSBCQ_M_S
1214+
VSBCQ_M_S_carry
12031215
VADCIQ_U
1216+
VADCIQ_U_carry
12041217
VADCIQ_M_U
1218+
VADCIQ_M_U_carry
12051219
VADCIQ_S
1220+
VADCIQ_S_carry
12061221
VADCIQ_M_S
1222+
VADCIQ_M_S_carry
12071223
VLD2Q
12081224
VLD4Q
12091225
VST2Q
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/* { dg-do run } */
2+
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
3+
/* { dg-require-effective-target arm_mve_hw } */
4+
/* { dg-options "-O2" } */
5+
/* { dg-add-options arm_v8_1m_mve } */
6+
7+
#include "arm_mve.h"
8+
9+
#ifdef __cplusplus
10+
extern "C" {
11+
#endif
12+
13+
#include <inttypes.h>
14+
#include <stdio.h>
15+
16+
__attribute((noinline)) void print_uint32x4_t(const char *name, uint32x4_t val)
17+
{
18+
printf("%s: %u, %u, %u, %u\n",
19+
name,
20+
vgetq_lane_u32(val, 0),
21+
vgetq_lane_u32(val, 1),
22+
vgetq_lane_u32(val, 2),
23+
vgetq_lane_u32(val, 3));
24+
}
25+
26+
void __attribute__ ((noinline)) test_2(void)
27+
{
28+
uint32x4_t v12, v18, v108;
29+
unsigned v17 = 0;
30+
v12 = vdupq_n_u32(1);
31+
v18 = vadcq_u32(v12, v12, &v17);
32+
v17 = 1;
33+
v108 = vadcq_u32(v12, v12, &v17);
34+
print_uint32x4_t("v108", v108);
35+
}
36+
37+
int main()
38+
{
39+
test_2();
40+
return 0;
41+
}
42+
43+
#ifdef __cplusplus
44+
}
45+
#endif
46+
47+
/* { dg-output "v108: 3, 2, 2, 2" } */
48+
/* { dg-final { scan-assembler-times {\tvmrs\t(?:ip|fp|r[0-9]+), FPSCR_nzcvqc} 3 } } */

gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
22
/* { dg-add-options arm_v8_1m_mve } */
3-
/* { dg-additional-options "-O2" } */
3+
/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
44
/* { dg-final { check-function-bodies "**" "" } } */
55

66
#include "arm_mve.h"

gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
22
/* { dg-add-options arm_v8_1m_mve } */
3-
/* { dg-additional-options "-O2" } */
3+
/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
44
/* { dg-final { check-function-bodies "**" "" } } */
55

66
#include "arm_mve.h"

gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
22
/* { dg-add-options arm_v8_1m_mve } */
3-
/* { dg-additional-options "-O2" } */
3+
/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
44
/* { dg-final { check-function-bodies "**" "" } } */
55

66
#include "arm_mve.h"

gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
22
/* { dg-add-options arm_v8_1m_mve } */
3-
/* { dg-additional-options "-O2" } */
3+
/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
44
/* { dg-final { check-function-bodies "**" "" } } */
55

66
#include "arm_mve.h"

0 commit comments

Comments
 (0)