Skip to content

Commit 3775358

Browse files
committed
arm: Fix wrong code generated for mve scatter store with writeback intrinsics with -O2 (PR97271).
This patch fixes (PR97271) the wrong code-gen for mve scatter store with writeback intrinsics with -O2. $cat bug.c void foo (uint32x4_t * addr, const int offset, int32x4_t value) { vstrwq_scatter_base_wb_s32 (addr, 8, value); } $ arm-none-eabi-gcc bug.c -S -O2 -march=armv8.1-m.main+mve -mfloat-abi=hard -o - Without this patch: ... foo: vldrw.32 q3, [r0] vstrw.u32 q0, [q3, gcc-mirror#8]! ---> (A) vldr.64 d4, .L3 vldr.64 d5, .L3+8 vldrw.32 q3, [r0] vstrw.u32 q2, [q3, gcc-mirror#8]! ---> (B) bx lr ... With this patch: ... foo: vldrw.32 q3, [r0] vstrw.u32 q0, [q3, gcc-mirror#8]! --> (C) vstrw.32 q3, [r0] bx lr ... Without this patch 2 vstrw assembly instructions (A and B) are generated for vstrwq_scatter_base_wb_s32 intrinsic where as fix generates only one vstrw assembly instruction (C). gcc/ChangeLog: 2020-10-06 Srinath Parvathaneni <srinath.parvathaneni@arm.com> PR target/97291 * config/arm/arm-builtins.c (arm_strsbwbs_qualifiers): Modify array. (arm_strsbwbu_qualifiers): Likewise. (arm_strsbwbs_p_qualifiers): Likewise. (arm_strsbwbu_p_qualifiers): Likewise. * config/arm/arm_mve.h (__arm_vstrdq_scatter_base_wb_s64): Modify function definition. (__arm_vstrdq_scatter_base_wb_u64): Likewise. (__arm_vstrdq_scatter_base_wb_p_s64): Likewise. (__arm_vstrdq_scatter_base_wb_p_u64): Likewise. (__arm_vstrwq_scatter_base_wb_p_s32): Likewise. (__arm_vstrwq_scatter_base_wb_p_u32): Likewise. (__arm_vstrwq_scatter_base_wb_s32): Likewise. (__arm_vstrwq_scatter_base_wb_u32): Likewise. (__arm_vstrwq_scatter_base_wb_f32): Likewise. (__arm_vstrwq_scatter_base_wb_p_f32): Likewise. * config/arm/arm_mve_builtins.def (vstrwq_scatter_base_wb_add_u): Remove expansion for the builtin. (vstrwq_scatter_base_wb_add_s): Likewise. (vstrwq_scatter_base_wb_add_f): Likewise. (vstrdq_scatter_base_wb_add_u): Likewise. (vstrdq_scatter_base_wb_add_s): Likewise. (vstrwq_scatter_base_wb_p_add_u): Likewise. (vstrwq_scatter_base_wb_p_add_s): Likewise. (vstrwq_scatter_base_wb_p_add_f): Likewise. (vstrdq_scatter_base_wb_p_add_u): Likewise. (vstrdq_scatter_base_wb_p_add_s): Likewise. * config/arm/mve.md (mve_vstrwq_scatter_base_wb_<supf>v4si): Remove expand. (mve_vstrwq_scatter_base_wb_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_<supf>v4si_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_<supf>v4si): This. (mve_vstrwq_scatter_base_wb_p_<supf>v4si): Remove expand. (mve_vstrwq_scatter_base_wb_p_add_<supf>v4si): Likewise. (mve_vstrwq_scatter_base_wb_p_<supf>v4si_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_p_<supf>v4si): This. (mve_vstrwq_scatter_base_wb_fv4sf): Remove expand. (mve_vstrwq_scatter_base_wb_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_fv4sf_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_fv4sf): This. (mve_vstrwq_scatter_base_wb_p_fv4sf): Remove expand. (mve_vstrwq_scatter_base_wb_p_add_fv4sf): Likewise. (mve_vstrwq_scatter_base_wb_p_fv4sf_insn): Rename pattern to ... (mve_vstrwq_scatter_base_wb_p_fv4sf): This. (mve_vstrdq_scatter_base_wb_<supf>v2di): Remove expand. (mve_vstrdq_scatter_base_wb_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_<supf>v2di_insn): Rename pattern to ... (mve_vstrdq_scatter_base_wb_<supf>v2di): This. (mve_vstrdq_scatter_base_wb_p_<supf>v2di): Remove expand. (mve_vstrdq_scatter_base_wb_p_add_<supf>v2di): Likewise. (mve_vstrdq_scatter_base_wb_p_<supf>v2di_insn): Rename pattern to ... (mve_vstrdq_scatter_base_wb_p_<supf>v2di): This. gcc/testsuite/ChangeLog: PR target/97291 * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_s64.c: Modify. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_p_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_s64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrdq_scatter_base_wb_u64.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_f32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_p_u32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_s32.c: Likewise. * gcc.target/arm/mve/intrinsics/vstrwq_scatter_base_wb_u32.c: Likewise.
1 parent e27c8cc commit 3775358

14 files changed

+32
-252
lines changed

gcc/config/arm/arm-builtins.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -811,23 +811,23 @@ arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
811811

812812
static enum arm_type_qualifiers
813813
arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
814-
= { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_none};
814+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_none};
815815
#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers)
816816

817817
static enum arm_type_qualifiers
818818
arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
819-
= { qualifier_void, qualifier_unsigned, qualifier_const, qualifier_unsigned};
819+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_unsigned};
820820
#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers)
821821

822822
static enum arm_type_qualifiers
823823
arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
824-
= { qualifier_void, qualifier_unsigned, qualifier_const,
824+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
825825
qualifier_none, qualifier_unsigned};
826826
#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
827827

828828
static enum arm_type_qualifiers
829829
arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
830-
= { qualifier_void, qualifier_unsigned, qualifier_const,
830+
= { qualifier_unsigned, qualifier_unsigned, qualifier_const,
831831
qualifier_unsigned, qualifier_unsigned};
832832
#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
833833

gcc/config/arm/arm_mve.h

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13956,64 +13956,56 @@ __extension__ extern __inline void
1395613956
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1395713957
__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value)
1395813958
{
13959-
__builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
13960-
__builtin_mve_vstrdq_scatter_base_wb_add_sv2di (*__addr, __offset, *__addr);
13959+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
1396113960
}
1396213961

1396313962
__extension__ extern __inline void
1396413963
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1396513964
__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
1396613965
{
13967-
__builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
13968-
__builtin_mve_vstrdq_scatter_base_wb_add_uv2di (*__addr, __offset, *__addr);
13966+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
1396913967
}
1397013968

1397113969
__extension__ extern __inline void
1397213970
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1397313971
__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
1397413972
{
13975-
__builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
13976-
__builtin_mve_vstrdq_scatter_base_wb_p_add_sv2di (*__addr, __offset, *__addr, __p);
13973+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
1397713974
}
1397813975

1397913976
__extension__ extern __inline void
1398013977
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1398113978
__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
1398213979
{
13983-
__builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
13984-
__builtin_mve_vstrdq_scatter_base_wb_p_add_uv2di (*__addr, __offset, *__addr, __p);
13980+
*__addr = __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
1398513981
}
1398613982

1398713983
__extension__ extern __inline void
1398813984
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1398913985
__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
1399013986
{
13991-
__builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
13992-
__builtin_mve_vstrwq_scatter_base_wb_p_add_sv4si (*__addr, __offset, *__addr, __p);
13987+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
1399313988
}
1399413989

1399513990
__extension__ extern __inline void
1399613991
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1399713992
__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
1399813993
{
13999-
__builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
14000-
__builtin_mve_vstrwq_scatter_base_wb_p_add_uv4si (*__addr, __offset, *__addr, __p);
13994+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
1400113995
}
1400213996

1400313997
__extension__ extern __inline void
1400413998
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1400513999
__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value)
1400614000
{
14007-
__builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
14008-
__builtin_mve_vstrwq_scatter_base_wb_add_sv4si (*__addr, __offset, *__addr);
14001+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
1400914002
}
1401014003

1401114004
__extension__ extern __inline void
1401214005
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1401314006
__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
1401414007
{
14015-
__builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
14016-
__builtin_mve_vstrwq_scatter_base_wb_add_uv4si (*__addr, __offset, *__addr);
14008+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
1401714009
}
1401814010

1401914011
__extension__ extern __inline uint8x16_t
@@ -19128,16 +19120,14 @@ __extension__ extern __inline void
1912819120
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1912919121
__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value)
1913019122
{
19131-
__builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
19132-
__builtin_mve_vstrwq_scatter_base_wb_add_fv4sf (*__addr, __offset, *__addr);
19123+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
1913319124
}
1913419125

1913519126
__extension__ extern __inline void
1913619127
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
1913719128
__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
1913819129
{
19139-
__builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
19140-
__builtin_mve_vstrwq_scatter_base_wb_p_add_fv4sf (*__addr, __offset, *__addr, __p);
19130+
*__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
1914119131
}
1914219132

1914319133
__extension__ extern __inline float16x8_t

gcc/config/arm/arm_mve_builtins.def

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -826,19 +826,9 @@ VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si)
826826
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi)
827827
VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi)
828828
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
829-
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_u, v4si)
830-
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_s, v4si)
831-
VAR1 (STRSBWBU, vstrwq_scatter_base_wb_add_f, v4sf)
832829
VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
833-
VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_u, v2di)
834-
VAR1 (STRSBWBU, vstrdq_scatter_base_wb_add_s, v2di)
835830
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
836-
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_u, v4si)
837-
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_s, v4si)
838-
VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_add_f, v4sf)
839831
VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di)
840-
VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_u, v2di)
841-
VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_add_s, v2di)
842832
VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si)
843833
VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf)
844834
VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di)

0 commit comments

Comments
 (0)