Skip to content

Commit da19e37

Browse files
committed
s390: Add LEN_LOAD/LEN_STORE support.
This patch adds LEN_LOAD/LEN_STORE support for z13 and newer. It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE optabs. Add vll/vstl testcases adapted from Power. Also change expectations for SLP testcases with more than one rgroup. gcc/ChangeLog: * config/s390/predicates.md (vll_bias_operand): Add -1 bias. * config/s390/s390.cc (s390_option_override_internal): Make partial vector usage the default from z13 on. * config/s390/vector.md (len_load_v16qi): Add. (len_store_v16qi): Add. gcc/testsuite/ChangeLog: * gcc.target/s390/s390.exp: Add partial subdirectory. * gcc.target/s390/vector/vec-nopeel-2.c: Change test expectation. * lib/target-supports.exp: Add s390. * gcc.target/s390/vector/partial/s390-vec-length-1.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-2.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-3.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-7.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-1.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-2.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-3.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-7.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-1.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-2.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-3.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-7.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c: New test. * gcc.target/s390/vector/partial/s390-vec-length-run-1.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-run-2.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-run-3.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-run-7.h: New test. * gcc.target/s390/vector/partial/s390-vec-length-small.c: New test. * gcc.target/s390/vector/partial/s390-vec-length.h: New test.
1 parent 61407e0 commit da19e37

32 files changed

+450
-3
lines changed

gcc/config/s390/predicates.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -606,3 +606,11 @@
606606
(and (match_code "reg")
607607
(match_test "reload_completed || reload_in_progress")
608608
(match_test "register_operand (op, GET_MODE (op))"))))
609+
610+
; Bias value for LEN_LOAD and LEN_STORE. The bias will be added to the
611+
; length (in bytes for s390) to be loaded. vll/vstl expect the lowest byte
612+
; to load while LEN_LOAD/LEN_STORE use the actual length in bytes. This implies
613+
; that we cannot load a length of 0.
614+
(define_predicate "vll_bias_operand"
615+
(and (match_code "const_int")
616+
(match_test "op == CONSTM1_RTX (QImode)")))

gcc/config/s390/s390.cc

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15857,6 +15857,14 @@ s390_option_override_internal (struct gcc_options *opts,
1585715857

1585815858
/* Use the alternative scheduling-pressure algorithm by default. */
1585915859
SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15860+
15861+
/* Allow simple vector masking using vll/vstl for epilogues. */
15862+
if (TARGET_Z13)
15863+
SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 1);
15864+
else
15865+
SET_OPTION_IF_UNSET (opts, opts_set, param_vect_partial_vector_usage, 0);
15866+
15867+
/* Do not vectorize loops with a low trip count for now. */
1586015868
SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
1586115869

1586215870
/* Set the default alignment. */
@@ -17825,7 +17833,6 @@ s390_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
1782517833
#undef TARGET_VECTORIZE_VEC_PERM_CONST
1782617834
#define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
1782717835

17828-
1782917836
struct gcc_target targetm = TARGET_INITIALIZER;
1783017837

1783117838
#include "gt-s390.h"

gcc/config/s390/vector.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2947,6 +2947,41 @@
29472947
""
29482948
[(set_attr "op_type" "*,VRX,VRX")])
29492949

2950+
;
2951+
; Implement len_load/len_store optabs with vll/vstl.
2952+
(define_expand "len_load_v16qi"
2953+
[(match_operand:V16QI 0 "register_operand")
2954+
(match_operand:V16QI 1 "memory_operand")
2955+
(match_operand:QI 2 "register_operand")
2956+
(match_operand:QI 3 "vll_bias_operand")
2957+
]
2958+
"TARGET_VX && TARGET_64BIT"
2959+
{
2960+
rtx mem = adjust_address (operands[1], BLKmode, 0);
2961+
2962+
rtx len = gen_reg_rtx (SImode);
2963+
emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2]));
2964+
emit_insn (gen_vllv16qi (operands[0], len, mem));
2965+
DONE;
2966+
})
2967+
2968+
(define_expand "len_store_v16qi"
2969+
[(match_operand:V16QI 0 "memory_operand")
2970+
(match_operand:V16QI 1 "register_operand")
2971+
(match_operand:QI 2 "register_operand")
2972+
(match_operand:QI 3 "vll_bias_operand")
2973+
]
2974+
"TARGET_VX && TARGET_64BIT"
2975+
{
2976+
rtx mem = adjust_address (operands[0], BLKmode, 0);
2977+
2978+
rtx len = gen_reg_rtx (SImode);
2979+
emit_move_insn (len, gen_rtx_ZERO_EXTEND (SImode, operands[2]));
2980+
emit_insn (gen_vstlv16qi (operands[1], len, mem));
2981+
DONE;
2982+
});;
2983+
2984+
29502985
; reduc_smin
29512986
; reduc_smax
29522987
; reduc_umin

gcc/testsuite/gcc.target/s390/s390.exp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,9 @@ dg-runtest [lsort [prune [glob -nocomplain $srcdir/$subdir/*.{c,S}] \
236236
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.{c,S}]] \
237237
"" $DEFAULT_CFLAGS
238238

239+
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/partial/*.{c,S}]] \
240+
"" $DEFAULT_CFLAGS
241+
239242
gfortran-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.F90]] \
240243
"" $DEFAULT_FFLAGS
241244

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#include "s390-vec-length.h"
2+
3+
/* Test the case loop iteration is known. */
4+
5+
#define N 127
6+
7+
#define test(TYPE) \
8+
extern TYPE a_##TYPE[N]; \
9+
extern TYPE b_##TYPE[N]; \
10+
extern TYPE c_##TYPE[N]; \
11+
void __attribute__ ((noinline, noclone)) test##TYPE () \
12+
{ \
13+
unsigned int i = 0; \
14+
for (i = 0; i < N; i++) \
15+
c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i]; \
16+
}
17+
18+
TEST_ALL (test)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#include "s390-vec-length.h"
2+
3+
/* Test the case loop iteration is unknown. */
4+
5+
#define N 255
6+
7+
#define test(TYPE) \
8+
extern TYPE a_##TYPE[N]; \
9+
extern TYPE b_##TYPE[N]; \
10+
extern TYPE c_##TYPE[N]; \
11+
void __attribute__ ((noinline, noclone)) test##TYPE (unsigned int n) \
12+
{ \
13+
unsigned int i = 0; \
14+
for (i = 0; i < n; i++) \
15+
c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i]; \
16+
}
17+
18+
TEST_ALL (test)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "s390-vec-length.h"
2+
3+
/* Test the case loop iteration less than VF. */
4+
5+
/* For char. */
6+
#define N_uint8_t 15
7+
#define N_int8_t 15
8+
/* For short. */
9+
#define N_uint16_t 6
10+
#define N_int16_t 6
11+
/* For int/float. */
12+
#define N_uint32_t 3
13+
#define N_int32_t 3
14+
#define N_float 3
15+
/* For long/double. */
16+
#define N_uint64_t 1
17+
#define N_int64_t 1
18+
#define N_double 1
19+
20+
#define test(TYPE) \
21+
extern TYPE a_##TYPE[N_##TYPE]; \
22+
extern TYPE b_##TYPE[N_##TYPE]; \
23+
extern TYPE c_##TYPE[N_##TYPE]; \
24+
void __attribute__ ((noinline, noclone)) test##TYPE () \
25+
{ \
26+
unsigned int i = 0; \
27+
for (i = 0; i < N_##TYPE; i++) \
28+
c_##TYPE[i] = a_##TYPE[i] + b_##TYPE[i]; \
29+
}
30+
31+
TEST_ALL (test)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include "s390-vec-length.h"
2+
3+
#define N 64
4+
#define START 1
5+
#define END 59
6+
7+
#define test(TYPE) \
8+
TYPE x_##TYPE[N] __attribute__((aligned(16))); \
9+
void __attribute__((noinline, noclone)) test_npeel_##TYPE() { \
10+
TYPE v = 0; \
11+
for (unsigned int i = START; i < END; i++) { \
12+
x_##TYPE[i] = v; \
13+
v += 1; \
14+
} \
15+
}
16+
17+
TEST_ALL (test)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* { dg-do compile { target { lp64 && s390_vx } } } */
2+
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
3+
4+
/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */
5+
6+
/* Test that we only vectorize the epilogue with vector load/store with length,
7+
the main body still uses normal vector load/store. */
8+
9+
#include "s390-vec-length-1.h"
10+
11+
/* { dg-final { scan-assembler-times {\mvll\M} 14 } } */
12+
/* { dg-final { scan-assembler-times {\mvstl\M} 7 } } */
13+
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* { dg-do compile { target { lp64 && s390_vx } } } */
2+
/* { dg-options "-march=native -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fno-trapping-math" } */
3+
4+
/* { dg-additional-options "--param=vect-partial-vector-usage=1 --param=min-vect-loop-bound=0" } */
5+
6+
/* Test that we only vectorize the epilogue with vector load/store with length,
7+
the main body still uses normal vector load/store. */
8+
9+
#include "s390-vec-length-2.h"
10+
11+
/* { dg-final { scan-assembler-times {\mvll\M} 20 } } */
12+
/* { dg-final { scan-assembler-times {\mvstl\M} 10 } } */
13+

0 commit comments

Comments
 (0)