Skip to content

Commit 9ee3a61

Browse files
rth7680pm215
authored andcommitted
target/arm: Implement SVE Predicate Count Group
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20180613015641.5667-15-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
1 parent 35da316 commit 9ee3a61

File tree

4 files changed

+176
-0
lines changed

4 files changed

+176
-0
lines changed

target/arm/helper-sve.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,3 +676,5 @@ DEF_HELPER_FLAGS_4(sve_brkbs_m, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
676676

677677
DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
678678
DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
679+
680+
DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)

target/arm/sve.decode

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@
6767
&ptrue rd esz pat s
6868
&incdec_cnt rd pat esz imm d u
6969
&incdec2_cnt rd rn pat esz imm d u
70+
&incdec_pred rd pg esz d u
71+
&incdec2_pred rd rn pg esz d u
7072

7173
###########################################################################
7274
# Named instruction formats. These are generally used to
@@ -113,6 +115,7 @@
113115

114116
# One register operand, with governing predicate, vector element size
115117
@rd_pg_rn ........ esz:2 ... ... ... pg:3 rn:5 rd:5 &rpr_esz
118+
@rd_pg4_pn ........ esz:2 ... ... .. pg:4 . rn:4 rd:5 &rpr_esz
116119

117120
# Two register operands with a 6-bit signed immediate.
118121
@rd_rn_i6 ........ ... rn:5 ..... imm:s6 rd:5 &rri
@@ -153,6 +156,12 @@
153156
@incdec2_cnt ........ esz:2 .. .... ...... pat:5 rd:5 \
154157
&incdec2_cnt imm=%imm4_16_p1 rn=%reg_movprfx
155158

159+
# One register, predicate.
160+
# User must fill in U and D.
161+
@incdec_pred ........ esz:2 .... .. ..... .. pg:4 rd:5 &incdec_pred
162+
@incdec2_pred ........ esz:2 .... .. ..... .. pg:4 rd:5 \
163+
&incdec2_pred rn=%reg_movprfx
164+
156165
###########################################################################
157166
# Instruction patterns. Grouped according to the SVE encodingindex.xhtml.
158167

@@ -579,6 +588,24 @@ BRKB_m 00100101 1. 01000001 .... 0 .... 1 .... @pd_pg_pn_s
579588
# SVE propagate break to next partition
580589
BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s
581590

591+
### SVE Predicate Count Group
592+
593+
# SVE predicate count
594+
CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn
595+
596+
# SVE inc/dec register by predicate count
597+
INCDECP_r 00100101 .. 10110 d:1 10001 00 .... ..... @incdec_pred u=1
598+
599+
# SVE inc/dec vector by predicate count
600+
INCDECP_z 00100101 .. 10110 d:1 10000 00 .... ..... @incdec2_pred u=1
601+
602+
# SVE saturating inc/dec register by predicate count
603+
SINCDECP_r_32 00100101 .. 1010 d:1 u:1 10001 00 .... ..... @incdec_pred
604+
SINCDECP_r_64 00100101 .. 1010 d:1 u:1 10001 10 .... ..... @incdec_pred
605+
606+
# SVE saturating inc/dec vector by predicate count
607+
SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred
608+
582609
### SVE Memory - 32-bit Gather and Unsized Contiguous Group
583610

584611
# SVE load predicate register

target/arm/sve_helper.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2724,3 +2724,17 @@ uint32_t HELPER(sve_brkns)(void *vd, void *vn, void *vg, uint32_t pred_desc)
27242724
return do_zero(vd, oprsz);
27252725
}
27262726
}
2727+
2728+
uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
2729+
{
2730+
intptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
2731+
intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
2732+
uint64_t *n = vn, *g = vg, sum = 0, mask = pred_esz_masks[esz];
2733+
intptr_t i;
2734+
2735+
for (i = 0; i < DIV_ROUND_UP(oprsz, 8); ++i) {
2736+
uint64_t t = n[i] & g[i] & mask;
2737+
sum += ctpop64(t);
2738+
}
2739+
return sum;
2740+
}

target/arm/translate-sve.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
#include "translate-a64.h"
3535

3636

37+
typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
38+
TCGv_i64, uint32_t, uint32_t);
39+
3740
typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
3841
TCGv_ptr, TCGv_i32);
3942
typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
@@ -2959,6 +2962,136 @@ static bool trans_BRKN(DisasContext *s, arg_rpr_s *a, uint32_t insn)
29592962
return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
29602963
}
29612964

2965+
/*
2966+
*** SVE Predicate Count Group
2967+
*/
2968+
2969+
static void do_cntp(DisasContext *s, TCGv_i64 val, int esz, int pn, int pg)
2970+
{
2971+
unsigned psz = pred_full_reg_size(s);
2972+
2973+
if (psz <= 8) {
2974+
uint64_t psz_mask;
2975+
2976+
tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
2977+
if (pn != pg) {
2978+
TCGv_i64 g = tcg_temp_new_i64();
2979+
tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
2980+
tcg_gen_and_i64(val, val, g);
2981+
tcg_temp_free_i64(g);
2982+
}
2983+
2984+
/* Reduce the pred_esz_masks value simply to reduce the
2985+
* size of the code generated here.
2986+
*/
2987+
psz_mask = MAKE_64BIT_MASK(0, psz * 8);
2988+
tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
2989+
2990+
tcg_gen_ctpop_i64(val, val);
2991+
} else {
2992+
TCGv_ptr t_pn = tcg_temp_new_ptr();
2993+
TCGv_ptr t_pg = tcg_temp_new_ptr();
2994+
unsigned desc;
2995+
TCGv_i32 t_desc;
2996+
2997+
desc = psz - 2;
2998+
desc = deposit32(desc, SIMD_DATA_SHIFT, 2, esz);
2999+
3000+
tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3001+
tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3002+
t_desc = tcg_const_i32(desc);
3003+
3004+
gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3005+
tcg_temp_free_ptr(t_pn);
3006+
tcg_temp_free_ptr(t_pg);
3007+
tcg_temp_free_i32(t_desc);
3008+
}
3009+
}
3010+
3011+
static bool trans_CNTP(DisasContext *s, arg_CNTP *a, uint32_t insn)
3012+
{
3013+
if (sve_access_check(s)) {
3014+
do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3015+
}
3016+
return true;
3017+
}
3018+
3019+
static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a,
3020+
uint32_t insn)
3021+
{
3022+
if (sve_access_check(s)) {
3023+
TCGv_i64 reg = cpu_reg(s, a->rd);
3024+
TCGv_i64 val = tcg_temp_new_i64();
3025+
3026+
do_cntp(s, val, a->esz, a->pg, a->pg);
3027+
if (a->d) {
3028+
tcg_gen_sub_i64(reg, reg, val);
3029+
} else {
3030+
tcg_gen_add_i64(reg, reg, val);
3031+
}
3032+
tcg_temp_free_i64(val);
3033+
}
3034+
return true;
3035+
}
3036+
3037+
static bool trans_INCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3038+
uint32_t insn)
3039+
{
3040+
if (a->esz == 0) {
3041+
return false;
3042+
}
3043+
if (sve_access_check(s)) {
3044+
unsigned vsz = vec_full_reg_size(s);
3045+
TCGv_i64 val = tcg_temp_new_i64();
3046+
GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3047+
3048+
do_cntp(s, val, a->esz, a->pg, a->pg);
3049+
gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3050+
vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3051+
}
3052+
return true;
3053+
}
3054+
3055+
static bool trans_SINCDECP_r_32(DisasContext *s, arg_incdec_pred *a,
3056+
uint32_t insn)
3057+
{
3058+
if (sve_access_check(s)) {
3059+
TCGv_i64 reg = cpu_reg(s, a->rd);
3060+
TCGv_i64 val = tcg_temp_new_i64();
3061+
3062+
do_cntp(s, val, a->esz, a->pg, a->pg);
3063+
do_sat_addsub_32(reg, val, a->u, a->d);
3064+
}
3065+
return true;
3066+
}
3067+
3068+
static bool trans_SINCDECP_r_64(DisasContext *s, arg_incdec_pred *a,
3069+
uint32_t insn)
3070+
{
3071+
if (sve_access_check(s)) {
3072+
TCGv_i64 reg = cpu_reg(s, a->rd);
3073+
TCGv_i64 val = tcg_temp_new_i64();
3074+
3075+
do_cntp(s, val, a->esz, a->pg, a->pg);
3076+
do_sat_addsub_64(reg, val, a->u, a->d);
3077+
}
3078+
return true;
3079+
}
3080+
3081+
static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
3082+
uint32_t insn)
3083+
{
3084+
if (a->esz == 0) {
3085+
return false;
3086+
}
3087+
if (sve_access_check(s)) {
3088+
TCGv_i64 val = tcg_temp_new_i64();
3089+
do_cntp(s, val, a->esz, a->pg, a->pg);
3090+
do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3091+
}
3092+
return true;
3093+
}
3094+
29623095
/*
29633096
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
29643097
*/

0 commit comments

Comments
 (0)