Skip to content

Commit 30562ab

Browse files
rth7680pm215
authored andcommitted
target/arm: Implement SVE Permute - Unpredicated Group
Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20180613015641.5667-3-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
1 parent 66f2dbd commit 30562ab

File tree

4 files changed

+297
-0
lines changed

4 files changed

+297
-0
lines changed

target/arm/helper-sve.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,29 @@ DEF_HELPER_FLAGS_4(sve_cpy_z_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
416416

417417
DEF_HELPER_FLAGS_4(sve_ext, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
418418

419+
DEF_HELPER_FLAGS_4(sve_insr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
420+
DEF_HELPER_FLAGS_4(sve_insr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
421+
DEF_HELPER_FLAGS_4(sve_insr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
422+
DEF_HELPER_FLAGS_4(sve_insr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
423+
424+
DEF_HELPER_FLAGS_3(sve_rev_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
425+
DEF_HELPER_FLAGS_3(sve_rev_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
426+
DEF_HELPER_FLAGS_3(sve_rev_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
427+
DEF_HELPER_FLAGS_3(sve_rev_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
428+
429+
DEF_HELPER_FLAGS_4(sve_tbl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
430+
DEF_HELPER_FLAGS_4(sve_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
431+
DEF_HELPER_FLAGS_4(sve_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
432+
DEF_HELPER_FLAGS_4(sve_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
433+
434+
DEF_HELPER_FLAGS_3(sve_sunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
435+
DEF_HELPER_FLAGS_3(sve_sunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
436+
DEF_HELPER_FLAGS_3(sve_sunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
437+
438+
DEF_HELPER_FLAGS_3(sve_uunpk_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
439+
DEF_HELPER_FLAGS_3(sve_uunpk_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
440+
DEF_HELPER_FLAGS_3(sve_uunpk_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
441+
419442
DEF_HELPER_FLAGS_5(sve_and_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
420443
DEF_HELPER_FLAGS_5(sve_bic_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
421444
DEF_HELPER_FLAGS_5(sve_eor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)

target/arm/sve.decode

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
%imm4_16_p1 16:4 !function=plus1
2626
%imm6_22_5 22:1 5:5
27+
%imm7_22_16 22:2 16:5
2728
%imm8_16_10 16:5 10:3
2829
%imm9_16_10 16:s6 10:3
2930

@@ -85,6 +86,8 @@
8586

8687
# Three operand, vector element size
8788
@rd_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 &rrr_esz
89+
@rdn_rm ........ esz:2 ...... ...... rm:5 rd:5 \
90+
&rrr_esz rn=%reg_movprfx
8891

8992
# Three operand with "memory" size, aka immediate left shift
9093
@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
@@ -369,6 +372,30 @@ CPY_z_i 00000101 .. 01 .... 00 . ........ ..... @rdn_pg4 imm=%sh8_i8s
369372
EXT 00000101 001 ..... 000 ... rm:5 rd:5 \
370373
&rrri rn=%reg_movprfx imm=%imm8_16_10
371374

375+
### SVE Permute - Unpredicated Group
376+
377+
# SVE broadcast general register
378+
DUP_s 00000101 .. 1 00000 001110 ..... ..... @rd_rn
379+
380+
# SVE broadcast indexed element
381+
DUP_x 00000101 .. 1 ..... 001000 rn:5 rd:5 \
382+
&rri imm=%imm7_22_16
383+
384+
# SVE insert SIMD&FP scalar register
385+
INSR_f 00000101 .. 1 10100 001110 ..... ..... @rdn_rm
386+
387+
# SVE insert general register
388+
INSR_r 00000101 .. 1 00100 001110 ..... ..... @rdn_rm
389+
390+
# SVE reverse vector elements
391+
REV_v 00000101 .. 1 11000 001110 ..... ..... @rd_rn
392+
393+
# SVE vector table lookup
394+
TBL 00000101 .. 1 ..... 001100 ..... ..... @rd_rn_rm
395+
396+
# SVE unpack vector elements
397+
UNPK 00000101 esz:2 1100 u:1 h:1 001110 rn:5 rd:5
398+
372399
### SVE Predicate Logical Operations Group
373400

374401
# SVE predicate logical operations

target/arm/sve_helper.c

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,3 +1560,117 @@ void HELPER(sve_ext)(void *vd, void *vn, void *vm, uint32_t desc)
15601560
memcpy(vd + n_siz, &tmp, n_ofs);
15611561
}
15621562
}
1563+
1564+
#define DO_INSR(NAME, TYPE, H) \
1565+
void HELPER(NAME)(void *vd, void *vn, uint64_t val, uint32_t desc) \
1566+
{ \
1567+
intptr_t opr_sz = simd_oprsz(desc); \
1568+
swap_memmove(vd + sizeof(TYPE), vn, opr_sz - sizeof(TYPE)); \
1569+
*(TYPE *)(vd + H(0)) = val; \
1570+
}
1571+
1572+
DO_INSR(sve_insr_b, uint8_t, H1)
1573+
DO_INSR(sve_insr_h, uint16_t, H1_2)
1574+
DO_INSR(sve_insr_s, uint32_t, H1_4)
1575+
DO_INSR(sve_insr_d, uint64_t, )
1576+
1577+
#undef DO_INSR
1578+
1579+
void HELPER(sve_rev_b)(void *vd, void *vn, uint32_t desc)
1580+
{
1581+
intptr_t i, j, opr_sz = simd_oprsz(desc);
1582+
for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
1583+
uint64_t f = *(uint64_t *)(vn + i);
1584+
uint64_t b = *(uint64_t *)(vn + j);
1585+
*(uint64_t *)(vd + i) = bswap64(b);
1586+
*(uint64_t *)(vd + j) = bswap64(f);
1587+
}
1588+
}
1589+
1590+
static inline uint64_t hswap64(uint64_t h)
1591+
{
1592+
uint64_t m = 0x0000ffff0000ffffull;
1593+
h = rol64(h, 32);
1594+
return ((h & m) << 16) | ((h >> 16) & m);
1595+
}
1596+
1597+
void HELPER(sve_rev_h)(void *vd, void *vn, uint32_t desc)
1598+
{
1599+
intptr_t i, j, opr_sz = simd_oprsz(desc);
1600+
for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
1601+
uint64_t f = *(uint64_t *)(vn + i);
1602+
uint64_t b = *(uint64_t *)(vn + j);
1603+
*(uint64_t *)(vd + i) = hswap64(b);
1604+
*(uint64_t *)(vd + j) = hswap64(f);
1605+
}
1606+
}
1607+
1608+
void HELPER(sve_rev_s)(void *vd, void *vn, uint32_t desc)
1609+
{
1610+
intptr_t i, j, opr_sz = simd_oprsz(desc);
1611+
for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
1612+
uint64_t f = *(uint64_t *)(vn + i);
1613+
uint64_t b = *(uint64_t *)(vn + j);
1614+
*(uint64_t *)(vd + i) = rol64(b, 32);
1615+
*(uint64_t *)(vd + j) = rol64(f, 32);
1616+
}
1617+
}
1618+
1619+
void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t desc)
1620+
{
1621+
intptr_t i, j, opr_sz = simd_oprsz(desc);
1622+
for (i = 0, j = opr_sz - 8; i < opr_sz / 2; i += 8, j -= 8) {
1623+
uint64_t f = *(uint64_t *)(vn + i);
1624+
uint64_t b = *(uint64_t *)(vn + j);
1625+
*(uint64_t *)(vd + i) = b;
1626+
*(uint64_t *)(vd + j) = f;
1627+
}
1628+
}
1629+
1630+
#define DO_TBL(NAME, TYPE, H) \
1631+
void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
1632+
{ \
1633+
intptr_t i, opr_sz = simd_oprsz(desc); \
1634+
uintptr_t elem = opr_sz / sizeof(TYPE); \
1635+
TYPE *d = vd, *n = vn, *m = vm; \
1636+
ARMVectorReg tmp; \
1637+
if (unlikely(vd == vn)) { \
1638+
n = memcpy(&tmp, vn, opr_sz); \
1639+
} \
1640+
for (i = 0; i < elem; i++) { \
1641+
TYPE j = m[H(i)]; \
1642+
d[H(i)] = j < elem ? n[H(j)] : 0; \
1643+
} \
1644+
}
1645+
1646+
DO_TBL(sve_tbl_b, uint8_t, H1)
1647+
DO_TBL(sve_tbl_h, uint16_t, H2)
1648+
DO_TBL(sve_tbl_s, uint32_t, H4)
1649+
DO_TBL(sve_tbl_d, uint64_t, )
1650+
1651+
#undef TBL
1652+
1653+
#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
1654+
void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
1655+
{ \
1656+
intptr_t i, opr_sz = simd_oprsz(desc); \
1657+
TYPED *d = vd; \
1658+
TYPES *n = vn; \
1659+
ARMVectorReg tmp; \
1660+
if (unlikely(vn - vd < opr_sz)) { \
1661+
n = memcpy(&tmp, n, opr_sz / 2); \
1662+
} \
1663+
for (i = 0; i < opr_sz / sizeof(TYPED); i++) { \
1664+
d[HD(i)] = n[HS(i)]; \
1665+
} \
1666+
}
1667+
1668+
DO_UNPK(sve_sunpk_h, int16_t, int8_t, H2, H1)
1669+
DO_UNPK(sve_sunpk_s, int32_t, int16_t, H4, H2)
1670+
DO_UNPK(sve_sunpk_d, int64_t, int32_t, , H4)
1671+
1672+
DO_UNPK(sve_uunpk_h, uint16_t, uint8_t, H2, H1)
1673+
DO_UNPK(sve_uunpk_s, uint32_t, uint16_t, H4, H2)
1674+
DO_UNPK(sve_uunpk_d, uint64_t, uint32_t, , H4)
1675+
1676+
#undef DO_UNPK

target/arm/translate-sve.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1956,6 +1956,139 @@ static bool trans_EXT(DisasContext *s, arg_EXT *a, uint32_t insn)
19561956
return true;
19571957
}
19581958

1959+
/*
1960+
*** SVE Permute - Unpredicated Group
1961+
*/
1962+
1963+
static bool trans_DUP_s(DisasContext *s, arg_DUP_s *a, uint32_t insn)
1964+
{
1965+
if (sve_access_check(s)) {
1966+
unsigned vsz = vec_full_reg_size(s);
1967+
tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
1968+
vsz, vsz, cpu_reg_sp(s, a->rn));
1969+
}
1970+
return true;
1971+
}
1972+
1973+
static bool trans_DUP_x(DisasContext *s, arg_DUP_x *a, uint32_t insn)
1974+
{
1975+
if ((a->imm & 0x1f) == 0) {
1976+
return false;
1977+
}
1978+
if (sve_access_check(s)) {
1979+
unsigned vsz = vec_full_reg_size(s);
1980+
unsigned dofs = vec_full_reg_offset(s, a->rd);
1981+
unsigned esz, index;
1982+
1983+
esz = ctz32(a->imm);
1984+
index = a->imm >> (esz + 1);
1985+
1986+
if ((index << esz) < vsz) {
1987+
unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
1988+
tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
1989+
} else {
1990+
tcg_gen_gvec_dup64i(dofs, vsz, vsz, 0);
1991+
}
1992+
}
1993+
return true;
1994+
}
1995+
1996+
static void do_insr_i64(DisasContext *s, arg_rrr_esz *a, TCGv_i64 val)
1997+
{
1998+
typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1999+
static gen_insr * const fns[4] = {
2000+
gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2001+
gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2002+
};
2003+
unsigned vsz = vec_full_reg_size(s);
2004+
TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, 0));
2005+
TCGv_ptr t_zd = tcg_temp_new_ptr();
2006+
TCGv_ptr t_zn = tcg_temp_new_ptr();
2007+
2008+
tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2009+
tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2010+
2011+
fns[a->esz](t_zd, t_zn, val, desc);
2012+
2013+
tcg_temp_free_ptr(t_zd);
2014+
tcg_temp_free_ptr(t_zn);
2015+
tcg_temp_free_i32(desc);
2016+
}
2017+
2018+
static bool trans_INSR_f(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2019+
{
2020+
if (sve_access_check(s)) {
2021+
TCGv_i64 t = tcg_temp_new_i64();
2022+
tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, 0, MO_64));
2023+
do_insr_i64(s, a, t);
2024+
tcg_temp_free_i64(t);
2025+
}
2026+
return true;
2027+
}
2028+
2029+
static bool trans_INSR_r(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2030+
{
2031+
if (sve_access_check(s)) {
2032+
do_insr_i64(s, a, cpu_reg(s, a->rm));
2033+
}
2034+
return true;
2035+
}
2036+
2037+
static bool trans_REV_v(DisasContext *s, arg_rr_esz *a, uint32_t insn)
2038+
{
2039+
static gen_helper_gvec_2 * const fns[4] = {
2040+
gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2041+
gen_helper_sve_rev_s, gen_helper_sve_rev_d
2042+
};
2043+
2044+
if (sve_access_check(s)) {
2045+
unsigned vsz = vec_full_reg_size(s);
2046+
tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2047+
vec_full_reg_offset(s, a->rn),
2048+
vsz, vsz, 0, fns[a->esz]);
2049+
}
2050+
return true;
2051+
}
2052+
2053+
static bool trans_TBL(DisasContext *s, arg_rrr_esz *a, uint32_t insn)
2054+
{
2055+
static gen_helper_gvec_3 * const fns[4] = {
2056+
gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2057+
gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2058+
};
2059+
2060+
if (sve_access_check(s)) {
2061+
unsigned vsz = vec_full_reg_size(s);
2062+
tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2063+
vec_full_reg_offset(s, a->rn),
2064+
vec_full_reg_offset(s, a->rm),
2065+
vsz, vsz, 0, fns[a->esz]);
2066+
}
2067+
return true;
2068+
}
2069+
2070+
static bool trans_UNPK(DisasContext *s, arg_UNPK *a, uint32_t insn)
2071+
{
2072+
static gen_helper_gvec_2 * const fns[4][2] = {
2073+
{ NULL, NULL },
2074+
{ gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2075+
{ gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2076+
{ gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2077+
};
2078+
2079+
if (a->esz == 0) {
2080+
return false;
2081+
}
2082+
if (sve_access_check(s)) {
2083+
unsigned vsz = vec_full_reg_size(s);
2084+
tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2085+
vec_full_reg_offset(s, a->rn)
2086+
+ (a->h ? vsz / 2 : 0),
2087+
vsz, vsz, 0, fns[a->esz][a->u]);
2088+
}
2089+
return true;
2090+
}
2091+
19592092
/*
19602093
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
19612094
*/

0 commit comments

Comments
 (0)