Skip to content

Commit a520199

Browse files
author
Eric Liu
committed
8282528: AArch64: Incorrect replicate2L_zero rule
This patch fixes the wrong matching rule of replicate2L_zero. It was matched "ReplicateI" by mistake so that long immediates(not only zero) had to be moved to register first and matched to replicate2L finally. To fix this trivial bug, this patch fixes the typo and extends the rule of replicate2L_zero to replicate2L_imm, which now supports all possible long immediate values. The final code changes are shown as below: replicate2L_imm: mov x13, #0xff movk x13, #0xff, lsl openjdk#16 movk x13, #0xff, lsl openjdk#32 dup v16.2d, x13 => movi v16.2d, #0xff00ff00ff [Test] test/jdk/jdk/incubator/vector, test/hotspot/jtreg/compiler/vectorapi passed without failure. Change-Id: Ieac92820dea560239a968de3d7430003f01726bd
1 parent 6013d09 commit a520199

File tree

5 files changed

+405
-62
lines changed

5 files changed

+405
-62
lines changed

src/hotspot/cpu/aarch64/aarch64_neon.ad

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3255,16 +3255,14 @@ instruct replicate2L(vecX dst, iRegL src)
32553255
ins_pipe(vdup_reg_reg128);
32563256
%}
32573257

3258-
instruct replicate2L_zero(vecX dst, immI0 zero)
3258+
instruct replicate2L_imm(vecX dst, immL con)
32593259
%{
32603260
predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3261-
match(Set dst (ReplicateI zero));
3261+
match(Set dst (ReplicateL con));
32623262
ins_cost(INSN_COST);
3263-
format %{ "movi $dst, $zero\t# vector (4I)" %}
3263+
format %{ "movi $dst, $con\t# vector (2L)" %}
32643264
ins_encode %{
3265-
__ eor(as_FloatRegister($dst$$reg), __ T16B,
3266-
as_FloatRegister($dst$$reg),
3267-
as_FloatRegister($dst$$reg));
3265+
__ mov(as_FloatRegister($dst$$reg), __ T2D, $con$$constant);
32683266
%}
32693267
ins_pipe(vmovi_reg_imm128);
32703268
%}

src/hotspot/cpu/aarch64/aarch64_neon_ad.m4

Lines changed: 40 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,55 +1553,50 @@ VFABD(fabd, fabd, 4, F, X, S, 128)
15531553
VFABD(fabd, fabd, 2, D, X, D, 128)
15541554
dnl
15551555
define(`VREPLICATE', `
1556-
instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con, src))
1557-
%{
1558-
predicate(UseSVE == 0 && ifelse($8, `',
1559-
n->as_Vector()->length() == $3,
1560-
(n->as_Vector()->length() == $3 ||`
1561-
'n->as_Vector()->length() == $8)));
1562-
match(Set dst (Replicate`'ifelse($7, immI0, I, $4) ifelse($7, immI0, zero, $7, immI, con, $7, zero, I, src)));
1563-
ins_cost(INSN_COST);
1564-
format %{ "$1 $dst, $ifelse($7, immI0, zero, $7, immI, con, src)`\t# vector ('ifelse($4$7, SimmI, $3H, $2, eor, 4I, $3$4)`)"' %}
1565-
ins_encode %{
1566-
__ $2(as_FloatRegister($dst$$reg), __ ifelse(
1567-
$2, eor, T16B, T$3`'$9),ifelse(
1568-
`$4 $7', `B immI', ` '$con$$constant & 0xff,
1569-
`$4 $7', `S immI', ` '$con$$constant & 0xffff,
1570-
`$4 $7', `I immI', ` '$con$$constant,
1571-
`$2', eor,`
1572-
as_FloatRegister($dst$$reg),
1573-
as_FloatRegister($dst$$reg)',
1574-
`$7', vRegF,`
1556+
instruct replicate$2$3$4`'(vec$5 dst, $6 ifelse($4, _imm, con, src))
1557+
%{
1558+
predicate(UseSVE == 0 && ifelse($7, `',
1559+
n->as_Vector()->length() == $2,
1560+
(n->as_Vector()->length() == $2 ||`
1561+
'n->as_Vector()->length() == $7)));
1562+
match(Set dst (Replicate$3 ifelse($4, _imm, con, src)));
1563+
ins_cost(INSN_COST);
1564+
format %{ "ifelse($1, mov, movi, $1) $dst, $ifelse($4, _imm, con, src)`\t# vector ('ifelse($3$4, S_imm, $2H, $2$3)`)"' %}
1565+
ins_encode %{
1566+
__ $1(as_FloatRegister($dst$$reg), __ T$2$8,ifelse(
1567+
`$3 $4', `B _imm', ` '$con$$constant & 0xff,
1568+
`$3 $4', `S _imm', ` '$con$$constant & 0xffff,
1569+
`$3 $4', `I _imm', ` '$con$$constant,
1570+
`$3 $4', `L _imm', ` '$con$$constant,
1571+
`$6', vRegF,`
15751572
as_FloatRegister($src$$reg)',
1576-
`$7', vRegD,`
1573+
`$6', vRegD,`
15771574
as_FloatRegister($src$$reg)',
15781575
` 'as_Register($src$$reg)));
15791576
%}
1580-
ins_pipe(ifelse($7, immI0, v$1_reg_imm,
1581-
$7, immI, v$1_reg_imm,
1582-
$7, iRegIorL2I, v$1_reg_reg,
1583-
$7, zero, vmovi_reg_imm,
1584-
$7, iRegL, vdup_reg_reg,
1585-
$4, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($6, X, 128, 64));
1586-
%}')dnl
1587-
dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
1588-
VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, 4, B)
1589-
VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, , B)
1590-
VREPLICATE(movi, mov, 8, B, _imm, D, immI, 4, B)
1591-
VREPLICATE(movi, mov, 16, B, _imm, X, immI, , B)
1592-
VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, 2, H)
1593-
VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, , H)
1594-
VREPLICATE(movi, mov, 4, S, _imm, D, immI, 2, H)
1595-
VREPLICATE(movi, mov, 8, S, _imm, X, immI, , H)
1596-
VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S)
1597-
VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, , S)
1598-
VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S)
1599-
VREPLICATE(movi, mov, 4, I, _imm, X, immI, , S)
1600-
VREPLICATE(dup, dup, 2, L, , X, iRegL, , D)
1601-
VREPLICATE(movi, eor, 2, L, _zero, X, immI0, , D)
1602-
VREPLICATE(dup, dup, 2, F, , D, vRegF, , S)
1603-
VREPLICATE(dup, dup, 4, F, , X, vRegF, , S)
1604-
VREPLICATE(dup, dup, 2, D, , X, vRegD, , D)
1577+
ins_pipe(ifelse($4, _imm, vmovi_reg_imm,
1578+
$6, iRegIorL2I, v$1_reg_reg,
1579+
$6, iRegL, vdup_reg_reg,
1580+
$3, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($5, X, 128, 64));
1581+
%}')dnl
1582+
dnl $1 $2 $3 $4 $5 $6 $7 $8
1583+
VREPLICATE(dup, 8, B, , D, iRegIorL2I, 4, B)
1584+
VREPLICATE(dup, 16, B, , X, iRegIorL2I, , B)
1585+
VREPLICATE(mov, 8, B, _imm, D, immI, 4, B)
1586+
VREPLICATE(mov, 16, B, _imm, X, immI, , B)
1587+
VREPLICATE(dup, 4, S, , D, iRegIorL2I, 2, H)
1588+
VREPLICATE(dup, 8, S, , X, iRegIorL2I, , H)
1589+
VREPLICATE(mov, 4, S, _imm, D, immI, 2, H)
1590+
VREPLICATE(mov, 8, S, _imm, X, immI, , H)
1591+
VREPLICATE(dup, 2, I, , D, iRegIorL2I, , S)
1592+
VREPLICATE(dup, 4, I, , X, iRegIorL2I, , S)
1593+
VREPLICATE(mov, 2, I, _imm, D, immI, , S)
1594+
VREPLICATE(mov, 4, I, _imm, X, immI, , S)
1595+
VREPLICATE(dup, 2, L, , X, iRegL, , D)
1596+
VREPLICATE(mov, 2, L, _imm, X, immL, , D)
1597+
VREPLICATE(dup, 2, F, , D, vRegF, , S)
1598+
VREPLICATE(dup, 4, F, , X, vRegF, , S)
1599+
VREPLICATE(dup, 2, D, , X, vRegD, , D)
16051600
dnl
16061601

16071602
// ====================REDUCTION ARITHMETIC====================================

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1332,16 +1332,51 @@ void MacroAssembler::movptr(Register r, uintptr_t imm64) {
13321332
}
13331333

13341334
// Macro to mov replicated immediate to vector register.
1335-
// Vd will get the following values for different arrangements in T
1336-
// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh
1337-
// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1338-
// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
1339-
// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1340-
// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1341-
// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1342-
// T1D/T2D: invalid
1343-
void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32) {
1344-
assert(T != T1D && T != T2D, "invalid arrangement");
1335+
// imm64: only the lower 8/16/32 bits are considered for B/H/S type. That is,
1336+
// the upper 56/48/32 bits must be zeros for B/H/S type.
1337+
// Vd will get the following values for different arrangements in T
1338+
// imm64 == hex 000000gh T8B: Vd = ghghghghghghghgh
1339+
// imm64 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1340+
// imm64 == hex 0000efgh T4H: Vd = efghefghefghefgh
1341+
// imm64 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1342+
// imm64 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1343+
// imm64 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1344+
// imm64 == hex abcdefgh T1D: Vd = 00000000abcdefgh
1345+
// imm64 == hex abcdefgh T2D: Vd = 00000000abcdefgh00000000abcdefgh
1346+
// Clobbers rscratch1
1347+
void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64) {
1348+
if (T == T1D || T == T2D) {
1349+
// To encode into movi, the 64-bit imm must be in the form of
1350+
// 'aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh'
1351+
// and encoded in "a:b:c:d:e:f:g:h".
1352+
bool can_encode = true;
1353+
uint64_t tmp = imm64;
1354+
uint64_t one_byte = 0;
1355+
for (int i = 0; i < 8; i++) {
1356+
one_byte = tmp & 0xFFULL;
1357+
if (one_byte != 0xFFULL && one_byte != 0) {
1358+
can_encode = false;
1359+
break;
1360+
}
1361+
tmp = tmp >> 8;
1362+
}
1363+
1364+
if(can_encode) {
1365+
uint64_t imm = imm64;
1366+
imm &= 0x0101010101010101ULL;
1367+
imm |= (imm >> 7);
1368+
imm |= (imm >> 14);
1369+
imm |= (imm >> 28);
1370+
imm &= 0xFFULL;
1371+
movi(Vd, T, imm);
1372+
} else {
1373+
mov(rscratch1, imm64);
1374+
dup(Vd, T, rscratch1);
1375+
}
1376+
return;
1377+
}
1378+
1379+
uint32_t imm32 = imm64 & 0xFFFFFFFFULL;
13451380
if (T == T8B || T == T16B) {
13461381
assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
13471382
movi(Vd, T, imm32 & 0xff, 0);

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ class MacroAssembler: public Assembler {
508508

509509
void movptr(Register r, uintptr_t imm64);
510510

511-
void mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32);
511+
void mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64);
512512

513513
void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
514514
orr(Vd, T, Vn, Vn);

0 commit comments

Comments
 (0)