Skip to content

Commit 9483069

Browse files
Yi-Fan TsaiPaul Hohensee
authored andcommitted
8282528: AArch64: Incorrect replicate2L_zero rule
Reviewed-by: phh Backport-of: c35590282d54d8388f2f7501a30365e0a912bfda
1 parent 67f31bb commit 9483069

File tree

7 files changed

+455
-103
lines changed

7 files changed

+455
-103
lines changed

src/hotspot/cpu/aarch64/aarch64_neon.ad

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3103,16 +3103,14 @@ instruct replicate2L(vecX dst, iRegL src)
31033103
ins_pipe(vdup_reg_reg128);
31043104
%}
31053105

3106-
instruct replicate2L_zero(vecX dst, immI0 zero)
3106+
instruct replicate2L_imm(vecX dst, immL con)
31073107
%{
31083108
predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3109-
match(Set dst (ReplicateI zero));
3109+
match(Set dst (ReplicateL con));
31103110
ins_cost(INSN_COST);
3111-
format %{ "movi $dst, $zero\t# vector (4I)" %}
3111+
format %{ "movi $dst, $con\t# vector (2L)" %}
31123112
ins_encode %{
3113-
__ eor(as_FloatRegister($dst$$reg), __ T16B,
3114-
as_FloatRegister($dst$$reg),
3115-
as_FloatRegister($dst$$reg));
3113+
__ mov(as_FloatRegister($dst$$reg), __ T2D, $con$$constant);
31163114
%}
31173115
ins_pipe(vmovi_reg_imm128);
31183116
%}
@@ -3124,8 +3122,7 @@ instruct replicate2F(vecD dst, vRegF src)
31243122
ins_cost(INSN_COST);
31253123
format %{ "dup $dst, $src\t# vector (2F)" %}
31263124
ins_encode %{
3127-
__ dup(as_FloatRegister($dst$$reg), __ T2S,
3128-
as_FloatRegister($src$$reg));
3125+
__ dup(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
31293126
%}
31303127
ins_pipe(vdup_reg_freg64);
31313128
%}
@@ -3137,8 +3134,7 @@ instruct replicate4F(vecX dst, vRegF src)
31373134
ins_cost(INSN_COST);
31383135
format %{ "dup $dst, $src\t# vector (4F)" %}
31393136
ins_encode %{
3140-
__ dup(as_FloatRegister($dst$$reg), __ T4S,
3141-
as_FloatRegister($src$$reg));
3137+
__ dup(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
31423138
%}
31433139
ins_pipe(vdup_reg_freg128);
31443140
%}
@@ -3150,8 +3146,7 @@ instruct replicate2D(vecX dst, vRegD src)
31503146
ins_cost(INSN_COST);
31513147
format %{ "dup $dst, $src\t# vector (2D)" %}
31523148
ins_encode %{
3153-
__ dup(as_FloatRegister($dst$$reg), __ T2D,
3154-
as_FloatRegister($src$$reg));
3149+
__ dup(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
31553150
%}
31563151
ins_pipe(vdup_reg_dreg128);
31573152
%}

src/hotspot/cpu/aarch64/aarch64_neon_ad.m4

Lines changed: 48 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,55 +1463,54 @@ VFABD(fabd, fabd, 2, F, D, S, 64)
14631463
VFABD(fabd, fabd, 4, F, X, S, 128)
14641464
VFABD(fabd, fabd, 2, D, X, D, 128)
14651465
dnl
1466-
define(`VREPLICATE', `
1467-
instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con, src))
1468-
%{
1469-
predicate(ifelse($8, UseSVE == 0 && , $8,
1470-
$8, , , $8`
1471-
')n->as_Vector()->length() == $3);
1472-
match(Set dst (Replicate`'ifelse($7, immI0, I, $4) ifelse($7, immI0, zero, $7, immI, con, $7, zero, I, src)));
1473-
ins_cost(INSN_COST);
1474-
format %{ "$1 $dst, $ifelse($7, immI0, zero, $7, immI, con, src)`\t# vector ('ifelse($4$7, SimmI, $3H, $2, eor, 4I, $3$4)`)"' %}
1475-
ins_encode %{
1476-
__ $2(as_FloatRegister($dst$$reg), __ ifelse(
1477-
$2, eor, T16B, T$3`'$9),ifelse(
1478-
`$4 $7', `B immI', ` '$con$$constant & 0xff,
1479-
`$4 $7', `S immI', ` '$con$$constant & 0xffff,
1480-
`$4 $7', `I immI', ` '$con$$constant,
1481-
`$2', eor,`
1482-
as_FloatRegister($dst$$reg),
1483-
as_FloatRegister($dst$$reg)',
1484-
`$7', vRegF,`
1485-
as_FloatRegister($src$$reg)',
1486-
`$7', vRegD,`
1487-
as_FloatRegister($src$$reg)',
1488-
` 'as_Register($src$$reg)));
1489-
%}
1490-
ins_pipe(ifelse($7, immI0, v$1_reg_imm,
1491-
$7, immI, v$1_reg_imm,
1492-
$7, iRegIorL2I, v$1_reg_reg,
1493-
$7, zero, vmovi_reg_imm,
1494-
$7, iRegL, vdup_reg_reg,
1495-
$4, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($6, X, 128, 64));
1496-
%}')dnl
1497-
dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
1498-
VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, n->as_Vector()->length() == 4 ||, B)
1499-
VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, UseSVE == 0 && , B)
1500-
VREPLICATE(movi, mov, 8, B, _imm, D, immI, n->as_Vector()->length() == 4 ||, B)
1501-
VREPLICATE(movi, mov, 16, B, _imm, X, immI, UseSVE == 0 && , B)
1502-
VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, n->as_Vector()->length() == 2 ||, H)
1503-
VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, UseSVE == 0 && , H)
1504-
VREPLICATE(movi, mov, 4, S, _imm, D, immI, n->as_Vector()->length() == 2 ||, H)
1505-
VREPLICATE(movi, mov, 8, S, _imm, X, immI, UseSVE == 0 && , H)
1506-
VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S)
1507-
VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, UseSVE == 0 && , S)
1508-
VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S)
1509-
VREPLICATE(movi, mov, 4, I, _imm, X, immI, UseSVE == 0 && , S)
1510-
VREPLICATE(dup, dup, 2, L, , X, iRegL, UseSVE == 0 && , D)
1511-
VREPLICATE(movi, eor, 2, L, _zero, X, immI0, UseSVE == 0 && , D)
1512-
VREPLICATE(dup, dup, 2, F, , D, vRegF, , S)
1513-
VREPLICATE(dup, dup, 4, F, , X, vRegF, UseSVE == 0 && , S)
1514-
VREPLICATE(dup, dup, 2, D, , X, vRegD, UseSVE == 0 && , D)
1466+
define(`VREPLICATE_REG', `
1467+
instruct replicate$2$3`'(vec$4 dst, $5 src)
1468+
%{
1469+
predicate(ifelse($7, UseSVE == 0 && , $7,
1470+
$7, , , $7`
1471+
')n->as_Vector()->length() == $2);
1472+
match(Set dst (Replicate$3 src));
1473+
ins_cost(INSN_COST);
1474+
format %{ "dup $dst, $src\t# vector ($2$3)" %}
1475+
ins_encode %{
1476+
__ dup(as_FloatRegister($dst$$reg), __ T$2$1, $6($src$$reg));
1477+
%}
1478+
ins_pipe(ifelse($5, iRegIorL2I, vdup_reg_reg,
1479+
$5, iRegL, vdup_reg_reg,
1480+
$3, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($4, X, 128, 64));
1481+
%}')dnl
1482+
define(`VREPLICATE_IMM', `
1483+
instruct replicate$2$3_imm`'(vec$4 dst, $5 con)
1484+
%{
1485+
predicate(ifelse($7, UseSVE == 0 && , $7,
1486+
$7, , , $7`
1487+
')n->as_Vector()->length() == $2);
1488+
match(Set dst (Replicate$3 con));
1489+
ins_cost(INSN_COST);
1490+
format %{ "movi $dst, $con\t`#' vector ($2`'ifelse($3, S, H, $3))" %}
1491+
ins_encode %{
1492+
__ mov(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3), $con$$constant`'$6);
1493+
%}
1494+
ins_pipe(vmovi_reg_imm`'ifelse($4, X, 128, 64));
1495+
%}')dnl
1496+
dnl $1 $2 $3 $4 $5 $6 $7
1497+
VREPLICATE_REG(B, 8, B, D, iRegIorL2I, as_Register, n->as_Vector()->length() == 4 ||)
1498+
VREPLICATE_REG(B, 16, B, X, iRegIorL2I, as_Register, UseSVE == 0 && )
1499+
VREPLICATE_IMM(B, 8, B, D, immI, ` & 0xff', n->as_Vector()->length() == 4 ||)
1500+
VREPLICATE_IMM(B, 16, B, X, immI, ` & 0xff', UseSVE == 0 && )
1501+
VREPLICATE_REG(H, 4, S, D, iRegIorL2I, as_Register, n->as_Vector()->length() == 2 ||)
1502+
VREPLICATE_REG(H, 8, S, X, iRegIorL2I, as_Register, UseSVE == 0 && )
1503+
VREPLICATE_IMM(H, 4, S, D, immI, ` & 0xffff', n->as_Vector()->length() == 2 ||)
1504+
VREPLICATE_IMM(H, 8, S, X, immI, ` & 0xffff', UseSVE == 0 && )
1505+
VREPLICATE_REG(S, 2, I, D, iRegIorL2I, as_Register, )
1506+
VREPLICATE_REG(S, 4, I, X, iRegIorL2I, as_Register, UseSVE == 0 && )
1507+
VREPLICATE_IMM(S, 2, I, D, immI, , )
1508+
VREPLICATE_IMM(S, 4, I, X, immI, , UseSVE == 0 && )
1509+
VREPLICATE_REG(D, 2, L, X, iRegL, as_Register, UseSVE == 0 && )
1510+
VREPLICATE_IMM(D, 2, L, X, immL, , UseSVE == 0 && )
1511+
VREPLICATE_REG(S, 2, F, D, vRegF, as_FloatRegister, )
1512+
VREPLICATE_REG(S, 4, F, X, vRegF, as_FloatRegister, UseSVE == 0 && )
1513+
VREPLICATE_REG(D, 2, D, X, vRegD, as_FloatRegister, UseSVE == 0 && )
15151514
dnl
15161515

15171516
// ====================REDUCTION ARITHMETIC====================================

src/hotspot/cpu/aarch64/assembler_aarch64.cpp

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
33
* Copyright (c) 2014, 2020 Red Hat Inc. All rights reserved.
44
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
55
*
@@ -313,6 +313,53 @@ bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) {
313313
return encode_logical_immediate(is32, imm) != 0xffffffff;
314314
}
315315

316+
// Check immediate encoding for movi.
317+
// Return the shift amount which can be {0, 8, 16, 24} for B/H/S types. As the D type
318+
// movi does not have shift variant, in this case the return value is the immediate
319+
// after encoding.
320+
// Return -1 if the input imm64 can not be encoded.
321+
int Assembler::operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T) {
322+
if (T == T1D || T == T2D) {
323+
// To encode into movi, the 64-bit imm must be in the form of
324+
// 'aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh'
325+
// and encoded in "a:b:c:d:e:f:g:h".
326+
uint64_t tmp = imm64;
327+
uint64_t one_byte = 0;
328+
for (int i = 0; i < 8; i++) {
329+
one_byte = tmp & 0xffULL;
330+
if (one_byte != 0xffULL && one_byte != 0) {
331+
return -1; // can not be encoded
332+
}
333+
tmp = tmp >> 8;
334+
}
335+
336+
imm64 &= 0x0101010101010101ULL;
337+
imm64 |= (imm64 >> 7);
338+
imm64 |= (imm64 >> 14);
339+
imm64 |= (imm64 >> 28);
340+
341+
return imm64 & 0xff;
342+
}
343+
344+
uint32_t imm32 = imm64 & 0xffffffffULL;
345+
if (T == T8B || T == T16B) { // 8-bit variant
346+
if (0 == (imm32 & ~0xff)) return 0;
347+
} else if(T == T4H || T == T8H) { // 16-bit variant
348+
if (0 == (imm32 & ~0xff)) return 0;
349+
if (0 == (imm32 & ~0xff00)) return 8;
350+
} else if (T == T2S || T == T4S) { // 32-bit variant
351+
if (0 == (imm32 & ~0xff)) return 0;
352+
if (0 == (imm32 & ~0xff00)) return 8;
353+
if (0 == (imm32 & ~0xff0000)) return 16;
354+
if (0 == (imm32 & ~0xff000000)) return 24;
355+
} else {
356+
assert(false, "unsupported");
357+
ShouldNotReachHere();
358+
}
359+
360+
return -1;
361+
}
362+
316363
static uint64_t doubleTo64Bits(jdouble d) {
317364
union {
318365
jdouble double_value;

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3255,6 +3255,7 @@ void mvnw(Register Rd, Register Rm,
32553255
static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
32563256
static bool operand_valid_for_add_sub_immediate(int64_t imm);
32573257
static bool operand_valid_for_float_immediate(double imm);
3258+
static int operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T);
32583259

32593260
void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
32603261
void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);

src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp

Lines changed: 35 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1509,48 +1509,43 @@ void MacroAssembler::movptr(Register r, uintptr_t imm64) {
15091509
}
15101510

15111511
// Macro to mov replicated immediate to vector register.
1512-
// Vd will get the following values for different arrangements in T
1513-
// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh
1514-
// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1515-
// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh
1516-
// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1517-
// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1518-
// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1519-
// T1D/T2D: invalid
1520-
void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32) {
1521-
assert(T != T1D && T != T2D, "invalid arrangement");
1522-
if (T == T8B || T == T16B) {
1523-
assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)");
1524-
movi(Vd, T, imm32 & 0xff, 0);
1512+
// imm64: only the lower 8/16/32 bits are considered for B/H/S type. That is,
1513+
// the upper 56/48/32 bits must be zeros for B/H/S type.
1514+
// Vd will get the following values for different arrangements in T
1515+
// imm64 == hex 000000gh T8B: Vd = ghghghghghghghgh
1516+
// imm64 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh
1517+
// imm64 == hex 0000efgh T4H: Vd = efghefghefghefgh
1518+
// imm64 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh
1519+
// imm64 == hex abcdefgh T2S: Vd = abcdefghabcdefgh
1520+
// imm64 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh
1521+
// imm64 == hex abcdefgh T1D: Vd = 00000000abcdefgh
1522+
// imm64 == hex abcdefgh T2D: Vd = 00000000abcdefgh00000000abcdefgh
1523+
// Clobbers rscratch1
1524+
void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64) {
1525+
assert(T != T1Q, "unsupported");
1526+
if (T == T1D || T == T2D) {
1527+
int imm = operand_valid_for_movi_immediate(imm64, T);
1528+
if (-1 != imm) {
1529+
movi(Vd, T, imm);
1530+
} else {
1531+
mov(rscratch1, imm64);
1532+
dup(Vd, T, rscratch1);
1533+
}
15251534
return;
15261535
}
1527-
uint32_t nimm32 = ~imm32;
1528-
if (T == T4H || T == T8H) {
1529-
assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)");
1530-
imm32 &= 0xffff;
1531-
nimm32 &= 0xffff;
1532-
}
1533-
uint32_t x = imm32;
1534-
int movi_cnt = 0;
1535-
int movn_cnt = 0;
1536-
while (x) { if (x & 0xff) movi_cnt++; x >>= 8; }
1537-
x = nimm32;
1538-
while (x) { if (x & 0xff) movn_cnt++; x >>= 8; }
1539-
if (movn_cnt < movi_cnt) imm32 = nimm32;
1540-
unsigned lsl = 0;
1541-
while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1542-
if (movn_cnt < movi_cnt)
1543-
mvni(Vd, T, imm32 & 0xff, lsl);
1544-
else
1545-
movi(Vd, T, imm32 & 0xff, lsl);
1546-
imm32 >>= 8; lsl += 8;
1547-
while (imm32) {
1548-
while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; }
1549-
if (movn_cnt < movi_cnt)
1550-
bici(Vd, T, imm32 & 0xff, lsl);
1551-
else
1552-
orri(Vd, T, imm32 & 0xff, lsl);
1553-
lsl += 8; imm32 >>= 8;
1536+
1537+
#ifdef ASSERT
1538+
if (T == T8B || T == T16B) assert((imm64 & ~0xff) == 0, "extraneous bits (T8B/T16B)");
1539+
if (T == T4H || T == T8H) assert((imm64 & ~0xffff) == 0, "extraneous bits (T4H/T8H)");
1540+
if (T == T2S || T == T4S) assert((imm64 & ~0xffffffff) == 0, "extraneous bits (T2S/T4S)");
1541+
#endif
1542+
int shift = operand_valid_for_movi_immediate(imm64, T);
1543+
uint32_t imm32 = imm64 & 0xffffffffULL;
1544+
if (shift >= 0) {
1545+
movi(Vd, T, (imm32 >> shift) & 0xff, shift);
1546+
} else {
1547+
movw(rscratch1, imm32);
1548+
dup(Vd, T, rscratch1);
15541549
}
15551550
}
15561551

src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,7 +525,7 @@ class MacroAssembler: public Assembler {
525525

526526
void movptr(Register r, uintptr_t imm64);
527527

528-
void mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32);
528+
void mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64);
529529

530530
void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
531531
orr(Vd, T, Vn, Vn);

0 commit comments

Comments
 (0)