Skip to content

Commit 9d3379b

Browse files
Ningsheng JianXiaohong GongWang HuangXuejin HeAi Jiaming
committed
8267356: AArch64: Vector API SVE codegen support
Co-authored-by: Xiaohong Gong <xgong@openjdk.org> Co-authored-by: Wang Huang <whuang@openjdk.org> Co-authored-by: Ningsheng Jian <njian@openjdk.org> Co-authored-by: Xuejin He <xhe@openjdk.org> Co-authored-by: Ai Jiaming <aijiaming1@huawei.com> Co-authored-by: Eric Liu <eliu@openjdk.org> Reviewed-by: aph, ngasson
1 parent 6031388 commit 9d3379b

File tree

13 files changed

+5566
-1185
lines changed

13 files changed

+5566
-1185
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 33 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1902,7 +1902,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
19021902
__ bind(L_skip_barrier);
19031903
}
19041904

1905-
if (C->max_vector_size() >= 16) {
1905+
if (C->max_vector_size() > 0) {
19061906
__ reinitialize_ptrue();
19071907
}
19081908

@@ -2388,15 +2388,15 @@ const bool Matcher::match_rule_supported(int opcode) {
23882388
// Identify extra cases that we might want to provide match rules for vector nodes and
23892389
// other intrinsics guarded with vector length (vlen) and element type (bt).
23902390
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2391-
if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
2391+
if (!match_rule_supported(opcode)) {
23922392
return false;
23932393
}
23942394
int bit_size = vlen * type2aelembytes(bt) * 8;
23952395
if (UseSVE == 0 && bit_size > 128) {
23962396
return false;
23972397
}
23982398
if (UseSVE > 0) {
2399-
return op_sve_supported(opcode);
2399+
return op_sve_supported(opcode, vlen, bt);
24002400
} else { // NEON
24012401
// Special cases
24022402
switch (opcode) {
@@ -2438,11 +2438,14 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
24382438
return false;
24392439
}
24402440
break;
2441+
case Op_LoadVectorGather:
2442+
case Op_StoreVectorScatter:
2443+
return false;
24412444
default:
24422445
break;
24432446
}
24442447
}
2445-
return true; // Per default match rules are supported.
2448+
return vector_size_supported(bt, vlen);
24462449
}
24472450

24482451
const RegMask* Matcher::predicate_reg_mask(void) {
@@ -2488,24 +2491,20 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
24882491
const int Matcher::max_vector_size(const BasicType bt) {
24892492
return vector_width_in_bytes(bt)/type2aelembytes(bt);
24902493
}
2494+
24912495
const int Matcher::min_vector_size(const BasicType bt) {
24922496
int max_size = max_vector_size(bt);
2493-
if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
2494-
// Currently vector length less than SVE vector register size is not supported.
2495-
return max_size;
2496-
} else { // NEON
2497-
// Limit the vector size to 8 bytes
2498-
int size = 8 / type2aelembytes(bt);
2499-
if (bt == T_BYTE) {
2500-
// To support vector api shuffle/rearrange.
2501-
size = 4;
2502-
} else if (bt == T_BOOLEAN) {
2503-
// To support vector api load/store mask.
2504-
size = 2;
2505-
}
2506-
if (size < 2) size = 2;
2507-
return MIN2(size,max_size);
2497+
// Limit the min vector size to 8 bytes.
2498+
int size = 8 / type2aelembytes(bt);
2499+
if (bt == T_BYTE) {
2500+
// To support vector api shuffle/rearrange.
2501+
size = 4;
2502+
} else if (bt == T_BOOLEAN) {
2503+
// To support vector api load/store mask.
2504+
size = 2;
25082505
}
2506+
if (size < 2) size = 2;
2507+
return MIN2(size, max_size);
25092508
}
25102509

25112510
// Actual max scalable vector register length.
@@ -2515,7 +2514,7 @@ const int Matcher::scalable_vector_reg_size(const BasicType bt) {
25152514

25162515
// Vector ideal reg.
25172516
const uint Matcher::vector_ideal_reg(int len) {
2518-
if (UseSVE > 0 && 16 <= len && len <= 256) {
2517+
if (UseSVE > 0 && 2 <= len && len <= 256) {
25192518
return Op_VecA;
25202519
}
25212520
switch(len) {
@@ -3720,7 +3719,7 @@ encode %{
37203719
}
37213720

37223721
// Only non uncommon_trap calls need to reinitialize ptrue.
3723-
if (Compile::current()->max_vector_size() >= 16 && uncommon_trap_request() == 0) {
3722+
if (Compile::current()->max_vector_size() > 0 && uncommon_trap_request() == 0) {
37243723
__ reinitialize_ptrue();
37253724
}
37263725
%}
@@ -3732,7 +3731,7 @@ encode %{
37323731
if (call == NULL) {
37333732
ciEnv::current()->record_failure("CodeCache is full");
37343733
return;
3735-
} else if (Compile::current()->max_vector_size() >= 16) {
3734+
} else if (Compile::current()->max_vector_size() > 0) {
37363735
__ reinitialize_ptrue();
37373736
}
37383737
%}
@@ -3770,7 +3769,7 @@ encode %{
37703769
__ bind(retaddr);
37713770
__ add(sp, sp, 2 * wordSize);
37723771
}
3773-
if (Compile::current()->max_vector_size() >= 16) {
3772+
if (Compile::current()->max_vector_size() > 0) {
37743773
__ reinitialize_ptrue();
37753774
}
37763775
%}
@@ -3783,7 +3782,7 @@ encode %{
37833782
enc_class aarch64_enc_ret() %{
37843783
C2_MacroAssembler _masm(&cbuf);
37853784
#ifdef ASSERT
3786-
if (Compile::current()->max_vector_size() >= 16) {
3785+
if (Compile::current()->max_vector_size() > 0) {
37873786
__ verify_ptrue();
37883787
}
37893788
#endif
@@ -4156,6 +4155,16 @@ operand immIExt()
41564155
interface(CONST_INTER);
41574156
%}
41584157

4158+
operand immI_gt_1()
4159+
%{
4160+
predicate(n->get_int() > 1);
4161+
match(ConI);
4162+
4163+
op_cost(0);
4164+
format %{ %}
4165+
interface(CONST_INTER);
4166+
%}
4167+
41594168
operand immI_le_4()
41604169
%{
41614170
predicate(n->get_int() <= 4);

src/hotspot/cpu/aarch64/aarch64_neon.ad

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
// Load Vector (16 bits)
3434
instruct loadV2(vecD dst, vmem2 mem)
3535
%{
36-
predicate(n->as_LoadVector()->memory_size() == 2);
36+
predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 2);
3737
match(Set dst (LoadVector mem));
3838
ins_cost(4 * INSN_COST);
3939
format %{ "ldrh $dst,$mem\t# vector (16 bits)" %}
@@ -44,7 +44,7 @@ instruct loadV2(vecD dst, vmem2 mem)
4444
// Load Vector (32 bits)
4545
instruct loadV4(vecD dst, vmem4 mem)
4646
%{
47-
predicate(n->as_LoadVector()->memory_size() == 4);
47+
predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 4);
4848
match(Set dst (LoadVector mem));
4949
ins_cost(4 * INSN_COST);
5050
format %{ "ldrs $dst,$mem\t# vector (32 bits)" %}
@@ -55,7 +55,7 @@ instruct loadV4(vecD dst, vmem4 mem)
5555
// Load Vector (64 bits)
5656
instruct loadV8(vecD dst, vmem8 mem)
5757
%{
58-
predicate(n->as_LoadVector()->memory_size() == 8);
58+
predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 8);
5959
match(Set dst (LoadVector mem));
6060
ins_cost(4 * INSN_COST);
6161
format %{ "ldrd $dst,$mem\t# vector (64 bits)" %}
@@ -2473,9 +2473,10 @@ instruct vmaskcastX(vecX dst)
24732473

24742474
instruct loadcon8B(vecD dst, immI0 src)
24752475
%{
2476-
predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
2477-
n->as_Vector()->length() == 8) &&
2478-
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2476+
predicate(UseSVE == 0 &&
2477+
(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
2478+
n->as_Vector()->length() == 8) &&
2479+
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
24792480
match(Set dst (VectorLoadConst src));
24802481
ins_cost(INSN_COST);
24812482
format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
@@ -2488,7 +2489,7 @@ instruct loadcon8B(vecD dst, immI0 src)
24882489

24892490
instruct loadcon16B(vecX dst, immI0 src)
24902491
%{
2491-
predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2492+
predicate(UseSVE == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
24922493
match(Set dst (VectorLoadConst src));
24932494
ins_cost(INSN_COST);
24942495
format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
@@ -2945,8 +2946,8 @@ instruct vabd2D(vecX dst, vecX src1, vecX src2)
29452946

29462947
instruct replicate8B(vecD dst, iRegIorL2I src)
29472948
%{
2948-
predicate(n->as_Vector()->length() == 4 ||
2949-
n->as_Vector()->length() == 8);
2949+
predicate(UseSVE == 0 && (n->as_Vector()->length() == 8 ||
2950+
n->as_Vector()->length() == 4));
29502951
match(Set dst (ReplicateB src));
29512952
ins_cost(INSN_COST);
29522953
format %{ "dup $dst, $src\t# vector (8B)" %}
@@ -2970,8 +2971,8 @@ instruct replicate16B(vecX dst, iRegIorL2I src)
29702971

29712972
instruct replicate8B_imm(vecD dst, immI con)
29722973
%{
2973-
predicate(n->as_Vector()->length() == 4 ||
2974-
n->as_Vector()->length() == 8);
2974+
predicate(UseSVE == 0 && (n->as_Vector()->length() == 8 ||
2975+
n->as_Vector()->length() == 4));
29752976
match(Set dst (ReplicateB con));
29762977
ins_cost(INSN_COST);
29772978
format %{ "movi $dst, $con\t# vector (8B)" %}
@@ -2995,8 +2996,8 @@ instruct replicate16B_imm(vecX dst, immI con)
29952996

29962997
instruct replicate4S(vecD dst, iRegIorL2I src)
29972998
%{
2998-
predicate(n->as_Vector()->length() == 2 ||
2999-
n->as_Vector()->length() == 4);
2999+
predicate(UseSVE == 0 && (n->as_Vector()->length() == 4 ||
3000+
n->as_Vector()->length() == 2));
30003001
match(Set dst (ReplicateS src));
30013002
ins_cost(INSN_COST);
30023003
format %{ "dup $dst, $src\t# vector (4S)" %}
@@ -3020,8 +3021,8 @@ instruct replicate8S(vecX dst, iRegIorL2I src)
30203021

30213022
instruct replicate4S_imm(vecD dst, immI con)
30223023
%{
3023-
predicate(n->as_Vector()->length() == 2 ||
3024-
n->as_Vector()->length() == 4);
3024+
predicate(UseSVE == 0 && (n->as_Vector()->length() == 4 ||
3025+
n->as_Vector()->length() == 2));
30253026
match(Set dst (ReplicateS con));
30263027
ins_cost(INSN_COST);
30273028
format %{ "movi $dst, $con\t# vector (4H)" %}
@@ -3045,7 +3046,7 @@ instruct replicate8S_imm(vecX dst, immI con)
30453046

30463047
instruct replicate2I(vecD dst, iRegIorL2I src)
30473048
%{
3048-
predicate(n->as_Vector()->length() == 2);
3049+
predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
30493050
match(Set dst (ReplicateI src));
30503051
ins_cost(INSN_COST);
30513052
format %{ "dup $dst, $src\t# vector (2I)" %}
@@ -3069,7 +3070,7 @@ instruct replicate4I(vecX dst, iRegIorL2I src)
30693070

30703071
instruct replicate2I_imm(vecD dst, immI con)
30713072
%{
3072-
predicate(n->as_Vector()->length() == 2);
3073+
predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
30733074
match(Set dst (ReplicateI con));
30743075
ins_cost(INSN_COST);
30753076
format %{ "movi $dst, $con\t# vector (2I)" %}
@@ -3119,7 +3120,7 @@ instruct replicate2L_zero(vecX dst, immI0 zero)
31193120

31203121
instruct replicate2F(vecD dst, vRegF src)
31213122
%{
3122-
predicate(n->as_Vector()->length() == 2);
3123+
predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
31233124
match(Set dst (ReplicateF src));
31243125
ins_cost(INSN_COST);
31253126
format %{ "dup $dst, $src\t# vector (2F)" %}
@@ -4249,8 +4250,8 @@ instruct vxor16B(vecX dst, vecX src1, vecX src2)
42494250
// ------------------------------ Shift ---------------------------------------
42504251

42514252
instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
4252-
predicate(n->as_Vector()->length_in_bytes() == 4 ||
4253-
n->as_Vector()->length_in_bytes() == 8);
4253+
predicate(UseSVE == 0 && (n->as_Vector()->length_in_bytes() == 4 ||
4254+
n->as_Vector()->length_in_bytes() == 8));
42544255
match(Set dst (LShiftCntV cnt));
42554256
match(Set dst (RShiftCntV cnt));
42564257
format %{ "dup $dst, $cnt\t# shift count vector (8B)" %}
@@ -4261,7 +4262,7 @@ instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
42614262
%}
42624263

42634264
instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
4264-
predicate(n->as_Vector()->length_in_bytes() == 16);
4265+
predicate(UseSVE == 0 && (n->as_Vector()->length_in_bytes() == 16));
42654266
match(Set dst (LShiftCntV cnt));
42664267
match(Set dst (RShiftCntV cnt));
42674268
format %{ "dup $dst, $cnt\t# shift count vector (16B)" %}

src/hotspot/cpu/aarch64/aarch64_neon_ad.m4

Lines changed: 32 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ instruct $3V$4`'(vec$5 $7, vmem$4 mem)
6969
ins_pipe(v$3`_reg_mem'ifelse(eval($4 * 8), 128, 128, 64));
7070
%}')dnl
7171
dnl $1 $2 $3 $4 $5 $6 $7 $8
72-
VLoadStore(ldrh, H, load, 2, D, 16, dst, )
73-
VLoadStore(ldrs, S, load, 4, D, 32, dst, )
74-
VLoadStore(ldrd, D, load, 8, D, 64, dst, )
72+
VLoadStore(ldrh, H, load, 2, D, 16, dst, UseSVE == 0 && )
73+
VLoadStore(ldrs, S, load, 4, D, 32, dst, UseSVE == 0 && )
74+
VLoadStore(ldrd, D, load, 8, D, 64, dst, UseSVE == 0 && )
7575
VLoadStore(ldrq, Q, load, 16, X, 128, dst, UseSVE == 0 && )
7676
VLoadStore(strh, H, store, 2, D, 16, src, )
7777
VLoadStore(strs, S, store, 4, D, 32, src, )
@@ -1196,10 +1196,11 @@ dnl
11961196
//-------------------------------- LOAD_IOTA_INDICES----------------------------------
11971197
dnl
11981198
define(`PREDICATE', `ifelse($1, 8,
1199-
`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
1200-
n->as_Vector()->length() == 8) &&
1201-
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1202-
`predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl
1199+
`predicate(UseSVE == 0 &&
1200+
(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
1201+
n->as_Vector()->length() == 8) &&
1202+
n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
1203+
`predicate(UseSVE == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl
12031204
dnl
12041205
define(`VECTOR_LOAD_CON', `
12051206
instruct loadcon$1B`'(vec$2 dst, immI0 src)
@@ -1466,9 +1467,10 @@ dnl
14661467
define(`VREPLICATE', `
14671468
instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con, src))
14681469
%{
1469-
predicate(ifelse($8, UseSVE == 0 && , $8,
1470-
$8, , , $8`
1471-
')n->as_Vector()->length() == $3);
1470+
predicate(UseSVE == 0 && ifelse($8, `',
1471+
n->as_Vector()->length() == $3,
1472+
(n->as_Vector()->length() == $3 ||`
1473+
'n->as_Vector()->length() == $8)));
14721474
match(Set dst (Replicate`'ifelse($7, immI0, I, $4) ifelse($7, immI0, zero, $7, immI, con, $7, zero, I, src)));
14731475
ins_cost(INSN_COST);
14741476
format %{ "$1 $dst, $ifelse($7, immI0, zero, $7, immI, con, src)`\t# vector ('ifelse($4$7, SimmI, $3H, $2, eor, 4I, $3$4)`)"' %}
@@ -1494,24 +1496,24 @@ instruct replicate$3$4$5`'(vec$6 dst, $7 ifelse($7, immI0, zero, $7, immI, con,
14941496
$7, iRegL, vdup_reg_reg,
14951497
$4, F, vdup_reg_freg, vdup_reg_dreg)`'ifelse($6, X, 128, 64));
14961498
%}')dnl
1497-
dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
1498-
VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, n->as_Vector()->length() == 4 ||, B)
1499-
VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, UseSVE == 0 && , B)
1500-
VREPLICATE(movi, mov, 8, B, _imm, D, immI, n->as_Vector()->length() == 4 ||, B)
1501-
VREPLICATE(movi, mov, 16, B, _imm, X, immI, UseSVE == 0 && , B)
1502-
VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, n->as_Vector()->length() == 2 ||, H)
1503-
VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, UseSVE == 0 && , H)
1504-
VREPLICATE(movi, mov, 4, S, _imm, D, immI, n->as_Vector()->length() == 2 ||, H)
1505-
VREPLICATE(movi, mov, 8, S, _imm, X, immI, UseSVE == 0 && , H)
1506-
VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S)
1507-
VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, UseSVE == 0 && , S)
1508-
VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S)
1509-
VREPLICATE(movi, mov, 4, I, _imm, X, immI, UseSVE == 0 && , S)
1510-
VREPLICATE(dup, dup, 2, L, , X, iRegL, UseSVE == 0 && , D)
1511-
VREPLICATE(movi, eor, 2, L, _zero, X, immI0, UseSVE == 0 && , D)
1512-
VREPLICATE(dup, dup, 2, F, , D, vRegF, , S)
1513-
VREPLICATE(dup, dup, 4, F, , X, vRegF, UseSVE == 0 && , S)
1514-
VREPLICATE(dup, dup, 2, D, , X, vRegD, UseSVE == 0 && , D)
1499+
dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
1500+
VREPLICATE(dup, dup, 8, B, , D, iRegIorL2I, 4, B)
1501+
VREPLICATE(dup, dup, 16, B, , X, iRegIorL2I, , B)
1502+
VREPLICATE(movi, mov, 8, B, _imm, D, immI, 4, B)
1503+
VREPLICATE(movi, mov, 16, B, _imm, X, immI, , B)
1504+
VREPLICATE(dup, dup, 4, S, , D, iRegIorL2I, 2, H)
1505+
VREPLICATE(dup, dup, 8, S, , X, iRegIorL2I, , H)
1506+
VREPLICATE(movi, mov, 4, S, _imm, D, immI, 2, H)
1507+
VREPLICATE(movi, mov, 8, S, _imm, X, immI, , H)
1508+
VREPLICATE(dup, dup, 2, I, , D, iRegIorL2I, , S)
1509+
VREPLICATE(dup, dup, 4, I, , X, iRegIorL2I, , S)
1510+
VREPLICATE(movi, mov, 2, I, _imm, D, immI, , S)
1511+
VREPLICATE(movi, mov, 4, I, _imm, X, immI, , S)
1512+
VREPLICATE(dup, dup, 2, L, , X, iRegL, , D)
1513+
VREPLICATE(movi, eor, 2, L, _zero, X, immI0, , D)
1514+
VREPLICATE(dup, dup, 2, F, , D, vRegF, , S)
1515+
VREPLICATE(dup, dup, 4, F, , X, vRegF, , S)
1516+
VREPLICATE(dup, dup, 2, D, , X, vRegD, , D)
15151517
dnl
15161518

15171519
// ====================REDUCTION ARITHMETIC====================================
@@ -1884,8 +1886,8 @@ VLOGICAL(xor, eor, xor, Xor, 16, B, X)
18841886
dnl
18851887
define(`VSHIFTCNT', `
18861888
instruct vshiftcnt$3$4`'(vec$5 dst, iRegIorL2I cnt) %{
1887-
predicate(ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||`
1888-
')n->as_Vector()->length_in_bytes() == $3);
1889+
predicate(UseSVE == 0 && (ifelse($3, 8, n->as_Vector()->length_in_bytes() == 4 ||`
1890+
')n->as_Vector()->length_in_bytes() == $3));
18891891
match(Set dst (LShiftCntV cnt));
18901892
match(Set dst (RShiftCntV cnt));
18911893
format %{ "$1 $dst, $cnt\t# shift count vector ($3$4)" %}

0 commit comments

Comments
 (0)