Skip to content

Commit d10afa2

Browse files
Fei GaoAndrew Haley
andcommitted
8336245: AArch64: remove extra register copy when converting from long to pointer
Co-authored-by: Andrew Haley <aph@openjdk.org> Reviewed-by: aph, adinn
1 parent 7f11935 commit d10afa2

File tree

5 files changed

+317
-23
lines changed

5 files changed

+317
-23
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2745,10 +2745,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
27452745
}
27462746

27472747
if (index == -1) {
2748-
/* If we get an out-of-range offset it is a bug in the compiler,
2749-
so we assert here. */
2750-
assert(Address::offset_ok_for_immed(disp, exact_log2(size_in_memory)), "c2 compiler bug");
2751-
/* Fix up any out-of-range offsets. */
2748+
// Fix up any out-of-range offsets.
27522749
assert_different_registers(rscratch1, base);
27532750
Address addr = Address(base, disp);
27542751
addr = __ legitimize_address(addr, size_in_memory, rscratch1);
@@ -3348,7 +3345,11 @@ encode %{
33483345
int scale = $mem$$scale;
33493346
int disp = $mem$$disp;
33503347
if (index == -1) {
3351-
__ prfm(Address(base, disp), PSTL1KEEP);
3348+
// Fix up any out-of-range offsets.
3349+
assert_different_registers(rscratch1, base);
3350+
Address addr = Address(base, disp);
3351+
addr = __ legitimize_address(addr, 8, rscratch1);
3352+
__ prfm(addr, PSTL1KEEP);
33523353
} else {
33533354
Register index_reg = as_Register(index);
33543355
if (disp == 0) {
@@ -4229,9 +4230,9 @@ operand immIOffset16()
42294230
interface(CONST_INTER);
42304231
%}
42314232

4232-
operand immLoffset()
4233+
operand immLOffset()
42334234
%{
4234-
predicate(Address::offset_ok_for_immed(n->get_long(), 0));
4235+
predicate(n->get_long() >= -256 && n->get_long() <= 65520);
42354236
match(ConL);
42364237

42374238
op_cost(0);
@@ -5341,6 +5342,34 @@ operand indOffL16(iRegP reg, immLoffset16 off)
53415342
%}
53425343
%}
53435344

5345+
operand indirectX2P(iRegL reg)
5346+
%{
5347+
constraint(ALLOC_IN_RC(ptr_reg));
5348+
match(CastX2P reg);
5349+
op_cost(0);
5350+
format %{ "[$reg]\t# long -> ptr" %}
5351+
interface(MEMORY_INTER) %{
5352+
base($reg);
5353+
index(0xffffffff);
5354+
scale(0x0);
5355+
disp(0x0);
5356+
%}
5357+
%}
5358+
5359+
operand indOffX2P(iRegL reg, immLOffset off)
5360+
%{
5361+
constraint(ALLOC_IN_RC(ptr_reg));
5362+
match(AddP (CastX2P reg) off);
5363+
op_cost(0);
5364+
format %{ "[$reg, $off]\t# long -> ptr" %}
5365+
interface(MEMORY_INTER) %{
5366+
base($reg);
5367+
index(0xffffffff);
5368+
scale(0x0);
5369+
disp($off);
5370+
%}
5371+
%}
5372+
53445373
operand indirectN(iRegN reg)
53455374
%{
53465375
predicate(CompressedOops::shift() == 0);
@@ -5431,7 +5460,7 @@ operand indOffIN(iRegN reg, immIOffset off)
54315460
%}
54325461
%}
54335462

5434-
operand indOffLN(iRegN reg, immLoffset off)
5463+
operand indOffLN(iRegN reg, immLOffset off)
54355464
%{
54365465
predicate(CompressedOops::shift() == 0);
54375466
constraint(ALLOC_IN_RC(ptr_reg));
@@ -5664,6 +5693,17 @@ operand iRegL2I(iRegL reg) %{
56645693
interface(REG_INTER)
56655694
%}
56665695

5696+
operand iRegL2P(iRegL reg) %{
5697+
5698+
op_cost(0);
5699+
5700+
match(CastX2P reg);
5701+
5702+
format %{ "l2p($reg)" %}
5703+
5704+
interface(REG_INTER)
5705+
%}
5706+
56675707
opclass vmem2(indirect, indIndex, indOffI2, indOffL2);
56685708
opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
56695709
opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
@@ -5680,21 +5720,21 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
56805720
// instruction defs. we can turn a memory op into an Address
56815721

56825722
opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
5683-
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
5723+
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P);
56845724

56855725
opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
5686-
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
5726+
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P);
56875727

56885728
opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
5689-
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5729+
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
56905730

56915731
opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
5692-
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5732+
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
56935733

56945734
// All of the memory operands. For the pipeline description.
56955735
opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
56965736
indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
5697-
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5737+
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
56985738

56995739

57005740
// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
@@ -5711,6 +5751,7 @@ opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indInde
57115751
// movw is actually redundant but its not too costly.
57125752

57135753
opclass iRegIorL2I(iRegI, iRegL2I);
5754+
opclass iRegPorL2P(iRegP, iRegL2P);
57145755

57155756
//----------PIPELINE-----------------------------------------------------------
57165757
// Rules which define the behavior of the target architectures pipeline.
@@ -9811,7 +9852,7 @@ instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
98119852
%}
98129853

98139854
// Pointer Addition
9814-
instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9855+
instruct addP_reg_reg(iRegPNoSp dst, iRegPorL2P src1, iRegL src2) %{
98159856
match(Set dst (AddP src1 src2));
98169857

98179858
ins_cost(INSN_COST);
@@ -9826,7 +9867,7 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
98269867
ins_pipe(ialu_reg_reg);
98279868
%}
98289869

9829-
instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9870+
instruct addP_reg_reg_ext(iRegPNoSp dst, iRegPorL2P src1, iRegIorL2I src2) %{
98309871
match(Set dst (AddP src1 (ConvI2L src2)));
98319872

98329873
ins_cost(1.9 * INSN_COST);
@@ -9841,7 +9882,7 @@ instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
98419882
ins_pipe(ialu_reg_reg);
98429883
%}
98439884

9844-
instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9885+
instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegPorL2P src1, iRegL src2, immIScale scale) %{
98459886
match(Set dst (AddP src1 (LShiftL src2 scale)));
98469887

98479888
ins_cost(1.9 * INSN_COST);
@@ -9856,7 +9897,7 @@ instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale
98569897
ins_pipe(ialu_reg_reg_shift);
98579898
%}
98589899

9859-
instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9900+
instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegPorL2P src1, iRegIorL2I src2, immIScale scale) %{
98609901
match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
98619902

98629903
ins_cost(1.9 * INSN_COST);
@@ -9889,7 +9930,7 @@ instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
98899930
// Pointer Immediate Addition
98909931
// n.b. this needs to be more expensive than using an indirect memory
98919932
// operand
9892-
instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9933+
instruct addP_reg_imm(iRegPNoSp dst, iRegPorL2P src1, immLAddSub src2) %{
98939934
match(Set dst (AddP src1 src2));
98949935

98959936
ins_cost(INSN_COST);

src/hotspot/cpu/aarch64/gc/x/x_aarch64.ad

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
2+
// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
33
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
//
55
// This code is free software; you can redistribute it and/or modify it
@@ -62,7 +62,13 @@ instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
6262
format %{ "ldr $dst, $mem" %}
6363

6464
ins_encode %{
65-
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
65+
Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
66+
if (ref_addr.getMode() == Address::base_plus_offset) {
67+
// Fix up any out-of-range offsets.
68+
assert_different_registers(rscratch1, as_Register($mem$$base));
69+
assert_different_registers(rscratch1, $dst$$Register);
70+
ref_addr = __ legitimize_address(ref_addr, 8, rscratch1);
71+
}
6672
__ ldr($dst$$Register, ref_addr);
6773
x_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, barrier_data());
6874
%}

src/hotspot/cpu/aarch64/gc/z/z_aarch64.ad

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//
2-
// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
2+
// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
33
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
//
55
// This code is free software; you can redistribute it and/or modify it
@@ -111,7 +111,13 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
111111
format %{ "ldr $dst, $mem" %}
112112

113113
ins_encode %{
114-
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
114+
Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
115+
if (ref_addr.getMode() == Address::base_plus_offset) {
116+
// Fix up any out-of-range offsets.
117+
assert_different_registers(rscratch2, as_Register($mem$$base));
118+
assert_different_registers(rscratch2, $dst$$Register);
119+
ref_addr = __ legitimize_address(ref_addr, 8, rscratch2);
120+
}
115121
__ ldr($dst$$Register, ref_addr);
116122
z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1);
117123
%}

src/hotspot/share/opto/machnode.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,14 @@ const class TypePtr *MachNode::adr_type() const {
395395
// 32-bit unscaled narrow oop can be the base of any address expression
396396
t = t->make_ptr();
397397
}
398-
if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) {
398+
399+
if (t->isa_intptr_t() &&
400+
#if !defined(AARCH64)
401+
// AArch64 supports the addressing mode:
402+
// [base, 0], in which [base] is converted from a long value
403+
offset != 0 &&
404+
#endif
405+
offset != Type::OffsetBot) {
399406
// We cannot assert that the offset does not look oop-ish here.
400407
// Depending on the heap layout the cardmark base could land
401408
// inside some oopish region. It definitely does for Win2K.

0 commit comments

Comments
 (0)