Skip to content

Feat: get mmtk-openjdk running on Armv8 architecture #249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Oct 16, 2023
13 changes: 9 additions & 4 deletions openjdk/CompileThirdPartyHeap.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -74,20 +74,25 @@ CARGO_TARGET_FLAG =

ifeq ($(COMPILE_TYPE), cross)
ifneq ($(CREATING_BUILDJDK), true)
# Set the CARGO_TARGET based on the architecture
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), riscv64)
CARGO_TARGET = riscv64gc-unknown-linux-gnu
endif
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
CARGO_TARGET = aarch64-unknown-linux-gnu
endif
CARGO_EXECUTABLE = cross
CARGO_TARGET = riscv64gc-unknown-linux-gnu
CARGO_TARGET_FLAG = --target $(CARGO_TARGET)
endif
endif

$(LIB_MMTK): FORCE
if [[ "$(OPENJDK_VERSION)" != "$(OPENJDK_LOCAL_VERSION)" ]]; then \
echo -e $(YELLOW)WARNING: Local OpenJDK version does not match version specified in mmtk/Cargo.toml$(NC); \
echo -e $(YELLOW)Local OpenJDK version $(OPENJDK_LOCAL_VERSION)$(NC); \
echo -e $(YELLOW)mmtk/Cargo.toml OpenJDK version $(OPENJDK_VERSION)$(NC); \
fi
if [[ "$(OPENJDK_TARGET_CPU)" != "riscv64" ]] && [[ $(CARGO_EXECUTABLE) == "cross" ]]; then \
echo -e "Only cross compiling to riscv64 is supported"; \
if [[ "$(OPENJDK_TARGET_CPU)" != "riscv64" ]] && [[ "$(OPENJDK_TARGET_CPU)" != "aarch64" ]] && [[ $(CARGO_EXECUTABLE) == "cross" ]]; then \
echo -e "Only cross compiling to riscv64/aarch64 are supported"; \
exit 1; \
fi
echo "cd $(MMTK_RUST_ROOT) && $(CARGO_EXECUTABLE) build $(CARGO_TARGET_FLAG) $(CARGO_PROFILE_FLAG) $(GC_FEATURES)"
Expand Down
197 changes: 197 additions & 0 deletions openjdk/cpu/aarch64/mmtkBarrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/macroAssembler.inline.hpp"
#include "interpreter/interp_masm.hpp"
#include "mmtkBarrierSet.hpp"
#include "mmtkBarrierSetAssembler_aarch64.hpp"
#include "mmtkBarrierSetC1.hpp"
#include "mmtkMutator.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/macros.hpp"
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"

#define __ masm->

void MMTkBarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register tmp1, Register tmp2, Label& slow_case) {
// XXX tmp1 seems to be -1
assert_different_registers(obj, tmp2);
assert_different_registers(obj, var_size_in_bytes);
assert(tmp2->is_valid(), "need temp reg");

if (!MMTK_ENABLE_ALLOCATION_FASTPATH) {
__ b(slow_case);
} else {
// printf("generating mmtk allocation fast path\n");
// MMTk size check. If the alloc size is larger than the allowed max size for non los,
// we jump to slow path and allodate with LOS in slowpath.
// Note that OpenJDK has a slow path check. Search for layout_helper_needs_slow_path and FastAllocateSizeLimit.
// I tried to set FastAllocateSizeLimit in MMTkHeap::initialize(). But there are still large objects allocated into the
// default space.
assert(MMTkMutatorContext::max_non_los_default_alloc_bytes != 0, "max_non_los_default_alloc_bytes hasn't been initialized");
size_t max_non_los_bytes = MMTkMutatorContext::max_non_los_default_alloc_bytes;
size_t extra_header = 0;
// fastpath, we only use default allocator
Allocator allocator = AllocatorDefault;
// We need to figure out which allocator we are using by querying MMTk.
AllocatorSelector selector = get_allocator_mapping(allocator);

// XXX riscv: disallow markcompact and global alloc bit for now
assert(selector.tag != TAG_MARK_COMPACT, "mark compact not supported for now");

if (var_size_in_bytes == noreg) {
// constant alloc size. If it is larger than max_non_los_bytes, we directly go to slowpath.
if ((size_t)con_size_in_bytes > max_non_los_bytes - extra_header) {
__ b(slow_case);
return;
}
} else {
// var alloc size. We compare with max_non_los_bytes and conditionally jump to slowpath.
// printf("max_non_los_bytes %lu\n",max_non_los_bytes);
__ movi(rscratch1, max_non_los_bytes - extra_header);
__ cmp(rscratch1, var_size_in_bytes);
__ br(Assembler::LT, slow_case);
}

if (selector.tag == TAG_MALLOC || selector.tag == TAG_LARGE_OBJECT) {
__ b(slow_case);
return;
}

// Calculate offsets of TLAB top and end
Address cursor, limit;
MMTkAllocatorOffsets alloc_offsets = get_tlab_top_and_end_offsets(selector);

cursor = Address(rthread, alloc_offsets.tlab_top_offset);
limit = Address(rthread, alloc_offsets.tlab_end_offset);

// XXX disassembly
// 0x7fffe85597e0: ld a0,688(s7)
// 0x7fffe85597e4: add a1,a0,a3
// 0x7fffe85597e8: bltu a1,a0,0x7fffe8559878
// 0x7fffe85597ec: ld t0,696(s7)
// 0x7fffe85597f0: bltu t0,a1,0x7fffe8559878
// 0x7fffe85597f4: sd a1,688(s7)

__ ldr(obj, cursor);
Register end = tmp2;
if (var_size_in_bytes == noreg) {
__ adr(end, Address(obj, con_size_in_bytes));
} else {
__ add(end, obj, var_size_in_bytes);
}
// slowpath if end < obj
__ cmp(end, obj);
__ br(Assembler::LT, slow_case);
// slowpath if end > lab.limit
__ ldr(tmp1, limit);
// XXX debug use, force slow path
__ cmp(end, tmp1);
__ br(Assembler::GT, slow_case);

// lab.cursor = end
__ str(end, cursor);

// recover var_size_in_bytes if necessary
if (var_size_in_bytes == end) {
__ sub(var_size_in_bytes, var_size_in_bytes, obj);
}
// if the above is removed, and the register holding the object size is
// clobbered, operations that rely on the size, such as array copy will
// crash

// XXX debug use, force segfault to disassemble in gdb
// __ ld(t0, zr);

// XXX debug use, force double allocation
// __ j(slow_case);

#ifdef MMTK_ENABLE_GLOBAL_ALLOC_BIT
assert(false, "global alloc bit not supported");
#endif
}
}

#undef __

#define __ sasm->

void MMTkBarrierSetAssembler::generate_c1_write_barrier_runtime_stub(StubAssembler* sasm) const {
// printf("xxx MMTkBarrierSetAssembler::generate_c1_write_barrier_runtime_stub\n");
// See also void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm)
__ prologue("mmtk_write_barrier", false);

Label done, runtime;

// void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg)
// ld(reg, Address(fp, offset_in_words * BytesPerWord));
// ra is free to use here, because call prologue/epilogue handles it
// Zheyuan: Code works by swaping rscratch2 and rscratch1, and I dont know why
const Register src = rscratch2;
const Register slot = rscratch1;
const Register new_val = lr;
__ load_parameter(0, src);
__ load_parameter(1, slot);
__ load_parameter(2, new_val);

__ bind(runtime);

// Push integer registers x7, x10-x17, x28-x31.
// t2, a0-a7, t3-t6
__ push_call_clobbered_registers();

#if MMTK_ENABLE_BARRIER_FASTPATH
__ call_VM_leaf(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_slow_call), src, slot, new_val);
#else
__ call_VM_leaf(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_post_call), src, slot, new_val);
#endif

__ pop_call_clobbered_registers();

__ bind(done);

__ epilogue();
}

#undef __

#define __ ce->masm()->

void MMTkBarrierSetAssembler::generate_c1_write_barrier_stub_call(LIR_Assembler* ce, MMTkC1BarrierStub* stub) {
// printf("xxx MMTkBarrierSetAssembler::generate_c1_write_barrier_stub_call\n");
// See also void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub)
MMTkBarrierSetC1* bs = (MMTkBarrierSetC1*) BarrierSet::barrier_set()->barrier_set_c1();
__ bind(*stub->entry());
assert(stub->src->is_register(), "Precondition");
assert(stub->slot->is_register(), "Precondition");
assert(stub->new_val->is_register(), "Precondition");
ce->store_parameter(stub->src->as_pointer_register(), 0);
ce->store_parameter(stub->slot->as_pointer_register(), 1);
ce->store_parameter(stub->new_val->as_pointer_register(), 2);
__ far_call(RuntimeAddress(bs->_write_barrier_c1_runtime_code_blob->code_begin()));
__ b(*stub->continuation());
}

#undef __
50 changes: 50 additions & 0 deletions openjdk/cpu/aarch64/mmtkBarrierSetAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#ifndef MMTK_OPENJDK_MMTK_BARRIER_SET_ASSEMBLER_AARCH64_HPP
#define MMTK_OPENJDK_MMTK_BARRIER_SET_ASSEMBLER_AARCH64_HPP

#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"

class MMTkBarrierSetC1;
class MMTkC1BarrierStub;
class LIR_Assembler;
class StubAssembler;

class MMTkBarrierSetAssembler: public BarrierSetAssembler {
friend class MMTkBarrierSetC1;

protected:
/// Full pre-barrier
virtual void object_reference_write_pre(MacroAssembler* masm, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2) const {}
/// Full post-barrier
virtual void object_reference_write_post(MacroAssembler* masm, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2) const {}

/// Barrier elision test
virtual bool can_remove_barrier(DecoratorSet decorators, Register val, bool skip_const_null) const {
bool in_heap = (decorators & IN_HEAP) != 0;
bool as_normal = (decorators & AS_NORMAL) != 0;
assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
return !in_heap || (skip_const_null && val == noreg);
}

/// Generate C1 write barrier slow-call assembly code
virtual void generate_c1_write_barrier_runtime_stub(StubAssembler* sasm) const;

public:
virtual void eden_allocate(MacroAssembler* masm,
Register obj, // result: pointer to object after successful allocation
Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
int con_size_in_bytes, // object size in bytes if known at compile time
Register tmp1, // temp register
Register tmp2, // temp register
Label& slow_case // continuation point if fast allocation fails
);
virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2, Register tmp3) {
if (type == T_OBJECT || type == T_ARRAY) object_reference_write_pre(masm, decorators, dst, val, tmp1, tmp2);
BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3);
if (type == T_OBJECT || type == T_ARRAY) object_reference_write_post(masm, decorators, dst, val, tmp1, tmp2);
}

/// Generate C1 write barrier slow-call stub
static void generate_c1_write_barrier_stub_call(LIR_Assembler* ce, MMTkC1BarrierStub* stub);
};
#endif // MMTK_OPENJDK_MMTK_BARRIER_SET_ASSEMBLER_AARCH64_HPP
5 changes: 5 additions & 0 deletions openjdk/cpu/aarch64/mmtkNoBarrierSetAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#ifndef MMTK_OPENJDK_MMTK_NO_BARRIER_SET_ASSEMBLER_AARCH64_HPP
#define MMTK_OPENJDK_MMTK_NO_BARRIER_SET_ASSEMBLER_AARCH64_HPP

class MMTkNoBarrierSetAssembler: public MMTkBarrierSetAssembler {};
#endif // MMTK_OPENJDK_MMTK_NO_BARRIER_SET_ASSEMBLER_AARCH64_HPP
69 changes: 69 additions & 0 deletions openjdk/cpu/aarch64/mmtkObjectBarrierSetAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include "precompiled.hpp"
#include "mmtkObjectBarrier.hpp"
#include "runtime/interfaceSupport.inline.hpp"

#define __ masm->

void MMTkObjectBarrierSetAssembler::object_reference_write_post(MacroAssembler* masm, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2) const {
// tmp1 and tmp2 is from MacroAssembler::access_store_at
// For do_oop_store, we have three tmps, x28/t3, x29/t4, x13/a3
// printf("object_reference_write_post\n");
// if (can_remove_barrier(decorators, val, /* skip_const_null */ true)) return;
if (can_remove_barrier(decorators, val, /* skip_const_null */ true)) return;
Register obj = dst.base();

#if MMTK_ENABLE_BARRIER_FASTPATH
Label done;

assert_different_registers(obj, tmp1, tmp2);
assert_different_registers(val, tmp1, tmp2);
assert(tmp1->is_valid(), "need temp reg");
assert(tmp2->is_valid(), "need temp reg");
// tmp1 = load-byte (SIDE_METADATA_BASE_ADDRESS + (obj >> 6));
__ mov(tmp1, obj);
__ lsr(tmp1, tmp1, 6); // tmp1 = obj >> 6;
__ mov(tmp2, SIDE_METADATA_BASE_ADDRESS);
__ add(tmp1, tmp1, tmp2); // tmp1 = SIDE_METADATA_BASE_ADDRESS + (obj >> 6);
__ ldrb(tmp1, Address(tmp1, 0));
// tmp2 = (obj >> 3) & 7
__ mov(tmp2, obj);
__ lsr(tmp2, tmp2, 3);
__ andr(tmp2, tmp2, 7);
// tmp1 = tmp1 >> tmp2
__ lsrv(tmp1, tmp1, tmp2);
// if ((tmp1 & 1) == 1) fall through to slowpath;
// equivalently ((tmp1 & 1) == 0) go to done
__ andr(tmp1, tmp1, 1);
__ cbz(tmp1, done);
// setup calling convention
__ mov(c_rarg0, obj);
__ lea(c_rarg1, dst);
__ mov(c_rarg2, val == noreg ? zr : val);
__ call_VM_leaf(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_slow_call), 3);

__ bind(done);
#else
__ mov(c_rarg0, obj);
__ lea(c_rarg1, dst);
__ mov(c_rarg2, val == noreg ? zr : val);
__ call_VM_leaf(FN_ADDR(MMTkBarrierSetRuntime::object_reference_write_post_call), 3);
#endif
}

void MMTkObjectBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
Register src, Register dst, Register count, Register tmp, RegSet saved_regs) {
// see also void G1BarrierSetAssembler::gen_write_ref_array_post_barrier
assert_different_registers(src, dst, count);
// const bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
// if (is_oop && !dest_uninitialized) {
if (is_oop){
__ push(saved_regs, sp);
__ mov(c_rarg1, dst);
__ mov(c_rarg2, count);
__ call_VM_leaf(FN_ADDR(MMTkBarrierSetRuntime::object_reference_array_copy_post_call), 3);
__ pop(saved_regs, sp);
}
}


#undef __
11 changes: 11 additions & 0 deletions openjdk/cpu/aarch64/mmtkObjectBarrierSetAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef MMTK_OPENJDK_MMTK_OBJECT_BARRIER_SET_ASSEMBLER_AARCH64_HPP
#define MMTK_OPENJDK_MMTK_OBJECT_BARRIER_SET_ASSEMBLER_AARCH64_HPP

class MMTkObjectBarrierSetAssembler: public MMTkBarrierSetAssembler {
protected:
virtual void object_reference_write_post(MacroAssembler* masm, DecoratorSet decorators, Address dst, Register val, Register tmp1, Register tmp2) const override;
public:
virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
Register src, Register dst, Register count, Register tmp, RegSet saved_regs) override;
};
#endif // MMTK_OPENJDK_MMTK_OBJECT_BARRIER_SET_ASSEMBLER_AARCH64_HPP