Skip to content

[LoongArch64] Part-2:Add runtime assembly code (*.S) files in nativeaot. #104084

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
273 changes: 273 additions & 0 deletions src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#include <unixasmmacros.inc>
#include "AsmOffsets.inc"

// GC type flags
GC_ALLOC_FINALIZE = 1

//
// Rename fields of nested structs
//
OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr
OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit



// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's
// allocation context then automatically fallback to the slow allocation path.
// $a0 == MethodTable
LEAF_ENTRY RhpNewFast, _TEXT

// a1 = GetThread()
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_1
#else
INLINE_GETTHREAD $a1
#endif

//
// a0 contains MethodTable pointer
//
ld.w $a2, $a0, OFFSETOF__MethodTable__m_uBaseSize

//
// a0: MethodTable pointer
// a1: Thread pointer
// a2: base size
//

// Load potential new object address into t3.
ld.d $t3, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Determine whether the end of the object would lie outside of the current allocation context. If so,
// we abandon the attempt to allocate the object directly and fall back to the slow helper.
add.d $a2, $a2, $t3
ld.d $t4, $a1, OFFSETOF__Thread__m_alloc_context__alloc_limit
bltu $t4, $a2, RhpNewFast_RarePath

// Update the alloc pointer to account for the allocation.
st.d $a2, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Set the new objects MethodTable pointer
st.d $a0, $t3, OFFSETOF__Object__m_pEEType

ori $a0, $t3, 0
jirl $r0, $ra, 0

RhpNewFast_RarePath:
ori $a1, $zero, 0
b RhpNewObject
LEAF_END RhpNewFast, _TEXT

// Allocate non-array object with finalizer.
// a0 == MethodTable
LEAF_ENTRY RhpNewFinalizable, _TEXT
ori $a1, $zero, GC_ALLOC_FINALIZE
b RhpNewObject
LEAF_END RhpNewFinalizable, _TEXT

// Allocate non-array object.
// a0 == MethodTable
// a1 == alloc flags
NESTED_ENTRY RhpNewObject, _TEXT, NoHandler

PUSH_COOP_PINVOKE_FRAME $a3

// a3: transition frame

// Preserve the MethodTable in s0
ori $s0, $a0, 0

ori $a2, $zero, 0 // numElements

// Call the rest of the allocation helper.
// void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
bl C_FUNC(RhpGcAlloc)

// Set the new objects MethodTable pointer on success.
beq $a0, $zero, NewOutOfMemory

.cfi_remember_state
POP_COOP_PINVOKE_FRAME
EPILOG_RETURN

.cfi_restore_state
NewOutOfMemory:
// This is the OOM failure path. We are going to tail-call to a managed helper that will throw
// an out of memory exception that the caller of this allocator understands.

ori $a0, $s0, 0 // MethodTable pointer
ori $a1, $zero, 0 // Indicate that we should throw OOM.

POP_COOP_PINVOKE_FRAME
b C_FUNC(RhExceptionHandling_FailedAllocation)

NESTED_END RhpNewObject, _TEXT

// Allocate a string.
// a0 == MethodTable
// a1 == element/character count
LEAF_ENTRY RhNewString, _TEXT
// Make sure computing the overall allocation size wont overflow
lu12i.w $a2, ((MAX_STRING_LENGTH >> 12) & 0xFFFFF)
ori $a2, $a2, (MAX_STRING_LENGTH & 0xFFF)
bltu $a2, $a1, StringSizeOverflow

// Compute overall allocation size (align(base size + (element size * elements), 8)).
ori $a2, $zero, STRING_COMPONENT_SIZE
mulw.d.w $a2, $a1, $a2 // a2 = (a1[31:0] * a2[31:0])[64:0]
addi.d $a2, $a2, STRING_BASE_SIZE + 7 // a2 = a2 + STRING_BASE_SIZE + 7
bstrins.d $a2, $r0, 2, 0 // clear the bits[2:0] of $a2

// a0 == MethodTable
// a1 == element count
// a2 == string size

#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_3
#else
INLINE_GETTHREAD $a3
#endif

// Load potential new object address into t3.
ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Determine whether the end of the object would lie outside of the current allocation context. If so,
// we abandon the attempt to allocate the object directly and fall back to the slow helper.
add.d $a2, $a2, $t3
ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit
bltu $t3, $a2, RhNewString_Rare

// Reload new object address into r12.
ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Update the alloc pointer to account for the allocation.
st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Set the new objects MethodTable pointer and element count.
st.d $a0, $t3, OFFSETOF__Object__m_pEEType
st.d $a1, $t3, OFFSETOF__Array__m_Length

// Return the object allocated in a0.
ori $a0, $t3, 0

jirl $r0, $ra, 0

StringSizeOverflow:
// We get here if the length of the final string object can not be represented as an unsigned
// 32-bit value. We are going to tail-call to a managed helper that will throw
// an OOM exception that the caller of this allocator understands.

// a0 holds MethodTable pointer already
ori $a1, $zero, 1 // Indicate that we should throw OverflowException
b C_FUNC(RhExceptionHandling_FailedAllocation)

RhNewString_Rare:
b C_FUNC(RhpNewArrayRare)
LEAF_END RhNewString, _Text

// Allocate one dimensional, zero based array (SZARRAY).
// $a0 == MethodTable
// $a1 == element count
LEAF_ENTRY RhpNewArray, _Text

// We want to limit the element count to the non-negative 32-bit int range.
// If the element count is <= 0x7FFFFFFF, no overflow is possible because the component
// size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst
// case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits.
lu12i.w $a2, 0x7ffff
ori $a2, $a2, 0xfff
bltu $a2, $a1, ArraySizeOverflow

ld.h $a2, $a0, OFFSETOF__MethodTable__m_usComponentSize
mulw.d.w $a2, $a1, $a2
ld.w $a3, $a0, OFFSETOF__MethodTable__m_uBaseSize
add.d $a2, $a2, $a3
addi.d $a2, $a2, 7
bstrins.d $a2, $r0, 2, 0
// a0 == MethodTable
// a1 == element count
// a2 == array size

INLINE_GETTHREAD $a3

// Load potential new object address into t3.
ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Determine whether the end of the object would lie outside of the current allocation context. If so,
// we abandon the attempt to allocate the object directly and fall back to the slow helper.
add.d $a2, $a2, $t3
ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit
bltu $t3, $a2, RhpNewArray_Rare

// Reload new object address into t3.
ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Update the alloc pointer to account for the allocation.
st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr

// Set the new objects MethodTable pointer and element count.
st.d $a0, $t3, OFFSETOF__Object__m_pEEType
st.d $a1, $t3, OFFSETOF__Array__m_Length

// Return the object allocated in r0.
ori $a0, $t3, 0

jirl $r0, $ra, 0

ArraySizeOverflow:
// We get here if the size of the final array object can not be represented as an unsigned
// 32-bit value. We are going to tail-call to a managed helper that will throw
// an overflow exception that the caller of this allocator understands.

// $a0 holds MethodTable pointer already
ori $a1, $zero, 1 // Indicate that we should throw OverflowException
b C_FUNC(RhExceptionHandling_FailedAllocation)

RhpNewArray_Rare:
b C_FUNC(RhpNewArrayRare)
LEAF_END RhpNewArray, _TEXT

// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper.
// a0 == MethodTable
// a1 == element count
// a2 == array size + Thread::m_alloc_context::alloc_ptr
// a3 == Thread
NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler

// Recover array size by subtracting the alloc_ptr from a2.
ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr
sub.d $a2, $a2, $t3

PUSH_COOP_PINVOKE_FRAME $a3

// Preserve data we will need later into the callee saved registers
ori $s0, $a0, 0 // Preserve MethodTable

ori $a2, $a1, 0 // numElements
ori $a1, $zero, 0 // uFlags

// void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame)
bl C_FUNC(RhpGcAlloc)

// Set the new objects MethodTable pointer and length on success.
beq $a0, $zero, ArrayOutOfMemory

.cfi_remember_state
POP_COOP_PINVOKE_FRAME
EPILOG_RETURN

.cfi_restore_state
ArrayOutOfMemory:
// This is the OOM failure path. We are going to tail-call to a managed helper that will throw
// an out of memory exception that the caller of this allocator understands.

ori $a0, $s0, 0 // MethodTable Pointer
ori $a1, $zero, 0 // Indicate that we should throw OOM.

POP_COOP_PINVOKE_FRAME
b C_FUNC(RhExceptionHandling_FailedAllocation)

NESTED_END RhpNewArrayRare, _TEXT
67 changes: 67 additions & 0 deletions src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

//
// This file is used by AsmOffsets.h to validate that our
// assembly-code offsets always match their C++ counterparts.
//
// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix

PLAT_ASM_SIZEOF(280, ExInfo)
PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo)
PLAT_ASM_OFFSET(8, ExInfo, m_pExContext)
PLAT_ASM_OFFSET(10, ExInfo, m_exception)
PLAT_ASM_OFFSET(18, ExInfo, m_kind)
PLAT_ASM_OFFSET(19, ExInfo, m_passNumber)
PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause)
PLAT_ASM_OFFSET(20, ExInfo, m_frameIter)
PLAT_ASM_OFFSET(278, ExInfo, m_notifyDebuggerSP)

PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer)
PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP)
PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread)
PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags)
PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs)

PLAT_ASM_SIZEOF(258, StackFrameIterator)
PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer)
PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC)
PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay)
PLAT_ASM_OFFSET(248, StackFrameIterator, m_OriginalControlPC)
PLAT_ASM_OFFSET(250, StackFrameIterator, m_pPreviousTransitionFrame)

PLAT_ASM_SIZEOF(C0, PAL_LIMITED_CONTEXT)

PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP)
PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA)
PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R4)
PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R5)
PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R23)
PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, R24)
PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R25)
PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R26)
PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R27)
PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R28)
PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R29)
PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, R30)
PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, R31)
PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, R2)
PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP)
PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, IP)

PLAT_ASM_SIZEOF(148, REGDISPLAY)
PLAT_ASM_OFFSET(18, REGDISPLAY, SP)

PLAT_ASM_OFFSET(b8, REGDISPLAY, pR23)
PLAT_ASM_OFFSET(c0, REGDISPLAY, pR24)
PLAT_ASM_OFFSET(c8, REGDISPLAY, pR25)
PLAT_ASM_OFFSET(d0, REGDISPLAY, pR26)
PLAT_ASM_OFFSET(d8, REGDISPLAY, pR27)
PLAT_ASM_OFFSET(e0, REGDISPLAY, pR28)
PLAT_ASM_OFFSET(e8, REGDISPLAY, pR29)
PLAT_ASM_OFFSET(f0, REGDISPLAY, pR30)
PLAT_ASM_OFFSET(f8, REGDISPLAY, pR31)
PLAT_ASM_OFFSET(10, REGDISPLAY, pR2)
PLAT_ASM_OFFSET(b0, REGDISPLAY, pFP)
PLAT_ASM_OFFSET(8, REGDISPLAY, pRA)
PLAT_ASM_OFFSET(108, REGDISPLAY, F)
Loading