Skip to content

[SuperPMI] Support spmi-asmdiffs on Arm64 #48683

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Feb 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
35f7bf1
Implement IMAGE_REL_ARM64_BRANCH26 in compileresult.cpp
echesakov Sep 30, 2020
80be600
Introduce GetSpmiTargetArchitecture/SetSpmiTargetArchitecture helpers…
echesakov Feb 24, 2021
bbd037f
Do platform specific relocations (IMAGE_REL_BASED_HIGHLOW) first in C…
echesakov Feb 24, 2021
f5ec6d2
Do Arm64 specific relocations first (IMAGE_REL_ARM64_*) in src/corecl…
echesakov Feb 24, 2021
226acee
Rename 'relocWasHandled' -> 'wasRelocHandled' in src/coreclr/ToolBox/…
echesakov Feb 24, 2021
a4cce85
Do 64-bit target specific relocations first (IMAGE_REL_BASED_DIR64) i…
echesakov Feb 24, 2021
bd38deb
Remove remaning 'if defined(TARGET_*)' in src/coreclr/ToolBox/superpm…
echesakov Feb 24, 2021
1e31f44
Call repCompileMethod() on Arm64 to adjust hotCodeSize in src/coreclr…
echesakov Feb 23, 2021
72c97ba
Add PutArm64Rel28 helper in src/coreclr/ToolBox/superpmi/superpmi-sha…
echesakov Feb 24, 2021
48aff18
Add PutArm64Rel21 and PutArm64Rel12 helpers in src/coreclr/ToolBox/su…
echesakov Feb 25, 2021
ae3fa9d
Re-implement IMAGE_REL_ARM64_BRANCH26 and implement IMAGE_REL_ARM64_P…
echesakov Feb 25, 2021
a0b8cb4
Add PutThumb2Mov32 and PutThumb2BlRel24 helpers in src/coreclr/ToolBo…
echesakov Feb 25, 2021
0a763b4
Enable IMAGE_REL_BASED_HIGHLOW on Arm in compileresult.cpp
echesakov Feb 25, 2021
30023dd
Implement IMAGE_REL_BASED_THUMB_MOV32 IMAGE_REL_BASED_REL_THUMB_MOV32…
echesakov Feb 25, 2021
6fe2d46
Add comment about functions copied from utilcode in spmiutil.cpp
echesakov Feb 26, 2021
df78f9c
Generalize checks for altjits in jitinstance.cpp
echesakov Feb 26, 2021
b9c6214
Add comment and add logic for figuring out roDataBlock/roDataSize on …
echesakov Feb 26, 2021
b88159d
Remove redundant cast in compileresult.cpp
echesakov Feb 26, 2021
13d16ea
Use one error message for both 32-bit and 64-bit targets in compilere…
echesakov Feb 26, 2021
227d6f6
Add comment before all platform relocations in src/coreclr/ToolBox/su…
echesakov Feb 26, 2021
ac3ce2e
Remove another redundant cast in compileresult.cpp
echesakov Feb 26, 2021
365b24a
Remove unused CallUtils::HasRetBuffArg in callutils.cpp callutils.h
echesakov Feb 26, 2021
f805314
Add IsSpmiTarget64Bit() and SpmiTargetPointerSize() helpers in spmiut…
echesakov Feb 26, 2021
b4e70da
Update TypeUtils in typeutils.cpp
echesakov Feb 26, 2021
74681c6
Use IsSpmiTarget64Bit() in compileresult.cpp
echesakov Feb 26, 2021
3ad3238
Replace sizeof(void*) -> SpmiTargetPointerSize() in methodcontext.cpp
echesakov Feb 26, 2021
d280c5b
Fix warning in methodcontext.cpp
echesakov Feb 26, 2021
5f9e9fd
Nit: "theor" -> "their" in neardiffer.cpp
echesakov Feb 26, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/callutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,25 +215,6 @@ CallType CallUtils::GetDirectCallSiteInfo(MethodContext* mc,
// SuperPMI's method context replaying instead of directly making calls into the JIT/EE interface.
//-------------------------------------------------------------------------------------------------

// Stolen from Compiler::impMethodInfo_hasRetBuffArg (in the importer)
bool CallUtils::HasRetBuffArg(MethodContext* mc, CORINFO_SIG_INFO args)
{
if (args.retType != CORINFO_TYPE_VALUECLASS && args.retType != CORINFO_TYPE_REFANY)
{
return false;
}

#if defined(TARGET_AMD64)
// We don't need a return buffer if:
// i) TYP_STRUCT argument that can fit into a single register and
// ii) Power of two sized TYP_STRUCT on AMD64.
unsigned size = mc->repGetClassSize(args.retTypeClass);
return (size > sizeof(void*)) || ((size & (size - 1)) != 0);
#else
return true;
#endif
}

// Originally from src/jit/ee_il_dll.cpp
const char* CallUtils::GetMethodName(MethodContext* mc, CORINFO_METHOD_HANDLE method, const char** classNamePtr)
{
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/ToolBox/superpmi/superpmi-shared/callutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ class CallUtils
void* callTarget,
/*out*/ CORINFO_SIG_INFO* outSigInfo,
/*out*/ char** outCallTargetSymbol);
static bool HasRetBuffArg(MethodContext* mc, CORINFO_SIG_INFO args);
static CorInfoHelpFunc GetHelperNum(CORINFO_METHOD_HANDLE method);
static bool IsNativeMethod(CORINFO_METHOD_HANDLE method);
static CORINFO_METHOD_HANDLE GetMethodHandleForNative(CORINFO_METHOD_HANDLE method);
Expand Down
217 changes: 158 additions & 59 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/compileresult.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,81 +742,128 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
printf("\n");
}

switch (tmp.fRelocType)
const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture();

const DWORD relocType = tmp.fRelocType;
bool wasRelocHandled = false;

// Do platform specific relocations first.

if ((targetArch == SPMI_TARGET_ARCHITECTURE_X86) || (targetArch == SPMI_TARGET_ARCHITECTURE_ARM))
{
#if defined(TARGET_X86)
case IMAGE_REL_BASED_HIGHLOW:
if (relocType == IMAGE_REL_BASED_HIGHLOW)
{
DWORDLONG fixupLocation = tmp.location;

size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address,
(DWORD)tmp.target);
(DWORD)tmp.target);
*(DWORD*)address = (DWORD)tmp.target;
}
wasRelocHandled = true;
}
break;
#endif // TARGET_X86
}

#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
case IMAGE_REL_BASED_REL32:
if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM)
{
DWORDLONG fixupLocation = tmp.location;
DWORDLONG address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;

switch (relocType)
{
DWORDLONG target = tmp.target + tmp.addlDelta;
DWORDLONG fixupLocation = tmp.location + tmp.slotNum;
DWORDLONG baseAddr = fixupLocation + sizeof(INT32);
INT64 delta = (INT64)((BYTE*)target - baseAddr);
case IMAGE_REL_BASED_THUMB_MOV32:
case IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL:
{
INT32 delta = (INT32)(tmp.target - fixupLocation);
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
PutThumb2Mov32((UINT16*)address, (UINT32)delta);
}
wasRelocHandled = true;
}
break;

#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
if (delta != (INT64)(int)delta)
case IMAGE_REL_BASED_THUMB_BRANCH24:
{
// This isn't going to fit in a signed 32-bit address. Use something that will fit,
// since we assume that original compilation fit fine. This is only an issue for
// 32-bit offsets on 64-bit targets.
target = (DWORDLONG)originalAddr + (DWORDLONG)blocksize1;
INT64 newdelta = (INT64)((BYTE*)target - baseAddr);
INT32 delta = (INT32)(tmp.target - fixupLocation);
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
if (!FitsInThumb2BlRel24(delta))
{
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was not sure what to do here. IIRC, if such overflow occurs in real product during crossgen the JIT would be required to re-compile the method without using this type of relocation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@BruceForstall Any opinion if this is desired behavior? What would/should happen during spmi collect when such relocation overflow occurs? Would the corresponding method context be included in the final collection?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what would happen during spmi collection if it failed. It's supposed to throw and retry, but the retry logic for relocs in crossgen1 seems to be dependent on m_fNGenLastRetry, but I can't find how that ever gets set to true (because it's dependent on fNgenLastRetry, which is never set to true).

It looks like crossgen2 always returns IMAGE_REL_BASED_THUMB_BRANCH24 with no provision for overflow.

I would guess spmi would only record the retry compilation, but it's not clear that ever happens.

DWORDLONG target = (DWORDLONG)originalAddr + (DWORDLONG)blocksize1;
delta = (INT32)(target - fixupLocation);
}
PutThumb2BlRel24((UINT16*)address, delta);
}
wasRelocHandled = true;
}
break;

LogDebug(" REL32 overflow. Mapping target to %016llX. Mapping delta: %016llX => %016llX", target,
delta, newdelta);
default:
break;
}
}

delta = newdelta;
if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64)
{
DWORDLONG fixupLocation = tmp.location;
DWORDLONG address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;

switch (relocType)
{
case IMAGE_REL_ARM64_BRANCH26: // 26 bit offset << 2 & sign ext, for B and BL
{
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
INT64 delta = (INT64)(tmp.target - fixupLocation);
if (!FitsInRel28(delta))
{
// Assume here that we would need a jump stub for this relocation and pretend
// that the jump stub is located right at the end of the method.
DWORDLONG target = (DWORDLONG)originalAddr + (DWORDLONG)blocksize1;
delta = (INT64)(target - fixupLocation);
}
PutArm64Rel28((UINT32*)address, (INT32)delta);
}
wasRelocHandled = true;
}
#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
break;

if (delta != (INT64)(int)delta)
case IMAGE_REL_ARM64_PAGEBASE_REL21: // ADRP 21 bit PC-relative page address
{
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
LogError("REL32 relocation overflows field! delta=0x%016llX", delta);
#else
LogError("REL32 relocation overflows field! delta=0x%08X", delta);
#endif
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
INT64 targetPage = (INT64)tmp.target & 0xFFFFFFFFFFFFF000LL;
INT64 fixupLocationPage = (INT64)fixupLocation & 0xFFFFFFFFFFFFF000LL;
INT64 pageDelta = (INT64)(targetPage - targetPage);
INT32 imm21 = (INT32)(pageDelta >> 12) & 0x1FFFFF;
PutArm64Rel21((UINT32*)address, imm21);
}
wasRelocHandled = true;
}
break;

// Write 32-bits into location
size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
case IMAGE_REL_ARM64_PAGEOFFSET_12A: // ADD 12 bit page offset
{
#if defined(TARGET_AMD64)
// During an actual compile, recordRelocation() will be called before the compile
// is actually finished, and it will write the relative offset into the fixupLocation.
// Then, emitEndCodeGen() will patch forward jumps by subtracting any adjustment due
// to overestimation of instruction sizes. Because we're applying the relocs after the
// compile has finished, we need to reverse that: i.e. add in the (negative) adjustment
// that's now in the fixupLocation.
INT32 adjustment = *(INT32*)address;
delta += adjustment;
#endif
LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address,
delta);
*(DWORD*)address = (DWORD)delta;
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
INT32 imm12 = (INT32)(SIZE_T)tmp.target & 0xFFFLL;
PutArm64Rel12((UINT32*)address, imm12);
}
wasRelocHandled = true;
}
break;

default:
break;
}
break;
#endif // defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_ARM)
}

#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
case IMAGE_REL_BASED_DIR64:
if (IsSpmiTarget64Bit())
{
if (relocType == IMAGE_REL_BASED_DIR64)
{
DWORDLONG fixupLocation = tmp.location + tmp.slotNum;

Expand All @@ -825,20 +872,72 @@ void CompileResult::applyRelocs(unsigned char* block1, ULONG blocksize1, void* o
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
LogDebug(" fixupLoc-%016llX (@%p) %016llX => %016llX", fixupLocation, address,
*(DWORDLONG*)address, tmp.target);
*(DWORDLONG*)address, tmp.target);
*(DWORDLONG*)address = tmp.target;
}

wasRelocHandled = true;
}
break;
#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64)
}

#ifdef TARGET_ARM64
case IMAGE_REL_ARM64_BRANCH26: // 26 bit offset << 2 & sign ext, for B and BL
case IMAGE_REL_ARM64_PAGEBASE_REL21:
case IMAGE_REL_ARM64_PAGEOFFSET_12A:
LogError("Unimplemented reloc type %u", tmp.fRelocType);
break;
#endif // TARGET_ARM64
if (wasRelocHandled)
continue;

// Now do all-platform relocations.

switch (tmp.fRelocType)
{
case IMAGE_REL_BASED_REL32:
{
DWORDLONG target = tmp.target + tmp.addlDelta;
DWORDLONG fixupLocation = tmp.location + tmp.slotNum;
DWORDLONG baseAddr = fixupLocation + sizeof(INT32);
INT64 delta = (INT64)(target - baseAddr);

if (IsSpmiTarget64Bit())
{
if (delta != (INT64)(int)delta)
{
// This isn't going to fit in a signed 32-bit address. Use something that will fit,
// since we assume that original compilation fit fine. This is only an issue for
// 32-bit offsets on 64-bit targets.
target = (DWORDLONG)originalAddr + (DWORDLONG)blocksize1;
INT64 newdelta = (INT64)(target - baseAddr);

LogDebug(" REL32 overflow. Mapping target to %016llX. Mapping delta: %016llX => %016llX", target,
delta, newdelta);

delta = newdelta;
}
}

if (delta != (INT64)(int)delta)
{
LogError("REL32 relocation overflows field! delta=0x%016llX", delta);
}

// Write 32-bits into location
size_t address = section_begin + (size_t)fixupLocation - (size_t)originalAddr;
if ((section_begin <= address) && (address < section_end)) // A reloc for our section?
{
if (targetArch == SPMI_TARGET_ARCHITECTURE_AMD64)
{
// During an actual compile, recordRelocation() will be called before the compile
// is actually finished, and it will write the relative offset into the fixupLocation.
// Then, emitEndCodeGen() will patch forward jumps by subtracting any adjustment due
// to overestimation of instruction sizes. Because we're applying the relocs after the
// compile has finished, we need to reverse that: i.e. add in the (negative) adjustment
// that's now in the fixupLocation.
INT32 adjustment = *(INT32*)address;
delta += adjustment;
}

LogDebug(" fixupLoc-%016llX (@%p) : %08X => %08X", fixupLocation, address, *(DWORD*)address,
delta);
*(DWORD*)address = (DWORD)delta;
}
}
break;

default:
LogError("Unknown reloc type %u", tmp.fRelocType);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ unsigned int toCorInfoSize(CorInfoType cit)
case CORINFO_TYPE_PTR:
case CORINFO_TYPE_BYREF:
case CORINFO_TYPE_CLASS:
return sizeof(void*);
return (int)SpmiTargetPointerSize();

case CORINFO_TYPE_STRING:
case CORINFO_TYPE_VALUECLASS:
Expand Down
91 changes: 91 additions & 0 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/spmiutil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,3 +227,94 @@ WCHAR* GetResultFileName(const WCHAR* folderPath, const WCHAR* fileName, const W

return fullPath;
}

#ifdef TARGET_AMD64
static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTURE_AMD64;
#elif defined(TARGET_X86)
static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTURE_X86;
#elif defined(TARGET_ARM)
static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTURE_ARM;
#elif defined(TARGET_ARM64)
static SPMI_TARGET_ARCHITECTURE SpmiTargetArchitecture = SPMI_TARGET_ARCHITECTURE_ARM64;
#else
#error Unsupported architecture
#endif

SPMI_TARGET_ARCHITECTURE GetSpmiTargetArchitecture()
{
return SpmiTargetArchitecture;
}

void SetSpmiTargetArchitecture(SPMI_TARGET_ARCHITECTURE spmiTargetArchitecture)
{
SpmiTargetArchitecture = spmiTargetArchitecture;
}

// The following functions are used for arm64/arm32 relocation processing.
// They are copies of the code in src\coreclr\utilcode\util.cpp.
// We decided to copy them instead of linking with utilcode library
// to avoid introducing additional runtime dependencies.

void PutArm64Rel28(UINT32* pCode, INT32 imm28)
{
UINT32 branchInstr = *pCode;
branchInstr &= 0xFC000000;
branchInstr |= ((imm28 >> 2) & 0x03FFFFFF);
*pCode = branchInstr;
}

void PutArm64Rel21(UINT32* pCode, INT32 imm21)
{
UINT32 adrpInstr = *pCode;
adrpInstr &= 0x9F00001F;
INT32 immlo = imm21 & 0x03;
INT32 immhi = (imm21 & 0x1FFFFC) >> 2;
adrpInstr |= ((immlo << 29) | (immhi << 5));
*pCode = adrpInstr;
}

void PutArm64Rel12(UINT32* pCode, INT32 imm12)
{
UINT32 addInstr = *pCode;
addInstr &= 0xFFC003FF;
addInstr |= (imm12 << 10);
*pCode = addInstr;
}

void PutThumb2Imm16(UINT16* p, UINT16 imm16)
{
USHORT Opcode0 = p[0];
USHORT Opcode1 = p[1];
Opcode0 &= ~((0xf000 >> 12) | (0x0800 >> 1));
Opcode1 &= ~((0x0700 << 4) | (0x00ff << 0));
Opcode0 |= (imm16 & 0xf000) >> 12;
Opcode0 |= (imm16 & 0x0800) >> 1;
Opcode1 |= (imm16 & 0x0700) << 4;
Opcode1 |= (imm16 & 0x00ff) << 0;
p[0] = Opcode0;
p[1] = Opcode1;
}

void PutThumb2Mov32(UINT16* p, UINT32 imm32)
{
PutThumb2Imm16(p, (UINT16)imm32);
PutThumb2Imm16(p + 2, (UINT16)(imm32 >> 16));
}

void PutThumb2BlRel24(UINT16* p, INT32 imm24)
{
USHORT Opcode0 = p[0];
USHORT Opcode1 = p[1];
Opcode0 &= 0xF800;
Opcode1 &= 0xD000;

UINT32 S = (imm24 & 0x1000000) >> 24;
UINT32 J1 = ((imm24 & 0x0800000) >> 23) ^ S ^ 1;
UINT32 J2 = ((imm24 & 0x0400000) >> 22) ^ S ^ 1;

Opcode0 |= ((imm24 & 0x03FF000) >> 12) | (S << 10);
Opcode1 |= ((imm24 & 0x0000FFE) >> 1) | (J1 << 13) | (J2 << 11);

p[0] = Opcode0;
p[1] = Opcode1;
}
Loading