Skip to content

Commit 3902454

Browse files
committed
Support Arm64 "constructed" constants in SuperPMI asm diffs
SuperPMI asm diffs tries to ignore constants that can change between multiple replays, such as addresses that the replay engine must generate and not simply hand back from the collected data. Often, addresses have associated relocations generated during replay. SuperPMI can use these relocations to adjust the constants to allow two replays to match. However, there are cases on Arm64 where an address both doesn't report a relocation and is "constructed" using multiple `mov`/`movk` instructions. One case is the `allocPgoInstrumentationBySchema()` API which returns a pointer to a PGO data buffer. An address within this buffer is constructed via a sequence such as: ``` mov x0, dotnet#63408 movk x0, dotnet#23602, lsl dotnet#16 movk x0, dotnet#606, lsl dotnet#32 ``` When SuperPMI replays this API, it constructs a new buffer and returns that pointer, which is used to construct various actual addresses that are generated as "constructed" constants, shown above. This change "de-constructs" the constants and looks them up in the replay address map. If base and diff match the mapped constants, there is no asm diff.
1 parent fb5f07f commit 3902454

File tree

3 files changed

+160
-16
lines changed

3 files changed

+160
-16
lines changed

src/coreclr/tools/superpmi/superpmi-shared/spmiutil.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,3 +335,46 @@ void PutThumb2BlRel24(UINT16* p, INT32 imm24)
335335
p[0] = Opcode0;
336336
p[1] = Opcode1;
337337
}
338+
339+
// GetArm64MovConstant / GetArm64MovkConstant: Decode arm64 mov / movk instructions, e.g.:
340+
// d29ff600 mov x0, #65456
341+
// f2ab8640 movk x0, #23602, lsl #16
342+
// f2c04bc0 movk x0, #606, lsl #32
343+
//
344+
// This is used in the NearDiffer to determine if a sequence of mov/movk is actually an address.
345+
//
346+
// Return `true` if the instruction pointed to by `p` is a mov/movk, `false` otherwise.
347+
// If true, fill out the target register in `*pReg`, constant in `*pCon`, and (for movk) shift value in `*pShift`.
348+
349+
bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon)
350+
{
351+
UINT32 instr = *p;
352+
if ((instr & 0xffe00000) == 0xd2800000)
353+
{
354+
*pReg = instr & 0x1f;
355+
*pCon = (instr >> 5) & 0xffff;
356+
return true;
357+
}
358+
359+
return false;
360+
}
361+
362+
bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift)
363+
{
364+
UINT32 instr = *p;
365+
if ((instr & 0xff800000) == 0xf2800000)
366+
{
367+
*pReg = instr & 0x1f;
368+
*pCon = (instr >> 5) & 0xffff;
369+
*pShift = ((instr >> 21) & 0x3) * 16;
370+
return true;
371+
}
372+
373+
return false;
374+
}
375+
376+
// PutArm64MovkConstant: set the constant field in an Arm64 `movk` instruction
377+
void PutArm64MovkConstant(UINT32* p, unsigned con)
378+
{
379+
*p = (*p & ~(0xffff << 5)) | ((con & 0xffff) << 5);
380+
}

src/coreclr/tools/superpmi/superpmi-shared/spmiutil.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ void PutArm64Rel12(UINT32* pCode, INT32 imm12);
8282
void PutThumb2Mov32(UINT16* p, UINT32 imm32);
8383
void PutThumb2BlRel24(UINT16* p, INT32 imm24);
8484

85+
bool GetArm64MovConstant(UINT32* p, unsigned* pReg, unsigned* pCon);
86+
bool GetArm64MovkConstant(UINT32* p, unsigned* pReg, unsigned* pCon, unsigned* pShift);
87+
88+
void PutArm64MovkConstant(UINT32* p, unsigned con);
89+
8590
template <typename T, int size>
8691
inline constexpr unsigned ArrLen(T (&)[size])
8792
{

src/coreclr/tools/superpmi/superpmi/neardiffer.cpp

Lines changed: 112 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -298,22 +298,24 @@ struct DiffData
298298
CompileResult* cr2;
299299

300300
// Details of the first block
301-
size_t blocksize1;
302-
size_t datablock1;
303-
size_t datablockSize1;
304-
size_t originalBlock1;
305-
size_t originalDataBlock1;
306-
size_t otherCodeBlock1;
307-
size_t otherCodeBlockSize1;
301+
unsigned char* block1;
302+
size_t blocksize1;
303+
unsigned char* datablock1;
304+
size_t datablockSize1;
305+
size_t originalBlock1;
306+
size_t originalDataBlock1;
307+
size_t otherCodeBlock1;
308+
size_t otherCodeBlockSize1;
308309

309310
// Details of the second block
310-
size_t blocksize2;
311-
size_t datablock2;
312-
size_t datablockSize2;
313-
size_t originalBlock2;
314-
size_t originalDataBlock2;
315-
size_t otherCodeBlock2;
316-
size_t otherCodeBlockSize2;
311+
unsigned char* block2;
312+
size_t blocksize2;
313+
unsigned char* datablock2;
314+
size_t datablockSize2;
315+
size_t originalBlock2;
316+
size_t originalDataBlock2;
317+
size_t otherCodeBlock2;
318+
size_t otherCodeBlockSize2;
317319
};
318320

319321
//
@@ -330,6 +332,7 @@ bool NearDiffer::compareOffsets(
330332
return true;
331333
}
332334

335+
const SPMI_TARGET_ARCHITECTURE targetArch = GetSpmiTargetArchitecture();
333336
const DiffData* data = (const DiffData*)payload;
334337
size_t ip1 = data->originalBlock1 + blockOffset;
335338
size_t ip2 = data->originalBlock2 + blockOffset;
@@ -435,6 +438,99 @@ bool NearDiffer::compareOffsets(
435438
if ((mapped1 == mapped2) && (mapped1 != (size_t)-1))
436439
return true;
437440

441+
// There are some cases on arm64 where we generate multiple instruction register construction of addresses
442+
// but we don't have a relocation for them (so they aren't handled by `applyRelocs`). One case is
443+
// allocPgoInstrumentationBySchema(), which returns an address into which the JIT writes PGO probe data.
444+
// The instruction sequence is something like this:
445+
// mov x0, #63408
446+
// movk x0, #23602, lsl #16
447+
// movk x0, #606, lsl #32
448+
//
449+
// Here, we try to match this sequence and look it up in the address map.
450+
//
451+
// Some version of this logic might apply to ARM as well.
452+
//
453+
if (targetArch == SPMI_TARGET_ARCHITECTURE_ARM64)
454+
{
455+
bool movk2 = false, movk3 = false;
456+
unsigned reg1_1, reg1_2, reg2_1, reg2_2, reg3_1, reg3_2, reg4_1, reg4_2;
457+
unsigned con1_1, con1_2, con2_1, con2_2, con3_1, con3_2, con4_1, con4_2;
458+
unsigned shift2_1, shift2_2, shift3_1, shift3_2, shift4_1, shift4_2;
459+
UINT32* iaddr1 = (UINT32*)(data->block1 + blockOffset);
460+
UINT32* iaddr2 = (UINT32*)(data->block2 + blockOffset);
461+
UINT32* iaddr1end = (UINT32*)(data->block1 + data->blocksize1);
462+
UINT32* iaddr2end = (UINT32*)(data->block2 + data->blocksize2);
463+
464+
// We're assuming that a mov/movk isn't the last instruction in the instruction buffer.
465+
if ((iaddr1 < iaddr1end) &&
466+
(iaddr2 < iaddr2end) &&
467+
GetArm64MovConstant(iaddr1, &reg1_1, &con1_1) &&
468+
GetArm64MovConstant(iaddr2, &reg1_2, &con1_2) &&
469+
(reg1_1 == reg1_2))
470+
{
471+
if ((iaddr1 + 1 < iaddr1end) &&
472+
(iaddr2 + 1 < iaddr2end) &&
473+
GetArm64MovkConstant(iaddr1 + 1, &reg2_1, &con2_1, &shift2_1) &&
474+
GetArm64MovkConstant(iaddr2 + 1, &reg2_2, &con2_2, &shift2_2) &&
475+
(reg2_1 == reg2_2) &&
476+
(shift2_1 == shift2_2) &&
477+
(shift2_1 == 16))
478+
{
479+
// We currently assume the address requires at least 2 'movk' instructions, thus, is >4GB.
480+
// The 3rd 'movk' is optional.
481+
if ((iaddr1 + 2 < iaddr1end) &&
482+
(iaddr2 + 2 < iaddr2end) &&
483+
GetArm64MovkConstant(iaddr1 + 2, &reg3_1, &con3_1, &shift3_1) &&
484+
GetArm64MovkConstant(iaddr2 + 2, &reg3_2, &con3_2, &shift3_2) &&
485+
(reg3_1 == reg3_2) &&
486+
(shift3_1 == shift3_2) &&
487+
(shift3_1 == 32))
488+
{
489+
movk2 = true;
490+
// Note: this only works if size_t is 64-bit.
491+
size_t addr1 = (size_t)con1_1 + ((size_t)con2_1 << 16) + ((size_t)con3_1 << 32);
492+
size_t addr2 = (size_t)con1_2 + ((size_t)con2_2 << 16) + ((size_t)con3_2 << 32);
493+
if ((iaddr1 + 3 < iaddr1end) &&
494+
(iaddr2 + 3 < iaddr2end) &&
495+
GetArm64MovkConstant(iaddr1 + 3, &reg4_1, &con4_1, &shift4_1) &&
496+
GetArm64MovkConstant(iaddr2 + 3, &reg4_2, &con4_2, &shift4_2) &&
497+
(reg4_1 == reg4_2) &&
498+
(shift4_1 == shift4_2) &&
499+
(shift4_1 == 48))
500+
{
501+
movk3 = true;
502+
addr1 += (size_t)con4_1 << 48;
503+
addr2 += (size_t)con4_2 << 48;
504+
}
505+
506+
// Check the constants! We don't need to check 'addr1 == addr2' because if that were
507+
// true we wouldn't have gotten here.
508+
509+
size_t mapped1 = (size_t)data->cr1->searchAddressMap((void*)addr1);
510+
size_t mapped2 = (size_t)data->cr2->searchAddressMap((void*)addr2);
511+
if ((mapped1 == mapped2) && (mapped1 != (size_t)-1))
512+
{
513+
// Now, zero out the constants in the `movk` instructions so when the disassembler
514+
// gets to them, they compare equal.
515+
PutArm64MovkConstant(iaddr1 + 1, 0);
516+
PutArm64MovkConstant(iaddr2 + 1, 0);
517+
if (movk2)
518+
{
519+
PutArm64MovkConstant(iaddr1 + 2, 0);
520+
PutArm64MovkConstant(iaddr2 + 2, 0);
521+
}
522+
if (movk3)
523+
{
524+
PutArm64MovkConstant(iaddr1 + 3, 0);
525+
PutArm64MovkConstant(iaddr2 + 3, 0);
526+
}
527+
return true;
528+
}
529+
}
530+
}
531+
}
532+
}
533+
438534
return false;
439535
}
440536

@@ -513,11 +609,11 @@ bool NearDiffer::compareCodeSection(MethodContext* mc,
513609
cr2,
514610

515611
// Details of the first block
516-
(size_t)blocksize1, (size_t)datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
612+
block1, (size_t)blocksize1, datablock1, (size_t)datablockSize1, (size_t)originalBlock1,
517613
(size_t)originalDataBlock1, (size_t)otherCodeBlock1, (size_t)otherCodeBlockSize1,
518614

519615
// Details of the second block
520-
(size_t)blocksize2, (size_t)datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
616+
block2, (size_t)blocksize2, datablock2, (size_t)datablockSize2, (size_t)originalBlock2,
521617
(size_t)originalDataBlock2, (size_t)otherCodeBlock2, (size_t)otherCodeBlockSize2};
522618

523619
#ifdef USE_COREDISTOOLS

0 commit comments

Comments
 (0)