Skip to content

Commit e9915df

Browse files
EgorBoam11
andauthored
[arm64] Add RCPC ISA (8.3+) and use ldap for volatile reads (#67384)
Co-authored-by: Adeel Mujahid <3840695+am11@users.noreply.github.com>
1 parent 3635e0f commit e9915df

File tree

16 files changed

+69
-15
lines changed

16 files changed

+69
-15
lines changed

src/coreclr/inc/clrconfigvalues.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Dp, W("EnableArm64Dp"), 1
775775
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rdm, W("EnableArm64Rdm"), 1, "Allows Arm64 Rdm+ hardware intrinsics to be disabled")
776776
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha1, W("EnableArm64Sha1"), 1, "Allows Arm64 Sha1+ hardware intrinsics to be disabled")
777777
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Sha256, W("EnableArm64Sha256"), 1, "Allows Arm64 Sha256+ hardware intrinsics to be disabled")
778+
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableArm64Rcpc, W("EnableArm64Rcpc"), 1, "Allows Arm64 Rcpc+ hardware intrinsics to be disabled")
778779
#endif
779780

780781
///

src/coreclr/inc/corinfoinstructionset.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ enum CORINFO_InstructionSet
3636
InstructionSet_Rdm_Arm64=18,
3737
InstructionSet_Sha1_Arm64=19,
3838
InstructionSet_Sha256_Arm64=20,
39+
InstructionSet_Rcpc=21,
3940
#endif // TARGET_ARM64
4041
#ifdef TARGET_AMD64
4142
InstructionSet_X86Base=1,
@@ -486,6 +487,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
486487
return "Vector128";
487488
case InstructionSet_Dczva :
488489
return "Dczva";
490+
case InstructionSet_Rcpc :
491+
return "Rcpc";
489492
#endif // TARGET_ARM64
490493
#ifdef TARGET_AMD64
491494
case InstructionSet_X86Base :

src/coreclr/inc/jiteeversionguid.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
4343
#define GUID_DEFINED
4444
#endif // !GUID_DEFINED
4545

46-
constexpr GUID JITEEVersionIdentifier = { /* b2d3c86f-87fd-4724-9e5d-4c44905eba91 */
47-
0xb2d3c86f,
48-
0x87fd,
49-
0x4724,
50-
{0x9e, 0x5d, 0x4c, 0x44, 0x90, 0x5e, 0xba, 0x91}
46+
constexpr GUID JITEEVersionIdentifier = { /* 206a7aa6-9f5c-47c1-b63b-54f4cb169ee3 */
47+
0x206a7aa6,
48+
0x9f5c,
49+
0x47c1,
50+
{0xb6, 0x3b, 0x54, 0xf4, 0xcb, 0x16, 0x9e, 0xe3}
5151
};
5252

5353
//////////////////////////////////////////////////////////////////////////////////////////////////////////

src/coreclr/jit/codegenarm64.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5233,6 +5233,12 @@ void CodeGen::genArm64EmitterUnitTests()
52335233
theEmitter->emitIns_R_R(INS_stlrb, EA_4BYTE, REG_R5, REG_R14);
52345234
theEmitter->emitIns_R_R(INS_stlrh, EA_4BYTE, REG_R3, REG_R15);
52355235

5236+
// ldapr Rt, [reg]
5237+
theEmitter->emitIns_R_R(INS_ldapr, EA_8BYTE, REG_R9, REG_R8);
5238+
theEmitter->emitIns_R_R(INS_ldapr, EA_4BYTE, REG_R7, REG_R10);
5239+
theEmitter->emitIns_R_R(INS_ldaprb, EA_4BYTE, REG_R5, REG_R11);
5240+
theEmitter->emitIns_R_R(INS_ldaprh, EA_4BYTE, REG_R5, REG_R12);
5241+
52365242
// ldaxr Rt, [reg]
52375243
theEmitter->emitIns_R_R(INS_ldaxr, EA_8BYTE, REG_R9, REG_R8);
52385244
theEmitter->emitIns_R_R(INS_ldaxr, EA_4BYTE, REG_R7, REG_R10);

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,17 +1884,21 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree)
18841884
bool addrIsInReg = tree->Addr()->isUsedFromReg();
18851885
bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0);
18861886

1887+
// on arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid
1888+
// full memory barriers if mixed with STLR
1889+
bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc);
1890+
18871891
if ((ins == INS_ldrb) && addrIsInReg)
18881892
{
1889-
ins = INS_ldarb;
1893+
ins = hasRcpc ? INS_ldaprb : INS_ldarb;
18901894
}
18911895
else if ((ins == INS_ldrh) && addrIsInReg && addrIsAligned)
18921896
{
1893-
ins = INS_ldarh;
1897+
ins = hasRcpc ? INS_ldaprh : INS_ldarh;
18941898
}
18951899
else if ((ins == INS_ldr) && addrIsInReg && addrIsAligned && genIsValidIntReg(targetReg))
18961900
{
1897-
ins = INS_ldar;
1901+
ins = hasRcpc ? INS_ldapr : INS_ldar;
18981902
}
18991903
else
19001904
#endif // TARGET_ARM64

src/coreclr/jit/emitarm64.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1132,6 +1132,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
11321132
{
11331133
case INS_ldxrb:
11341134
case INS_ldarb:
1135+
case INS_ldaprb:
11351136
case INS_ldaxrb:
11361137
case INS_stxrb:
11371138
case INS_stlrb:
@@ -1145,6 +1146,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
11451146

11461147
case INS_ldxrh:
11471148
case INS_ldarh:
1149+
case INS_ldaprh:
11481150
case INS_ldaxrh:
11491151
case INS_stxrh:
11501152
case INS_stlrh:
@@ -1181,6 +1183,7 @@ emitAttr emitter::emitInsTargetRegSize(instrDesc* id)
11811183

11821184
case INS_ldxr:
11831185
case INS_ldar:
1186+
case INS_ldapr:
11841187
case INS_ldaxr:
11851188
case INS_stxr:
11861189
case INS_stlr:
@@ -1212,6 +1215,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
12121215
switch (ins)
12131216
{
12141217
case INS_ldarb:
1218+
case INS_ldaprb:
12151219
case INS_stlrb:
12161220
case INS_ldrb:
12171221
case INS_strb:
@@ -1223,6 +1227,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
12231227
break;
12241228

12251229
case INS_ldarh:
1230+
case INS_ldaprh:
12261231
case INS_stlrh:
12271232
case INS_ldrh:
12281233
case INS_strh:
@@ -1247,6 +1252,7 @@ emitAttr emitter::emitInsLoadStoreSize(instrDesc* id)
12471252
break;
12481253

12491254
case INS_ldar:
1255+
case INS_ldapr:
12501256
case INS_stlr:
12511257
case INS_ldr:
12521258
case INS_str:
@@ -4460,6 +4466,7 @@ void emitter::emitIns_R_R(
44604466
break;
44614467

44624468
case INS_ldar:
4469+
case INS_ldapr:
44634470
case INS_ldaxr:
44644471
case INS_ldxr:
44654472
case INS_stlr:
@@ -4468,9 +4475,11 @@ void emitter::emitIns_R_R(
44684475
FALLTHROUGH;
44694476

44704477
case INS_ldarb:
4478+
case INS_ldaprb:
44714479
case INS_ldaxrb:
44724480
case INS_ldxrb:
44734481
case INS_ldarh:
4482+
case INS_ldaprh:
44744483
case INS_ldaxrh:
44754484
case INS_ldxrh:
44764485
case INS_stlrb:
@@ -14206,7 +14215,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
1420614215
break;
1420714216

1420814217
case IF_LS_2A: // ldr, ldrsw, ldrb, ldrh, ldrsb, ldrsh, str, strb, strh (no immediate)
14209-
// ldar, ldarb, ldarh, ldxr, ldxrb, ldxrh,
14218+
// ldar, ldarb, ldarh, ldapr, ldaprb, ldaprh, ldxr, ldxrb, ldxrh,
1421014219
// ldaxr, ldaxrb, ldaxrh, stlr, stlrb, stlrh
1421114220

1421214221
result.insThroughput = PERFSCORE_THROUGHPUT_1C;

src/coreclr/jit/instrsarm64.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1053,6 +1053,17 @@ INST1(ldarb, "ldarb", LD, IF_LS_2A, 0x08DFFC00)
10531053
INST1(ldarh, "ldarh", LD, IF_LS_2A, 0x48DFFC00)
10541054
// ldarh Rt,[Xn] LS_2A 0100100011011111 111111nnnnnttttt 48DF FC00
10551055

1056+
1057+
INST1(ldapr, "ldapr", LD, IF_LS_2A, 0xB8BFC000)
1058+
// ldapr Rt,[Xn] LS_2A 1X11100010111111 110000nnnnnttttt B8BF C000 Rm Rt Rn ARMv8.3 LRCPC
1059+
1060+
INST1(ldaprb, "ldaprb", LD, IF_LS_2A, 0x38BFC000)
1061+
// ldaprb Rt,[Xn] LS_2A 0011100010111111 110000nnnnnttttt 38BF C000 Rm Rt Rn ARMv8.3 LRCPC
1062+
1063+
INST1(ldaprh, "ldaprh", LD, IF_LS_2A, 0x78BFC000)
1064+
// ldaprh Rt,[Xn] LS_2A 0111100010111111 110000nnnnnttttt 78BF C000 Rm Rt Rn ARMv8.3 LRCPC
1065+
1066+
10561067
INST1(ldxr, "ldxr", LD, IF_LS_2A, 0x885F7C00)
10571068
// ldxr Rt,[Xn] LS_2A 1X00100001011111 011111nnnnnttttt 885F 7C00
10581069

src/coreclr/nativeaot/Runtime/IntrinsicConstants.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ enum ARM64IntrinsicConstants
4040
ARM64IntrinsicConstants_Sha256 = 0x0100,
4141
ARM64IntrinsicConstants_Atomics = 0x0200,
4242
ARM64IntrinsicConstants_Vector64 = 0x0400,
43-
ARM64IntrinsicConstants_Vector128 = 0x0800
43+
ARM64IntrinsicConstants_Vector128 = 0x0800,
44+
ARM64IntrinsicConstants_Rcpc = 0x1000
4445
};
4546
#endif //HOST_ARM64
4647

src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1219,8 +1219,8 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags)
12191219
// *flags |= ARM64IntrinsicConstants_???;
12201220
#endif
12211221
#ifdef HWCAP_LRCPC
1222-
// if (hwCap & HWCAP_LRCPC)
1223-
// *flags |= ARM64IntrinsicConstants_???;
1222+
if (hwCap & HWCAP_LRCPC)
1223+
*flags |= ARM64IntrinsicConstants_Rcpc;
12241224
#endif
12251225
#ifdef HWCAP_PMULL
12261226
// if (hwCap & HWCAP_PMULL)

src/coreclr/pal/src/misc/jitsupport.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,9 @@ static const CpuCapability CpuCapabilities[] = {
5454
#endif
5555
//{ "jscvt", HWCAP_JSCVT },
5656
//{ "fcma", HWCAP_FCMA },
57-
//{ "lrcpc", HWCAP_LRCPC },
57+
#ifdef HWCAP_LRCPC
58+
{ "lrcpc", HWCAP_LRCPC },
59+
#endif
5860
//{ "dcpop", HWCAP_DCPOP },
5961
//{ "sha3", HWCAP_SHA3 },
6062
//{ "sm3", HWCAP_SM3 },
@@ -208,8 +210,8 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)
208210
// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_JSCVT);
209211
#endif
210212
#ifdef HWCAP_LRCPC
211-
// if (hwCap & HWCAP_LRCPC)
212-
// flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_LRCPC);
213+
if (hwCap & HWCAP_LRCPC)
214+
flags->Set(InstructionSet_Rcpc);
213215
#endif
214216
#ifdef HWCAP_PMULL
215217
// if (hwCap & HWCAP_PMULL)
@@ -280,6 +282,9 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags)
280282

281283
if ((sysctlbyname("hw.optional.armv8_1_atomics", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
282284
flags->Set(InstructionSet_Atomics);
285+
286+
if ((sysctlbyname("hw.optional.arm.FEAT_LRCPC", &valueFromSysctl, &sz, nullptr, 0) == 0) && (valueFromSysctl != 0))
287+
flags->Set(InstructionSet_Rcpc);
283288
#endif // HAVE_SYSCTLBYNAME
284289
// CoreCLR SIMD and FP support is included in ARM64 baseline
285290
// On exceptional basis platforms may leave out support, but CoreCLR does not

src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public static class ReadyToRunInstructionSetHelper
4343
case InstructionSet.ARM64_Vector64: return null;
4444
case InstructionSet.ARM64_Vector128: return null;
4545
case InstructionSet.ARM64_Dczva: return null;
46+
case InstructionSet.ARM64_Rcpc: return null;
4647

4748
default: throw new Exception("Unknown instruction set");
4849
}

src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public enum InstructionSet
3838
ARM64_Rdm_Arm64 = InstructionSet_ARM64.Rdm_Arm64,
3939
ARM64_Sha1_Arm64 = InstructionSet_ARM64.Sha1_Arm64,
4040
ARM64_Sha256_Arm64 = InstructionSet_ARM64.Sha256_Arm64,
41+
ARM64_Rcpc = InstructionSet_ARM64.Rcpc,
4142
X64_X86Base = InstructionSet_X64.X86Base,
4243
X64_SSE = InstructionSet_X64.SSE,
4344
X64_SSE2 = InstructionSet_X64.SSE2,
@@ -136,6 +137,7 @@ public enum InstructionSet_ARM64
136137
Rdm_Arm64 = 18,
137138
Sha1_Arm64 = 19,
138139
Sha256_Arm64 = 20,
140+
Rcpc = 21,
139141
}
140142

141143
public enum InstructionSet_X64
@@ -740,6 +742,7 @@ public static IEnumerable<InstructionSetInfo> ArchitectureToValidInstructionSets
740742
yield return new InstructionSetInfo("Vector64", "", InstructionSet.ARM64_Vector64, false);
741743
yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false);
742744
yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false);
745+
yield return new InstructionSetInfo("Rcpc", "", InstructionSet.ARM64_Rcpc, false);
743746
break;
744747

745748
case TargetArchitecture.X64:

src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ instructionset64bit,ARM64 ,Dp
114114
instructionset64bit,ARM64 ,Rdm
115115
instructionset64bit,ARM64 ,Sha1
116116
instructionset64bit,ARM64 ,Sha256
117+
instructionset ,ARM64 , , , ,Rcpc ,
117118

118119
vectorinstructionset,ARM64,Vector64
119120
vectorinstructionset,ARM64,Vector128

src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,7 @@ private static class Arm64IntrinsicConstants
190190
public const int Atomics = 0x0200;
191191
public const int Vector64 = 0x0400;
192192
public const int Vector128 = 0x0800;
193+
public const int Rcpc = 0x1000;
193194

194195
public static int FromHardwareIntrinsicId(string id)
195196
{
@@ -207,6 +208,7 @@ public static int FromHardwareIntrinsicId(string id)
207208
"Atomics" => Atomics,
208209
"Vector64" => Vector64,
209210
"Vector128" => Vector128,
211+
"Rcpc" => Rcpc,
210212
_ => throw new NotSupportedException(),
211213
};
212214
}
@@ -231,6 +233,7 @@ public static int FromInstructionSetFlags(InstructionSetFlags instructionSets)
231233
InstructionSet.ARM64_Atomics => Atomics,
232234
InstructionSet.ARM64_Vector64 => Vector64,
233235
InstructionSet.ARM64_Vector128 => Vector128,
236+
InstructionSet.ARM64_Rcpc => Rcpc,
234237
_ => throw new NotSupportedException()
235238
};
236239
}

src/coreclr/tools/aot/ILCompiler/Program.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ private int Run(string[] args)
444444
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha1");
445445
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("sha2");
446446
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("lse");
447+
optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc");
447448
}
448449

449450
optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _,

src/coreclr/vm/codeman.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1592,6 +1592,11 @@ void EEJitManager::SetCpuInfo()
15921592
CPUCompileFlags.Clear(InstructionSet_Atomics);
15931593
}
15941594

1595+
if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Rcpc))
1596+
{
1597+
CPUCompileFlags.Clear(InstructionSet_Rcpc);
1598+
}
1599+
15951600
if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableArm64Crc32))
15961601
{
15971602
CPUCompileFlags.Clear(InstructionSet_Crc32);

0 commit comments

Comments
 (0)