Skip to content

Fix genPutArgStkFieldList for SIMD12 #88920

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jul 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5313,14 +5313,11 @@ void CodeGen::genStoreLclTypeSimd12(GenTreeLclVarCommon* treeNode)
{
// Simply use mov if we move a SIMD12 reg to another SIMD12 reg
assert(GetEmitter()->isVectorRegister(tgtReg));

inst_Mov(treeNode->TypeGet(), tgtReg, dataReg, /* canSkip */ true);
}
else
{
// Need an additional integer register to extract upper 4 bytes from data.
regNumber tmpReg = treeNode->GetSingleTempReg();
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, offs, dataReg, tmpReg);
GetEmitter()->emitStoreSimd12ToLclOffset(varNum, offs, dataReg, treeNode);
}
genUpdateLifeStore(treeNode, tgtReg, varDsc);
}
Expand Down
5 changes: 2 additions & 3 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -815,10 +815,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* treeNode)
assert(compMacOsArm64Abi() || treeNode->GetStackByteSize() % TARGET_POINTER_SIZE == 0);

#ifdef TARGET_ARM64
if (compMacOsArm64Abi() && (treeNode->GetStackByteSize() == 12))
if (treeNode->GetStackByteSize() == 12)
{
regNumber tmpReg = treeNode->GetSingleTempReg();
GetEmitter()->emitStoreSimd12ToLclOffset(varNumOut, argOffsetOut, srcReg, tmpReg);
GetEmitter()->emitStoreSimd12ToLclOffset(varNumOut, argOffsetOut, srcReg, treeNode);
argOffsetOut += 12;
}
else
Expand Down
9 changes: 3 additions & 6 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1868,13 +1868,10 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk, unsigned outArg
// Emit store instructions to store the registers produced by the GT_FIELD_LIST into the outgoing
// argument area.

#if defined(FEATURE_SIMD) && defined(TARGET_ARM64)
// storing of TYP_SIMD12 (i.e. Vector3) argument.
if (compMacOsArm64Abi() && (type == TYP_SIMD12))
#if defined(FEATURE_SIMD)
if (type == TYP_SIMD12)
{
// Need an additional integer register to extract upper 4 bytes from data.
regNumber tmpReg = nextArgNode->GetSingleTempReg();
GetEmitter()->emitStoreSimd12ToLclOffset(outArgVarNum, thisFieldOffset, reg, tmpReg);
GetEmitter()->emitStoreSimd12ToLclOffset(outArgVarNum, thisFieldOffset, reg, nextArgNode);
}
else
#endif // FEATURE_SIMD
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2714,6 +2714,10 @@ class emitter
void emitMarkStackLvl(unsigned stackLevel);
#endif

#if defined(FEATURE_SIMD)
void emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider);
#endif // FEATURE_SIMD

int emitNextRandomNop();

//
Expand Down
28 changes: 28 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16767,4 +16767,32 @@ bool emitter::IsOptimizableLdrToMov(

return true;
}

#if defined(FEATURE_SIMD)
//-----------------------------------------------------------------------------------
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
//
// Arguments:
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// dataReg - the src reg with SIMD12 value;
// tmpRegProvider - a tree to grab a tmp reg from if needed.
//
void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider)
{
assert(varNum != BAD_VAR_NUM);
assert(isVectorRegister(dataReg));

// store lower 8 bytes
emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset);

// Extract upper 4-bytes from data
regNumber tmpReg = tmpRegProvider->GetSingleTempReg();
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);

// 4-byte write
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
}
#endif // FEATURE_SIMD

#endif // defined(TARGET_ARM64)
26 changes: 0 additions & 26 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -1003,30 +1003,4 @@ inline bool emitIsLoadConstant(instrDesc* jmp)
(jmp->idInsFmt() == IF_LARGELDC));
}

#if defined(FEATURE_SIMD)
//-----------------------------------------------------------------------------------
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
//
// Arguments:
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// dataReg - the src reg with SIMD12 value;
// tmpReg - a tmp reg to use for the write, can be general or float.
//
void emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, regNumber tmpReg)
{
assert(varNum != BAD_VAR_NUM);
assert(isVectorRegister(dataReg));

// store lower 8 bytes
emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset);

// Extract upper 4-bytes from data
emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2);

// 4-byte write
emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8);
}
#endif // FEATURE_SIMD

#endif // TARGET_ARM64
37 changes: 37 additions & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5648,6 +5648,43 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg)
emitAdjustStackDepthPushPop(ins);
}

#if defined(FEATURE_SIMD)
//-----------------------------------------------------------------------------------
// emitStoreSimd12ToLclOffset: store SIMD12 value from dataReg to varNum+offset.
//
// Arguments:
// varNum - the variable on the stack to use as a base;
// offset - the offset from the varNum;
// dataReg - the src reg with SIMD12 value;
// tmpRegProvider - a tree to grab a tmp reg from if needed.
//
void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider)
{
assert(varNum != BAD_VAR_NUM);
assert(isFloatReg(dataReg));

// Store lower 8 bytes
emitIns_S_R(INS_movsd_simd, EA_8BYTE, dataReg, varNum, offset);

if (emitComp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// Extract and store upper 4 bytes
emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2);
}
else
{
regNumber tmpReg = tmpRegProvider->GetSingleTempReg();
assert(isFloatReg(tmpReg));

// Extract upper 4 bytes from data
emitIns_R_R(INS_movhlps, EA_16BYTE, tmpReg, dataReg);

// Store upper 4 bytes
emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, varNum, offset + 8);
}
}
#endif // FEATURE_SIMD

/*****************************************************************************
*
* Add an instruction referencing a register and a constant.
Expand Down
12 changes: 4 additions & 8 deletions src/coreclr/jit/lsraarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,11 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* argNode)
srcCount++;

#if defined(FEATURE_SIMD)
if (compMacOsArm64Abi())
if (use.GetType() == TYP_SIMD12)
{
if (use.GetType() == TYP_SIMD12)
{
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
// To assemble the vector properly we would need an additional int register.
// The other platforms can write it as 16-byte using 1 write.
buildInternalIntRegisterDefForNode(use.GetNode());
}
// Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
// To assemble the vector properly we would need an additional int register.
buildInternalIntRegisterDefForNode(use.GetNode());
}
#endif // FEATURE_SIMD
}
Expand Down
20 changes: 15 additions & 5 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1651,12 +1651,22 @@ int LinearScan::BuildPutArgStk(GenTreePutArgStk* putArgStk)
#endif // TARGET_X86

#if defined(FEATURE_SIMD)
// Note that we need to check the field type, not the type of the node. This is because the
// field type will be TYP_SIMD12 whereas the node type might be TYP_SIMD16 for lclVar, where
// we "round up" to 16.
if ((fieldType == TYP_SIMD12) && (simdTemp == nullptr))
if (fieldType == TYP_SIMD12)
{
simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
// Note that we need to check the field type, not the type of the node. This is because the
// field type will be TYP_SIMD12 whereas the node type might be TYP_SIMD16 for lclVar, where
// we "round up" to 16.
if (simdTemp == nullptr)
{
simdTemp = buildInternalFloatRegisterDefForNode(putArgStk);
}

if (!compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// To store SIMD12 without SSE4.1 (extractps) we will need
// a temp xmm reg to do the shuffle.
buildInternalFloatRegisterDefForNode(use.GetNode());
}
}
#endif // defined(FEATURE_SIMD)

Expand Down
67 changes: 67 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_79118/Runtime_79118.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Numerics;
using System.Runtime.CompilerServices;
using Xunit;

public readonly struct Ray
{
public readonly Vector3 Origin;
public readonly Vector3 Direction;

public Ray(Vector3 origin, Vector3 direction)
{
Origin = origin;
Direction = direction;
}
}

public class RayTracer
{
private static IEnumerable<object> hittables;

static RayTracer()
{
var list = new List<object>();
list.Add(new object());
hittables = list;
}

[Fact]
public static int TestEntryPoint()
{
Ray r = GetRay();
Consume(r.Direction);
traceRay(r);
return 100;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void Consume(object o)
{
var _ = o.ToString();
}

private static Ray GetRay()
{
return new Ray(Vector3.Zero, -Vector3.UnitY);
}

private static Vector3 traceRay(Ray r)
{
foreach (object h in hittables)
{
TestHit(r);
return Vector3.One;
}
return Vector3.Zero;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void TestHit(Ray ray)
{
return;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>