-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add AVX and FMA intrinsics in Factorization Machine #3785
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ | |
|
||
namespace Microsoft.ML.Internal.CpuMath | ||
{ | ||
[BestFriend] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Nice! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this change necessary? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Eric! Access to CpuMathUtils from FactorizationMachineInterface gives an error when using netcoreapp3.0. Issue #3654 |
||
internal static partial class CpuMathUtils | ||
{ | ||
// The count of bytes in Vector128<T>, corresponding to _cbAlign in AlignedArray | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
using System.Runtime.CompilerServices; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing license. And why do we need this file? |
||
using System.Runtime.InteropServices; | ||
using System.Runtime.Intrinsics; | ||
using System.Runtime.Intrinsics.X86; | ||
using System.Security; | ||
using Microsoft.ML.Internal.CpuMath; | ||
using Microsoft.ML.Runtime; | ||
|
||
namespace Microsoft.ML.Trainers | ||
{ | ||
internal static unsafe class FieldAwareFactorizationMachineInterface | ||
{ | ||
internal const string NativePath = "FactorizationMachineNative"; | ||
public const int CbAlign = 16; | ||
|
||
private static bool Compat(AlignedArray a) | ||
{ | ||
Contracts.AssertValue(a); | ||
Contracts.Assert(a.Size > 0); | ||
return a.CbAlign == CbAlign; | ||
} | ||
|
||
private static unsafe float* Ptr(AlignedArray a, float* p) | ||
{ | ||
Contracts.AssertValue(a); | ||
float* q = p + a.GetBase((long)p); | ||
Contracts.Assert(((long)q & (CbAlign - 1)) == 0); | ||
return q; | ||
} | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateIntermediateVariablesNativeSSE(int fieldCount, int latentDim, int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, | ||
float* /*const*/ featureValues, float* /*const*/ linearWeights, float* /*const*/ latentWeights, float* latentSum, float* response); | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateIntermediateVariablesNativeAVX(int fieldCount, int latentDim, int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, | ||
float* /*const*/ featureValues, float* /*const*/ linearWeights, float* /*const*/ latentWeights, float* latentSum, float* response); | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateIntermediateVariablesNativeFMA(int fieldCount, int latentDim, int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, | ||
float* /*const*/ featureValues, float* /*const*/ linearWeights, float* /*const*/ latentWeights, float* latentSum, float* response); | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateGradientAndUpdateNativeSSE(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight, | ||
int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, float* /*const*/ featureValues, float* /*const*/ latentSum, float slope, | ||
float* linearWeights, float* latentWeights, float* linearAccumulatedSquaredGrads, float* latentAccumulatedSquaredGrads); | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateGradientAndUpdateNativeAVX(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight, | ||
int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, float* /*const*/ featureValues, float* /*const*/ latentSum, float slope, | ||
float* linearWeights, float* latentWeights, float* linearAccumulatedSquaredGrads, float* latentAccumulatedSquaredGrads); | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateGradientAndUpdateNativeFMA(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight, | ||
int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, float* /*const*/ featureValues, float* /*const*/ latentSum, float slope, | ||
float* linearWeights, float* latentWeights, float* linearAccumulatedSquaredGrads, float* latentAccumulatedSquaredGrads); | ||
|
||
public static void CalculateIntermediateVariables(int fieldCount, int latentDim, int count, int[] fieldIndices, int[] featureIndices, float[] featureValues, | ||
float[] linearWeights, AlignedArray latentWeights, AlignedArray latentSum, ref float response) | ||
{ | ||
Contracts.AssertNonEmpty(fieldIndices); | ||
Contracts.AssertNonEmpty(featureValues); | ||
Contracts.AssertNonEmpty(featureIndices); | ||
Contracts.AssertNonEmpty(linearWeights); | ||
Contracts.Assert(Compat(latentWeights)); | ||
Contracts.Assert(Compat(latentSum)); | ||
|
||
unsafe | ||
{ | ||
fixed (int* pf = &fieldIndices[0]) | ||
fixed (int* pi = &featureIndices[0]) | ||
fixed (float* px = &featureValues[0]) | ||
fixed (float* pw = &linearWeights[0]) | ||
fixed (float* pv = &latentWeights.Items[0]) | ||
fixed (float* pq = &latentSum.Items[0]) | ||
fixed (float* pr = &response) | ||
{ | ||
if (Fma.IsSupported) | ||
CalculateIntermediateVariablesNativeFMA(fieldCount, latentDim, count, pf, pi, px, pw, Ptr(latentWeights, pv), Ptr(latentSum, pq), pr); | ||
else if (Avx.IsSupported) | ||
CalculateIntermediateVariablesNativeAVX(fieldCount, latentDim, count, pf, pi, px, pw, Ptr(latentWeights, pv), Ptr(latentSum, pq), pr); | ||
else | ||
CalculateIntermediateVariablesNativeSSE(fieldCount, latentDim, count, pf, pi, px, pw, Ptr(latentWeights, pv), Ptr(latentSum, pq), pr); | ||
} | ||
} | ||
} | ||
|
||
public static void CalculateGradientAndUpdate(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, | ||
float weight, int count, int[] fieldIndices, int[] featureIndices, float[] featureValues, AlignedArray latentSum, float slope, | ||
float[] linearWeights, AlignedArray latentWeights, float[] linearAccumulatedSquaredGrads, AlignedArray latentAccumulatedSquaredGrads) | ||
{ | ||
Contracts.AssertNonEmpty(fieldIndices); | ||
Contracts.AssertNonEmpty(featureIndices); | ||
Contracts.AssertNonEmpty(featureValues); | ||
Contracts.Assert(Compat(latentSum)); | ||
Contracts.AssertNonEmpty(linearWeights); | ||
Contracts.Assert(Compat(latentWeights)); | ||
Contracts.AssertNonEmpty(linearAccumulatedSquaredGrads); | ||
Contracts.Assert(Compat(latentAccumulatedSquaredGrads)); | ||
|
||
unsafe | ||
{ | ||
fixed (int* pf = &fieldIndices[0]) | ||
fixed (int* pi = &featureIndices[0]) | ||
fixed (float* px = &featureValues[0]) | ||
fixed (float* pq = &latentSum.Items[0]) | ||
fixed (float* pw = &linearWeights[0]) | ||
fixed (float* pv = &latentWeights.Items[0]) | ||
fixed (float* phw = &linearAccumulatedSquaredGrads[0]) | ||
fixed (float* phv = &latentAccumulatedSquaredGrads.Items[0]) | ||
{ | ||
if (Fma.IsSupported) | ||
CalculateGradientAndUpdateNativeFMA(lambdaLinear, lambdaLatent, learningRate, fieldCount, latentDim, weight, count, pf, pi, px, | ||
Ptr(latentSum, pq), slope, pw, Ptr(latentWeights, pv), phw, Ptr(latentAccumulatedSquaredGrads, phv)); | ||
else if (Avx.IsSupported) | ||
CalculateGradientAndUpdateNativeAVX(lambdaLinear, lambdaLatent, learningRate, fieldCount, latentDim, weight, count, pf, pi, px, | ||
Ptr(latentSum, pq), slope, pw, Ptr(latentWeights, pv), phw, Ptr(latentAccumulatedSquaredGrads, phv)); | ||
else | ||
CalculateGradientAndUpdateNativeSSE(lambdaLinear, lambdaLatent, learningRate, fieldCount, latentDim, weight, count, pf, pi, px, | ||
Ptr(latentSum, pq), slope, pw, Ptr(latentWeights, pv), phw, Ptr(latentAccumulatedSquaredGrads, phv)); | ||
} | ||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,11 +30,11 @@ private static bool Compat(AlignedArray a) | |
} | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateIntermediateVariablesNative(int fieldCount, int latentDim, int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, | ||
public static extern void CalculateIntermediateVariablesNativeSSE(int fieldCount, int latentDim, int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CalculateIntermediateVariablesNativeSSE ---> CalculateIntermediateVariablesSse |
||
float* /*const*/ featureValues, float* /*const*/ linearWeights, float* /*const*/ latentWeights, float* latentSum, float* response); | ||
|
||
[DllImport(NativePath), SuppressUnmanagedCodeSecurity] | ||
public static extern void CalculateGradientAndUpdateNative(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight, | ||
public static extern void CalculateGradientAndUpdateNativeSSE(float lambdaLinear, float lambdaLatent, float learningRate, int fieldCount, int latentDim, float weight, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
CalculateGradientAndUpdateNativeSSE ---> CalculateGradientAndUpdateSse The same comment are applicable to other similar places. |
||
int count, int* /*const*/ fieldIndices, int* /*const*/ featureIndices, float* /*const*/ featureValues, float* /*const*/ latentSum, float slope, | ||
float* linearWeights, float* latentWeights, float* linearAccumulatedSquaredGrads, float* latentAccumulatedSquaredGrads); | ||
|
||
|
@@ -57,7 +57,7 @@ public static void CalculateIntermediateVariables(int fieldCount, int latentDim, | |
fixed (float* pv = &latentWeights.Items[0]) | ||
fixed (float* pq = &latentSum.Items[0]) | ||
fixed (float* pr = &response) | ||
CalculateIntermediateVariablesNative(fieldCount, latentDim, count, pf, pi, px, pw, Ptr(latentWeights, pv), Ptr(latentSum, pq), pr); | ||
CalculateIntermediateVariablesNativeSSE(fieldCount, latentDim, count, pf, pi, px, pw, Ptr(latentWeights, pv), Ptr(latentSum, pq), pr); | ||
} | ||
} | ||
|
||
|
@@ -84,10 +84,10 @@ public static void CalculateGradientAndUpdate(float lambdaLinear, float lambdaLa | |
fixed (float* pv = &latentWeights.Items[0]) | ||
fixed (float* phw = &linearAccumulatedSquaredGrads[0]) | ||
fixed (float* phv = &latentAccumulatedSquaredGrads.Items[0]) | ||
CalculateGradientAndUpdateNative(lambdaLinear, lambdaLatent, learningRate, fieldCount, latentDim, weight, count, pf, pi, px, | ||
CalculateGradientAndUpdateNativeSSE(lambdaLinear, lambdaLatent, learningRate, fieldCount, latentDim, weight, count, pf, pi, px, | ||
Ptr(latentSum, pq), slope, pw, Ptr(latentWeights, pv), phw, Ptr(latentAccumulatedSquaredGrads, phv)); | ||
} | ||
|
||
} | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,8 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>netstandard2.0</TargetFramework> | ||
<TargetFramework Condition="'$(UseIntrinsics)' != 'true'">netstandard2.0</TargetFramework> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eerhardt, could you please take a look? I am not quite familiar with the build settings. Thank you! |
||
<TargetFrameworks Condition="'$(UseIntrinsics)' == 'true'">netstandard2.0;netcoreapp3.0</TargetFrameworks> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The NuGet package is not going to work the way this is currently written. See #534 for more information. Basically, once you introduce a single assembly in It would probably be easiest if the new AVX/FMA were put in the CpuMath assembly/package instead. |
||
<IncludeInPackage>Microsoft.ML</IncludeInPackage> | ||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
</PropertyGroup> | ||
|
@@ -10,6 +11,17 @@ | |
<ProjectReference Include="..\Microsoft.ML.Core\Microsoft.ML.Core.csproj" /> | ||
<ProjectReference Include="..\Microsoft.ML.CpuMath\Microsoft.ML.CpuMath.csproj" /> | ||
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" /> | ||
<!-- Workaround https://github.com/dotnet/project-system/issues/935 --> | ||
<None Include="**/*.cs" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup Condition="'$(TargetFramework)' == 'netcoreapp3.0'"> | ||
<Compile Remove="FactorizationMachine/FactorizationMachineInterface.netstandard.cs" /> | ||
<PackageReference Include="System.Runtime.CompilerServices.Unsafe" Version="$(SystemRuntimeCompilerServices)" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'"> | ||
<Compile Remove="FactorizationMachine/FactorizationMachineInterface.netcoreapp.cs" /> | ||
</ItemGroup> | ||
|
||
</Project> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,16 @@ | ||
project (FactorizationMachineNative) | ||
|
||
set(SOURCES | ||
FactorizationMachineCore.cpp | ||
FactorizationMachineCoreSSE.cpp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we compiling all of source files? Are all of them cross-platform? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we are compiling all the source files. Not all of them are cross-platform. Avx.isSupported (and similar) dispatches the code as needed. |
||
FactorizationMachineCoreAVX.cpp | ||
FactorizationMachineCoreFMA.cpp | ||
) | ||
|
||
if(WIN32) | ||
else() | ||
list(APPEND SOURCES ${VERSION_FILE_PATH}) | ||
set_property(SOURCE FactorizationMachineCoreAVX.cpp APPEND_STRING PROPERTY COMPILE_FLAGS " -mavx") | ||
set_property(SOURCE FactorizationMachineCoreFMA.cpp APPEND_STRING PROPERTY COMPILE_FLAGS " -mfma") | ||
endif() | ||
|
||
add_library(FactorizationMachineNative SHARED ${SOURCES} ${RESOURCES}) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason we need this line? Could you add a comment to add some details?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi Wei-Sheng! Thanks for reviewing. It’s not needed for now and I will remove it.