Skip to content

Port SymSGD trainer #624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 1, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Microsoft.ML.Console/Microsoft.ML.Console.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
<ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
<ProjectReference Include="..\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.csproj" />
<ProjectReference Include="..\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
<ProjectReference Include="..\Microsoft.ML.HalLearners\Microsoft.ML.HalLearners.csproj" />
<ProjectReference Include="..\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
<ProjectReference Include="..\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj" />
<ProjectReference Include="..\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj" />
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.HalLearners/Microsoft.ML.HalLearners.csproj
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
<IncludeInPackage>Microsoft.ML.HalLearners</IncludeInPackage>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
Copy link
Member

@sfilipi sfilipi Aug 1, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true [](start = 3, length = 44)

think i removed this #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need it for symsgd.


In reply to: 206926582 [](ancestors = 206926582)

</PropertyGroup>

<ItemGroup>
Expand Down
850 changes: 850 additions & 0 deletions src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs

Large diffs are not rendered by default.

22 changes: 20 additions & 2 deletions src/Microsoft.ML.HalLearners/doc.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<doc>
<members>

Expand All @@ -22,6 +22,24 @@
</code>
</example>
</member>

<member name="SymSGD">
<summary>
Parallel Stochastic Gradient Descent trainer.
</summary>
<remarks>
<a href='https://en.wikipedia.org/wiki/Stochastic_gradient_descent'>Stochastic gradient descent (SGD)</a> is an interative algorithm
that optimizes a differentiable objective function. <a href='https://arxiv.org/abs/1705.08030'>SYMSGD</a> parallelizes SGD using Sound Combiners.
</remarks>
<example>
<code language="csharp">
new SymSgdBinaryClassifier()
{
NumberOfIterations = 50,
L2Regularization = 0,
Shuffle = true
}
</code>
</example>
</member>
</members>
</doc>
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
Expand Down
134 changes: 134 additions & 0 deletions src/Microsoft.ML/CSharpApi.cs
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,18 @@ public void Add(Microsoft.ML.Trainers.StochasticGradientDescentBinaryClassifier
_jsonNodes.Add(Serialize("Trainers.StochasticGradientDescentBinaryClassifier", input, output));
}

public Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output Add(Microsoft.ML.Trainers.SymSgdBinaryClassifier input)
{
var output = new Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output();
Add(input, output);
return output;
}

public void Add(Microsoft.ML.Trainers.SymSgdBinaryClassifier input, Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output output)
{
_jsonNodes.Add(Serialize("Trainers.SymSgdBinaryClassifier", input, output));
}

public Microsoft.ML.Transforms.ApproximateBootstrapSampler.Output Add(Microsoft.ML.Transforms.ApproximateBootstrapSampler input)
{
var output = new Microsoft.ML.Transforms.ApproximateBootstrapSampler.Output();
Expand Down Expand Up @@ -9761,6 +9773,128 @@ public StochasticGradientDescentBinaryClassifierPipelineStep(Output output)
}
}

namespace Trainers
{

/// <summary>
/// Train a symbolic SGD.
/// </summary>
public sealed partial class SymSgdBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
{


/// <summary>
/// Degree of lock-free parallelism. Determinism not guaranteed. Multi-threading is not supported currently.
/// </summary>
public int? NumberOfThreads { get; set; }

/// <summary>
/// Number of passes over the data.
/// </summary>
[TlcModule.SweepableDiscreteParamAttribute("NumberOfIterations", new object[]{1, 5, 10, 20, 30, 40, 50})]
public int NumberOfIterations { get; set; } = 50;

/// <summary>
/// Tolerance for difference in average loss in consecutive passes.
/// </summary>
public float Tolerance { get; set; } = 0.0001f;

/// <summary>
/// Learning rate
/// </summary>
[TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{"<Auto>", 10f, 1f, 0.1f, 0.01f, 0.001f})]
public float? LearningRate { get; set; }

/// <summary>
/// L2 regularization
/// </summary>
[TlcModule.SweepableDiscreteParamAttribute("L2Regularization", new object[]{0f, 1E-05f, 1E-05f, 1E-06f, 1E-07f})]
public float L2Regularization { get; set; }

/// <summary>
/// The number of iterations each thread learns a local model until combining it with the global model. Low value means more updated global model and high value means less cache traffic.
/// </summary>
[TlcModule.SweepableDiscreteParamAttribute("UpdateFrequency", new object[]{"<Auto>", 5, 20})]
public int? UpdateFrequency { get; set; }

/// <summary>
/// The acceleration memory budget in MB
/// </summary>
public long MemorySize { get; set; } = 1024;

/// <summary>
/// Shuffle data?
/// </summary>
public bool Shuffle { get; set; } = true;

/// <summary>
/// Apply weight to the positive class, for imbalanced data
/// </summary>
public float PositiveInstanceWeight { get; set; } = 1f;

/// <summary>
/// Column to use for labels
/// </summary>
public string LabelColumn { get; set; } = "Label";

/// <summary>
/// The data to be used for training
/// </summary>
public Var<Microsoft.ML.Runtime.Data.IDataView> TrainingData { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();

/// <summary>
/// Column to use for features
/// </summary>
public string FeatureColumn { get; set; } = "Features";

/// <summary>
/// Normalize option for the feature column
/// </summary>
public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto;

/// <summary>
/// Whether learner should cache input training data
/// </summary>
public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto;


public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput
{
/// <summary>
/// The trained model
/// </summary>
public Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel> PredictorModel { get; set; } = new Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel>();

}
public Var<IDataView> GetInputData() => TrainingData;

public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
{
if (previousStep != null)
{
if (!(previousStep is ILearningPipelineDataStep dataStep))
{
throw new InvalidOperationException($"{ nameof(SymSgdBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input.");
}

TrainingData = dataStep.Data;
}
Output output = experiment.Add(this);
return new SymSgdBinaryClassifierPipelineStep(output);
}

private class SymSgdBinaryClassifierPipelineStep : ILearningPipelinePredictorStep
{
public SymSgdBinaryClassifierPipelineStep(Output output)
{
Model = output.PredictorModel;
}

public Var<IPredictorModel> Model { get; }
}
}
}

namespace Transforms
{

Expand Down
1 change: 1 addition & 0 deletions src/Native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,4 @@ add_subdirectory(CpuMathNative)
add_subdirectory(FastTreeNative)
add_subdirectory(LdaNative)
add_subdirectory(FactorizationMachineNative)
add_subdirectory(SymSgdNative)
32 changes: 32 additions & 0 deletions src/Native/SymSgdNative/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
project (SymSgdNative)

set(SOURCES
SymSgdNative.cpp
)

if(WIN32)
find_library(MKL_LIBRARY MklImports HINTS ${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/win-x64/native)
else()
list(APPEND SOURCES ${VERSION_FILE_PATH})
if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
message("Linking SymSgdNative with MKL on macOS.")
find_library(MKL_LIBRARY libMklImports.dylib HINTS "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/osx-x64/native")
else()
message("Linking SymSgdNative with MKL on linux.")
find_library(MKL_LIBRARY libMklImports.so HINTS ${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/linux-x64/native)
SET(CMAKE_SKIP_BUILD_RPATH FALSE)
SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
SET(CMAKE_INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes")
Copy link
Member

@eerhardt eerhardt Aug 1, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm concerned that this may work for unit tests, but it may not work on an end-user's machine. We will have to test this as an end user to verify. #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep. I will make sure to do this as part of bug bash.


In reply to: 206967492 [](ancestors = 206967492)

SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
SET(CMAKE_INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes")
endif()
endif()

add_library(SymSgdNative SHARED ${SOURCES} ${RESOURCES})
target_link_libraries(SymSgdNative PUBLIC ${MKL_LIBRARY})

if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
set_target_properties(SymSgdNative PROPERTIES INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/osx-x64/native")
endif()

install_library_and_symbols (SymSgdNative)
9 changes: 9 additions & 0 deletions src/Native/SymSgdNative/Macros.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#pragma once
#define MIN(__X__, __Y__) (((__X__) > (__Y__)) ? (__Y__) : (__X__))

// This is a very large prime number used for permutation
#define VERYLARGEPRIME 961748941
31 changes: 31 additions & 0 deletions src/Native/SymSgdNative/SparseBLAS.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#pragma once
#include "../Stdafx.h"

extern "C" float cblas_sdot(const int vecSize, const float* denseVecX, const int incX, const float* denseVecY, const int incY);
extern "C" float cblas_sdoti(const int sparseVecSize, const float* sparseVecValues, const int* sparseVecIndices, float* denseVec);
extern "C" void cblas_saxpy(const int vecSize, const float coef, const float* denseVecX, const int incX, float* denseVecY, const int incY);
extern "C" void cblas_saxpyi(const int sparseVecSize, const float coef, const float* sparseVecValues, const int* sparseVecIndices, float* denseVec);

float SDOT(const int vecSize, const float* denseVecX, const float* denseVecY)
{
return cblas_sdot(vecSize, denseVecX, 1, denseVecY, 1);
}

float SDOTI(const int sparseVecSize, const int* sparseVecIndices, const float* sparseVecValues, float* denseVec)
{
return cblas_sdoti(sparseVecSize, sparseVecValues, sparseVecIndices, denseVec);
}

void SAXPY(const int vecSize, const float* denseVecX, float* denseVecY, float coef)
{
return cblas_saxpy(vecSize, coef, denseVecX, 1, denseVecY, 1);
}

void SAXPYI(const int sparseVecSize, const int* sparseVecIndices, const float* sparseVecValues, float* denseVec, float coef)
{
cblas_saxpyi(sparseVecSize, coef, sparseVecValues, sparseVecIndices, denseVec);
}
Loading