dotnet · codemzs · Aug 1, 2018 · Aug 1, 2018 · Aug 1, 2018 · Aug 1, 2018
diff --git a/src/Microsoft.ML.Console/Microsoft.ML.Console.csproj b/src/Microsoft.ML.Console/Microsoft.ML.Console.csproj
@@ -15,6 +15,7 @@
     <ProjectReference Include="..\Microsoft.ML.Data\Microsoft.ML.Data.csproj" />
     <ProjectReference Include="..\Microsoft.ML.Ensemble\Microsoft.ML.Ensemble.csproj" />
     <ProjectReference Include="..\Microsoft.ML.FastTree\Microsoft.ML.FastTree.csproj" />
+    <ProjectReference Include="..\Microsoft.ML.HalLearners\Microsoft.ML.HalLearners.csproj" />
     <ProjectReference Include="..\Microsoft.ML.KMeansClustering\Microsoft.ML.KMeansClustering.csproj" />
     <ProjectReference Include="..\Microsoft.ML.LightGBM\Microsoft.ML.LightGBM.csproj" />
     <ProjectReference Include="..\Microsoft.ML.Maml\Microsoft.ML.Maml.csproj" />

diff --git a/src/Microsoft.ML.HalLearners/Microsoft.ML.HalLearners.csproj b/src/Microsoft.ML.HalLearners/Microsoft.ML.HalLearners.csproj
@@ -1,8 +1,9 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>
     <IncludeInPackage>Microsoft.ML.HalLearners</IncludeInPackage>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
 
   <ItemGroup>

diff --git a/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs b/src/Microsoft.ML.HalLearners/SymSgdClassificationTrainer.cs
diff --git a/src/Microsoft.ML.HalLearners/doc.xml b/src/Microsoft.ML.HalLearners/doc.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
 <doc>
   <members>
 
@@ -22,6 +22,24 @@
         </code>
       </example>
     </member>
-
+    <member name="SymSGD">
+      <summary>
+        Parallel Stochastic Gradient Descent trainer.
+      </summary>
+      <remarks>
+        <a href='https://en.wikipedia.org/wiki/Stochastic_gradient_descent'>Stochastic gradient descent (SGD)</a> is an interative algorithm
+        that optimizes a differentiable objective function. <a href='https://arxiv.org/abs/1705.08030'>SYMSGD</a> parallelizes SGD using Sound Combiners.
+      </remarks>
+      <example>
+        <code language="csharp">
+          new SymSgdBinaryClassifier()
+          {
+            NumberOfIterations = 50,
+            L2Regularization = 0,
+            Shuffle = true
+          }
+        </code>
+      </example>
+    </member>
   </members>
 </doc>
diff --git a/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj b/src/Microsoft.ML.StandardLearners/Microsoft.ML.StandardLearners.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <TargetFramework>netstandard2.0</TargetFramework>

diff --git a/src/Microsoft.ML/CSharpApi.cs b/src/Microsoft.ML/CSharpApi.cs
@@ -838,6 +838,18 @@ public void Add(Microsoft.ML.Trainers.StochasticGradientDescentBinaryClassifier
                 _jsonNodes.Add(Serialize("Trainers.StochasticGradientDescentBinaryClassifier", input, output));
             }
 
+            public Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output Add(Microsoft.ML.Trainers.SymSgdBinaryClassifier input)
+            {
+                var output = new Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output();
+                Add(input, output);
+                return output;
+            }
+
+            public void Add(Microsoft.ML.Trainers.SymSgdBinaryClassifier input, Microsoft.ML.Trainers.SymSgdBinaryClassifier.Output output)
+            {
+                _jsonNodes.Add(Serialize("Trainers.SymSgdBinaryClassifier", input, output));
+            }
+
             public Microsoft.ML.Transforms.ApproximateBootstrapSampler.Output Add(Microsoft.ML.Transforms.ApproximateBootstrapSampler input)
             {
                 var output = new Microsoft.ML.Transforms.ApproximateBootstrapSampler.Output();
@@ -9761,6 +9773,128 @@ public StochasticGradientDescentBinaryClassifierPipelineStep(Output output)
         }
     }
 
+    namespace Trainers
+    {
+
+        /// <summary>
+        /// Train a symbolic SGD.
+        /// </summary>
+        public sealed partial class SymSgdBinaryClassifier : Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInputWithLabel, Microsoft.ML.Runtime.EntryPoints.CommonInputs.ITrainerInput, Microsoft.ML.ILearningPipelineItem
+        {
+
+
+            /// <summary>
+            /// Degree of lock-free parallelism. Determinism not guaranteed. Multi-threading is not supported currently.
+            /// </summary>
+            public int? NumberOfThreads { get; set; }
+
+            /// <summary>
+            /// Number of passes over the data.
+            /// </summary>
+            [TlcModule.SweepableDiscreteParamAttribute("NumberOfIterations", new object[]{1, 5, 10, 20, 30, 40, 50})]
+            public int NumberOfIterations { get; set; } = 50;
+
+            /// <summary>
+            /// Tolerance for difference in average loss in consecutive passes.
+            /// </summary>
+            public float Tol { get; set; } = 0.0001f;
+
+            /// <summary>
+            /// Learning rate
+            /// </summary>
+            [TlcModule.SweepableDiscreteParamAttribute("LearningRate", new object[]{"<Auto>", 10f, 1f, 0.1f, 0.01f, 0.001f})]
+            public float? LearningRate { get; set; }
+
+            /// <summary>
+            /// L2 regularization
+            /// </summary>
+            [TlcModule.SweepableDiscreteParamAttribute("L2Regularization", new object[]{0f, 1E-05f, 1E-05f, 1E-06f, 1E-07f})]
+            public float L2Regularization { get; set; }
+
+            /// <summary>
+            /// The number of iterations each thread learns a local model until combining it with the global model. Low value means more updated global model and high value means less cache traffic.
+            /// </summary>
+            [TlcModule.SweepableDiscreteParamAttribute("UpdateFrequency", new object[]{"<Auto>", 5, 20})]
+            public int? UpdateFrequency { get; set; }
+
+            /// <summary>
+            /// The acceleration memory budget in MB
+            /// </summary>
+            public long MemorySize { get; set; } = 1024;
+
+            /// <summary>
+            /// Shuffle data?
+            /// </summary>
+            public bool Shuffle { get; set; } = true;
+
+            /// <summary>
+            /// Apply weight to the positive class, for imbalanced data
+            /// </summary>
+            public float PositiveInstanceWeight { get; set; } = 1f;
+
+            /// <summary>
+            /// Column to use for labels
+            /// </summary>
+            public string LabelColumn { get; set; } = "Label";
+
+            /// <summary>
+            /// The data to be used for training
+            /// </summary>
+            public Var<Microsoft.ML.Runtime.Data.IDataView> TrainingData { get; set; } = new Var<Microsoft.ML.Runtime.Data.IDataView>();
+
+            /// <summary>
+            /// Column to use for features
+            /// </summary>
+            public string FeatureColumn { get; set; } = "Features";
+
+            /// <summary>
+            /// Normalize option for the feature column
+            /// </summary>
+            public Microsoft.ML.Models.NormalizeOption NormalizeFeatures { get; set; } = Microsoft.ML.Models.NormalizeOption.Auto;
+
+            /// <summary>
+            /// Whether learner should cache input training data
+            /// </summary>
+            public Microsoft.ML.Models.CachingOptions Caching { get; set; } = Microsoft.ML.Models.CachingOptions.Auto;
+
+
+            public sealed class Output : Microsoft.ML.Runtime.EntryPoints.CommonOutputs.IBinaryClassificationOutput, Microsoft.ML.Runtime.EntryPoints.CommonOutputs.ITrainerOutput
+            {
+                /// <summary>
+                /// The trained model
+                /// </summary>
+                public Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel> PredictorModel { get; set; } = new Var<Microsoft.ML.Runtime.EntryPoints.IPredictorModel>();
+
+            }
+            public Var<IDataView> GetInputData() => TrainingData;
+
+            public ILearningPipelineStep ApplyStep(ILearningPipelineStep previousStep, Experiment experiment)
+            {
+                if (previousStep != null)
+                {
+                    if (!(previousStep is ILearningPipelineDataStep dataStep))
+                    {
+                        throw new InvalidOperationException($"{ nameof(SymSgdBinaryClassifier)} only supports an { nameof(ILearningPipelineDataStep)} as an input.");
+                    }
+
+                    TrainingData = dataStep.Data;
+                }
+                Output output = experiment.Add(this);
+                return new SymSgdBinaryClassifierPipelineStep(output);
+            }
+
+            private class SymSgdBinaryClassifierPipelineStep : ILearningPipelinePredictorStep
+            {
+                public SymSgdBinaryClassifierPipelineStep(Output output)
+                {
+                    Model = output.PredictorModel;
+                }
+
+                public Var<IPredictorModel> Model { get; }
+            }
+        }
+    }
+
     namespace Transforms
     {
 

diff --git a/src/Native/CMakeLists.txt b/src/Native/CMakeLists.txt
@@ -182,3 +182,4 @@ add_subdirectory(CpuMathNative)
 add_subdirectory(FastTreeNative)
 add_subdirectory(LdaNative)
 add_subdirectory(FactorizationMachineNative)
+add_subdirectory(SymSgdNative)
diff --git a/src/Native/SymSgdNative/CMakeLists.txt b/src/Native/SymSgdNative/CMakeLists.txt
@@ -0,0 +1,32 @@
+project (SymSgdNative)
+
+set(SOURCES
+    SymSgdNative.cpp
+)
+
+if(WIN32)
+    find_library(MKL_LIBRARY MklImports HINTS ${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/win-x64/native)
+else()
+    list(APPEND SOURCES ${VERSION_FILE_PATH})
+    if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
+        message("Linking SymSgdNative with MKL on macOS.")
+        find_library(MKL_LIBRARY libMklImports.dylib HINTS "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/osx-x64/native")
+    else()
+        message("Linking SymSgdNative with MKL on linux.")
+        find_library(MKL_LIBRARY libMklImports.so HINTS ${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/linux-x64/native)
+        SET(CMAKE_SKIP_BUILD_RPATH  FALSE)
+        SET(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
+        SET(CMAKE_INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes")
+        SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
+        SET(CMAKE_INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes")
+    endif()
+endif()
+
+add_library(SymSgdNative SHARED ${SOURCES} ${RESOURCES})
+target_link_libraries(SymSgdNative PUBLIC ${MKL_LIBRARY})
+
+if(CMAKE_SYSTEM_NAME STREQUAL Darwin)
+    set_target_properties(SymSgdNative PROPERTIES INSTALL_RPATH "${CMAKE_SOURCE_DIR}/../../packages/mlnetmkldeps/0.0.0.5/runtimes/osx-x64/native")
+endif()
+
+install_library_and_symbols (SymSgdNative)
diff --git a/src/Native/SymSgdNative/Macros.h b/src/Native/SymSgdNative/Macros.h
@@ -0,0 +1,9 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#pragma once
+#define MIN(__X__, __Y__) (((__X__) > (__Y__)) ? (__Y__) : (__X__))
+
+// This is a very large prime number used for permutation
+#define VERYLARGEPRIME 961748941
diff --git a/src/Native/SymSgdNative/SparseBLAS.h b/src/Native/SymSgdNative/SparseBLAS.h
@@ -0,0 +1,27 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#pragma once
+#include "../Stdafx.h"
+
+extern "C" float cblas_sdot(const int vecSize, const float* denseVecX, const int incX, const float* denseVecY, const int incY);
+extern "C" float cblas_sdoti(const int sparseVecSize, const float* sparseVecValues, const int* sparseVecIndices, float* denseVec);
+extern "C" void cblas_saxpy(const int vecSize, const float coef, const float* denseVecX, const int incX, float* denseVecY, const int incY);
+extern "C" void cblas_saxpyi(const int sparseVecSize, const float coef, const float* sparseVecValues, const int* sparseVecIndices, float* denseVec);
+
+float SDOT(const int vecSize, const float* denseVecX, const float* denseVecY) {
+    return cblas_sdot(vecSize, denseVecX, 1, denseVecY, 1);
+}
+
+float SDOTI(const int sparseVecSize, const int* sparseVecIndices, const float* sparseVecValues, float* denseVec) {
+    return cblas_sdoti(sparseVecSize, sparseVecValues, sparseVecIndices, denseVec);
+}
+
+void SAXPY(const int vecSize, const float* denseVecX, float* denseVecY, float coef) {
+    return cblas_saxpy(vecSize, coef, denseVecX, 1, denseVecY, 1);
+}
+
+void SAXPYI(const int sparseVecSize, const int* sparseVecIndices, const float* sparseVecValues, float* denseVec, float coef) {
+    cblas_saxpyi(sparseVecSize, coef, sparseVecValues, sparseVecIndices, denseVec);
+}