From 90e3ee2c8f3a6ae8538e9081f465a8e5bfac4524 Mon Sep 17 00:00:00 2001 From: Scott Inglis Date: Fri, 1 Mar 2019 17:48:52 -0800 Subject: [PATCH] - Minor updates --- .../LightGbmArguments.cs | 23 ++++---- .../LightGbmBinaryTrainer.cs | 6 +-- .../LightGbmMulticlassTrainer.cs | 12 ++--- .../LightGbmRegressionTrainer.cs | 6 +-- .../Common/EntryPoints/core_manifest.json | 52 +++---------------- .../UnitTests/TestEntryPoints.cs | 3 +- .../TrainerEstimators/TreeEstimators.cs | 2 +- 7 files changed, 34 insertions(+), 70 deletions(-) diff --git a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs index eb0e70ac02..abedc67b42 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmArguments.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmArguments.cs @@ -97,9 +97,9 @@ private static string GetOptionName(string name) return strBuf.ToString(); } - // Static name map that maps friendly names to lightGBM arguments. - // There is a conversion that will convert the field name to a lightGBM name - // (but lowercasing and adding an underscore between words). In + // Static override name map that maps friendly names to lightGBM arguments. + // If an argument is not here, then its name is identicaltto a lightGBM argument + // and does not require a mapping, for example, Subsample. private static Dictionary _nameMapping = new Dictionary() { {nameof(TreeBooster.Options.MinimumSplitGain), "min_split_gain" }, @@ -110,7 +110,7 @@ private static string GetOptionName(string name) {nameof(TreeBooster.Options.L2Regularization), "reg_lambda"}, {nameof(TreeBooster.Options.WeightOfPositiveExamples), "scale_pos_weight"}, {nameof(DartBooster.Options.TreeDropFraction), "drop_rate" }, - {nameof(DartBooster.Options.MaximumDroppedTreesPerRound), "max_drop" }, + {nameof(DartBooster.Options.MaximumDroppedTreeCountPerRound), "max_drop" }, {nameof(DartBooster.Options.SkipDropFraction), "skip_drop" }, {nameof(MinimumExampleCountPerLeaf), "min_data_per_leaf"}, {nameof(NumberOfLeaves), "num_leaves"}, @@ -159,7 +159,8 @@ public class Options : ISupportBoosterParameterFactory [Argument(ArgumentType.AtMostOnce, HelpText = "Subsample frequency for bagging. 0 means no subsample. " - + "If subsampleFreq > 0, it will use a subset to train and the subset will be updated on every Subsample iteration.")] + + "Specifies the frequency at which the bagging occurs, where if this is set to N, the subsampling will happen at N iterations." + + "This must be set with Subsample as this specifies the amount to subsample.")] [TlcModule.Range(Min = 0, Max = int.MaxValue)] public int SubsampleFrequency = 0; @@ -177,7 +178,7 @@ public class Options : ISupportBoosterParameterFactory [Argument(ArgumentType.AtMostOnce, HelpText = "L2 regularization term on weights, increasing this value will make model more conservative.", - ShortName = "l2,RegLambda")] + ShortName = "l2")] [TlcModule.Range(Min = 0.0)] [TGUI(Label = "Lambda(L2)", SuggestedSweeps = "0,0.5,1")] [TlcModule.SweepableDiscreteParam("RegLambda", new object[] { 0f, 0.5f, 1f })] @@ -185,7 +186,7 @@ public class Options : ISupportBoosterParameterFactory [Argument(ArgumentType.AtMostOnce, HelpText = "L1 regularization term on weights, increase this value will make model more conservative.", - ShortName = "l1,RegAlpha")] + ShortName = "l1")] [TlcModule.Range(Min = 0.0)] [TGUI(Label = "Alpha(L1)", SuggestedSweeps = "0,0.5,1")] [TlcModule.SweepableDiscreteParam("RegAlpha", new object[] { 0f, 0.5f, 1f })] @@ -235,7 +236,7 @@ public sealed class Options : TreeBooster.Options [Argument(ArgumentType.AtMostOnce, HelpText = "Maximum number of dropped tree in a boosting round.")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] - public int MaximumDroppedTreesPerRound = 1; + public int MaximumDroppedTreeCountPerRound = 1; [Argument(ArgumentType.AtMostOnce, HelpText = "Probability for not dropping in a boosting round.")] [TlcModule.Range(Inf = 0.0, Max = 1.0)] @@ -358,7 +359,7 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Use softmax loss for the multi classification.")] [TlcModule.SweepableDiscreteParam("UseSoftmax", new object[] { true, false })] - public bool? UseSoftMax; + public bool? UseSoftmax; [Argument(ArgumentType.AtMostOnce, HelpText = "Rounds of early stopping, 0 will disable it.", ShortName = "es")] @@ -382,7 +383,7 @@ public enum EvalMetricType [Argument(ArgumentType.AtMostOnce, HelpText = "Enable special handling of missing value or not.")] [TlcModule.SweepableDiscreteParam("UseMissing", new object[] { true, false })] - public bool UseMissing = false; + public bool HandleMissingValue = false; [Argument(ArgumentType.AtMostOnce, HelpText = "Minimum number of instances per categorical group.", ShortName = "mdpg")] [TlcModule.Range(Inf = 0, Max = int.MaxValue)] @@ -459,7 +460,7 @@ internal Dictionary ToDictionary(IHost host) res[GetOptionName(nameof(metric))] = metric; res[GetOptionName(nameof(Sigmoid))] = Sigmoid; res[GetOptionName(nameof(CustomGains))] = CustomGains; - res[GetOptionName(nameof(UseMissing))] = UseMissing; + res[GetOptionName(nameof(HandleMissingValue))] = HandleMissingValue; res[GetOptionName(nameof(MinimumExampleCountPerGroup))] = MinimumExampleCountPerGroup; res[GetOptionName(nameof(MaximumCategoricalSplitPointCount))] = MaximumCategoricalSplitPointCount; res[GetOptionName(nameof(CategoricalSmoothing))] = CategoricalSmoothing; diff --git a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs index 33bb6a57e8..ab85635cb1 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmBinaryTrainer.cs @@ -109,7 +109,7 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, Options options) /// The private instance of . /// The name of The label column. /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. @@ -117,12 +117,12 @@ internal LightGbmBinaryTrainer(IHostEnvironment env, Options options) internal LightGbmBinaryTrainer(IHostEnvironment env, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, - string weights = null, + string exampleWeightColumnName = null, int? numberOfLeaves = null, int? minimumExampleCountPerLeaf = null, double? learningRate = null, int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) - : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumnName), featureColumnName, weights, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeBoolScalarLabel(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations) { } diff --git a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs index c6029123cf..a35b59572f 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmMulticlassTrainer.cs @@ -43,7 +43,7 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, Options options) /// The private instance of . /// The name of The label column. /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. @@ -51,12 +51,12 @@ internal LightGbmMulticlassTrainer(IHostEnvironment env, Options options) internal LightGbmMulticlassTrainer(IHostEnvironment env, string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, - string weights = null, + string exampleWeightColumnName = null, int? numberOfLeaves = null, int? minimumExampleCountPerLeaf = null, double? learningRate = null, int numberOfIterations = LightGBM.Options.Defaults.NumberOfIterations) - : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumnName), featureColumnName, weights, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations) + : base(env, LoadNameValue, TrainerUtils.MakeU4ScalarColumn(labelColumnName), featureColumnName, exampleWeightColumnName, null, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations) { _numClass = -1; } @@ -182,14 +182,14 @@ private protected override void CheckAndUpdateParametersBeforeTraining(IChannel Options["num_class"] = _numClass; bool useSoftmax = false; - if (LightGbmTrainerOptions.UseSoftMax.HasValue) - useSoftmax = LightGbmTrainerOptions.UseSoftMax.Value; + if (LightGbmTrainerOptions.UseSoftmax.HasValue) + useSoftmax = LightGbmTrainerOptions.UseSoftmax.Value; else { if (labels.Length >= _minDataToUseSoftmax) useSoftmax = true; - ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftMax) + " = " + useSoftmax); + ch.Info("Auto-tuning parameters: " + nameof(LightGbmTrainerOptions.UseSoftmax) + " = " + useSoftmax); } if (useSoftmax) diff --git a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs index 9453cf6a2e..26f670dfe2 100644 --- a/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs +++ b/src/Microsoft.ML.LightGBM/LightGbmRegressionTrainer.cs @@ -87,7 +87,7 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBaseThe private instance of . /// The name of the label column. /// The name of the feature column. - /// The name for the column containing the initial weight. + /// The name for the column containing the initial weight. /// The number of leaves to use. /// The minimal number of data points allowed in a leaf of the tree, out of the subsampled data. /// The learning rate. @@ -95,12 +95,12 @@ public sealed class LightGbmRegressorTrainer : LightGbmTrainerBase 0, it will use a subset to train and the subset will be updated on every Subsample iteration.", - "Aliases": [ - "SubsampleFreq" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23778,9 +23766,6 @@ "Name": "MinimumSplitGain", "Type": "Float", "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.", - "Aliases": [ - "MinSplitGain" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23793,9 +23778,6 @@ "Name": "MaximumTreeDepth", "Type": "Int", "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.", - "Aliases": [ - "MaxDepth" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23809,9 +23791,6 @@ "Name": "MinimumChildWeight", "Type": "Float", "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.", - "Aliases": [ - "MinChildWeight" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23824,9 +23803,6 @@ "Name": "SubsampleFrequency", "Type": "Int", "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset to train and the subset will be updated on every Subsample iteration.", - "Aliases": [ - "SubsampleFreq" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23974,9 +23950,6 @@ "Name": "MinimumSplitGain", "Type": "Float", "Desc": "Minimum loss reduction required to make a further partition on a leaf node of the tree. the larger, the more conservative the algorithm will be.", - "Aliases": [ - "MinSplitGain" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -23989,9 +23962,6 @@ "Name": "MaximumTreeDepth", "Type": "Int", "Desc": "Maximum depth of a tree. 0 means no limit. However, tree still grows by best-first.", - "Aliases": [ - "MaxDepth" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -24005,9 +23975,6 @@ "Name": "MinimumChildWeight", "Type": "Float", "Desc": "Minimum sum of instance weight(hessian) needed in a child. If the tree partition step results in a leaf node with the sum of instance weight less than min_child_weight, then the building process will give up further partitioning. In linear regression mode, this simply corresponds to minimum number of instances needed to be in each node. The larger, the more conservative the algorithm will be.", - "Aliases": [ - "MinChildWeight" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, @@ -24020,9 +23987,6 @@ "Name": "SubsampleFrequency", "Type": "Int", "Desc": "Subsample frequency for bagging. 0 means no subsample. If subsampleFreq > 0, it will use a subset to train and the subset will be updated on every Subsample iteration.", - "Aliases": [ - "SubsampleFreq" - ], "Required": false, "SortOrder": 150.0, "IsNullable": false, diff --git a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs index d255f44d7e..746e0fd16d 100644 --- a/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs +++ b/test/Microsoft.ML.Core.Tests/UnitTests/TestEntryPoints.cs @@ -226,8 +226,7 @@ private string GetBuildPrefix() #endif } - //[Fact(Skip = "Execute this test if you want to regenerate the core_manifest and core_ep_list files")] - [Fact] + [Fact(Skip = "Execute this test if you want to regenerate the core_manifest and core_ep_list files")] public void RegenerateEntryPointCatalog() { var (epListContents, jObj) = BuildManifests(); diff --git a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs index e875f2a90b..88c8e46e64 100644 --- a/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs +++ b/test/Microsoft.ML.Tests/TrainerEstimators/TreeEstimators.cs @@ -298,7 +298,7 @@ private void LightGbmHelper(bool useSoftmax, out string modelString, out List