Skip to content

Commit

Permalink
Scrub changes for LightGBM (#2808)
Browse files Browse the repository at this point in the history
Api clean up for LightGBM. The cleanup includes:
- Changing all abbreviated parameters to full names (i.e. numThreads->NumberOfThreads)
- Moving protected to private protected
- Updated baseline files to reflect these changes which are semantical
and should not have any computational difference.

Part of the fix for #2618
  • Loading branch information
singlis committed Mar 5, 2019
1 parent a4bfd93 commit d28886f
Show file tree
Hide file tree
Showing 39 changed files with 458 additions and 492 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public static void Example()
FeatureColumnName = "Features",
Booster = new DartBooster.Options
{
DropRate = 0.15,
TreeDropFraction = 0.15,
XgboostDartMode = false
}
}))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ public static void Example()

// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
var pipeline = mlContext.Ranking.Trainers.LightGbm(
numLeaves: 4,
minDataPerLeaf: 10,
numberOfLeaves: 4,
minimumExampleCountPerLeaf: 10,
learningRate: 0.1,
numBoostRound: 2);
numberOfIterations: 2);

// Fit this Pipeline to the Training Data.
var model = pipeline.Fit(split.TrainSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ public static void Example()
var pipeline = mlContext.Ranking.Trainers.LightGbm(
new Options
{
NumLeaves = 4,
MinDataPerLeaf = 10,
NumberOfLeaves = 4,
MinimumExampleCountPerGroup = 10,
LearningRate = 0.1,
NumBoostRound = 2,
NumberOfIterations = 2,
Booster = new TreeBooster.Options
{
FeatureFraction = 0.9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ public static void Example()
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Regression.Trainers.LightGbm(
labelColumnName: labelName,
numLeaves: 4,
minDataPerLeaf: 6,
numberOfLeaves: 4,
minimumExampleCountPerLeaf: 6,
learningRate: 0.001));

// Fit this pipeline to the training data.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public static void Example()
.Append(mlContext.Regression.Trainers.LightGbm(new Options
{
LabelColumnName = labelName,
NumLeaves = 4,
MinDataPerLeaf = 6,
NumberOfLeaves = 4,
MinimumExampleCountPerLeaf = 6,
LearningRate = 0.001,
Booster = new GossBooster.Options
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ public static void LightGbmBinaryClassification()
Score: mlContext.BinaryClassification.Trainers.LightGbm(
row.Label,
row.Features,
numLeaves: 4,
minDataPerLeaf: 6,
numberOfLeaves: 4,
minimumExampleCountPerLeaf: 6,
learningRate: 0.001)))
.Append(row => (
Label: row.Label,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ public static void LightGbmRegression()
.Append(r => (r.label, score: mlContext.Regression.Trainers.LightGbm(
r.label,
r.features,
numLeaves: 4,
minDataPerLeaf: 6,
numberOfLeaves: 4,
minimumExampleCountPerLeaf: 6,
learningRate: 0.001,
onFit: p => pred = p)
)
Expand Down
89 changes: 47 additions & 42 deletions src/Microsoft.ML.LightGBM.StaticPipe/LightGbmStaticExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ public static class LightGbmStaticExtensions
/// <param name="label">The label column.</param>
/// <param name="features">The features column.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -39,19 +39,19 @@ public static class LightGbmStaticExtensions
/// </example>
public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers catalog,
Scalar<float> label, Vector<float> features, Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<LightGbmRegressionModelParameters> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit);

var rec = new TrainerEstimatorReconciler.Regression(
(env, labelName, featuresName, weightsName) =>
{
var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmRegressorTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
minimumExampleCountPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
return trainer;
Expand Down Expand Up @@ -104,10 +104,10 @@ public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers c
/// <param name="label">The label column.</param>
/// <param name="features">The features column.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -122,20 +122,22 @@ public static Scalar<float> LightGbm(this RegressionCatalog.RegressionTrainers c
/// ]]></format>
/// </example>
public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> predictedLabel) LightGbm(this BinaryClassificationCatalog.BinaryClassificationTrainers catalog,
Scalar<bool> label, Vector<float> features, Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
Scalar<bool> label,
Vector<float> features,
Scalar<float> weights = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<CalibratedModelParametersBase<LightGbmBinaryModelParameters, PlattCalibrator>> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit);

var rec = new TrainerEstimatorReconciler.BinaryClassifier(
(env, labelName, featuresName, weightsName) =>
{
var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmBinaryTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
minimumExampleCountPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
Expand Down Expand Up @@ -194,10 +196,10 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
/// <param name="features">The features column.</param>
/// <param name="groupId">The groupId column.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -206,21 +208,24 @@ public static (Scalar<float> score, Scalar<float> probability, Scalar<bool> pred
/// <returns>The set of output columns including in order the predicted binary classification score (which will range
/// from negative to positive infinity), the calibrated prediction (from 0 to 1), and the predicted label.</returns>
public static Scalar<float> LightGbm<TVal>(this RankingCatalog.RankingTrainers catalog,
Scalar<float> label, Vector<float> features, Key<uint, TVal> groupId, Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
Scalar<float> label,
Vector<float> features,
Key<uint, TVal> groupId,
Scalar<float> weights = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<LightGbmRankingModelParameters> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit);
Contracts.CheckValue(groupId, nameof(groupId));

var rec = new TrainerEstimatorReconciler.Ranker<TVal>(
(env, labelName, featuresName, groupIdName, weightsName) =>
{
var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmRankingTrainer(env, labelName, featuresName, groupIdName, weightsName, numberOfLeaves,
minimumExampleCountPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
Expand Down Expand Up @@ -279,10 +284,10 @@ public static Scalar<float> LightGbm<TVal>(this RankingCatalog.RankingTrainers c
/// <param name="label">The label, or dependent variable.</param>
/// <param name="features">The features, or independent variables.</param>
/// <param name="weights">The weights column.</param>
/// <param name="numLeaves">The number of leaves to use.</param>
/// <param name="numBoostRound">Number of iterations.</param>
/// <param name="minDataPerLeaf">The minimal number of documents allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="numberOfLeaves">The number of leaves to use.</param>
/// <param name="minimumExampleCountPerLeaf">The minimal number of data points allowed in a leaf of the tree, out of the subsampled data.</param>
/// <param name="learningRate">The learning rate.</param>
/// <param name="numberOfIterations">Number of iterations.</param>
/// <param name="onFit">A delegate that is called every time the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}.Fit(DataView{TInShape})"/> method is called on the
/// <see cref="Estimator{TInShape, TOutShape, TTransformer}"/> instance created out of this. This delegate will receive
Expand All @@ -301,19 +306,19 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel)
Key<uint, TVal> label,
Vector<float> features,
Scalar<float> weights = null,
int? numLeaves = null,
int? minDataPerLeaf = null,
int? numberOfLeaves = null,
int? minimumExampleCountPerLeaf = null,
double? learningRate = null,
int numBoostRound = Options.Defaults.NumBoostRound,
int numberOfIterations = Options.Defaults.NumberOfIterations,
Action<OneVersusAllModelParameters> onFit = null)
{
CheckUserValues(label, features, weights, numLeaves, minDataPerLeaf, learningRate, numBoostRound, onFit);
CheckUserValues(label, features, weights, numberOfLeaves, minimumExampleCountPerLeaf, learningRate, numberOfIterations, onFit);

var rec = new TrainerEstimatorReconciler.MulticlassClassifier<TVal>(
(env, labelName, featuresName, weightsName) =>
{
var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numLeaves,
minDataPerLeaf, learningRate, numBoostRound);
var trainer = new LightGbmMulticlassTrainer(env, labelName, featuresName, weightsName, numberOfLeaves,
minimumExampleCountPerLeaf, learningRate, numberOfIterations);
if (onFit != null)
return trainer.WithOnFitDelegate(trans => onFit(trans.Model));
Expand Down Expand Up @@ -365,17 +370,17 @@ public static (Vector<float> score, Key<uint, TVal> predictedLabel)
}

private static void CheckUserValues(PipelineColumn label, Vector<float> features, Scalar<float> weights,
int? numLeaves,
int? minDataPerLeaf,
int? numberOfLeaves,
int? minimumExampleCountPerLeaf,
double? learningRate,
int numBoostRound,
Delegate onFit)
{
Contracts.CheckValue(label, nameof(label));
Contracts.CheckValue(features, nameof(features));
Contracts.CheckValueOrNull(weights);
Contracts.CheckParam(!(numLeaves < 2), nameof(numLeaves), "Must be at least 2.");
Contracts.CheckParam(!(minDataPerLeaf <= 0), nameof(minDataPerLeaf), "Must be positive");
Contracts.CheckParam(!(numberOfLeaves < 2), nameof(numberOfLeaves), "Must be at least 2.");
Contracts.CheckParam(!(minimumExampleCountPerLeaf <= 0), nameof(minimumExampleCountPerLeaf), "Must be positive");
Contracts.CheckParam(!(learningRate <= 0), nameof(learningRate), "Must be positive");
Contracts.CheckParam(numBoostRound > 0, nameof(numBoostRound), "Must be positive");
Contracts.CheckValueOrNull(onFit);
Expand Down
Loading

0 comments on commit d28886f

Please sign in to comment.