Skip to content

Commit ff6d16d

Browse files
authored
Adding sample for LightGbm ranking (#2729)
* Adding a sample for LightGbm Ranking * PR feedback + cleaning up namespaces in Microsoft.ML.Samples project * Adding a sample for LightGbm Ranking * PR feedback + cleaning up namespaces in Microsoft.ML.Samples project * nit * Adding a sample for LightGbm Ranking * PR feedback + cleaning up namespaces in Microsoft.ML.Samples project * Adding a sample for LightGbm Ranking * PR feedback + cleaning up namespaces in Microsoft.ML.Samples project * nit * Changing dataset to small sample and other feedback * Renaming LightGbm sample filenames * Feedback
1 parent c90fa51 commit ff6d16d

19 files changed

+228
-84
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
using Microsoft.ML.Transforms.Categorical;
22

3-
namespace Microsoft.ML.Samples.Dynamic
3+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
44
{
5-
public class LightGbmBinaryClassification
5+
public class LightGbm
66
{
77
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
88
public static void Example()
@@ -17,25 +17,25 @@ public static void Example()
1717
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);
1818

1919
// Create the Estimator.
20-
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features");
20+
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm();
2121

2222
// Fit this Pipeline to the Training Data.
2323
var model = pipeline.Fit(split.TrainSet);
2424

2525
// Evaluate how the model is doing on the test data.
2626
var dataWithPredictions = model.Transform(split.TestSet);
2727

28-
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
28+
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
2929
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
3030

31-
// Output:
32-
// Accuracy: 0.88
33-
// AUC: 0.93
34-
// F1 Score: 0.71
35-
// Negative Precision: 0.90
36-
// Negative Recall: 0.94
37-
// Positive Precision: 0.76
38-
// Positive Recall: 0.66
31+
// Expected output:
32+
// Accuracy: 0.88
33+
// AUC: 0.93
34+
// F1 Score: 0.71
35+
// Negative Precision: 0.90
36+
// Negative Recall: 0.94
37+
// Positive Precision: 0.76
38+
// Positive Recall: 0.66
3939
}
4040
}
4141
}
Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
using Microsoft.ML.LightGBM;
2-
using Microsoft.ML.Transforms.Categorical;
32
using static Microsoft.ML.LightGBM.Options;
43

5-
namespace Microsoft.ML.Samples.Dynamic
4+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
65
{
7-
class LightGbmBinaryClassificationWithOptions
6+
class LightGbmWithOptions
87
{
98
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
109
public static void Example()
@@ -22,8 +21,6 @@ public static void Example()
2221
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(
2322
new Options
2423
{
25-
LabelColumn = "IsOver50K",
26-
FeatureColumn = "Features",
2724
Booster = new GossBooster.Options
2825
{
2926
TopRate = 0.3,
@@ -37,17 +34,17 @@ public static void Example()
3734
// Evaluate how the model is doing on the test data.
3835
var dataWithPredictions = model.Transform(split.TestSet);
3936

40-
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
37+
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
4138
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
4239

43-
// Output:
44-
// Accuracy: 0.88
45-
// AUC: 0.93
46-
// F1 Score: 0.71
47-
// Negative Precision: 0.90
48-
// Negative Recall: 0.94
49-
// Positive Precision: 0.76
50-
// Positive Recall: 0.67
40+
// Expected output:
41+
// Accuracy: 0.88
42+
// AUC: 0.93
43+
// F1 Score: 0.71
44+
// Negative Precision: 0.90
45+
// Negative Recall: 0.94
46+
// Positive Precision: 0.76
47+
// Positive Recall: 0.67
5148
}
5249
}
5350
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCALogisticRegression.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
using Microsoft.ML.Data;
44
using Microsoft.ML.Trainers;
55

6-
namespace Microsoft.ML.Samples.Dynamic
6+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
77
{
88
public static class SDCALogisticRegression
99
{

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SDCASupportVectorMachine.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
using System.Linq;
33
using Microsoft.ML.Data;
44

5-
namespace Microsoft.ML.Samples.Dynamic
5+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
66
{
77
public static class SDCASupportVectorMachine
88
{

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescent.cs

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
namespace Microsoft.ML.Samples.Dynamic
1+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
22
{
33
public static class SymbolicStochasticGradientDescent
44
{
@@ -24,15 +24,17 @@ public static void Example()
2424

2525
// Evaluate how the model is doing on the test data.
2626
var dataWithPredictions = model.Transform(split.TestSet);
27-
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
27+
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
2828
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
29-
// Accuracy: 0.85
30-
// AUC: 0.90
31-
// F1 Score: 0.64
32-
// Negative Precision: 0.88
33-
// Negative Recall: 0.93
34-
// Positive Precision: 0.72
35-
// Positive Recall: 0.58
29+
30+
// Expected output:
31+
// Accuracy: 0.85
32+
// AUC: 0.90
33+
// F1 Score: 0.64
34+
// Negative Precision: 0.88
35+
// Negative Recall: 0.93
36+
// Positive Precision: 0.72
37+
// Positive Recall: 0.58
3638
}
3739
}
3840
}

docs/samples/Microsoft.ML.Samples/Dynamic/Trainers/BinaryClassification/SymbolicStochasticGradientDescentWithOptions.cs

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
namespace Microsoft.ML.Samples.Dynamic
1+
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
22
{
33
public static class SymbolicStochasticGradientDescentWithOptions
44
{
@@ -22,7 +22,6 @@ public static void Example()
2222
var pipeline = mlContext.BinaryClassification.Trainers.SymbolicStochasticGradientDescent(
2323
new ML.Trainers.HalLearners.SymSgdClassificationTrainer.Options()
2424
{
25-
LabelColumn = "IsOver50K",
2625
LearningRate = 0.2f,
2726
NumberOfIterations = 10,
2827
NumberOfThreads = 1,
@@ -33,15 +32,17 @@ public static void Example()
3332

3433
// Evaluate how the model is doing on the test data.
3534
var dataWithPredictions = model.Transform(split.TestSet);
36-
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions, "IsOver50K");
35+
var metrics = mlContext.BinaryClassification.EvaluateNonCalibrated(dataWithPredictions);
3736
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
38-
// Accuracy: 0.84
39-
// AUC: 0.88
40-
// F1 Score: 0.60
41-
// Negative Precision: 0.87
42-
// Negative Recall: 0.93
43-
// Positive Precision: 0.69
44-
// Positive Recall: 0.53
37+
38+
// Expected output:
39+
// Accuracy: 0.84
40+
// AUC: 0.88
41+
// F1 Score: 0.60
42+
// Negative Precision: 0.87
43+
// Negative Recall: 0.93
44+
// Positive Precision: 0.69
45+
// Positive Recall: 0.53
4546
}
4647
}
4748
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
using Microsoft.ML.Data;
44
using Microsoft.ML.SamplesUtils;
55

6-
namespace Microsoft.ML.Samples.Dynamic
6+
namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification
77
{
8-
class LightGbmMulticlassClassification
8+
class LightGbm
99
{
1010
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
1111
public static void Example()
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
using Microsoft.ML.SamplesUtils;
66
using static Microsoft.ML.LightGBM.Options;
77

8-
namespace Microsoft.ML.Samples.Dynamic
8+
namespace Microsoft.ML.Samples.Dynamic.Trainers.MulticlassClassification
99
{
10-
class LightGbmMulticlassClassificationWithOptions
10+
class LightGbmWithOptions
1111
{
1212
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
1313
public static void Example()
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
using Microsoft.ML;
2+
3+
namespace Microsoft.ML.Samples.Dynamic.Trainers.Ranking
4+
{
5+
public class LightGbm
6+
{
7+
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
8+
public static void Example()
9+
{
10+
// Creating the ML.Net IHostEnvironment object, needed for the pipeline.
11+
var mlContext = new MLContext();
12+
13+
// Download and featurize the dataset.
14+
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext);
15+
16+
// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
17+
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
18+
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
19+
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
20+
21+
// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
22+
var pipeline = mlContext.Ranking.Trainers.LightGbm(
23+
numLeaves: 4,
24+
minDataPerLeaf: 10,
25+
learningRate: 0.1,
26+
numBoostRound: 2);
27+
28+
// Fit this Pipeline to the Training Data.
29+
var model = pipeline.Fit(split.TrainSet);
30+
31+
// Evaluate how the model is doing on the test data.
32+
var dataWithPredictions = model.Transform(split.TestSet);
33+
34+
var metrics = mlContext.Ranking.Evaluate(dataWithPredictions);
35+
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
36+
37+
// Expected output:
38+
// DCG: @1:1.71, @2:3.88, @3:7.93
39+
// NDCG: @1:7.98, @2:12.14, @3:16.62
40+
}
41+
}
42+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
using Microsoft.ML.LightGBM;
2+
using static Microsoft.ML.LightGBM.Options;
3+
4+
namespace Microsoft.ML.Samples.Dynamic.Trainers.Ranking
5+
{
6+
public class LightGbmWithOptions
7+
{
8+
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
9+
public static void Example()
10+
{
11+
// Creating the ML.Net IHostEnvironment object, needed for the pipeline.
12+
var mlContext = new MLContext();
13+
14+
// Download and featurize the train and validation datasets.
15+
var dataview = SamplesUtils.DatasetUtils.LoadFeaturizedMslrWeb10kDataset(mlContext);
16+
17+
// Leave out 10% of the dataset for testing. Since this is a ranking problem, we must ensure that the split
18+
// respects the GroupId column, i.e. rows with the same GroupId are either all in the train split or all in
19+
// the test split. The samplingKeyColumn parameter in Ranking.TrainTestSplit is used for this purpose.
20+
var split = mlContext.Ranking.TrainTestSplit(dataview, testFraction: 0.1, samplingKeyColumn: "GroupId");
21+
22+
// Create the Estimator pipeline. For simplicity, we will train a small tree with 4 leaves and 2 boosting iterations.
23+
var pipeline = mlContext.Ranking.Trainers.LightGbm(
24+
new Options
25+
{
26+
NumLeaves = 4,
27+
MinDataPerLeaf = 10,
28+
LearningRate = 0.1,
29+
NumBoostRound = 2,
30+
Booster = new TreeBooster.Options
31+
{
32+
FeatureFraction = 0.9
33+
}
34+
});
35+
36+
// Fit this pipeline to the training Data.
37+
var model = pipeline.Fit(split.TrainSet);
38+
39+
// Evaluate how the model is doing on the test data.
40+
var dataWithPredictions = model.Transform(split.TestSet);
41+
42+
var metrics = mlContext.Ranking.Evaluate(dataWithPredictions);
43+
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);
44+
45+
// Expected output:
46+
// DCG: @1:1.71, @2:3.88, @3:7.93
47+
// NDCG: @1:7.98, @2:12.14, @3:16.62
48+
}
49+
}
50+
}

0 commit comments

Comments
 (0)