Skip to content

Adding a sample for LightGbm ranking #2704

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 26 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b572614
Adding a sample for LightGbm Ranking
najeeb-kazmi Feb 20, 2019
f3d5d82
PR feedback + cleaning up namespaces in Microsoft.ML.Samples project
najeeb-kazmi Feb 23, 2019
ba14a9d
Adding a sample for LightGbm Ranking
najeeb-kazmi Feb 20, 2019
f20d7bf
PR feedback + cleaning up namespaces in Microsoft.ML.Samples project
najeeb-kazmi Feb 23, 2019
d862c3b
nit
najeeb-kazmi Feb 23, 2019
269619f
merge conflicts
najeeb-kazmi Feb 23, 2019
9fe8233
- Fixes the project reference path for OnnxTransformer. (#2705)
singlis Feb 23, 2019
160eade
- Removes ResultProcessor, Maml and Sweeper from Microsoft.ML nuget.…
singlis Feb 23, 2019
eecf272
Remove MD5Hasher. (#2706)
codemzs Feb 23, 2019
f063510
Hide delegates, model parameters classes, move onFit to staticPIpe, g…
Ivanidzo4ka Feb 24, 2019
22844f6
Move the builder classes in DataViewSchema (#2703)
eerhardt Feb 24, 2019
8001ccc
Adding functional tests for all training and evaluation tasks (#2646)
rogancarr Feb 24, 2019
850559f
Introduce order for pixel extraction (#2602)
Ivanidzo4ka Feb 25, 2019
7cc208c
Fixing parameters in ML.NET Public API (#2665)
abgoswam Feb 25, 2019
4acf5aa
Explicit implementation for IsRowToRowMapper and GetRowToRowMapper (#…
artidoro Feb 25, 2019
2ef0614
Internalize DataKind (#2661)
wschin Feb 25, 2019
a16eb30
Added samples & docs for BinaryClassification.StochasticGradientDesce…
Feb 25, 2019
f6d55f3
Make DataViewRowId not act like a number. (#2707)
eerhardt Feb 25, 2019
4420cc7
Changed Ranker to Ranking in evaluation related files. (#2675)
zeahmed Feb 25, 2019
18801ab
Adding a sample for LightGbm Ranking
najeeb-kazmi Feb 20, 2019
1e1a803
PR feedback + cleaning up namespaces in Microsoft.ML.Samples project
najeeb-kazmi Feb 23, 2019
345cf60
Adding a sample for LightGbm Ranking
najeeb-kazmi Feb 20, 2019
c25a3c3
PR feedback + cleaning up namespaces in Microsoft.ML.Samples project
najeeb-kazmi Feb 23, 2019
34ecd4a
nit
najeeb-kazmi Feb 23, 2019
b8bbf21
merge conflicts
najeeb-kazmi Feb 26, 2019
1c99a4f
Changing dataset to small sample and other feedback
najeeb-kazmi Feb 26, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ public static ITransformer TrainModel(MLContext mlContext, IDataView trainData)
// Construct the learning pipeline.
var estimator = mlContext.Transforms.CustomMapping(mapping, null)
.AppendCacheCheckpoint(mlContext)
.Append(mlContext.BinaryClassification.Trainers.FastTree(label: "Label"));
.Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label"));

return estimator.Fit(trainData);
}
Expand Down Expand Up @@ -998,7 +998,7 @@ public class CustomMappings : CustomMappingFactory<InputRow, OutputRow>
// Construct the learning pipeline. Note that we are now providing a contract name for the custom mapping:
// otherwise we will not be able to save the model.
var estimator = mlContext.Transforms.CustomMapping<InputRow, OutputRow>(CustomMappings.IncomeMapping, nameof(CustomMappings.IncomeMapping))
.Append(mlContext.BinaryClassification.Trainers.FastTree(label: "Label"));
.Append(mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label"));

// If memory is enough, we can cache the data in-memory to avoid reading them from file
// when it will be accessed multiple times.
Expand Down
8 changes: 4 additions & 4 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Calibrator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ public static void Example()
HasHeader = true,
Columns = new[]
{
new TextLoader.Column("Sentiment", DataKind.BL, 0),
new TextLoader.Column("SentimentText", DataKind.Text, 1)
new TextLoader.Column("Sentiment", DataKind.Boolean, 0),
new TextLoader.Column("SentimentText", DataKind.String, 1)
}
});

Expand All @@ -50,8 +50,8 @@ public static void Example()
// the "Features" column produced by FeaturizeText as the features column.
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
labelColumn: "Sentiment",
featureColumn: "Features",
labelColumnName: "Sentiment",
featureColumnName: "Features",
l2Const: 0.001f,
loss: new HingeLoss())); // By specifying loss: new HingeLoss(), StochasticDualCoordinateAscent will train a support vector machine (SVM).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public static void Example()
// We will train a FastTreeRegression model with 1 tree on these two columns to predict Age.
string outputColumnName = "Features";
var pipeline = ml.Transforms.Concatenate(outputColumnName, new[] { "Parity", "Induced" })
.Append(ml.Regression.Trainers.FastTree(labelColumn: "Age", featureColumn: outputColumnName, numTrees: 1, numLeaves: 2, minDatapointsInLeaves: 1));
.Append(ml.Regression.Trainers.FastTree(labelColumnName: "Age", featureColumnName: outputColumnName, numTrees: 1, numLeaves: 2, minDatapointsInLeaves: 1));

var model = pipeline.Fit(trainData);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ public static void Example()
var reader = ml.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Label", DataKind.BL, 0),
new TextLoader.Column("Features", DataKind.Num, new [] { new TextLoader.Range(1, 9) })
new TextLoader.Column("Label", DataKind.Boolean, 0),
new TextLoader.Column("Features", DataKind.Single, new [] { new TextLoader.Range(1, 9) })
},
hasHeader: true
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ public static void Example()
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.BL, 0),
new TextLoader.Column("SentimentText", DataKind.Text, 1)
new TextLoader.Column("Sentiment", DataKind.Boolean, 0),
new TextLoader.Column("SentimentText", DataKind.String, 1)
},
hasHeader: true
);
Expand All @@ -46,7 +46,7 @@ public static void Example()
// the "Features" column produced by FeaturizeText as the features column.
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
.AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline.
.Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(labelColumn: "Sentiment", featureColumns: new[] { "Features" }));
.Append(mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(labelColumnName: "Sentiment", featureColumnNames: new[] { "Features" }));

// Fit the model.
var model = pipeline.Fit(data);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public static void Example()
.ToArray();
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Regression.Trainers.GeneralizedAdditiveModels(
labelColumn: labelName, featureColumn: "Features", maxBins: 16));
labelColumnName: labelName, featureColumnName: "Features", maxBins: 16));
var fitPipeline = pipeline.Fit(data);

// Extract the model from the pipeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ public static void Example()
{
Columns = new[]
{
new TextLoader.Column("ImagePath", DataKind.TX, 0),
new TextLoader.Column("Name", DataKind.TX, 1),
new TextLoader.Column("ImagePath", DataKind.String, 0),
new TextLoader.Column("Name", DataKind.String, 1),
}
}).Read(imagesDataFile);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ public static void Example()
{
Columns = new[]
{
new TextLoader.Column("ImagePath", DataKind.TX, 0),
new TextLoader.Column("Name", DataKind.TX, 1),
new TextLoader.Column("ImagePath", DataKind.String, 0),
new TextLoader.Column("Name", DataKind.String, 1),
}
}).Read(imagesDataFile);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ public static void Example()
{
Columns = new[]
{
new TextLoader.Column("ImagePath", DataKind.TX, 0),
new TextLoader.Column("Name", DataKind.TX, 1),
new TextLoader.Column("ImagePath", DataKind.String, 0),
new TextLoader.Column("Name", DataKind.String, 1),
}
}).Read(imagesDataFile);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ public static void Example()
{
Columns = new[]
{
new TextLoader.Column("ImagePath", DataKind.TX, 0),
new TextLoader.Column("Name", DataKind.TX, 1),
new TextLoader.Column("ImagePath", DataKind.String, 0),
new TextLoader.Column("Name", DataKind.String, 1),
}
}).Read(imagesDataFile);

Expand Down
30 changes: 15 additions & 15 deletions docs/samples/Microsoft.ML.Samples/Dynamic/LogisticRegression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,21 @@ public static void Example()
HasHeader = true,
Columns = new[]
{
new TextLoader.Column("age", DataKind.R4, 0),
new TextLoader.Column("workclass", DataKind.Text, 1),
new TextLoader.Column("fnlwgt", DataKind.R4, 2),
new TextLoader.Column("education", DataKind.Text, 3),
new TextLoader.Column("education-num", DataKind.R4, 4),
new TextLoader.Column("marital-status", DataKind.Text, 5),
new TextLoader.Column("occupation", DataKind.Text, 6),
new TextLoader.Column("relationship", DataKind.Text, 7),
new TextLoader.Column("ethnicity", DataKind.Text, 8),
new TextLoader.Column("sex", DataKind.Text, 9),
new TextLoader.Column("capital-gain", DataKind.R4, 10),
new TextLoader.Column("capital-loss", DataKind.R4, 11),
new TextLoader.Column("hours-per-week", DataKind.R4, 12),
new TextLoader.Column("native-country", DataKind.Text, 13),
new TextLoader.Column("Label", DataKind.Bool, 14)
new TextLoader.Column("age", DataKind.Single, 0),
new TextLoader.Column("workclass", DataKind.String, 1),
new TextLoader.Column("fnlwgt", DataKind.Single, 2),
new TextLoader.Column("education", DataKind.String, 3),
new TextLoader.Column("education-num", DataKind.Single, 4),
new TextLoader.Column("marital-status", DataKind.String, 5),
new TextLoader.Column("occupation", DataKind.String, 6),
new TextLoader.Column("relationship", DataKind.String, 7),
new TextLoader.Column("ethnicity", DataKind.String, 8),
new TextLoader.Column("sex", DataKind.String, 9),
new TextLoader.Column("capital-gain", DataKind.Single, 10),
new TextLoader.Column("capital-loss", DataKind.Single, 11),
new TextLoader.Column("hours-per-week", DataKind.Single, 12),
new TextLoader.Column("native-country", DataKind.String, 13),
new TextLoader.Column("Label", DataKind.Boolean, 14)
}
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static void Example()
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.BinaryClassification.Trainers.LogisticRegression(
labelColumn: labelName, featureColumn: "Features"));
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);

// Extract the model from the pipeline
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ public static void Example()
var lookupMap = mlContext.Data.ReadFromTextFile(Path.Combine(modelLocation, "imdb_word_index.csv"),
columns: new[]
{
new TextLoader.Column("Words", DataKind.TX, 0),
new TextLoader.Column("Ids", DataKind.I4, 1),
new TextLoader.Column("Words", DataKind.String, 0),
new TextLoader.Column("Ids", DataKind.Int32, 1),
},
separatorChar: ','
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
public static class AveragedPerceptron
{
// In this examples we will use the adult income dataset. The goal is to predict
// if a person's income is above $50K or not, based on different pieces of information about that person.
// if a person's income is above $50K or not, based on demographic information about that person.
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
public static void Example()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
public static class AveragedPerceptronWithOptions
{
// In this examples we will use the adult income dataset. The goal is to predict
// if a person's income is above $50K or not, based on different pieces of information about that person.
// if a person's income is above $50K or not, based on demographic information about that person.
// For more details about this dataset, please see https://archive.ics.uci.edu/ml/datasets/adult.
public static void Example()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
using Microsoft.ML.Transforms.Categorical;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public class LightGbmBinaryClassification
public class LightGbm
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand All @@ -17,25 +17,25 @@ public static void Example()
var split = mlContext.BinaryClassification.TrainTestSplit(dataview, testFraction: 0.1);

// Create the Estimator.
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm("IsOver50K", "Features");
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm();

// Fit this Pipeline to the Training Data.
var model = pipeline.Fit(split.TrainSet);

// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);

var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.66
// Expected output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.66
}
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
using Microsoft.ML.LightGBM;
using Microsoft.ML.Transforms.Categorical;
using static Microsoft.ML.LightGBM.Options;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
class LightGbmBinaryClassificationWithOptions
class LightGbmWithOptions
{
// This example requires installation of additional nuget package <a href="https://www.nuget.org/packages/Microsoft.ML.LightGBM/">Microsoft.ML.LightGBM</a>.
public static void Example()
Expand All @@ -22,8 +21,6 @@ public static void Example()
var pipeline = mlContext.BinaryClassification.Trainers.LightGbm(
new Options
{
LabelColumn = "IsOver50K",
FeatureColumn = "Features",
Booster = new GossBooster.Options
{
TopRate = 0.3,
Expand All @@ -37,17 +34,17 @@ public static void Example()
// Evaluate how the model is doing on the test data.
var dataWithPredictions = model.Transform(split.TestSet);

var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions, "IsOver50K");
var metrics = mlContext.BinaryClassification.Evaluate(dataWithPredictions);
SamplesUtils.ConsoleUtils.PrintMetrics(metrics);

// Output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.67
// Expected output:
// Accuracy: 0.88
// AUC: 0.93
// F1 Score: 0.71
// Negative Precision: 0.90
// Negative Recall: 0.94
// Positive Precision: 0.76
// Positive Recall: 0.67
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
using Microsoft.ML.Data;
using Microsoft.ML.Trainers;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public static class SDCALogisticRegression
{
Expand All @@ -28,8 +28,8 @@ public static void Example()
var reader = mlContext.Data.CreateTextLoader(
columns: new[]
{
new TextLoader.Column("Sentiment", DataKind.BL, 0),
new TextLoader.Column("SentimentText", DataKind.Text, 1)
new TextLoader.Column("Sentiment", DataKind.Boolean, 0),
new TextLoader.Column("SentimentText", DataKind.String, 1)
},
hasHeader: true
);
Expand All @@ -49,7 +49,7 @@ public static void Example()
// the "Features" column produced by FeaturizeText as the features column.
var pipeline = mlContext.Transforms.Text.FeaturizeText("SentimentText", "Features")
.AppendCacheCheckpoint(mlContext) // Add a data-cache step within a pipeline.
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumn: "Sentiment", featureColumn: "Features", l2Const: 0.001f));
.Append(mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Sentiment", featureColumnName: "Features", l2Const: 0.001f));

// Step 3: Run Cross-Validation on this pipeline.
var cvResults = mlContext.BinaryClassification.CrossValidate(data, pipeline, labelColumn: "Sentiment");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
using System.Linq;
using Microsoft.ML.Data;

namespace Microsoft.ML.Samples.Dynamic
namespace Microsoft.ML.Samples.Dynamic.Trainers.BinaryClassification
{
public static class SDCASupportVectorMachine
{
Expand Down Expand Up @@ -41,7 +41,7 @@ public static void Example()
// Step 2: Create a binary classifier. This trainer may produce a logistic regression model.
// We set the "Label" column as the label of the dataset, and the "Features" column as the features column.
var pipeline = mlContext.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
labelColumn: "Label", featureColumn: "Features", loss: new HingeLoss(), l2Const: 0.001f);
labelColumnName: "Label", featureColumnName: "Features", loss: new HingeLoss(), l2Const: 0.001f);

// Step 3: Train the pipeline created.
var model = pipeline.Fit(data);
Expand Down
Loading