-
Notifications
You must be signed in to change notification settings - Fork 1.9k
[AutoML] Add AutoML example code #3458
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
17 commits
Select commit
Hold shift + click to select a range
6b4f4da
add rev-ed / working AutoML samples
daholste e12f741
Link samples in C# XML
daholste 9565ce3
inline XML example names fixes
daholste b8dc2c8
rev example paths
daholste 59da285
change example paths
daholste b9a0c1c
link datasets from test\data folder
daholste e85a782
download optdigits dataset from ML.NET samples Git repo; print MicroA…
daholste c5a8d9e
remove deleted files from csproj
daholste b322082
best --> bestRun variable name change; print multiclass metrics
daholste 4b5bfd5
avoid printing some metrics that don't display well
daholste 86d1732
iterate on metrics printing
daholste 23241f9
print statement correction
daholste 0187416
downgrade to .NET Core 2.1
daholste e3eb1ed
correct 'hasHeader' for multiclass samples
daholste 3efe412
remove potentially unpopulated 'Probability' from Sentiment schema
daholste 4fe5295
remove unneeded comment
daholste 16939d6
Remove actual dataset paths for clarity for user
daholste File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
docs/samples/Microsoft.ML.AutoML.Samples/BinaryClassificationExperiment.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
using System; | ||
using System.IO; | ||
using System.Linq; | ||
using Microsoft.ML.Auto; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public static class BinaryClassificationExperiment | ||
{ | ||
private static string TrainDataPath = "<Path to your train dataset goes here>"; | ||
private static string TestDataPath = "<Path to your test dataset goes here>"; | ||
private static string ModelPath = @"<Desired model output directory goes here>\SentimentModel.zip"; | ||
private static uint ExperimentTime = 60; | ||
|
||
public static void Run() | ||
{ | ||
MLContext mlContext = new MLContext(); | ||
|
||
// STEP 1: Load data | ||
IDataView trainDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TrainDataPath, hasHeader: true); | ||
IDataView testDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TestDataPath, hasHeader: true); | ||
|
||
// STEP 2: Run AutoML experiment | ||
Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds..."); | ||
ExperimentResult<BinaryClassificationMetrics> experimentResult = mlContext.Auto() | ||
.CreateBinaryClassificationExperiment(ExperimentTime) | ||
.Execute(trainDataView); | ||
|
||
// STEP 3: Print metric from the best model | ||
RunDetail<BinaryClassificationMetrics> bestRun = experimentResult.BestRun; | ||
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); | ||
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); | ||
Console.WriteLine($"Metrics of best model from validation data --"); | ||
PrintMetrics(bestRun.ValidationMetrics); | ||
|
||
// STEP 4: Evaluate test data | ||
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); | ||
BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore); | ||
Console.WriteLine($"Metrics of best model on test data --"); | ||
PrintMetrics(testMetrics); | ||
|
||
// STEP 5: Save the best model for later deployment and inferencing | ||
using (FileStream fs = File.Create(ModelPath)) | ||
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); | ||
|
||
// STEP 6: Create prediction engine from the best trained model | ||
var predictionEngine = mlContext.Model.CreatePredictionEngine<SentimentIssue, SentimentPrediction>(bestRun.Model); | ||
|
||
// STEP 7: Initialize a new sentiment issue, and get the predicted sentiment | ||
var testSentimentIssue = new SentimentIssue | ||
{ | ||
Text = "I hope this helps." | ||
}; | ||
var prediction = predictionEngine.Predict(testSentimentIssue); | ||
Console.WriteLine($"Predicted sentiment for test issue: {prediction.Prediction}"); | ||
|
||
Console.WriteLine("Press any key to continue..."); | ||
Console.ReadKey(); | ||
} | ||
|
||
private static void PrintMetrics(BinaryClassificationMetrics metrics) | ||
{ | ||
Console.WriteLine($"Accuracy: {metrics.Accuracy}"); | ||
Console.WriteLine($"AreaUnderPrecisionRecallCurve: {metrics.AreaUnderPrecisionRecallCurve}"); | ||
Console.WriteLine($"AreaUnderRocCurve: {metrics.AreaUnderRocCurve}"); | ||
Console.WriteLine($"F1Score: {metrics.F1Score}"); | ||
Console.WriteLine($"NegativePrecision: {metrics.NegativePrecision}"); | ||
Console.WriteLine($"NegativeRecall: {metrics.NegativeRecall}"); | ||
Console.WriteLine($"PositivePrecision: {metrics.PositivePrecision}"); | ||
Console.WriteLine($"PositiveRecall: {metrics.PositiveRecall}"); | ||
} | ||
} | ||
} |
14 changes: 14 additions & 0 deletions
14
docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/PixelData.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public class PixelData | ||
{ | ||
[LoadColumn(0, 63)] | ||
[VectorType(64)] | ||
public float[] PixelValues; | ||
|
||
[LoadColumn(64)] | ||
public float Number; | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/PixelPrediction.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public class PixelPrediction | ||
{ | ||
[ColumnName("PredictedLabel")] | ||
public float Prediction; | ||
} | ||
} |
13 changes: 13 additions & 0 deletions
13
docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/SentimentIssue.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public class SentimentIssue | ||
{ | ||
[LoadColumn(0)] | ||
public bool Label { get; set; } | ||
|
||
[LoadColumn(1)] | ||
public string Text { get; set; } | ||
} | ||
} |
14 changes: 14 additions & 0 deletions
14
docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/SentimentPrediction.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public class SentimentPrediction | ||
{ | ||
// ColumnName attribute is used to change the column name from | ||
// its default value, which is the name of the field. | ||
[ColumnName("PredictedLabel")] | ||
public bool Prediction { get; set; } | ||
|
||
public float Score { get; set; } | ||
} | ||
} |
28 changes: 28 additions & 0 deletions
28
docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/TaxiTrip.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public class TaxiTrip | ||
{ | ||
[LoadColumn(0)] | ||
public string VendorId; | ||
|
||
[LoadColumn(1)] | ||
public float RateCode; | ||
|
||
[LoadColumn(2)] | ||
public float PassengerCount; | ||
|
||
[LoadColumn(3)] | ||
public float TripTimeInSeconds; | ||
|
||
[LoadColumn(4)] | ||
public float TripDistance; | ||
|
||
[LoadColumn(5)] | ||
public string PaymentType; | ||
|
||
[LoadColumn(6)] | ||
public float FareAmount; | ||
} | ||
} |
10 changes: 10 additions & 0 deletions
10
docs/samples/Microsoft.ML.AutoML.Samples/DataStructures/TaxiTripFarePrediction.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public class TaxiTripFarePrediction | ||
{ | ||
[ColumnName("Score")] | ||
public float FareAmount; | ||
} | ||
} |
12 changes: 12 additions & 0 deletions
12
docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>netcoreapp2.1</TargetFramework> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<ProjectReference Include="..\..\..\src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj" /> | ||
</ItemGroup> | ||
|
||
</Project> |
71 changes: 71 additions & 0 deletions
71
docs/samples/Microsoft.ML.AutoML.Samples/MulticlassClassificationExperiment.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
using System; | ||
using System.IO; | ||
using System.Linq; | ||
using Microsoft.ML.Auto; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public static class MulticlassClassificationExperiment | ||
{ | ||
private static string TrainDataPath = "<Path to your train dataset goes here>"; | ||
private static string TestDataPath = "<Path to your test dataset goes here>"; | ||
private static string ModelPath = @"<Desired model output directory goes here>\OptDigitsModel.zip"; | ||
private static string LabelColumnName = "Number"; | ||
private static uint ExperimentTime = 60; | ||
|
||
public static void Run() | ||
{ | ||
MLContext mlContext = new MLContext(); | ||
|
||
// STEP 1: Load data | ||
IDataView trainDataView = mlContext.Data.LoadFromTextFile<PixelData>(TrainDataPath, separatorChar: ','); | ||
IDataView testDataView = mlContext.Data.LoadFromTextFile<PixelData>(TestDataPath, separatorChar: ','); | ||
|
||
// STEP 2: Run AutoML experiment | ||
Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds..."); | ||
ExperimentResult<MulticlassClassificationMetrics> experimentResult = mlContext.Auto() | ||
.CreateMulticlassClassificationExperiment(ExperimentTime) | ||
.Execute(trainDataView, LabelColumnName); | ||
|
||
// STEP 3: Print metric from the best model | ||
RunDetail<MulticlassClassificationMetrics> bestRun = experimentResult.BestRun; | ||
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); | ||
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); | ||
Console.WriteLine($"Metrics of best model from validation data --"); | ||
PrintMetrics(bestRun.ValidationMetrics); | ||
|
||
// STEP 4: Evaluate test data | ||
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); | ||
MulticlassClassificationMetrics testMetrics = mlContext.MulticlassClassification.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName); | ||
Console.WriteLine($"Metrics of best model on test data --"); | ||
PrintMetrics(testMetrics); | ||
|
||
// STEP 5: Save the best model for later deployment and inferencing | ||
using (FileStream fs = File.Create(ModelPath)) | ||
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); | ||
|
||
// STEP 6: Create prediction engine from the best trained model | ||
var predictionEngine = mlContext.Model.CreatePredictionEngine<PixelData, PixelPrediction>(bestRun.Model); | ||
|
||
// STEP 7: Initialize new pixel data, and get the predicted number | ||
var testPixelData = new PixelData | ||
{ | ||
PixelValues = new float[] { 0, 0, 1, 8, 15, 10, 0, 0, 0, 3, 13, 15, 14, 14, 0, 0, 0, 5, 10, 0, 10, 12, 0, 0, 0, 0, 3, 5, 15, 10, 2, 0, 0, 0, 16, 16, 16, 16, 12, 0, 0, 1, 8, 12, 14, 8, 3, 0, 0, 0, 0, 10, 13, 0, 0, 0, 0, 0, 0, 11, 9, 0, 0, 0 } | ||
}; | ||
var prediction = predictionEngine.Predict(testPixelData); | ||
Console.WriteLine($"Predicted number for test pixels: {prediction.Prediction}"); | ||
|
||
Console.WriteLine("Press any key to continue..."); | ||
Console.ReadKey(); | ||
} | ||
|
||
private static void PrintMetrics(MulticlassClassificationMetrics metrics) | ||
{ | ||
Console.WriteLine($"LogLoss: {metrics.LogLoss}"); | ||
Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction}"); | ||
Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy}"); | ||
Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}"); | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
using System; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public class Program | ||
{ | ||
public static void Main(string[] args) | ||
{ | ||
try | ||
{ | ||
RegressionExperiment.Run(); | ||
Console.Clear(); | ||
|
||
BinaryClassificationExperiment.Run(); | ||
Console.Clear(); | ||
|
||
MulticlassClassificationExperiment.Run(); | ||
Console.Clear(); | ||
|
||
Console.WriteLine("Done"); | ||
} | ||
catch (Exception ex) | ||
{ | ||
Console.WriteLine($"Exception {ex}"); | ||
} | ||
|
||
Console.ReadLine(); | ||
} | ||
} | ||
} |
76 changes: 76 additions & 0 deletions
76
docs/samples/Microsoft.ML.AutoML.Samples/RegressionExperiment.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
using System; | ||
using System.IO; | ||
using System.Linq; | ||
using Microsoft.ML.Auto; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.AutoML.Samples | ||
{ | ||
public static class RegressionExperiment | ||
{ | ||
private static string TrainDataPath = "<Path to your train dataset goes here>"; | ||
private static string TestDataPath = "<Path to your test dataset goes here>"; | ||
private static string ModelPath = @"<Desired model output directory goes here>\TaxiFareModel.zip"; | ||
private static string LabelColumnName = "FareAmount"; | ||
private static uint ExperimentTime = 60; | ||
|
||
public static void Run() | ||
{ | ||
MLContext mlContext = new MLContext(); | ||
|
||
// STEP 1: Load data | ||
IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); | ||
IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); | ||
|
||
// STEP 2: Run AutoML experiment | ||
Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds..."); | ||
ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto() | ||
.CreateRegressionExperiment(ExperimentTime) | ||
.Execute(trainDataView, LabelColumnName); | ||
|
||
// STEP 3: Print metric from best model | ||
RunDetail<RegressionMetrics> bestRun = experimentResult.BestRun; | ||
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}"); | ||
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}"); | ||
Console.WriteLine($"Metrics of best model from validation data --"); | ||
PrintMetrics(bestRun.ValidationMetrics); | ||
|
||
// STEP 5: Evaluate test data | ||
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView); | ||
RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName); | ||
Console.WriteLine($"Metrics of best model on test data --"); | ||
PrintMetrics(testMetrics); | ||
|
||
// STEP 6: Save the best model for later deployment and inferencing | ||
using (FileStream fs = File.Create(ModelPath)) | ||
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs); | ||
|
||
// STEP 7: Create prediction engine from the best trained model | ||
var predictionEngine = mlContext.Model.CreatePredictionEngine<TaxiTrip, TaxiTripFarePrediction>(bestRun.Model); | ||
|
||
// STEP 8: Initialize a new test taxi trip, and get the predicted fare | ||
var testTaxiTrip = new TaxiTrip | ||
{ | ||
VendorId = "VTS", | ||
RateCode = 1, | ||
PassengerCount = 1, | ||
TripTimeInSeconds = 1140, | ||
TripDistance = 3.75f, | ||
PaymentType = "CRD" | ||
}; | ||
var prediction = predictionEngine.Predict(testTaxiTrip); | ||
Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}"); | ||
|
||
Console.WriteLine("Press any key to continue..."); | ||
Console.ReadKey(); | ||
} | ||
|
||
private static void PrintMetrics(RegressionMetrics metrics) | ||
{ | ||
Console.WriteLine($"MeanAbsoluteError: {metrics.MeanAbsoluteError}"); | ||
Console.WriteLine($"MeanSquaredError: {metrics.MeanSquaredError}"); | ||
Console.WriteLine($"RootMeanSquaredError: {metrics.RootMeanSquaredError}"); | ||
Console.WriteLine($"RSquared: {metrics.RSquared}"); | ||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure how to best do it, but printing the featurization would be nice.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great point. +1 that not sure how to do it. Let's think through this sometime together, and then circle back & add it