Skip to content

[AutoML] Add AutoML example code #3458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Apr 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Microsoft.ML.AutoML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet", "src\mlnet\mlnet.cs
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "mlnet.Tests", "test\mlnet.Tests\mlnet.Tests.csproj", "{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.AutoML.Samples", "docs\samples\Microsoft.ML.AutoML.Samples\Microsoft.ML.AutoML.Samples.csproj", "{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -68,6 +70,18 @@ Global
{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
{AAC3E4E6-C146-44BB-8873-A1E61D563F2A}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug-Intrinsics|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-Intrinsics|Any CPU.Build.0 = Debug-Intrinsics|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release|Any CPU.Build.0 = Release|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-Intrinsics|Any CPU.ActiveCfg = Release-Intrinsics|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-Intrinsics|Any CPU.Build.0 = Release-Intrinsics|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU
{6E84E7C5-FECE-45A9-AD4C-4B0F39F78904}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using System;
using System.IO;
using System.Linq;
using Microsoft.ML.Auto;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public static class BinaryClassificationExperiment
{
private static string TrainDataPath = "<Path to your train dataset goes here>";
private static string TestDataPath = "<Path to your test dataset goes here>";
private static string ModelPath = @"<Desired model output directory goes here>\SentimentModel.zip";
private static uint ExperimentTime = 60;

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TrainDataPath, hasHeader: true);
IDataView testDataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(TestDataPath, hasHeader: true);

// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML binary classification experiment for {ExperimentTime} seconds...");
ExperimentResult<BinaryClassificationMetrics> experimentResult = mlContext.Auto()
.CreateBinaryClassificationExperiment(ExperimentTime)
.Execute(trainDataView);

// STEP 3: Print metric from the best model
RunDetail<BinaryClassificationMetrics> bestRun = experimentResult.BestRun;
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how to best do it, but printing the featurization would be nice.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great point. +1 that not sure how to do it. Let's think through this sometime together, and then circle back & add it

Console.WriteLine($"Metrics of best model from validation data --");
PrintMetrics(bestRun.ValidationMetrics);

// STEP 4: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
BinaryClassificationMetrics testMetrics = mlContext.BinaryClassification.EvaluateNonCalibrated(testDataViewWithBestScore);
Console.WriteLine($"Metrics of best model on test data --");
PrintMetrics(testMetrics);

// STEP 5: Save the best model for later deployment and inferencing
using (FileStream fs = File.Create(ModelPath))
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

// STEP 6: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<SentimentIssue, SentimentPrediction>(bestRun.Model);

// STEP 7: Initialize a new sentiment issue, and get the predicted sentiment
var testSentimentIssue = new SentimentIssue
{
Text = "I hope this helps."
};
var prediction = predictionEngine.Predict(testSentimentIssue);
Console.WriteLine($"Predicted sentiment for test issue: {prediction.Prediction}");

Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}

private static void PrintMetrics(BinaryClassificationMetrics metrics)
{
Console.WriteLine($"Accuracy: {metrics.Accuracy}");
Console.WriteLine($"AreaUnderPrecisionRecallCurve: {metrics.AreaUnderPrecisionRecallCurve}");
Console.WriteLine($"AreaUnderRocCurve: {metrics.AreaUnderRocCurve}");
Console.WriteLine($"F1Score: {metrics.F1Score}");
Console.WriteLine($"NegativePrecision: {metrics.NegativePrecision}");
Console.WriteLine($"NegativeRecall: {metrics.NegativeRecall}");
Console.WriteLine($"PositivePrecision: {metrics.PositivePrecision}");
Console.WriteLine($"PositiveRecall: {metrics.PositiveRecall}");
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class PixelData
{
[LoadColumn(0, 63)]
[VectorType(64)]
public float[] PixelValues;

[LoadColumn(64)]
public float Number;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class PixelPrediction
{
[ColumnName("PredictedLabel")]
public float Prediction;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class SentimentIssue
{
[LoadColumn(0)]
public bool Label { get; set; }

[LoadColumn(1)]
public string Text { get; set; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class SentimentPrediction
{
// ColumnName attribute is used to change the column name from
// its default value, which is the name of the field.
[ColumnName("PredictedLabel")]
public bool Prediction { get; set; }

public float Score { get; set; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class TaxiTrip
{
[LoadColumn(0)]
public string VendorId;

[LoadColumn(1)]
public float RateCode;

[LoadColumn(2)]
public float PassengerCount;

[LoadColumn(3)]
public float TripTimeInSeconds;

[LoadColumn(4)]
public float TripDistance;

[LoadColumn(5)]
public string PaymentType;

[LoadColumn(6)]
public float FareAmount;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public class TaxiTripFarePrediction
{
[ColumnName("Score")]
public float FareAmount;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.1</TargetFramework>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\src\Microsoft.ML.Auto\Microsoft.ML.Auto.csproj" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
using System;
using System.IO;
using System.Linq;
using Microsoft.ML.Auto;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public static class MulticlassClassificationExperiment
{
private static string TrainDataPath = "<Path to your train dataset goes here>";
private static string TestDataPath = "<Path to your test dataset goes here>";
private static string ModelPath = @"<Desired model output directory goes here>\OptDigitsModel.zip";
private static string LabelColumnName = "Number";
private static uint ExperimentTime = 60;

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<PixelData>(TrainDataPath, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<PixelData>(TestDataPath, separatorChar: ',');

// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML multiclass classification experiment for {ExperimentTime} seconds...");
ExperimentResult<MulticlassClassificationMetrics> experimentResult = mlContext.Auto()
.CreateMulticlassClassificationExperiment(ExperimentTime)
.Execute(trainDataView, LabelColumnName);

// STEP 3: Print metric from the best model
RunDetail<MulticlassClassificationMetrics> bestRun = experimentResult.BestRun;
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
Console.WriteLine($"Metrics of best model from validation data --");
PrintMetrics(bestRun.ValidationMetrics);

// STEP 4: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
MulticlassClassificationMetrics testMetrics = mlContext.MulticlassClassification.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
Console.WriteLine($"Metrics of best model on test data --");
PrintMetrics(testMetrics);

// STEP 5: Save the best model for later deployment and inferencing
using (FileStream fs = File.Create(ModelPath))
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

// STEP 6: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<PixelData, PixelPrediction>(bestRun.Model);

// STEP 7: Initialize new pixel data, and get the predicted number
var testPixelData = new PixelData
{
PixelValues = new float[] { 0, 0, 1, 8, 15, 10, 0, 0, 0, 3, 13, 15, 14, 14, 0, 0, 0, 5, 10, 0, 10, 12, 0, 0, 0, 0, 3, 5, 15, 10, 2, 0, 0, 0, 16, 16, 16, 16, 12, 0, 0, 1, 8, 12, 14, 8, 3, 0, 0, 0, 0, 10, 13, 0, 0, 0, 0, 0, 0, 11, 9, 0, 0, 0 }
};
var prediction = predictionEngine.Predict(testPixelData);
Console.WriteLine($"Predicted number for test pixels: {prediction.Prediction}");

Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}

private static void PrintMetrics(MulticlassClassificationMetrics metrics)
{
Console.WriteLine($"LogLoss: {metrics.LogLoss}");
Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction}");
Console.WriteLine($"MacroAccuracy: {metrics.MacroAccuracy}");
Console.WriteLine($"MicroAccuracy: {metrics.MicroAccuracy}");
}
}
}
30 changes: 30 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using System;

namespace Microsoft.ML.AutoML.Samples
{
public class Program
{
public static void Main(string[] args)
{
try
{
RegressionExperiment.Run();
Console.Clear();

BinaryClassificationExperiment.Run();
Console.Clear();

MulticlassClassificationExperiment.Run();
Console.Clear();

Console.WriteLine("Done");
}
catch (Exception ex)
{
Console.WriteLine($"Exception {ex}");
}

Console.ReadLine();
}
}
}
76 changes: 76 additions & 0 deletions docs/samples/Microsoft.ML.AutoML.Samples/RegressionExperiment.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
using System;
using System.IO;
using System.Linq;
using Microsoft.ML.Auto;
using Microsoft.ML.Data;

namespace Microsoft.ML.AutoML.Samples
{
public static class RegressionExperiment
{
private static string TrainDataPath = "<Path to your train dataset goes here>";
private static string TestDataPath = "<Path to your test dataset goes here>";
private static string ModelPath = @"<Desired model output directory goes here>\TaxiFareModel.zip";
private static string LabelColumnName = "FareAmount";
private static uint ExperimentTime = 60;

public static void Run()
{
MLContext mlContext = new MLContext();

// STEP 1: Load data
IDataView trainDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ',');
IDataView testDataView = mlContext.Data.LoadFromTextFile<TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ',');

// STEP 2: Run AutoML experiment
Console.WriteLine($"Running AutoML regression experiment for {ExperimentTime} seconds...");
ExperimentResult<RegressionMetrics> experimentResult = mlContext.Auto()
.CreateRegressionExperiment(ExperimentTime)
.Execute(trainDataView, LabelColumnName);

// STEP 3: Print metric from best model
RunDetail<RegressionMetrics> bestRun = experimentResult.BestRun;
Console.WriteLine($"Total models produced: {experimentResult.RunDetails.Count()}");
Console.WriteLine($"Best model's trainer: {bestRun.TrainerName}");
Console.WriteLine($"Metrics of best model from validation data --");
PrintMetrics(bestRun.ValidationMetrics);

// STEP 5: Evaluate test data
IDataView testDataViewWithBestScore = bestRun.Model.Transform(testDataView);
RegressionMetrics testMetrics = mlContext.Regression.Evaluate(testDataViewWithBestScore, labelColumnName: LabelColumnName);
Console.WriteLine($"Metrics of best model on test data --");
PrintMetrics(testMetrics);

// STEP 6: Save the best model for later deployment and inferencing
using (FileStream fs = File.Create(ModelPath))
mlContext.Model.Save(bestRun.Model, trainDataView.Schema, fs);

// STEP 7: Create prediction engine from the best trained model
var predictionEngine = mlContext.Model.CreatePredictionEngine<TaxiTrip, TaxiTripFarePrediction>(bestRun.Model);

// STEP 8: Initialize a new test taxi trip, and get the predicted fare
var testTaxiTrip = new TaxiTrip
{
VendorId = "VTS",
RateCode = 1,
PassengerCount = 1,
TripTimeInSeconds = 1140,
TripDistance = 3.75f,
PaymentType = "CRD"
};
var prediction = predictionEngine.Predict(testTaxiTrip);
Console.WriteLine($"Predicted fare for test taxi trip: {prediction.FareAmount}");

Console.WriteLine("Press any key to continue...");
Console.ReadKey();
}

private static void PrintMetrics(RegressionMetrics metrics)
{
Console.WriteLine($"MeanAbsoluteError: {metrics.MeanAbsoluteError}");
Console.WriteLine($"MeanSquaredError: {metrics.MeanSquaredError}");
Console.WriteLine($"RootMeanSquaredError: {metrics.RootMeanSquaredError}");
Console.WriteLine($"RSquared: {metrics.RSquared}");
}
}
}
Loading