Skip to content

[AutoML] add task agnostic wrappers for autofit calls #3860

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions test/Microsoft.ML.AutoML.Tests/Utils/TaskAgnosticAutoFit.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;
using System;
using System.Collections.Generic;
using System.Linq;

namespace Microsoft.ML.AutoML.Test
{
public enum TaskType
{
Classification = 1,
Regression
}

/// <summary>
/// make AutoFit and Score calls uniform across task types
/// </summary>
internal class TaskAgnosticAutoFit
{
private TaskType taskType;
private MLContext context;

internal interface IUniversalProgressHandler : IProgress<RunDetail<RegressionMetrics>>, IProgress<RunDetail<MulticlassClassificationMetrics>>
{
}

internal TaskAgnosticAutoFit(TaskType taskType, MLContext context)
{
this.taskType = taskType;
this.context = context;
}

internal IEnumerable<TaskAgnosticIterationResult> AutoFit(
IDataView trainData,
string label,
int maxModels,
uint maxExperimentTimeInSeconds,
IDataView validationData = null,
IEstimator<ITransformer> preFeaturizers = null,
IEnumerable<(string, ColumnPurpose)> columnPurposes = null,
IUniversalProgressHandler progressHandler = null)
{
var columnInformation = new ColumnInformation() { LabelColumnName = label };

switch (this.taskType)
{
case TaskType.Classification:

var mcs = new MulticlassExperimentSettings
{
OptimizingMetric = MulticlassClassificationMetric.MicroAccuracy,

MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds,
MaxModels = maxModels
};

var classificationResult = this.context.Auto()
.CreateMulticlassClassificationExperiment(mcs)
.Execute(
trainData,
validationData,
columnInformation,
progressHandler: progressHandler);

var iterationResults = classificationResult.RunDetails.Select(i => new TaskAgnosticIterationResult(i)).ToList();

return iterationResults;

case TaskType.Regression:

var rs = new RegressionExperimentSettings
{
OptimizingMetric = RegressionMetric.RSquared,

MaxExperimentTimeInSeconds = maxExperimentTimeInSeconds,
MaxModels = maxModels
};

var regressionResult = this.context.Auto()
.CreateRegressionExperiment(rs)
.Execute(
trainData,
validationData,
columnInformation,
progressHandler: progressHandler);

iterationResults = regressionResult.RunDetails.Select(i => new TaskAgnosticIterationResult(i)).ToList();

return iterationResults;

default:
throw new ArgumentException($"Unknown task type {this.taskType}.", "TaskType");
}
}

internal struct ScoreResult
{
public IDataView ScoredTestData;
public double PrimaryMetricResult;
public Dictionary<string, double> Metrics;
}

internal ScoreResult Score(
IDataView testData,
ITransformer model,
string label)
{
var result = new ScoreResult();

result.ScoredTestData = model.Transform(testData);

switch (this.taskType)
{
case TaskType.Classification:

var classificationMetrics = context.MulticlassClassification.Evaluate(result.ScoredTestData, labelColumnName: label);

//var classificationMetrics = context.MulticlassClassification.(scoredTestData, labelColumnName: label);
result.PrimaryMetricResult = classificationMetrics.MicroAccuracy; // TODO: don't hardcode metric
result.Metrics = TaskAgnosticIterationResult.MetricValuesToDictionary(classificationMetrics);

break;

case TaskType.Regression:

var regressionMetrics = context.Regression.Evaluate(result.ScoredTestData, labelColumnName: label);

result.PrimaryMetricResult = regressionMetrics.RSquared; // TODO: don't hardcode metric
result.Metrics = TaskAgnosticIterationResult.MetricValuesToDictionary(regressionMetrics);

break;

default:
throw new ArgumentException($"Unknown task type {this.taskType}.", "TaskType");
}

return result;
}
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Microsoft.ML.Data;
using System;
using System.Collections.Generic;
using System.Linq;

namespace Microsoft.ML.AutoML.Test
{
internal class TaskAgnosticIterationResult
{
internal double PrimaryMetricValue;

internal Dictionary<string, double> MetricValues = new Dictionary<string, double>();

internal readonly ITransformer Model;
internal readonly Exception Exception;
internal string TrainerName;
internal double RuntimeInSeconds;
internal IEstimator<ITransformer> Estimator;
internal Pipeline Pipeline;
internal int PipelineInferenceTimeInSeconds;

private string primaryMetricName;

private TaskAgnosticIterationResult(RunDetail baseRunDetail, object validationMetrics, string primaryMetricName)
{
this.TrainerName = baseRunDetail.TrainerName;
this.Estimator = baseRunDetail.Estimator;
this.Pipeline = baseRunDetail.Pipeline;

this.PipelineInferenceTimeInSeconds = (int)baseRunDetail.PipelineInferenceTimeInSeconds;
this.RuntimeInSeconds = (int)baseRunDetail.RuntimeInSeconds;

this.primaryMetricName = primaryMetricName;
this.PrimaryMetricValue = -1; // default value in case of exception. TODO: won't work for minimizing metrics, use nullable?

if (validationMetrics == null)
{
return;
}

this.MetricValues = MetricValuesToDictionary(validationMetrics);

this.PrimaryMetricValue = this.MetricValues[this.primaryMetricName];
}

public TaskAgnosticIterationResult(RunDetail<RegressionMetrics> runDetail, string primaryMetricName = "RSquared")
: this(runDetail, runDetail.ValidationMetrics, primaryMetricName)
{
if (runDetail.Exception == null)
{
this.Model = runDetail.Model;
}

this.Exception = runDetail.Exception;
}

public TaskAgnosticIterationResult(RunDetail<MulticlassClassificationMetrics> runDetail, string primaryMetricName = "MicroAccuracy")
: this(runDetail, runDetail.ValidationMetrics, primaryMetricName)
{
if (runDetail.Exception == null)
{
this.Model = runDetail.Model;
}

this.Exception = runDetail.Exception;
}

public static Dictionary<string, double> MetricValuesToDictionary<T>(T metric)
{
var supportedTypes = new[] { typeof(MulticlassClassificationMetrics), typeof(RegressionMetrics) };

if (!supportedTypes.Contains(metric.GetType()))
{
throw new ArgumentException($"Unsupported metric type {typeof(T).Name}.");
}

var propertiesToReport = metric.GetType().GetProperties().Where(p => p.PropertyType == typeof(double));

return propertiesToReport.ToDictionary(p => p.Name, p => (double)metric.GetType().GetProperty(p.Name).GetValue(metric));
}
}
}