Skip to content

Add SetMaximumMemoryUsageInMegaByte in AutoMLExperiment #6305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -654,18 +654,18 @@ internal SweepablePipeline ImagePathFeaturizer(string outputColumnName, string i
/// them into a single feature column as output.
/// </summary>
/// <param name="data">input data.</param>
/// <param name="catalogColumns">columns that should be treated as catalog. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="catelogicalColumns">columns that should be treated as catalog. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="numericColumns">columns that should be treated as numeric. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="textColumns">columns that should be treated as text. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="imagePathColumns">columns that should be treated as image path. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="outputColumnName">output feature column.</param>
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
public SweepablePipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catalogColumns = null, string[] numericColumns = null, string[] textColumns = null, string[] imagePathColumns = null, string[] excludeColumns = null)
public SweepablePipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catelogicalColumns = null, string[] numericColumns = null, string[] textColumns = null, string[] imagePathColumns = null, string[] excludeColumns = null)
{
Contracts.CheckValue(data, nameof(data));

// validate if there's overlapping among catalogColumns, numericColumns, textColumns and excludeColumns
var overallColumns = new string[][] { catalogColumns, numericColumns, textColumns, excludeColumns }
var overallColumns = new string[][] { catelogicalColumns, numericColumns, textColumns, excludeColumns }
.Where(c => c != null)
.SelectMany(c => c);

Expand All @@ -684,9 +684,9 @@ public SweepablePipeline Featurizer(IDataView data, string outputColumnName = "F
}
}

if (catalogColumns != null)
if (catelogicalColumns != null)
{
foreach (var catalogColumn in catalogColumns)
foreach (var catalogColumn in catelogicalColumns)
{
columnInfo.CategoricalColumnNames.Add(catalogColumn);
}
Expand Down
13 changes: 13 additions & 0 deletions src/Microsoft.ML.AutoML/API/AutoMLExperimentExtension.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Collections.Generic;
using System.Text;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.ML.Runtime;
using static Microsoft.ML.DataOperationsCatalog;

namespace Microsoft.ML.AutoML
Expand Down Expand Up @@ -144,6 +145,18 @@ public static AutoMLExperiment SetPipeline(this AutoMLExperiment experiment, Swe
return experiment;
}

public static AutoMLExperiment SetPerformanceMonitor(this AutoMLExperiment experiment, int checkIntervalInMilliseconds = 1000)
{
experiment.SetPerformanceMonitor((service) =>
{
var channel = service.GetService<IChannel>();

return new DefaultPerformanceMonitor(channel, checkIntervalInMilliseconds);
});

return experiment;
}

private static AutoMLExperiment SetEvaluateMetric<TEvaluateMetricManager>(this AutoMLExperiment experiment, TEvaluateMetricManager metricManager)
where TEvaluateMetricManager : class, IEvaluateMetricManager
{
Expand Down
36 changes: 35 additions & 1 deletion src/Microsoft.ML.AutoML/API/BinaryClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
Expand Down Expand Up @@ -144,6 +146,10 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
_experiment = context.Auto().CreateExperiment();
if (settings.MaximumMemoryUsageInMegaByte is double d)
{
_experiment.SetMaximumMemoryUsageInMegaByte(d);
}
}

public override ExperimentResult<BinaryClassificationMetrics> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<BinaryClassificationMetrics>> progressHandler = null)
Expand Down Expand Up @@ -332,7 +338,7 @@ private SweepablePipeline CreateBinaryClassificationPipeline(IDataView trainData

internal class BinaryClassificationRunner : ITrialRunner
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BinaryClassificationRunner

Should we use IDisposable with this class?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a great question here. The IDisposable pattern is mainly because I want to make sure MLContext.CancelExecuation get called and set to null after the trial is finished while I don't want to explicitly call it's deconstructor or call GC. But I can go another route if you have any recommendation.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dispose pattern is not only for GC finalization or deconstruction. It is used to clean up any object when done using it. You already implemented the Dispose method. Having IDisposable will benefit with the using () code pattern.

{
private readonly MLContext _context;
private MLContext _context;
private readonly IDatasetManager _datasetManager;
private readonly IMetricManager _metricManager;
private readonly SweepablePipeline _pipeline;
Expand All @@ -346,6 +352,12 @@ public BinaryClassificationRunner(MLContext context, IDatasetManager datasetMana
_rnd = settings.Seed.HasValue ? new Random(settings.Seed.Value) : new Random();
}

public void Dispose()
{
_context.CancelExecution();
_context = null;
}

public TrialResult Run(TrialSettings settings)
{
if (_metricManager is BinaryMetricManager metricManager)
Expand Down Expand Up @@ -421,5 +433,27 @@ public TrialResult Run(TrialSettings settings)

throw new ArgumentException($"The runner metric manager is of type {_metricManager.GetType()} which expected to be of type {typeof(ITrainTestDatasetManager)} or {typeof(ICrossValidateDatasetManager)}");
}

public Task<TrialResult> RunAsync(TrialSettings settings, CancellationToken ct)
{
try
{
using (var ctRegistration = ct.Register(() =>
{
_context?.CancelExecution();
}))
{
return Task.Run(() => Run(settings));
}
}
catch (Exception ex) when (ct.IsCancellationRequested)
{
throw new OperationCanceledException(ex.Message, ex.InnerException);
}
catch (Exception)
{
throw;
}
}
}
}
3 changes: 3 additions & 0 deletions src/Microsoft.ML.AutoML/API/ExperimentSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Threading;

namespace Microsoft.ML.AutoML
Expand Down Expand Up @@ -56,6 +57,8 @@ public abstract class ExperimentSettings
/// <value>The default value is <see cref="CacheBeforeTrainer.Auto"/>.</value>
public CacheBeforeTrainer CacheBeforeTrainer { get; set; }

public double? MaximumMemoryUsageInMegaByte { get; set; }

internal int MaxModels;

/// <summary>
Expand Down
38 changes: 36 additions & 2 deletions src/Microsoft.ML.AutoML/API/MulticlassClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
Expand Down Expand Up @@ -135,6 +137,11 @@ internal MulticlassClassificationExperiment(MLContext context, MulticlassExperim
TrainerExtensionUtil.GetTrainerNames(settings.Trainers))
{
_experiment = context.Auto().CreateExperiment();

if (settings.MaximumMemoryUsageInMegaByte is double d)
{
_experiment.SetMaximumMemoryUsageInMegaByte(d);
}
}

public override ExperimentResult<MulticlassClassificationMetrics> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<MulticlassClassificationMetrics>> progressHandler = null)
Expand Down Expand Up @@ -189,7 +196,6 @@ public override ExperimentResult<MulticlassClassificationMetrics> Execute(IDataV

return result;
}

public override ExperimentResult<MulticlassClassificationMetrics> Execute(IDataView trainData, IDataView validationData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<MulticlassClassificationMetrics>> progressHandler = null)
{
var label = columnInformation.LabelColumnName;
Expand Down Expand Up @@ -333,7 +339,7 @@ private SweepablePipeline CreateMulticlassClassificationPipeline(IDataView train

internal class MulticlassClassificationRunner : ITrialRunner
{
private readonly MLContext _context;
private MLContext _context;
private readonly IDatasetManager _datasetManager;
private readonly IMetricManager _metricManager;
private readonly SweepablePipeline _pipeline;
Expand Down Expand Up @@ -424,5 +430,33 @@ public TrialResult Run(TrialSettings settings)

throw new ArgumentException($"The runner metric manager is of type {_metricManager.GetType()} which expected to be of type {typeof(ITrainTestDatasetManager)} or {typeof(ICrossValidateDatasetManager)}");
}

public Task<TrialResult> RunAsync(TrialSettings settings, CancellationToken ct)
{
try
{
using (var ctRegistration = ct.Register(() =>
{
_context?.CancelExecution();
}))
{
return Task.Run(() => Run(settings));
}
}
catch (Exception ex) when (ct.IsCancellationRequested)
{
throw new OperationCanceledException(ex.Message, ex.InnerException);
}
catch (Exception)
{
throw;
}
}

public void Dispose()
{
_context.CancelExecution();
_context = null;
}
}
}
Loading