Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed & Reactivated AutoFitImageClassificationTrainTest hanging by freeing old Tensor objects #4978

Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f4a910e
Free Tensor objects in finally statement
mstfbl Mar 27, 2020
4f8475c
Update RunnerUtil.cs
mstfbl Mar 27, 2020
f7e1337
Re-enable AutoFitImageClassificationTrainTest after fix
mstfbl Mar 30, 2020
83ad312
Added IDisposable support to ModelContainer & corrected name of model…
mstfbl Apr 7, 2020
b366fbe
Corrected name of modelContainer in used cases
mstfbl Apr 7, 2020
cb22b21
Clean up Tensor objects through finalizer/destructor of ImageClassifi…
mstfbl Apr 9, 2020
eefa76f
Dispose ExperimentResult objects at the end
mstfbl Apr 9, 2020
45681b4
Dispose only Tensor objects in models
mstfbl Apr 10, 2020
fbd3fd9
Don't free BestModel models
mstfbl Apr 12, 2020
2816ced
Merge remote-tracking branch 'upstream/master' into AutoFitImageClass…
mstfbl Apr 12, 2020
7dad242
Throw Exception if model is trying to be accessed after disposal
mstfbl Apr 14, 2020
1488d0c
Initialize IsModelDisposed inside constructors
mstfbl Apr 14, 2020
78bba9c
Model always written to disk, no longer stored in memory, simplify mo…
mstfbl Apr 16, 2020
bf84823
Model always written to disk, no longer stored in memory, simplify mo…
mstfbl Apr 16, 2020
15b6135
Merge branch 'AutoFitImageClassificationTrainTest-memoryFix' of https…
mstfbl Apr 16, 2020
087c0d5
Update ModelContainer.cs
mstfbl Apr 16, 2020
476bc0e
Added descriptions for Dispose functions and "using" instead of direc…
mstfbl Apr 16, 2020
6234fe5
Merge remote-tracking branch 'upstream/master' into AutoFitImageClass…
mstfbl May 29, 2020
ef17073
Added disposable/finalizer
mstfbl May 29, 2020
a16fccf
Run specific test 100 iterations
mstfbl May 31, 2020
3c6045f
Update AutoFitTests.cs
mstfbl Jun 1, 2020
289bc12
Update .vsts-dotnet-ci.yml
mstfbl Jun 4, 2020
5fb9450
Update AutoFitTests.cs
mstfbl Jun 4, 2020
7668f04
Update AutoFitTests.cs
mstfbl Jun 4, 2020
cb90b8f
Update AutoFitTests.cs
mstfbl Jun 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using Microsoft.ML.Data;

Expand All @@ -11,7 +12,7 @@ namespace Microsoft.ML.AutoML
/// Result of an AutoML experiment that includes cross validation details.
/// </summary>
/// <typeparam name="TMetrics">Metrics type for the experiment (like <see cref="BinaryClassificationMetrics"/>).</typeparam>
public class CrossValidationExperimentResult<TMetrics>
public class CrossValidationExperimentResult<TMetrics> : IDisposable
{
/// <summary>
/// Details of the cross validation runs in this experiment.
Expand All @@ -35,6 +36,46 @@ internal CrossValidationExperimentResult(IEnumerable<CrossValidationRunDetail<TM
{
RunDetails = runDetails;
BestRun = bestRun;
_disposed = false;
_disposedRunDetails = false;
mstfbl marked this conversation as resolved.
Show resolved Hide resolved
}

#region IDisposable Support
private bool _disposed;
private bool _disposedRunDetails;

/// <summary>
/// Releases unmanaged Tensor objects in models stored in RunDetail and BestRun instances
/// </summary>
/// <remarks>
/// Invocation of Dispose() is necessary to clean up remaining C library Tensor objects and
/// avoid a memory leak
/// </remarks>
public void Dispose()
{
if (_disposed)
return;
if (!_disposedRunDetails)
(RunDetails as IDisposable)?.Dispose();
(BestRun as IDisposable)?.Dispose();
_disposed = true;
_disposedRunDetails = true;
}

/// <summary>
/// Releases unmanaged Tensor objects in models stored in RunDetail instances
/// </summary>
mstfbl marked this conversation as resolved.
Show resolved Hide resolved
/// <remarks>
/// Invocation of Dispose() is necessary to clean up remaining C library Tensor objects and
/// avoid a memory leak
/// </remarks>
public void DisposeRunDetails()
{
if (_disposedRunDetails || _disposed)
return;
(RunDetails as IDisposable)?.Dispose();
_disposedRunDetails = true;
}
#endregion
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using Microsoft.ML.Data;

Expand All @@ -11,7 +12,7 @@ namespace Microsoft.ML.AutoML
/// Result of an AutoML experiment.
/// </summary>
/// <typeparam name="TMetrics">Metrics type for the experiment (like <see cref="BinaryClassificationMetrics"/>).</typeparam>
public class ExperimentResult<TMetrics>
public class ExperimentResult<TMetrics> : IDisposable
{
/// <summary>
/// Details of the runs in this experiment.
Expand All @@ -35,6 +36,46 @@ internal ExperimentResult(IEnumerable<RunDetail<TMetrics>> runDetails,
{
RunDetails = runDetails;
BestRun = bestRun;
_disposed = false;
_disposedRunDetails = false;
}

#region IDisposable Support
private bool _disposed;
private bool _disposedRunDetails;

/// <summary>
/// Releases unmanaged Tensor objects in models stored in RunDetail and BestRun instances
/// </summary>
/// <remarks>
/// Invocation of Dispose() is necessary to clean up remaining C library Tensor objects and
/// avoid a memory leak
/// </remarks>
public void Dispose()
{
if (_disposed)
mstfbl marked this conversation as resolved.
Show resolved Hide resolved
return;
if (!_disposedRunDetails)
(RunDetails as IDisposable)?.Dispose();
(BestRun as IDisposable)?.Dispose();
_disposed = true;
_disposedRunDetails = true;
}

/// <summary>
/// Releases unmanaged Tensor objects in models stored in RunDetail instances
/// </summary>
/// <remarks>
/// Invocation of Dispose() is necessary to clean up remaining C library Tensor objects and
/// avoid a memory leak
/// </remarks>
public void DisposeRunDetails()
{
mstfbl marked this conversation as resolved.
Show resolved Hide resolved
if (_disposedRunDetails || _disposed)
return;
(RunDetails as IDisposable)?.Dispose();
_disposedRunDetails = true;
}
#endregion
}
}
13 changes: 0 additions & 13 deletions src/Microsoft.ML.AutoML/Experiment/ModelContainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,6 @@ internal class ModelContainer
{
private readonly MLContext _mlContext;
private readonly FileInfo _fileInfo;
private readonly ITransformer _model;

internal ModelContainer(MLContext mlContext, ITransformer model)
{
_mlContext = mlContext;
_model = model;
}

internal ModelContainer(MLContext mlContext, FileInfo fileInfo, ITransformer model, DataViewSchema modelInputSchema)
{
Expand All @@ -32,12 +25,6 @@ internal ModelContainer(MLContext mlContext, FileInfo fileInfo, ITransformer mod

public ITransformer GetModel()
{
mstfbl marked this conversation as resolved.
Show resolved Hide resolved
// If model stored in memory, return it
if (_model != null)
{
return _model;
}

// Load model from disk
ITransformer model;
using (var stream = new FileStream(_fileInfo.FullName, FileMode.Open, FileAccess.Read, FileShare.Read))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public CrossValRunner(MLContext context,
var modelFileInfo = RunnerUtil.GetModelFileInfo(modelDirectory, iterationNum, i + 1);
var trainResult = RunnerUtil.TrainAndScorePipeline(_context, pipeline, _trainDatasets[i], _validDatasets[i],
_labelColumn, _metricsAgent, _preprocessorTransforms?[i], modelFileInfo, _modelInputSchema, _logger);
trainResults.Add(new SuggestedPipelineTrainResult<TMetrics>(trainResult.model, trainResult.metrics, trainResult.exception, trainResult.score));
trainResults.Add(new SuggestedPipelineTrainResult<TMetrics>(trainResult.modelContainer, trainResult.metrics, trainResult.exception, trainResult.score));
}

var avgScore = CalcAverageScore(trainResults.Select(r => r.Score));
Expand Down
8 changes: 3 additions & 5 deletions src/Microsoft.ML.AutoML/Experiment/Runners/RunnerUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Microsoft.ML.AutoML
{
internal static class RunnerUtil
{
public static (ModelContainer model, TMetrics metrics, Exception exception, double score)
public static (ModelContainer modelContainer, TMetrics metrics, Exception exception, double score)
TrainAndScorePipeline<TMetrics>(MLContext context,
SuggestedPipeline pipeline,
IDataView trainData,
Expand All @@ -37,9 +37,7 @@ public static (ModelContainer model, TMetrics metrics, Exception exception, doub
}

// Build container for model
var modelContainer = modelFileInfo == null ?
new ModelContainer(context, model) :
new ModelContainer(context, modelFileInfo, model, modelInputSchema);
var modelContainer = new ModelContainer(context, modelFileInfo, model, modelInputSchema);

return (modelContainer, metrics, null, score);
}
Expand All @@ -53,7 +51,7 @@ public static (ModelContainer model, TMetrics metrics, Exception exception, doub
public static FileInfo GetModelFileInfo(DirectoryInfo modelDirectory, int iterationNum, int foldNum)
{
return modelDirectory == null ?
null :
new FileInfo(Path.Combine(Path.GetTempPath(), $"Model{iterationNum}_{foldNum}.zip")) :
new FileInfo(Path.Combine(modelDirectory.FullName, $"Model{iterationNum}_{foldNum}.zip"));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public TrainValidateRunner(MLContext context,
trainResult.score,
trainResult.exception == null,
trainResult.metrics,
trainResult.model,
trainResult.modelContainer,
trainResult.exception);
var runDetail = suggestedPipelineRunDetail.ToIterationResult(_preFeaturizer);
return (suggestedPipelineRunDetail, runDetail);
Expand All @@ -59,7 +59,7 @@ public TrainValidateRunner(MLContext context,
private static FileInfo GetModelFileInfo(DirectoryInfo modelDirectory, int iterationNum)
{
return modelDirectory == null ?
null :
new FileInfo(Path.Combine(Path.GetTempPath(), $"Model{iterationNum}.zip")) :
new FileInfo(Path.Combine(modelDirectory.FullName, $"Model{iterationNum}.zip"));
}
}
Expand Down
8 changes: 6 additions & 2 deletions test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public void AutoFitBinaryTest()
Assert.NotNull(result.BestRun.Estimator);
Assert.NotNull(result.BestRun.Model);
Assert.NotNull(result.BestRun.TrainerName);
result.Dispose();
}
mstfbl marked this conversation as resolved.
Show resolved Hide resolved

[Fact]
Expand All @@ -51,11 +52,10 @@ public void AutoFitMultiTest()
Assert.True(result.BestRun.Results.First().ValidationMetrics.MicroAccuracy >= 0.7);
var scoredData = result.BestRun.Results.First().Model.Transform(trainData);
Assert.Equal(NumberDataViewType.Single, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);
result.Dispose();
}

[TensorFlowFact]
//Skipping test temporarily. This test will be re-enabled once the cause of failures has been determined
[Trait("Category", "SkipInCI")]
public void AutoFitImageClassificationTrainTest()
{
var context = new MLContext(seed: 1);
Expand All @@ -75,6 +75,7 @@ public void AutoFitImageClassificationTrainTest()

var scoredData = result.BestRun.Model.Transform(trainData);
Assert.Equal(TextDataViewType.Instance, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);
result.Dispose();
}

[Fact(Skip ="Takes too much time, ~10 minutes.")]
Expand All @@ -96,6 +97,7 @@ public void AutoFitImageClassification()
Assert.InRange(result.BestRun.ValidationMetrics.MicroAccuracy, 0.80, 0.9);
var scoredData = result.BestRun.Model.Transform(trainData);
Assert.Equal(TextDataViewType.Instance, scoredData.Schema[DefaultColumnNames.PredictedLabel].Type);
result.Dispose();
}

private void Context_Log(object sender, LoggingEventArgs e)
Expand All @@ -119,6 +121,7 @@ public void AutoFitRegressionTest()
new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel });

Assert.True(result.RunDetails.Max(i => i.ValidationMetrics.RSquared > 0.9));
result.Dispose();
mstfbl marked this conversation as resolved.
Show resolved Hide resolved
}

[Fact]
Expand Down Expand Up @@ -165,6 +168,7 @@ public void AutoFitRecommendationTest()

var metrices = mlContext.Recommendation().Evaluate(testDataViewWithBestScore, labelColumnName: labelColumnName, scoreColumnName: scoreColumnName);
Assert.NotEqual(0, metrices.MeanSquaredError);
experimentResult.Dispose();
}

private TextLoader.Options GetLoaderArgs(string labelColumnName, string userIdColumnName, string itemIdColumnName)
Expand Down