Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stabilize the LR test #4446

Merged
merged 3 commits into from
Nov 11, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/Microsoft.ML.Vision/ImageClassificationTrainer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ private void TrainAndEvaluateClassificationLayer(string trainBottleneckFilePath,
metrics.Train.LearningRate = learningRate;
// Update train state.
trainstate.CurrentEpoch = epoch;
using (var cursor = trainingSet.GetRowCursor(trainingSet.Schema.ToArray(), new Random()))
using (var cursor = trainingSet.GetRowCursor(trainingSet.Schema.ToArray(), Host.Rand))
Copy link
Member

@codemzs codemzs Nov 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

trainingSet.GetRowCursor(trainingSet.Schema.ToArray(), Host.Rand)) [](start = 36, length = 66)

You don't need to pass anything, look at the code for ShuffleRowTransformer:

        if (_forceShuffle || _forceShuffleSource)
            _forceShuffleSeed = options.ForceShuffleSeed ?? Host.Rand.NextSigned(); #Resolved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While you are correct that if we do not pass anything then it should still be stable, RowShuffling uses host to create a seed to then create a new random, this seems unnecessary, as we would be creating a new object when we can just use an existing random which is more efficient.


In reply to: 344297118 [](ancestors = 344297118)

Copy link
Member

@codemzs codemzs Nov 8, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I discussed offline with @bpstark and we agreed that we should not be passing in Host.Rand object (as I suggested earlier) because it is not thread safe. As I explained, the shuffling transformer code will create a seed from Host.Rand object (that it has access internally) to create an local/internal Rand object and this will ensure stability and also remove concerns around thread safety. #Resolved

{
var labelGetter = cursor.GetGetter<long>(trainingSet.Schema[0]);
var featuresGetter = cursor.GetGetter<VBuffer<float>>(featureColumn);
Expand Down Expand Up @@ -1069,7 +1069,7 @@ private void TrainAndEvaluateClassificationLayer(string trainBottleneckFilePath,
metrics.Train.BatchProcessedCount = 0;
metrics.Train.Accuracy = 0;
metrics.Train.CrossEntropy = 0;
using (var cursor = validationSet.GetRowCursor(validationSet.Schema.ToArray(), new Random()))
using (var cursor = validationSet.GetRowCursor(validationSet.Schema.ToArray(), Host.Rand))
{
var labelGetter = cursor.GetGetter<long>(validationSet.Schema[0]);
var featuresGetter = cursor.GetGetter<VBuffer<float>>(featureColumn);
Expand Down
2 changes: 1 addition & 1 deletion test/Microsoft.ML.AutoML.Tests/AutoFitTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public void AutoFitMultiTest()
[TensorFlowFact]
public void AutoFitImageClassificationTrainTest()
{
var context = new MLContext();
var context = new MLContext(seed: 1);
var datasetPath = DatasetUtil.GetFlowersDataset();
var columnInference = context.Auto().InferColumns(datasetPath, "Label");
var textLoader = context.Data.CreateTextLoader(columnInference.TextLoaderOptions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1274,8 +1274,8 @@ public void TensorFlowImageClassificationDefault()
if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ||
(RuntimeInformation.IsOSPlatform(OSPlatform.OSX))))
{
Assert.InRange(metrics.MicroAccuracy, 0.3, 1);
Assert.InRange(metrics.MacroAccuracy, 0.3, 1);
Assert.InRange(metrics.MicroAccuracy, 0.2, 1);
Assert.InRange(metrics.MacroAccuracy, 0.2, 1);
}
else
{
Expand Down Expand Up @@ -1370,8 +1370,8 @@ public void TensorFlowImageClassification(ImageClassificationTrainer.Architectur
if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ||
(RuntimeInformation.IsOSPlatform(OSPlatform.OSX))))
{
Assert.InRange(metrics.MicroAccuracy, 0.3, 1);
Assert.InRange(metrics.MacroAccuracy, 0.3, 1);
Assert.InRange(metrics.MicroAccuracy, 0.2, 1);
Assert.InRange(metrics.MacroAccuracy, 0.2, 1);
}
else
{
Expand Down Expand Up @@ -1429,16 +1429,16 @@ public void TensorFlowImageClassification(ImageClassificationTrainer.Architectur
[TensorFlowFact]
public void TensorFlowImageClassificationWithExponentialLRScheduling()
{
TensorFlowImageClassificationWithLRScheduling(new ExponentialLRDecay());
TensorFlowImageClassificationWithLRScheduling(new ExponentialLRDecay(), 50);
}

[Fact(Skip ="Very unstable tests, causing many build failures.")]
[TensorFlowFact]
public void TensorFlowImageClassificationWithPolynomialLRScheduling()
{
TensorFlowImageClassificationWithLRScheduling(new PolynomialLRDecay());
TensorFlowImageClassificationWithLRScheduling(new PolynomialLRDecay(), 50);
}

internal void TensorFlowImageClassificationWithLRScheduling(LearningRateScheduler learningRateScheduler)
internal void TensorFlowImageClassificationWithLRScheduling(LearningRateScheduler learningRateScheduler, int epoch)
{
string assetsRelativePath = @"assets";
string assetsPath = GetAbsolutePath(assetsRelativePath);
Expand Down Expand Up @@ -1484,17 +1484,14 @@ internal void TensorFlowImageClassificationWithLRScheduling(LearningRateSchedule
// ResnetV2101 you can try a different architecture/
// pre-trained model.
Arch = ImageClassificationTrainer.Architecture.ResnetV2101,
Epoch = 50,
Epoch = epoch,
BatchSize = 10,
LearningRate = 0.01f,
MetricsCallback = (metric) => Console.WriteLine(metric),
ValidationSet = validationSet,
ReuseValidationSetBottleneckCachedValues = false,
ReuseTrainSetBottleneckCachedValues = false,
EarlyStoppingCriteria = null,
// Using Exponential Decay for learning rate scheduling
// You can also try other types of Learning rate scheduling methods
// available in LearningRateScheduler.cs
LearningRateScheduler = learningRateScheduler,
WorkspacePath = GetTemporaryDirectory()
};
Expand Down Expand Up @@ -1526,8 +1523,8 @@ internal void TensorFlowImageClassificationWithLRScheduling(LearningRateSchedule
if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ||
(RuntimeInformation.IsOSPlatform(OSPlatform.OSX))))
{
Assert.InRange(metrics.MicroAccuracy, 0.3, 1);
Assert.InRange(metrics.MacroAccuracy, 0.3, 1);
Assert.InRange(metrics.MicroAccuracy, 0.2, 1);
Assert.InRange(metrics.MacroAccuracy, 0.2, 1);
}
else
{
Expand Down Expand Up @@ -1669,8 +1666,8 @@ public void TensorFlowImageClassificationEarlyStoppingIncreasing()
if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ||
(RuntimeInformation.IsOSPlatform(OSPlatform.OSX))))
{
Assert.InRange(metrics.MicroAccuracy, 0.3, 1);
Assert.InRange(metrics.MacroAccuracy, 0.3, 1);
Assert.InRange(metrics.MicroAccuracy, 0.2, 1);
Assert.InRange(metrics.MacroAccuracy, 0.2, 1);
}
else
{
Expand Down Expand Up @@ -1763,8 +1760,8 @@ public void TensorFlowImageClassificationEarlyStoppingDecreasing()
if (!(RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ||
(RuntimeInformation.IsOSPlatform(OSPlatform.OSX))))
{
Assert.InRange(metrics.MicroAccuracy, 0.3, 1);
Assert.InRange(metrics.MacroAccuracy, 0.3, 1);
Assert.InRange(metrics.MicroAccuracy, 0.2, 1);
Assert.InRange(metrics.MacroAccuracy, 0.2, 1);
}
else
{
Expand Down