Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions src/Microsoft.ML.AutoML/API/BinaryClassificationExperiment.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.ML.AutoML.Tuner;
using Microsoft.ML.Data;
using Microsoft.ML.Runtime;
using Microsoft.ML.Trainers;
Expand All @@ -35,13 +36,19 @@ public sealed class BinaryExperimentSettings : ExperimentSettings
/// <value>The default value is a collection auto-populated with all possible trainers (all values of <see cref="BinaryClassificationTrainer" />).</value>
public ICollection<BinaryClassificationTrainer> Trainers { get; }

/// <summary>
/// Set if use <see cref="AutoZeroTuner"/> for hyper-parameter optimization, default to false.
/// </summary>
public bool UseAutoZeroTuner { get; set; }

/// <summary>
/// Initializes a new instance of <see cref="BinaryExperimentSettings"/>.
/// </summary>
public BinaryExperimentSettings()
{
OptimizingMetric = BinaryClassificationMetric.Accuracy;
Trainers = Enum.GetValues(typeof(BinaryClassificationTrainer)).OfType<BinaryClassificationTrainer>().ToList();
UseAutoZeroTuner = false;
}
}

Expand Down Expand Up @@ -133,7 +140,7 @@ public enum BinaryClassificationTrainer
/// </example>
public sealed class BinaryClassificationExperiment : ExperimentBase<BinaryClassificationMetrics, BinaryExperimentSettings>
{
private readonly AutoMLExperiment _experiment;
private AutoMLExperiment _experiment;
private const string Features = "__Features__";
private SweepablePipeline _pipeline;

Expand All @@ -151,13 +158,13 @@ internal BinaryClassificationExperiment(MLContext context, BinaryExperimentSetti
_experiment.SetMaximumMemoryUsageInMegaByte(d);
}
_experiment.SetMaxModelToExplore(settings.MaxModels);
_experiment.SetTrainingTimeInSeconds(settings.MaxExperimentTimeInSeconds);
}

public override ExperimentResult<BinaryClassificationMetrics> Execute(IDataView trainData, ColumnInformation columnInformation, IEstimator<ITransformer> preFeaturizer = null, IProgress<RunDetail<BinaryClassificationMetrics>> progressHandler = null)
{
var label = columnInformation.LabelColumnName;
_experiment.SetBinaryClassificationMetric(Settings.OptimizingMetric, label);
_experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds);

// Cross val threshold for # of dataset rows --
// If dataset has < threshold # of rows, use cross val.
Expand Down Expand Up @@ -194,7 +201,7 @@ public override ExperimentResult<BinaryClassificationMetrics> Execute(IDataView

return monitor;
});
_experiment.SetTrialRunner<BinaryClassificationRunner>();
_experiment = PostConfigureAutoMLExperiment(_experiment);
_experiment.Run();

var runDetails = monitor.RunDetails.Select(e => BestResultUtil.ToRunDetail(Context, e, _pipeline));
Expand All @@ -208,7 +215,6 @@ public override ExperimentResult<BinaryClassificationMetrics> Execute(IDataView
{
var label = columnInformation.LabelColumnName;
_experiment.SetBinaryClassificationMetric(Settings.OptimizingMetric, label);
_experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds);
_experiment.SetDataset(trainData, validationData);
_pipeline = CreateBinaryClassificationPipeline(trainData, columnInformation, preFeaturizer);
_experiment.SetPipeline(_pipeline);
Expand All @@ -228,7 +234,7 @@ public override ExperimentResult<BinaryClassificationMetrics> Execute(IDataView

return monitor;
});
_experiment.SetTrialRunner<BinaryClassificationRunner>();
_experiment = PostConfigureAutoMLExperiment(_experiment);
_experiment.Run();

var runDetails = monitor.RunDetails.Select(e => BestResultUtil.ToRunDetail(Context, e, _pipeline));
Expand Down Expand Up @@ -263,7 +269,6 @@ public override CrossValidationExperimentResult<BinaryClassificationMetrics> Exe
{
var label = columnInformation.LabelColumnName;
_experiment.SetBinaryClassificationMetric(Settings.OptimizingMetric, label);
_experiment.SetTrainingTimeInSeconds(Settings.MaxExperimentTimeInSeconds);
_experiment.SetDataset(trainData, (int)numberOfCVFolds);
_pipeline = CreateBinaryClassificationPipeline(trainData, columnInformation, preFeaturizer);
_experiment.SetPipeline(_pipeline);
Expand All @@ -284,7 +289,7 @@ public override CrossValidationExperimentResult<BinaryClassificationMetrics> Exe
return monitor;
});

_experiment.SetTrialRunner<BinaryClassificationRunner>();
_experiment = PostConfigureAutoMLExperiment(_experiment);
_experiment.Run();

var runDetails = monitor.RunDetails.Select(e => BestResultUtil.ToCrossValidationRunDetail(Context, e, _pipeline));
Expand Down Expand Up @@ -335,6 +340,17 @@ private SweepablePipeline CreateBinaryClassificationPipeline(IDataView trainData
.Append(Context.Auto().BinaryClassification(labelColumnName: columnInformation.LabelColumnName, useSdcaLogisticRegression: useSdca, useFastTree: useFastTree, useLgbm: useLgbm, useLbfgsLogisticRegression: uselbfgs, useFastForest: useFastForest, featureColumnName: Features));
}
}

private AutoMLExperiment PostConfigureAutoMLExperiment(AutoMLExperiment experiment)
{
experiment.SetTrialRunner<BinaryClassificationRunner>();
if (Settings.UseAutoZeroTuner)
{
experiment.SetTuner<AutoZeroTuner>();
}

return experiment;
}
}

internal class BinaryClassificationRunner : ITrialRunner
Expand Down
6 changes: 6 additions & 0 deletions src/Microsoft.ML.AutoML/Microsoft.ML.AutoML.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@
<AdditionalFiles Include="CodeGen\*-estimators.json" />
</ItemGroup>

<ItemGroup>
<EmbeddedResource Include="Tuner\Portfolios.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</EmbeddedResource>
</ItemGroup>

<Target DependsOnTargets="ResolveReferences" Name="CopyProjectReferencesToPackage">
<ItemGroup>
<!--Include DLLs of Project References-->
Expand Down
142 changes: 142 additions & 0 deletions src/Microsoft.ML.AutoML/Tuner/AutoZeroTuner.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Text;
using System.Text.Json;
using Microsoft.ML.AutoML.CodeGen;
using Microsoft.ML.SearchSpace;

namespace Microsoft.ML.AutoML.Tuner
{
internal class AutoZeroTuner : ITuner
{
private readonly List<Config> _configs = new List<Config>();
private readonly IEnumerator<Config> _configsEnumerator;
private readonly Dictionary<string, string> _pipelineStrings;
private readonly SweepablePipeline _sweepablePipeline;
private readonly Dictionary<int, Config> _configLookBook = new Dictionary<int, Config>();
private readonly string _metricName;

public AutoZeroTuner(SweepablePipeline pipeline, AggregateTrainingStopManager aggregateTrainingStopManager, IEvaluateMetricManager evaluateMetricManager, AutoMLExperiment.AutoMLExperimentSettings settings)
{
_configs = LoadConfigsFromJson();
_sweepablePipeline = pipeline;
_pipelineStrings = _sweepablePipeline.Schema.ToTerms().Select(t => new
{
schema = t.ToString(),
pipelineString = string.Join("=>", t.ValueEntities().Select(e => _sweepablePipeline.Estimators[e.ToString()].EstimatorType)),
}).ToDictionary(kv => kv.schema, kv => kv.pipelineString);

// todo
// filter configs on trainers
var trainerEstimators = _sweepablePipeline.Estimators.Where(e => e.Value.EstimatorType.IsTrainer()).Select(e => e.Value.EstimatorType.ToString()).ToList();
_configs = evaluateMetricManager switch
{
BinaryMetricManager => _configs.Where(c => c.Task == "binary-classification" && trainerEstimators.Contains(c.Trainer)).ToList(),
MultiClassMetricManager => _configs.Where(c => c.Task == "multi-classification" && trainerEstimators.Contains(c.Trainer)).ToList(),
RegressionMetricManager => _configs.Where(c => c.Task == "regression" && trainerEstimators.Contains(c.Trainer)).ToList(),
_ => throw new Exception(),
};
_metricName = evaluateMetricManager switch
{
BinaryMetricManager bm => bm.Metric.ToString(),
MultiClassMetricManager mm => mm.Metric.ToString(),
RegressionMetricManager rm => rm.Metric.ToString(),
_ => throw new Exception(),
};

if (_configs.Count == 0)
{
throw new ArgumentException($"Fail to find available configs for given trainers: {string.Join(",", trainerEstimators)}");
}

_configsEnumerator = _configs.GetEnumerator();
aggregateTrainingStopManager.AddTrainingStopManager(new MaxModelStopManager(_configs.Count, null));
}

private List<Config> LoadConfigsFromJson()
{
var assembly = Assembly.GetExecutingAssembly();
var resourceName = "Microsoft.ML.AutoML.Tuner.Portfolios.json";

using (Stream stream = assembly.GetManifestResourceStream(resourceName))
using (StreamReader reader = new StreamReader(stream))
{
var json = reader.ReadToEnd();
var res = JsonSerializer.Deserialize<List<Config>>(json);

return res;
}
}

public Parameter Propose(TrialSettings settings)
{
if (_configsEnumerator.MoveNext())
{
var config = _configsEnumerator.Current;
IEnumerable<KeyValuePair<string, string>> pipelineSchemas = default;
if (_pipelineStrings.Any(kv => kv.Value.Contains("OneHotHashEncoding") || kv.Value.Contains("OneHotEncoding")))
{
pipelineSchemas = _pipelineStrings.Where(kv => kv.Value.Contains(config.CatalogTransformer));
}
else
{
pipelineSchemas = _pipelineStrings;
}

pipelineSchemas = pipelineSchemas.Where(kv => kv.Value.Contains(config.Trainer));
var pipelineSchema = pipelineSchemas.First().Key;
var pipeline = _sweepablePipeline.BuildSweepableEstimatorPipeline(pipelineSchema);
var parameter = pipeline.SearchSpace.SampleFromFeatureSpace(pipeline.SearchSpace.Default);
var trainerEstimatorName = pipeline.Estimators.Where(kv => kv.Value.EstimatorType.IsTrainer()).First().Key;
var label = parameter[trainerEstimatorName]["LabelColumnName"].AsType<string>();
var feature = parameter[trainerEstimatorName]["FeatureColumnName"].AsType<string>();
parameter[trainerEstimatorName] = config.TrainerParameter;
parameter[trainerEstimatorName]["LabelColumnName"] = Parameter.FromString(label);
parameter[trainerEstimatorName]["FeatureColumnName"] = Parameter.FromString(feature);
settings.Parameter[AutoMLExperiment.PipelineSearchspaceName] = parameter;
_configLookBook[settings.TrialId] = config;
return settings.Parameter;
}

throw new OperationCanceledException();
}

public void Update(TrialResult result)
{
}

class Config
{
/// <summary>
/// one of OneHot, HashEncoding
/// </summary>
public string CatalogTransformer { get; set; }

/// <summary>
/// One of Lgbm, Sdca, FastTree,,,
/// </summary>
public string Trainer { get; set; }

public Parameter TrainerParameter { get; set; }

public string Task { get; set; }
}

class Rows
{
public string CustomDimensionsBestPipeline { get; set; }

public string CustomDimensionsOptionsTask { get; set; }

public Parameter CustomDimensionsParameter { get; set; }
}
}
}
Loading