Skip to content

Commit 9652e59

Browse files
Use SweepablePipeline (#6285)
* replacing MultiModel Pipeline with SweepablePipeline * remove pipeline in TrialSettings * clean up * implement regression && multiclass evaluation metric and add tests * remove tunarFactory * rename EciCfoParameterProposer to EciCfoTuner * some clean up * remove AutoMLExperimentSetting from TrialSetting * disable lgbm trainer in test * clean up * renmame SetEvaluateMetricManager * only keep back-to-bone function in AutoMLExperiment, and make other API as extension * fix di * clean up * convert vector<bool> to vector<single? * disable log_base for lgbm * clean up * clean up * fix comment and add comment in AutoMLExtension * change namespace for AutoMLExtension * fix build error * fix tests * fix test * fix test * Update job-template.yml
1 parent 8589d25 commit 9652e59

File tree

57 files changed

+2633
-1062
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+2633
-1062
lines changed

build/ci/job-template.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
${{ if eq(parameters.nightlyBuild, 'true') }}:
2121
timeoutInMinutes: 30
2222
${{ if and(eq(parameters.nightlyBuild, 'false'), eq(parameters.codeCoverage, 'false')) }}:
23-
timeoutInMinutes: 90
23+
timeoutInMinutes: 120
2424
${{ if eq(parameters.codeCoverage, 'true') }}:
2525
timeoutInMinutes: 120
2626
cancelTimeoutInMinutes: 10
@@ -239,4 +239,4 @@ jobs:
239239
displayName: Clean up runtime folder for package (Unix)
240240
- ${{ if eq(parameters.nightlyBuild, 'false') }}:
241241
- script: ${{ parameters.buildScript }} /p:Build=false -pack -ci -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} $(testTargetFramework)
242-
displayName: Build Packages
242+
displayName: Build Packages

docs/samples/Microsoft.ML.AutoML.Samples/Cifar10.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public static void Run()
4040

4141
experiment.SetDataset(trainDataset, testDataset)
4242
.SetPipeline(pipeline)
43-
.SetEvaluateMetric(MulticlassClassificationMetric.MicroAccuracy)
43+
.SetMulticlassClassificationMetric(MulticlassClassificationMetric.MicroAccuracy)
4444
.SetTrainingTimeInSeconds(200);
4545

4646
var result = experiment.Run();

src/Microsoft.ML.AutoML.Interactive/AutoMLMonitorKernelExtension.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ private static void WriteSummary(NotebookMonitor monitor, TextWriter writer)
5151
var bestTrialParam = JsonSerializer.Serialize(monitor.BestTrial.TrialSettings.Parameter, new JsonSerializerOptions() { WriteIndented = true, });
5252
summary.Add(h3("Best Trial"));
5353
summary.Add(p($"Id: {monitor.BestTrial.TrialSettings.TrialId}"));
54-
summary.Add(p($"Trainer: {monitor.BestTrial.TrialSettings.Pipeline}".Replace("Unknown=>", "")));
54+
summary.Add(p($"Trainer: {monitor.SweepablePipeline.ToString(monitor.BestTrial.TrialSettings.Parameter)}".Replace("Unknown=>", "")));
5555
summary.Add(p($"Parameters: {bestTrialParam}"));
5656
}
5757
if (monitor.ActiveTrial != null)
@@ -61,7 +61,7 @@ private static void WriteSummary(NotebookMonitor monitor, TextWriter writer)
6161

6262
summary.Add(h3("Active Trial"));
6363
summary.Add(p($"Id: {monitor.ActiveTrial.TrialId}"));
64-
summary.Add(p($"Trainer: {monitor.ActiveTrial.Pipeline}".Replace("Unknown=>", "")));
64+
summary.Add(p($"Trainer: {monitor.SweepablePipeline.ToString(monitor.ActiveTrial.Parameter)}".Replace("Unknown=>", "")));
6565
summary.Add(p($"Parameters: {activeTrialParam}"));
6666
}
6767

src/Microsoft.ML.AutoML.Interactive/NotebookMonitor.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,19 @@ public class NotebookMonitor : IMonitor
1717
private readonly ActionThrottler _updateThrottler;
1818
private DisplayedValue _valueToUpdate;
1919

20+
public SweepablePipeline SweepablePipeline { get; private set; }
2021
public TrialResult BestTrial { get; set; }
2122
public TrialResult MostRecentTrial { get; set; }
2223
public TrialSettings ActiveTrial { get; set; }
2324
public List<TrialResult> CompletedTrials { get; set; }
2425
public DataFrame TrialData { get; set; }
2526

26-
public NotebookMonitor()
27+
public NotebookMonitor(SweepablePipeline pipeline)
2728
{
2829
CompletedTrials = new List<TrialResult>();
2930
TrialData = new DataFrame(new PrimitiveDataFrameColumn<int>("Trial"), new PrimitiveDataFrameColumn<float>("Metric"), new StringDataFrameColumn("Trainer"), new StringDataFrameColumn("Parameters"));
3031
_updateThrottler = new ActionThrottler(Update, TimeSpan.FromSeconds(5));
32+
SweepablePipeline = pipeline;
3133
}
3234

3335
public void ReportBestTrial(TrialResult result)
@@ -48,7 +50,7 @@ public void ReportCompletedTrial(TrialResult result)
4850
{
4951
new KeyValuePair<string, object>("Trial",result.TrialSettings.TrialId),
5052
new KeyValuePair<string, object>("Metric", result.Metric),
51-
new KeyValuePair<string, object>("Trainer",result.TrialSettings.Pipeline.ToString().Replace("Unknown=>","")),
53+
new KeyValuePair<string, object>("Trainer",SweepablePipeline.ToString(result.TrialSettings.Parameter).Replace("Unknown=>","")),
5254
new KeyValuePair<string, object>("Parameters",activeRunParam),
5355
}, true);
5456

src/Microsoft.ML.AutoML/API/AutoCatalog.cs

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ public AutoMLExperiment CreateExperiment(AutoMLExperiment.AutoMLExperimentSettin
326326
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
327327
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
328328
/// <returns></returns>
329-
public SweepableEstimator[] BinaryClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
329+
public SweepablePipeline BinaryClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
330330
FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
331331
SearchSpace<FastTreeOption> fastTreeSearchSpace = null, SearchSpace<LgbmOption> lgbmSearchSpace = null, SearchSpace<FastForestOption> fastForestSearchSpace = null, SearchSpace<LbfgsOption> lbfgsSearchSpace = null, SearchSpace<SdcaOption> sdcaSearchSpace = null)
332332
{
@@ -377,7 +377,7 @@ public SweepableEstimator[] BinaryClassification(string labelColumnName = Defaul
377377
res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionBinary(sdcaOption, sdcaSearchSpace ?? new SearchSpace<SdcaOption>(sdcaOption)));
378378
}
379379

380-
return res.ToArray();
380+
return new SweepablePipeline().Append(res.ToArray());
381381
}
382382

383383
/// <summary>
@@ -402,7 +402,7 @@ public SweepableEstimator[] BinaryClassification(string labelColumnName = Defaul
402402
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
403403
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
404404
/// <returns></returns>
405-
public SweepableEstimator[] MultiClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
405+
public SweepablePipeline MultiClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
406406
FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
407407
SearchSpace<FastTreeOption> fastTreeSearchSpace = null, SearchSpace<LgbmOption> lgbmSearchSpace = null, SearchSpace<FastForestOption> fastForestSearchSpace = null, SearchSpace<LbfgsOption> lbfgsSearchSpace = null, SearchSpace<SdcaOption> sdcaSearchSpace = null)
408408
{
@@ -455,7 +455,7 @@ public SweepableEstimator[] MultiClassification(string labelColumnName = Default
455455
res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaOption, sdcaSearchSpace ?? new SearchSpace<SdcaOption>(sdcaOption)));
456456
}
457457

458-
return res.ToArray();
458+
return new SweepablePipeline().Append(res.ToArray());
459459
}
460460

461461
/// <summary>
@@ -480,7 +480,7 @@ public SweepableEstimator[] MultiClassification(string labelColumnName = Default
480480
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
481481
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
482482
/// <returns></returns>
483-
public SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
483+
public SweepablePipeline Regression(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
484484
FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
485485
SearchSpace<FastTreeOption> fastTreeSearchSpace = null, SearchSpace<LgbmOption> lgbmSearchSpace = null, SearchSpace<FastForestOption> fastForestSearchSpace = null, SearchSpace<LbfgsOption> lbfgsSearchSpace = null, SearchSpace<SdcaOption> sdcaSearchSpace = null)
486486
{
@@ -531,31 +531,31 @@ public SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNam
531531
res.Add(SweepableEstimatorFactory.CreateSdcaRegression(sdcaOption, sdcaSearchSpace ?? new SearchSpace<SdcaOption>(sdcaOption)));
532532
}
533533

534-
return res.ToArray();
534+
return new SweepablePipeline().Append(res.ToArray());
535535
}
536536

537537
/// <summary>
538538
/// Create a list of <see cref="SweepableEstimator"/> for featurizing text.
539539
/// </summary>
540540
/// <param name="outputColumnName">output column name.</param>
541541
/// <param name="inputColumnName">input column name.</param>
542-
internal SweepableEstimator[] TextFeaturizer(string outputColumnName, string inputColumnName)
542+
internal SweepablePipeline TextFeaturizer(string outputColumnName, string inputColumnName)
543543
{
544544
var option = new FeaturizeTextOption
545545
{
546546
InputColumnName = inputColumnName,
547547
OutputColumnName = outputColumnName,
548548
};
549549

550-
return new[] { SweepableEstimatorFactory.CreateFeaturizeText(option) };
550+
return new SweepablePipeline().Append(new[] { SweepableEstimatorFactory.CreateFeaturizeText(option) });
551551
}
552552

553553
/// <summary>
554554
/// Create a <see cref="SweepablePipeline"/> for featurizing numeric columns.
555555
/// </summary>
556556
/// <param name="outputColumnNames">output column names.</param>
557557
/// <param name="inputColumnNames">input column names.</param>
558-
internal SweepableEstimator[] NumericFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
558+
internal SweepablePipeline NumericFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
559559
{
560560
Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames));
561561
Contracts.CheckValue(outputColumnNames, nameof(outputColumnNames));
@@ -566,7 +566,7 @@ internal SweepableEstimator[] NumericFeaturizer(string[] outputColumnNames, stri
566566
OutputColumnNames = outputColumnNames,
567567
};
568568

569-
return new[] { SweepableEstimatorFactory.CreateReplaceMissingValues(replaceMissingValueOption) };
569+
return new SweepablePipeline().Append(new[] { SweepableEstimatorFactory.CreateReplaceMissingValues(replaceMissingValueOption) });
570570
}
571571

572572
/// <summary>
@@ -597,7 +597,7 @@ internal SweepableEstimator[] BooleanFeaturizer(string[] outputColumnNames, stri
597597
/// </summary>
598598
/// <param name="outputColumnNames">output column names.</param>
599599
/// <param name="inputColumnNames">input column names.</param>
600-
internal SweepableEstimator[] CatalogFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
600+
internal SweepablePipeline CatalogFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
601601
{
602602
Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0");
603603

@@ -607,10 +607,10 @@ internal SweepableEstimator[] CatalogFeaturizer(string[] outputColumnNames, stri
607607
OutputColumnNames = outputColumnNames,
608608
};
609609

610-
return new SweepableEstimator[] { SweepableEstimatorFactory.CreateOneHotEncoding(option), SweepableEstimatorFactory.CreateOneHotHashEncoding(option) };
610+
return new SweepablePipeline().Append(new SweepableEstimator[] { SweepableEstimatorFactory.CreateOneHotEncoding(option), SweepableEstimatorFactory.CreateOneHotHashEncoding(option) });
611611
}
612612

613-
internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string inputColumnName)
613+
internal SweepablePipeline ImagePathFeaturizer(string outputColumnName, string inputColumnName)
614614
{
615615
// load image => resize image (224, 224) => extract pixels => dnn featurizer
616616
var loadImageOption = new LoadImageOption
@@ -640,7 +640,7 @@ internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string
640640
OutputColumnName = outputColumnName,
641641
};
642642

643-
var pipeline = new MultiModelPipeline();
643+
var pipeline = new SweepablePipeline();
644644

645645
return pipeline.Append(SweepableEstimatorFactory.CreateLoadImages(loadImageOption))
646646
.Append(SweepableEstimatorFactory.CreateResizeImages(resizeImageOption))
@@ -660,7 +660,7 @@ internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string
660660
/// <param name="imagePathColumns">columns that should be treated as image path. If not specified, it will automatically infer if a column is catalog or not.</param>
661661
/// <param name="outputColumnName">output feature column.</param>
662662
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
663-
public MultiModelPipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catalogColumns = null, string[] numericColumns = null, string[] textColumns = null, string[] imagePathColumns = null, string[] excludeColumns = null)
663+
public SweepablePipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catalogColumns = null, string[] numericColumns = null, string[] textColumns = null, string[] imagePathColumns = null, string[] excludeColumns = null)
664664
{
665665
Contracts.CheckValue(data, nameof(data));
666666

@@ -727,16 +727,18 @@ public MultiModelPipeline Featurizer(IDataView data, string outputColumnName = "
727727
/// <param name="data">input data.</param>
728728
/// <param name="columnInformation">column information.</param>
729729
/// <param name="outputColumnName">output feature column.</param>
730-
/// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns>
731-
public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features")
730+
/// <returns>A <see cref="SweepablePipeline"/> for featurization.</returns>
731+
public SweepablePipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features")
732732
{
733733
Contracts.CheckValue(data, nameof(data));
734734
Contracts.CheckValue(columnInformation, nameof(columnInformation));
735735

736736
var columnPurposes = PurposeInference.InferPurposes(this._context, data, columnInformation);
737737
var textFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature);
738-
var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature && data.Schema[c.ColumnIndex].Type != BooleanDataViewType.Instance);
739-
var booleanFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature && data.Schema[c.ColumnIndex].Type == BooleanDataViewType.Instance);
738+
var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature
739+
&& data.Schema[c.ColumnIndex].Type != BooleanDataViewType.Instance
740+
&& !(data.Schema[c.ColumnIndex].Type is VectorDataViewType vt && vt.ItemType == BooleanDataViewType.Instance)).ToArray();
741+
var booleanFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature && !numericFeatures.Contains(c));
740742
var catalogFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature);
741743
var imagePathFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.ImagePath);
742744
var textFeatureColumnNames = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
@@ -745,7 +747,7 @@ public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInf
745747
var imagePathColumnNames = imagePathFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
746748
var booleanFeatureColumnNames = booleanFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
747749

748-
var pipeline = new MultiModelPipeline();
750+
var pipeline = new SweepablePipeline();
749751
if (numericFeatureColumnNames.Length > 0)
750752
{
751753
pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames));

0 commit comments

Comments
 (0)