Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use SweepablePipeline #6285

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
2722c86
replacing MultiModel Pipeline with SweepablePipeline
LittleLittleCloud Aug 16, 2022
b580667
remove pipeline in TrialSettings
LittleLittleCloud Aug 16, 2022
192b266
clean up
LittleLittleCloud Aug 17, 2022
cd92de5
implement regression && multiclass evaluation metric and add tests
LittleLittleCloud Aug 17, 2022
9ea3f43
remove tunarFactory
LittleLittleCloud Aug 17, 2022
0c1d0d4
rename EciCfoParameterProposer to EciCfoTuner
LittleLittleCloud Aug 17, 2022
bbe4858
some clean up
LittleLittleCloud Aug 17, 2022
5892e9a
Merge branch 'main' into u/xiaoyun/ISearchSpaceProposer
LittleLittleCloud Aug 17, 2022
9b41c1a
remove AutoMLExperimentSetting from TrialSetting
LittleLittleCloud Aug 17, 2022
6f4c95b
disable lgbm trainer in test
LittleLittleCloud Aug 17, 2022
2eab5cb
clean up
LittleLittleCloud Aug 17, 2022
928f98f
renmame SetEvaluateMetricManager
LittleLittleCloud Aug 18, 2022
c1bc1b9
only keep back-to-bone function in AutoMLExperiment, and make other A…
LittleLittleCloud Aug 19, 2022
31362ff
fix di
LittleLittleCloud Aug 19, 2022
4c65fa3
clean up
LittleLittleCloud Aug 19, 2022
2b17060
Merge branch 'main' into u/xiaoyun/ISearchSpaceProposer
LittleLittleCloud Aug 21, 2022
c236830
convert vector<bool> to vector<single?
LittleLittleCloud Aug 21, 2022
daa3db7
disable log_base for lgbm
LittleLittleCloud Aug 21, 2022
dec85d8
clean up
LittleLittleCloud Aug 22, 2022
6de7519
clean up
LittleLittleCloud Aug 22, 2022
ce58257
fix comment and add comment in AutoMLExtension
LittleLittleCloud Aug 24, 2022
df379c5
change namespace for AutoMLExtension
LittleLittleCloud Aug 24, 2022
6be0a48
fix build error
LittleLittleCloud Aug 24, 2022
c4b51f6
fix tests
LittleLittleCloud Aug 24, 2022
6c201a8
fix test
LittleLittleCloud Aug 24, 2022
6834273
fix test
LittleLittleCloud Aug 24, 2022
4281116
Update job-template.yml
LittleLittleCloud Aug 24, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions build/ci/job-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
${{ if eq(parameters.nightlyBuild, 'true') }}:
timeoutInMinutes: 30
${{ if and(eq(parameters.nightlyBuild, 'false'), eq(parameters.codeCoverage, 'false')) }}:
timeoutInMinutes: 90
timeoutInMinutes: 120
${{ if eq(parameters.codeCoverage, 'true') }}:
timeoutInMinutes: 120
cancelTimeoutInMinutes: 10
Expand Down Expand Up @@ -239,4 +239,4 @@ jobs:
displayName: Clean up runtime folder for package (Unix)
- ${{ if eq(parameters.nightlyBuild, 'false') }}:
- script: ${{ parameters.buildScript }} /p:Build=false -pack -ci -configuration $(_configuration) /p:TargetArchitecture=${{ parameters.architecture }} /p:TestArchitectures=${{ parameters.architecture }} $(testTargetFramework)
displayName: Build Packages
displayName: Build Packages
2 changes: 1 addition & 1 deletion docs/samples/Microsoft.ML.AutoML.Samples/Cifar10.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public static void Run()

experiment.SetDataset(trainDataset, testDataset)
.SetPipeline(pipeline)
.SetEvaluateMetric(MulticlassClassificationMetric.MicroAccuracy)
.SetMulticlassClassificationMetric(MulticlassClassificationMetric.MicroAccuracy)
.SetTrainingTimeInSeconds(200);

var result = experiment.Run();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ private static void WriteSummary(NotebookMonitor monitor, TextWriter writer)
var bestTrialParam = JsonSerializer.Serialize(monitor.BestTrial.TrialSettings.Parameter, new JsonSerializerOptions() { WriteIndented = true, });
summary.Add(h3("Best Trial"));
summary.Add(p($"Id: {monitor.BestTrial.TrialSettings.TrialId}"));
summary.Add(p($"Trainer: {monitor.BestTrial.TrialSettings.Pipeline}".Replace("Unknown=>", "")));
summary.Add(p($"Trainer: {monitor.SweepablePipeline.ToString(monitor.BestTrial.TrialSettings.Parameter)}".Replace("Unknown=>", "")));
summary.Add(p($"Parameters: {bestTrialParam}"));
}
if (monitor.ActiveTrial != null)
Expand All @@ -61,7 +61,7 @@ private static void WriteSummary(NotebookMonitor monitor, TextWriter writer)

summary.Add(h3("Active Trial"));
summary.Add(p($"Id: {monitor.ActiveTrial.TrialId}"));
summary.Add(p($"Trainer: {monitor.ActiveTrial.Pipeline}".Replace("Unknown=>", "")));
summary.Add(p($"Trainer: {monitor.SweepablePipeline.ToString(monitor.ActiveTrial.Parameter)}".Replace("Unknown=>", "")));
summary.Add(p($"Parameters: {activeTrialParam}"));
}

Expand Down
6 changes: 4 additions & 2 deletions src/Microsoft.ML.AutoML.Interactive/NotebookMonitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,19 @@ public class NotebookMonitor : IMonitor
private readonly ActionThrottler _updateThrottler;
private DisplayedValue _valueToUpdate;

public SweepablePipeline SweepablePipeline { get; private set; }
public TrialResult BestTrial { get; set; }
public TrialResult MostRecentTrial { get; set; }
public TrialSettings ActiveTrial { get; set; }
public List<TrialResult> CompletedTrials { get; set; }
public DataFrame TrialData { get; set; }

public NotebookMonitor()
public NotebookMonitor(SweepablePipeline pipeline)
{
CompletedTrials = new List<TrialResult>();
TrialData = new DataFrame(new PrimitiveDataFrameColumn<int>("Trial"), new PrimitiveDataFrameColumn<float>("Metric"), new StringDataFrameColumn("Trainer"), new StringDataFrameColumn("Parameters"));
_updateThrottler = new ActionThrottler(Update, TimeSpan.FromSeconds(5));
SweepablePipeline = pipeline;
}

public void ReportBestTrial(TrialResult result)
Expand All @@ -48,7 +50,7 @@ public void ReportCompletedTrial(TrialResult result)
{
new KeyValuePair<string, object>("Trial",result.TrialSettings.TrialId),
new KeyValuePair<string, object>("Metric", result.Metric),
new KeyValuePair<string, object>("Trainer",result.TrialSettings.Pipeline.ToString().Replace("Unknown=>","")),
new KeyValuePair<string, object>("Trainer",SweepablePipeline.ToString(result.TrialSettings.Parameter).Replace("Unknown=>","")),
new KeyValuePair<string, object>("Parameters",activeRunParam),
}, true);

Expand Down
42 changes: 22 additions & 20 deletions src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ public AutoMLExperiment CreateExperiment(AutoMLExperiment.AutoMLExperimentSettin
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
/// <returns></returns>
public SweepableEstimator[] BinaryClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
public SweepablePipeline BinaryClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
SearchSpace<FastTreeOption> fastTreeSearchSpace = null, SearchSpace<LgbmOption> lgbmSearchSpace = null, SearchSpace<FastForestOption> fastForestSearchSpace = null, SearchSpace<LbfgsOption> lbfgsSearchSpace = null, SearchSpace<SdcaOption> sdcaSearchSpace = null)
{
Expand Down Expand Up @@ -377,7 +377,7 @@ public SweepableEstimator[] BinaryClassification(string labelColumnName = Defaul
res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionBinary(sdcaOption, sdcaSearchSpace ?? new SearchSpace<SdcaOption>(sdcaOption)));
}

return res.ToArray();
return new SweepablePipeline().Append(res.ToArray());
}

/// <summary>
Expand All @@ -402,7 +402,7 @@ public SweepableEstimator[] BinaryClassification(string labelColumnName = Defaul
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
/// <returns></returns>
public SweepableEstimator[] MultiClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
public SweepablePipeline MultiClassification(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
SearchSpace<FastTreeOption> fastTreeSearchSpace = null, SearchSpace<LgbmOption> lgbmSearchSpace = null, SearchSpace<FastForestOption> fastForestSearchSpace = null, SearchSpace<LbfgsOption> lbfgsSearchSpace = null, SearchSpace<SdcaOption> sdcaSearchSpace = null)
{
Expand Down Expand Up @@ -455,7 +455,7 @@ public SweepableEstimator[] MultiClassification(string labelColumnName = Default
res.Add(SweepableEstimatorFactory.CreateSdcaLogisticRegressionOva(sdcaOption, sdcaSearchSpace ?? new SearchSpace<SdcaOption>(sdcaOption)));
}

return res.ToArray();
return new SweepablePipeline().Append(res.ToArray());
}

/// <summary>
Expand All @@ -480,7 +480,7 @@ public SweepableEstimator[] MultiClassification(string labelColumnName = Default
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
/// <returns></returns>
public SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
public SweepablePipeline Regression(string labelColumnName = DefaultColumnNames.Label, string featureColumnName = DefaultColumnNames.Features, string exampleWeightColumnName = null, bool useFastForest = true, bool useLgbm = true, bool useFastTree = true, bool useLbfgs = true, bool useSdca = true,
FastTreeOption fastTreeOption = null, LgbmOption lgbmOption = null, FastForestOption fastForestOption = null, LbfgsOption lbfgsOption = null, SdcaOption sdcaOption = null,
SearchSpace<FastTreeOption> fastTreeSearchSpace = null, SearchSpace<LgbmOption> lgbmSearchSpace = null, SearchSpace<FastForestOption> fastForestSearchSpace = null, SearchSpace<LbfgsOption> lbfgsSearchSpace = null, SearchSpace<SdcaOption> sdcaSearchSpace = null)
{
Expand Down Expand Up @@ -531,31 +531,31 @@ public SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNam
res.Add(SweepableEstimatorFactory.CreateSdcaRegression(sdcaOption, sdcaSearchSpace ?? new SearchSpace<SdcaOption>(sdcaOption)));
}

return res.ToArray();
return new SweepablePipeline().Append(res.ToArray());
}

/// <summary>
/// Create a list of <see cref="SweepableEstimator"/> for featurizing text.
/// </summary>
/// <param name="outputColumnName">output column name.</param>
/// <param name="inputColumnName">input column name.</param>
internal SweepableEstimator[] TextFeaturizer(string outputColumnName, string inputColumnName)
internal SweepablePipeline TextFeaturizer(string outputColumnName, string inputColumnName)
{
var option = new FeaturizeTextOption
{
InputColumnName = inputColumnName,
OutputColumnName = outputColumnName,
};

return new[] { SweepableEstimatorFactory.CreateFeaturizeText(option) };
return new SweepablePipeline().Append(new[] { SweepableEstimatorFactory.CreateFeaturizeText(option) });
}

/// <summary>
/// Create a <see cref="SweepablePipeline"/> for featurizing numeric columns.
/// </summary>
/// <param name="outputColumnNames">output column names.</param>
/// <param name="inputColumnNames">input column names.</param>
internal SweepableEstimator[] NumericFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
internal SweepablePipeline NumericFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
{
Contracts.CheckValue(inputColumnNames, nameof(inputColumnNames));
Contracts.CheckValue(outputColumnNames, nameof(outputColumnNames));
Expand All @@ -566,7 +566,7 @@ internal SweepableEstimator[] NumericFeaturizer(string[] outputColumnNames, stri
OutputColumnNames = outputColumnNames,
};

return new[] { SweepableEstimatorFactory.CreateReplaceMissingValues(replaceMissingValueOption) };
return new SweepablePipeline().Append(new[] { SweepableEstimatorFactory.CreateReplaceMissingValues(replaceMissingValueOption) });
}

/// <summary>
Expand Down Expand Up @@ -597,7 +597,7 @@ internal SweepableEstimator[] BooleanFeaturizer(string[] outputColumnNames, stri
/// </summary>
/// <param name="outputColumnNames">output column names.</param>
/// <param name="inputColumnNames">input column names.</param>
internal SweepableEstimator[] CatalogFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
internal SweepablePipeline CatalogFeaturizer(string[] outputColumnNames, string[] inputColumnNames)
{
Contracts.Check(outputColumnNames.Count() == inputColumnNames.Count() && outputColumnNames.Count() > 0, "outputColumnNames and inputColumnNames must have the same length and greater than 0");

Expand All @@ -607,10 +607,10 @@ internal SweepableEstimator[] CatalogFeaturizer(string[] outputColumnNames, stri
OutputColumnNames = outputColumnNames,
};

return new SweepableEstimator[] { SweepableEstimatorFactory.CreateOneHotEncoding(option), SweepableEstimatorFactory.CreateOneHotHashEncoding(option) };
return new SweepablePipeline().Append(new SweepableEstimator[] { SweepableEstimatorFactory.CreateOneHotEncoding(option), SweepableEstimatorFactory.CreateOneHotHashEncoding(option) });
}

internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string inputColumnName)
internal SweepablePipeline ImagePathFeaturizer(string outputColumnName, string inputColumnName)
{
// load image => resize image (224, 224) => extract pixels => dnn featurizer
var loadImageOption = new LoadImageOption
Expand Down Expand Up @@ -640,7 +640,7 @@ internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string
OutputColumnName = outputColumnName,
};

var pipeline = new MultiModelPipeline();
var pipeline = new SweepablePipeline();

return pipeline.Append(SweepableEstimatorFactory.CreateLoadImages(loadImageOption))
.Append(SweepableEstimatorFactory.CreateResizeImages(resizeImageOption))
Expand All @@ -660,7 +660,7 @@ internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string
/// <param name="imagePathColumns">columns that should be treated as image path. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="outputColumnName">output feature column.</param>
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
public MultiModelPipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catalogColumns = null, string[] numericColumns = null, string[] textColumns = null, string[] imagePathColumns = null, string[] excludeColumns = null)
public SweepablePipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catalogColumns = null, string[] numericColumns = null, string[] textColumns = null, string[] imagePathColumns = null, string[] excludeColumns = null)
{
Contracts.CheckValue(data, nameof(data));

Expand Down Expand Up @@ -727,16 +727,18 @@ public MultiModelPipeline Featurizer(IDataView data, string outputColumnName = "
/// <param name="data">input data.</param>
/// <param name="columnInformation">column information.</param>
/// <param name="outputColumnName">output feature column.</param>
/// <returns>A <see cref="MultiModelPipeline"/> for featurization.</returns>
public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features")
/// <returns>A <see cref="SweepablePipeline"/> for featurization.</returns>
public SweepablePipeline Featurizer(IDataView data, ColumnInformation columnInformation, string outputColumnName = "Features")
{
Contracts.CheckValue(data, nameof(data));
Contracts.CheckValue(columnInformation, nameof(columnInformation));

var columnPurposes = PurposeInference.InferPurposes(this._context, data, columnInformation);
var textFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.TextFeature);
var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature && data.Schema[c.ColumnIndex].Type != BooleanDataViewType.Instance);
var booleanFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature && data.Schema[c.ColumnIndex].Type == BooleanDataViewType.Instance);
var numericFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature
&& data.Schema[c.ColumnIndex].Type != BooleanDataViewType.Instance
&& !(data.Schema[c.ColumnIndex].Type is VectorDataViewType vt && vt.ItemType == BooleanDataViewType.Instance)).ToArray();
var booleanFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.NumericFeature && !numericFeatures.Contains(c));
var catalogFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.CategoricalFeature);
var imagePathFeatures = columnPurposes.Where(c => c.Purpose == ColumnPurpose.ImagePath);
var textFeatureColumnNames = textFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
Expand All @@ -745,7 +747,7 @@ public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInf
var imagePathColumnNames = imagePathFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();
var booleanFeatureColumnNames = booleanFeatures.Select(c => data.Schema[c.ColumnIndex].Name).ToArray();

var pipeline = new MultiModelPipeline();
var pipeline = new SweepablePipeline();
if (numericFeatureColumnNames.Length > 0)
{
pipeline = pipeline.Append(this.NumericFeaturizer(numericFeatureColumnNames, numericFeatureColumnNames));
Expand Down
Loading