@@ -326,7 +326,7 @@ public AutoMLExperiment CreateExperiment(AutoMLExperiment.AutoMLExperimentSettin
326
326
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
327
327
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
328
328
/// <returns></returns>
329
- public SweepableEstimator [ ] BinaryClassification ( string labelColumnName = DefaultColumnNames . Label , string featureColumnName = DefaultColumnNames . Features , string exampleWeightColumnName = null , bool useFastForest = true , bool useLgbm = true , bool useFastTree = true , bool useLbfgs = true , bool useSdca = true ,
329
+ public SweepablePipeline BinaryClassification ( string labelColumnName = DefaultColumnNames . Label , string featureColumnName = DefaultColumnNames . Features , string exampleWeightColumnName = null , bool useFastForest = true , bool useLgbm = true , bool useFastTree = true , bool useLbfgs = true , bool useSdca = true ,
330
330
FastTreeOption fastTreeOption = null , LgbmOption lgbmOption = null , FastForestOption fastForestOption = null , LbfgsOption lbfgsOption = null , SdcaOption sdcaOption = null ,
331
331
SearchSpace < FastTreeOption > fastTreeSearchSpace = null , SearchSpace < LgbmOption > lgbmSearchSpace = null , SearchSpace < FastForestOption > fastForestSearchSpace = null , SearchSpace < LbfgsOption > lbfgsSearchSpace = null , SearchSpace < SdcaOption > sdcaSearchSpace = null )
332
332
{
@@ -377,7 +377,7 @@ public SweepableEstimator[] BinaryClassification(string labelColumnName = Defaul
377
377
res . Add ( SweepableEstimatorFactory . CreateSdcaLogisticRegressionBinary ( sdcaOption , sdcaSearchSpace ?? new SearchSpace < SdcaOption > ( sdcaOption ) ) ) ;
378
378
}
379
379
380
- return res . ToArray ( ) ;
380
+ return new SweepablePipeline ( ) . Append ( res . ToArray ( ) ) ;
381
381
}
382
382
383
383
/// <summary>
@@ -402,7 +402,7 @@ public SweepableEstimator[] BinaryClassification(string labelColumnName = Defaul
402
402
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
403
403
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
404
404
/// <returns></returns>
405
- public SweepableEstimator [ ] MultiClassification ( string labelColumnName = DefaultColumnNames . Label , string featureColumnName = DefaultColumnNames . Features , string exampleWeightColumnName = null , bool useFastForest = true , bool useLgbm = true , bool useFastTree = true , bool useLbfgs = true , bool useSdca = true ,
405
+ public SweepablePipeline MultiClassification ( string labelColumnName = DefaultColumnNames . Label , string featureColumnName = DefaultColumnNames . Features , string exampleWeightColumnName = null , bool useFastForest = true , bool useLgbm = true , bool useFastTree = true , bool useLbfgs = true , bool useSdca = true ,
406
406
FastTreeOption fastTreeOption = null , LgbmOption lgbmOption = null , FastForestOption fastForestOption = null , LbfgsOption lbfgsOption = null , SdcaOption sdcaOption = null ,
407
407
SearchSpace < FastTreeOption > fastTreeSearchSpace = null , SearchSpace < LgbmOption > lgbmSearchSpace = null , SearchSpace < FastForestOption > fastForestSearchSpace = null , SearchSpace < LbfgsOption > lbfgsSearchSpace = null , SearchSpace < SdcaOption > sdcaSearchSpace = null )
408
408
{
@@ -455,7 +455,7 @@ public SweepableEstimator[] MultiClassification(string labelColumnName = Default
455
455
res . Add ( SweepableEstimatorFactory . CreateSdcaLogisticRegressionOva ( sdcaOption , sdcaSearchSpace ?? new SearchSpace < SdcaOption > ( sdcaOption ) ) ) ;
456
456
}
457
457
458
- return res . ToArray ( ) ;
458
+ return new SweepablePipeline ( ) . Append ( res . ToArray ( ) ) ;
459
459
}
460
460
461
461
/// <summary>
@@ -480,7 +480,7 @@ public SweepableEstimator[] MultiClassification(string labelColumnName = Default
480
480
/// <param name="lbfgsSearchSpace">if provided, use it as search space for lbfgs, otherwise the default search space will be used.</param>
481
481
/// <param name="sdcaSearchSpace">if provided, use it as search space for sdca, otherwise the default search space will be used.</param>
482
482
/// <returns></returns>
483
- public SweepableEstimator [ ] Regression ( string labelColumnName = DefaultColumnNames . Label , string featureColumnName = DefaultColumnNames . Features , string exampleWeightColumnName = null , bool useFastForest = true , bool useLgbm = true , bool useFastTree = true , bool useLbfgs = true , bool useSdca = true ,
483
+ public SweepablePipeline Regression ( string labelColumnName = DefaultColumnNames . Label , string featureColumnName = DefaultColumnNames . Features , string exampleWeightColumnName = null , bool useFastForest = true , bool useLgbm = true , bool useFastTree = true , bool useLbfgs = true , bool useSdca = true ,
484
484
FastTreeOption fastTreeOption = null , LgbmOption lgbmOption = null , FastForestOption fastForestOption = null , LbfgsOption lbfgsOption = null , SdcaOption sdcaOption = null ,
485
485
SearchSpace < FastTreeOption > fastTreeSearchSpace = null , SearchSpace < LgbmOption > lgbmSearchSpace = null , SearchSpace < FastForestOption > fastForestSearchSpace = null , SearchSpace < LbfgsOption > lbfgsSearchSpace = null , SearchSpace < SdcaOption > sdcaSearchSpace = null )
486
486
{
@@ -531,31 +531,31 @@ public SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNam
531
531
res . Add ( SweepableEstimatorFactory . CreateSdcaRegression ( sdcaOption , sdcaSearchSpace ?? new SearchSpace < SdcaOption > ( sdcaOption ) ) ) ;
532
532
}
533
533
534
- return res . ToArray ( ) ;
534
+ return new SweepablePipeline ( ) . Append ( res . ToArray ( ) ) ;
535
535
}
536
536
537
537
/// <summary>
538
538
/// Create a list of <see cref="SweepableEstimator"/> for featurizing text.
539
539
/// </summary>
540
540
/// <param name="outputColumnName">output column name.</param>
541
541
/// <param name="inputColumnName">input column name.</param>
542
- internal SweepableEstimator [ ] TextFeaturizer ( string outputColumnName , string inputColumnName )
542
+ internal SweepablePipeline TextFeaturizer ( string outputColumnName , string inputColumnName )
543
543
{
544
544
var option = new FeaturizeTextOption
545
545
{
546
546
InputColumnName = inputColumnName ,
547
547
OutputColumnName = outputColumnName ,
548
548
} ;
549
549
550
- return new [ ] { SweepableEstimatorFactory . CreateFeaturizeText ( option ) } ;
550
+ return new SweepablePipeline ( ) . Append ( new [ ] { SweepableEstimatorFactory . CreateFeaturizeText ( option ) } ) ;
551
551
}
552
552
553
553
/// <summary>
554
554
/// Create a <see cref="SweepablePipeline"/> for featurizing numeric columns.
555
555
/// </summary>
556
556
/// <param name="outputColumnNames">output column names.</param>
557
557
/// <param name="inputColumnNames">input column names.</param>
558
- internal SweepableEstimator [ ] NumericFeaturizer ( string [ ] outputColumnNames , string [ ] inputColumnNames )
558
+ internal SweepablePipeline NumericFeaturizer ( string [ ] outputColumnNames , string [ ] inputColumnNames )
559
559
{
560
560
Contracts . CheckValue ( inputColumnNames , nameof ( inputColumnNames ) ) ;
561
561
Contracts . CheckValue ( outputColumnNames , nameof ( outputColumnNames ) ) ;
@@ -566,7 +566,7 @@ internal SweepableEstimator[] NumericFeaturizer(string[] outputColumnNames, stri
566
566
OutputColumnNames = outputColumnNames ,
567
567
} ;
568
568
569
- return new [ ] { SweepableEstimatorFactory . CreateReplaceMissingValues ( replaceMissingValueOption ) } ;
569
+ return new SweepablePipeline ( ) . Append ( new [ ] { SweepableEstimatorFactory . CreateReplaceMissingValues ( replaceMissingValueOption ) } ) ;
570
570
}
571
571
572
572
/// <summary>
@@ -597,7 +597,7 @@ internal SweepableEstimator[] BooleanFeaturizer(string[] outputColumnNames, stri
597
597
/// </summary>
598
598
/// <param name="outputColumnNames">output column names.</param>
599
599
/// <param name="inputColumnNames">input column names.</param>
600
- internal SweepableEstimator [ ] CatalogFeaturizer ( string [ ] outputColumnNames , string [ ] inputColumnNames )
600
+ internal SweepablePipeline CatalogFeaturizer ( string [ ] outputColumnNames , string [ ] inputColumnNames )
601
601
{
602
602
Contracts . Check ( outputColumnNames . Count ( ) == inputColumnNames . Count ( ) && outputColumnNames . Count ( ) > 0 , "outputColumnNames and inputColumnNames must have the same length and greater than 0" ) ;
603
603
@@ -607,10 +607,10 @@ internal SweepableEstimator[] CatalogFeaturizer(string[] outputColumnNames, stri
607
607
OutputColumnNames = outputColumnNames ,
608
608
} ;
609
609
610
- return new SweepableEstimator [ ] { SweepableEstimatorFactory . CreateOneHotEncoding ( option ) , SweepableEstimatorFactory . CreateOneHotHashEncoding ( option ) } ;
610
+ return new SweepablePipeline ( ) . Append ( new SweepableEstimator [ ] { SweepableEstimatorFactory . CreateOneHotEncoding ( option ) , SweepableEstimatorFactory . CreateOneHotHashEncoding ( option ) } ) ;
611
611
}
612
612
613
- internal MultiModelPipeline ImagePathFeaturizer ( string outputColumnName , string inputColumnName )
613
+ internal SweepablePipeline ImagePathFeaturizer ( string outputColumnName , string inputColumnName )
614
614
{
615
615
// load image => resize image (224, 224) => extract pixels => dnn featurizer
616
616
var loadImageOption = new LoadImageOption
@@ -640,7 +640,7 @@ internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string
640
640
OutputColumnName = outputColumnName ,
641
641
} ;
642
642
643
- var pipeline = new MultiModelPipeline ( ) ;
643
+ var pipeline = new SweepablePipeline ( ) ;
644
644
645
645
return pipeline . Append ( SweepableEstimatorFactory . CreateLoadImages ( loadImageOption ) )
646
646
. Append ( SweepableEstimatorFactory . CreateResizeImages ( resizeImageOption ) )
@@ -660,7 +660,7 @@ internal MultiModelPipeline ImagePathFeaturizer(string outputColumnName, string
660
660
/// <param name="imagePathColumns">columns that should be treated as image path. If not specified, it will automatically infer if a column is catalog or not.</param>
661
661
/// <param name="outputColumnName">output feature column.</param>
662
662
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
663
- public MultiModelPipeline Featurizer ( IDataView data , string outputColumnName = "Features" , string [ ] catalogColumns = null , string [ ] numericColumns = null , string [ ] textColumns = null , string [ ] imagePathColumns = null , string [ ] excludeColumns = null )
663
+ public SweepablePipeline Featurizer ( IDataView data , string outputColumnName = "Features" , string [ ] catalogColumns = null , string [ ] numericColumns = null , string [ ] textColumns = null , string [ ] imagePathColumns = null , string [ ] excludeColumns = null )
664
664
{
665
665
Contracts . CheckValue ( data , nameof ( data ) ) ;
666
666
@@ -727,16 +727,18 @@ public MultiModelPipeline Featurizer(IDataView data, string outputColumnName = "
727
727
/// <param name="data">input data.</param>
728
728
/// <param name="columnInformation">column information.</param>
729
729
/// <param name="outputColumnName">output feature column.</param>
730
- /// <returns>A <see cref="MultiModelPipeline "/> for featurization.</returns>
731
- public MultiModelPipeline Featurizer ( IDataView data , ColumnInformation columnInformation , string outputColumnName = "Features" )
730
+ /// <returns>A <see cref="SweepablePipeline "/> for featurization.</returns>
731
+ public SweepablePipeline Featurizer ( IDataView data , ColumnInformation columnInformation , string outputColumnName = "Features" )
732
732
{
733
733
Contracts . CheckValue ( data , nameof ( data ) ) ;
734
734
Contracts . CheckValue ( columnInformation , nameof ( columnInformation ) ) ;
735
735
736
736
var columnPurposes = PurposeInference . InferPurposes ( this . _context , data , columnInformation ) ;
737
737
var textFeatures = columnPurposes . Where ( c => c . Purpose == ColumnPurpose . TextFeature ) ;
738
- var numericFeatures = columnPurposes . Where ( c => c . Purpose == ColumnPurpose . NumericFeature && data . Schema [ c . ColumnIndex ] . Type != BooleanDataViewType . Instance ) ;
739
- var booleanFeatures = columnPurposes . Where ( c => c . Purpose == ColumnPurpose . NumericFeature && data . Schema [ c . ColumnIndex ] . Type == BooleanDataViewType . Instance ) ;
738
+ var numericFeatures = columnPurposes . Where ( c => c . Purpose == ColumnPurpose . NumericFeature
739
+ && data . Schema [ c . ColumnIndex ] . Type != BooleanDataViewType . Instance
740
+ && ! ( data . Schema [ c . ColumnIndex ] . Type is VectorDataViewType vt && vt . ItemType == BooleanDataViewType . Instance ) ) . ToArray ( ) ;
741
+ var booleanFeatures = columnPurposes . Where ( c => c . Purpose == ColumnPurpose . NumericFeature && ! numericFeatures . Contains ( c ) ) ;
740
742
var catalogFeatures = columnPurposes . Where ( c => c . Purpose == ColumnPurpose . CategoricalFeature ) ;
741
743
var imagePathFeatures = columnPurposes . Where ( c => c . Purpose == ColumnPurpose . ImagePath ) ;
742
744
var textFeatureColumnNames = textFeatures . Select ( c => data . Schema [ c . ColumnIndex ] . Name ) . ToArray ( ) ;
@@ -745,7 +747,7 @@ public MultiModelPipeline Featurizer(IDataView data, ColumnInformation columnInf
745
747
var imagePathColumnNames = imagePathFeatures . Select ( c => data . Schema [ c . ColumnIndex ] . Name ) . ToArray ( ) ;
746
748
var booleanFeatureColumnNames = booleanFeatures . Select ( c => data . Schema [ c . ColumnIndex ] . Name ) . ToArray ( ) ;
747
749
748
- var pipeline = new MultiModelPipeline ( ) ;
750
+ var pipeline = new SweepablePipeline ( ) ;
749
751
if ( numericFeatureColumnNames . Length > 0 )
750
752
{
751
753
pipeline = pipeline . Append ( this . NumericFeaturizer ( numericFeatureColumnNames , numericFeatureColumnNames ) ) ;
0 commit comments