Skip to content

Commit

Permalink
add auto featurizer api
Browse files Browse the repository at this point in the history
  • Loading branch information
LittleLittleCloud committed May 5, 2022
1 parent 97a920a commit 50facc5
Showing 1 changed file with 58 additions and 0 deletions.
58 changes: 58 additions & 0 deletions src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -530,5 +530,63 @@ public SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNam

return res.ToArray();
}

/// <summary>
/// Create a list of <see cref="SweepableEstimator"/> for featurizing text.
/// </summary>
/// <param name="outputColumnName">output column name.</param>
/// <param name="inputColumnName">input column name.</param>
internal SweepableEstimator[] TextFeaturizer(string outputColumnName, string inputColumnName)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a list of <see cref="SweepableEstimator"/> for featurizing numeric columns.
/// </summary>
/// <param name="outputColumnName">output column name.</param>
/// <param name="inputColumnName">input column name.</param>
internal SweepableEstimator[] NumericFeaturizer(string outputColumnName, string inputColumnName)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a list of <see cref="SweepableEstimator"/> for featurizing catalog columns.
/// </summary>
/// <param name="outputColumnName">output column name.</param>
/// <param name="inputColumnName">input column name.</param>
internal SweepableEstimator[] CatalogFeaturizer(string outputColumnName, string inputColumnName)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a single featurize pipeline according to <paramref name="data"/>. This function will collect all columns in <paramref name="data"/> and not in <paramref name="excludeColumns"/>,
/// featurizing them using <see cref="CatalogFeaturizer(string, string)"/>, <see cref="NumericFeaturizer(string, string)"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
/// them into a single feature column as output.
/// </summary>
/// <param name="data">input data.</param>
/// <param name="catalogColumns">columns that should be treated as catalog. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="outputColumnName">output feature column.</param>
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
internal MultiModelPipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catalogColumns = null, string[] excludeColumns = null)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/> and not in <paramref name="excludeColumns"/>,
/// featurizing them using <see cref="CatalogFeaturizer(string, string)"/>, <see cref="NumericFeaturizer(string, string)"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
/// them into a single feature column as output.
/// </summary>
/// <param name="columnInformation">column information.</param>
/// <param name="outputColumnName">output feature column.</param>
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
/// <returns></returns>
internal MultiModelPipeline Featurizer(ColumnInformation columnInformation, string outputColumnName = "Features", string[] excludeColumns = null)
{
throw new NotImplementedException();
}
}
}

0 comments on commit 50facc5

Please sign in to comment.