Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add auto featurizer api #6187

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions src/Microsoft.ML.AutoML/API/AutoCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -530,5 +530,63 @@ public SweepableEstimator[] Regression(string labelColumnName = DefaultColumnNam

return res.ToArray();
}

/// <summary>
/// Create a list of <see cref="SweepableEstimator"/> for featurizing text.
/// </summary>
/// <param name="outputColumnName">output column name.</param>
/// <param name="inputColumnName">input column name.</param>
internal SweepableEstimator[] TextFeaturizer(string outputColumnName, string inputColumnName)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a list of <see cref="SweepableEstimator"/> for featurizing numeric columns.
/// </summary>
/// <param name="outputColumnName">output column name.</param>
/// <param name="inputColumnName">input column name.</param>
internal SweepableEstimator[] NumericFeaturizer(string outputColumnName, string inputColumnName)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a list of <see cref="SweepableEstimator"/> for featurizing catalog columns.
/// </summary>
/// <param name="outputColumnName">output column name.</param>
/// <param name="inputColumnName">input column name.</param>
internal SweepableEstimator[] CatalogFeaturizer(string outputColumnName, string inputColumnName)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a single featurize pipeline according to <paramref name="data"/>. This function will collect all columns in <paramref name="data"/> and not in <paramref name="excludeColumns"/>,
/// featurizing them using <see cref="CatalogFeaturizer(string, string)"/>, <see cref="NumericFeaturizer(string, string)"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
/// them into a single feature column as output.
/// </summary>
/// <param name="data">input data.</param>
/// <param name="catalogColumns">columns that should be treated as catalog. If not specified, it will automatically infer if a column is catalog or not.</param>
/// <param name="outputColumnName">output feature column.</param>
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
internal MultiModelPipeline Featurizer(IDataView data, string outputColumnName = "Features", string[] catalogColumns = null, string[] excludeColumns = null)
{
throw new NotImplementedException();
}

/// <summary>
/// Create a single featurize pipeline according to <paramref name="columnInformation"/>. This function will collect all columns in <paramref name="columnInformation"/> and not in <paramref name="excludeColumns"/>,
/// featurizing them using <see cref="CatalogFeaturizer(string, string)"/>, <see cref="NumericFeaturizer(string, string)"/> or <see cref="TextFeaturizer(string, string)"/>. And combine
/// them into a single feature column as output.
/// </summary>
/// <param name="columnInformation">column information.</param>
/// <param name="outputColumnName">output feature column.</param>
/// <param name="excludeColumns">columns that won't be included when featurizing, like label</param>
/// <returns></returns>
internal MultiModelPipeline Featurizer(ColumnInformation columnInformation, string outputColumnName = "Features", string[] excludeColumns = null)
{
throw new NotImplementedException();
}
}
}