Skip to content

Commit 218813d

Browse files
committed
Move Normalizer extension method from experimental to stable nuget.
1 parent 3663320 commit 218813d

File tree

2 files changed

+100
-112
lines changed

2 files changed

+100
-112
lines changed

src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs

Lines changed: 0 additions & 112 deletions
This file was deleted.

src/Microsoft.ML.Transforms/NormalizerCatalog.cs

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,106 @@ internal static NormalizingEstimator Normalize(this TransformsCatalog catalog,
5151
return new NormalizingEstimator(env, mode, InputOutputColumnPair.ConvertToValueTuples(columns));
5252
}
5353

54+
/// <summary>
55+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.MinMax"/> mode.
56+
/// It normalizes the data based on the observed minimum and maximum values of the data.
57+
/// </summary>
58+
/// <param name="catalog">The transform catalog</param>
59+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
60+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
61+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
62+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
63+
public static NormalizingEstimator NormalizeMinMax(this TransformsCatalog catalog,
64+
string outputColumnName, string inputColumnName = null,
65+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
66+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched)
67+
{
68+
var columnOptions = new NormalizingEstimator.MinMaxColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero);
69+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
70+
}
71+
72+
/// <summary>
73+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.MeanVariance"/> mode.
74+
/// It normalizes the data based on the computed mean and variance of the data.
75+
/// </summary>
76+
/// <param name="catalog">The transform catalog</param>
77+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
78+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
79+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
80+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
81+
/// <param name="useCdf">Whether to use CDF as the output.</param>
82+
public static NormalizingEstimator NormalizeMeanVariance(this TransformsCatalog catalog,
83+
string outputColumnName, string inputColumnName = null,
84+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
85+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
86+
bool useCdf = NormalizingEstimator.Defaults.MeanVarCdf)
87+
{
88+
var columnOptions = new NormalizingEstimator.MeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, useCdf);
89+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
90+
}
91+
92+
/// <summary>
93+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.LogMeanVariance"/> mode.
94+
/// It normalizes the data based on the computed mean and variance of the logarithm of the data.
95+
/// </summary>
96+
/// <param name="catalog">The transform catalog</param>
97+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
98+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
99+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
100+
/// <param name="useCdf">Whether to use CDF as the output.</param>
101+
public static NormalizingEstimator NormalizeLogMeanVariance(this TransformsCatalog catalog,
102+
string outputColumnName, string inputColumnName = null,
103+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
104+
bool useCdf = NormalizingEstimator.Defaults.LogMeanVarCdf)
105+
{
106+
var columnOptions = new NormalizingEstimator.LogMeanVarianceColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, useCdf);
107+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
108+
}
109+
110+
/// <summary>
111+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.Binning"/> mode.
112+
/// The values are assigned into bins with equal density.
113+
/// </summary>
114+
/// <param name="catalog">The transform catalog</param>
115+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
116+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
117+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
118+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
119+
/// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
120+
public static NormalizingEstimator NormalizeBinning(this TransformsCatalog catalog,
121+
string outputColumnName, string inputColumnName = null,
122+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
123+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
124+
int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount)
125+
{
126+
var columnOptions = new NormalizingEstimator.BinningColumnOptions(outputColumnName, inputColumnName, maximumExampleCount, fixZero, maximumBinCount);
127+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
128+
}
129+
130+
/// <summary>
131+
/// Normalize (rescale) the column according to the <see cref="NormalizingEstimator.NormalizationMode.SupervisedBinning"/> mode.
132+
/// The values are assigned into bins based on correlation with the <paramref name="labelColumnName"/> column.
133+
/// </summary>
134+
/// <param name="catalog">The transform catalog</param>
135+
/// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
136+
/// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
137+
/// <param name="labelColumnName">Name of the label column for supervised binning.</param>
138+
/// <param name="maximumExampleCount">Maximum number of examples used to train the normalizer.</param>
139+
/// <param name="fixZero">Whether to map zero to zero, preserving sparsity.</param>
140+
/// <param name="maximumBinCount">Maximum number of bins (power of 2 recommended).</param>
141+
/// <param name="mininimumExamplesPerBin">Minimum number of examples per bin.</param>
142+
public static NormalizingEstimator NormalizeSupervisedBinning(this TransformsCatalog catalog,
143+
string outputColumnName, string inputColumnName = null,
144+
string labelColumnName = DefaultColumnNames.Label,
145+
long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
146+
bool fixZero = NormalizingEstimator.Defaults.EnsureZeroUntouched,
147+
int maximumBinCount = NormalizingEstimator.Defaults.MaximumBinCount,
148+
int mininimumExamplesPerBin = NormalizingEstimator.Defaults.MininimumBinSize)
149+
{
150+
var columnOptions = new NormalizingEstimator.SupervisedBinningColumOptions(outputColumnName, inputColumnName, labelColumnName, maximumExampleCount, fixZero, maximumBinCount, mininimumExamplesPerBin);
151+
return new NormalizingEstimator(CatalogUtils.GetEnvironment(catalog), columnOptions);
152+
}
153+
54154
/// <summary>
55155
/// Normalize (rescale) columns according to specified custom parameters.
56156
/// </summary>

0 commit comments

Comments
 (0)