Skip to content

Move Normalizer extension method from experimental to stable nuget and remove Normalizer generic APIs #3118

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions docs/code/MlNetCookBook.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ var cachedTrainData = mlContext.Data.Cache(trainData);
var pipeline =
// First 'normalize' the data (rescale to be
// between -1 and 1 for all examples)
mlContext.Transforms.Normalize("FeatureVector")
mlContext.Transforms.NormalizeMinMax("FeatureVector")
// We add a step for caching data in memory so that the downstream iterative training
// algorithm can efficiently scan through the data multiple times. Otherwise, the following
// trainer will load data from disk multiple times. The caching mechanism uses an on-demand strategy.
Expand Down Expand Up @@ -625,18 +625,15 @@ var trainData = mlContext.Data.LoadFromTextFile<IrisInputAllFeatures>(dataPath,
separatorChar: ','
);

// Apply all kinds of standard ML.NET normalization to the raw features.
// Apply MinMax normalization to the raw features.
var pipeline =
mlContext.Transforms.Normalize(
new NormalizingEstimator.MinMaxColumnOptions("MinMaxNormalized", "Features", fixZero: true),
new NormalizingEstimator.MeanVarianceColumnOptions("MeanVarNormalized", "Features", fixZero: true),
new NormalizingEstimator.BinningColumnOptions("BinNormalized", "Features", maximumBinCount: 256));
mlContext.Transforms.NormalizeMinMax("MinMaxNormalized", "Features");

// Let's train our pipeline of normalizers, and then apply it to the same data.
var normalizedData = pipeline.Fit(trainData).Transform(trainData);

// Inspect one column of the resulting dataset.
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MeanVarNormalized"]).ToArray();
var meanVarValues = normalizedData.GetColumn<float[]>(normalizedData.Schema["MinMaxNormalized"]).ToArray();
```

## How do I train my model on categorical data?
Expand Down
8 changes: 3 additions & 5 deletions docs/samples/Microsoft.ML.Samples/Dynamic/Normalizer.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms;

namespace Microsoft.ML.Samples.Dynamic
{
Expand All @@ -28,7 +26,7 @@ public static void Example()
// 35 1 6-11yrs 1 3 32 5 ...

// A pipeline for normalizing the Induced column.
var pipeline = ml.Transforms.Normalize("Induced");
var pipeline = ml.Transforms.NormalizeMinMax("Induced");
// The transformed (normalized according to Normalizer.NormalizerMode.MinMax) data.
var transformer = pipeline.Fit(trainData);

Expand Down Expand Up @@ -58,8 +56,8 @@ public static void Example()

// Composing a different pipeline if we wanted to normalize more than one column at a time.
// Using log scale as the normalization mode.
var multiColPipeline = ml.Transforms.Normalize("LogInduced", "Induced", NormalizingEstimator.NormalizationMode.LogMeanVariance)
.Append(ml.Transforms.Normalize("LogSpontaneous", "Spontaneous", NormalizingEstimator.NormalizationMode.LogMeanVariance));
var multiColPipeline = ml.Transforms.NormalizeMinMax("LogInduced", "Induced")
.Append(ml.Transforms.NormalizeMinMax("LogSpontaneous", "Spontaneous"));
// The transformed data.
var multiColtransformer = multiColPipeline.Fit(trainData);
var multiColtransformedData = multiColtransformer.Transform(trainData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a linear regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.Regression.Trainers.Ols(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public static void Example()
// Normalize the data set so that for each feature, its maximum value is 1 while its minimum value is 0.
// Then append a logistic regression trainer.
var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
.Append(mlContext.Transforms.Normalize("Features"))
.Append(mlContext.Transforms.NormalizeMinMax("Features"))
.Append(mlContext.BinaryClassification.Trainers.LbfgsLogisticRegression(
labelColumnName: labelName, featureColumnName: "Features"));
var model = pipeline.Fit(data);
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ML.Data/Transforms/Normalizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ internal static class Defaults
public const long MaximumExampleCount = 1000000000;
}

public enum NormalizationMode
[BestFriend]
internal enum NormalizationMode
{
/// <summary>
/// Linear rescale such that minimum and maximum values are mapped between -1 and 1.
Expand Down
112 changes: 0 additions & 112 deletions src/Microsoft.ML.Experimental/TransformsCatalogExtensions.cs

This file was deleted.

2 changes: 1 addition & 1 deletion src/Microsoft.ML.SamplesUtils/SamplesDatasetUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public static IDataView LoadFeaturizedAdultDataset(MLContext mlContext)
"occupation", "relationship", "ethnicity", "native-country", "age", "education-num",
"capital-gain", "capital-loss", "hours-per-week"))
// Min-max normalize all the features
.Append(mlContext.Transforms.Normalize("Features"));
.Append(mlContext.Transforms.NormalizeMinMax("Features"));

var data = loader.Load(dataFile);
var featurizedData = pipeline.Fit(data).Transform(data);
Expand Down
Loading