-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Samples for CustomMapping, IndicateMissingValues, ReplaceMissingValues #3216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
75105b2
samples custom, missingindicator, missingreplace
artidoro 0bf25ee
review comments
artidoro 9473b46
remove sample from file name
artidoro ce038f9
update to test.snk
artidoro 779f5f6
making a few corrections and updating proj csproj file
artidoro da90716
review comments and remove signing altogether
artidoro 7e11f86
review comments
artidoro 18ce098
merge master
artidoro File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
70 changes: 70 additions & 0 deletions
70
docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMapping.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.ML; | ||
|
||
namespace Samples.Dynamic | ||
{ | ||
public static class CustomMapping | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable and convert it to an IDataView. | ||
var samples = new List<InputData> | ||
{ | ||
new InputData { Age = 26 }, | ||
new InputData { Age = 35 }, | ||
new InputData { Age = 34 }, | ||
new InputData { Age = 28 }, | ||
}; | ||
var data = mlContext.Data.LoadFromEnumerable(samples); | ||
|
||
// We define the custom mapping between input and output rows that will be applied by the transformation. | ||
Action<InputData, CustomMappingOutput > mapping = | ||
(input, output) => output.IsUnderThirty = input.Age < 30; | ||
|
||
// Custom transformations can be used to transform data directly, or as part of a pipeline of estimators. | ||
// Note: If contractName is null in the CustomMapping estimator, any pipeline of estimators containing it, | ||
// cannot be saved and loaded back. | ||
var pipeline = mlContext.Transforms.CustomMapping(mapping, contractName: null); | ||
|
||
// Now we can transform the data and look at the output to confirm the behavior of the estimator. | ||
// This operation doesn't actually evaluate data until we read the data below. | ||
var transformer = pipeline.Fit(data); | ||
var transformedData = transformer.Transform(data); | ||
|
||
var dataEnumerable = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: true); | ||
Console.WriteLine("Age\t IsUnderThirty"); | ||
foreach (var row in dataEnumerable) | ||
Console.WriteLine($"{row.Age}\t {row.IsUnderThirty}"); | ||
|
||
// Expected output: | ||
// Age IsUnderThirty | ||
// 26 True | ||
// 35 False | ||
// 34 False | ||
// 28 True | ||
} | ||
|
||
// Defines only the column to be generated by the custom mapping transformation in addition to the columns already present. | ||
private class CustomMappingOutput | ||
{ | ||
public bool IsUnderThirty { get; set; } | ||
} | ||
|
||
// Defines the schema of the input data. | ||
private class InputData | ||
{ | ||
public float Age { get; set; } | ||
} | ||
|
||
// Defines the schema of the transformed data, which includes the new column IsUnderThirty. | ||
private class TransformedData : InputData | ||
{ | ||
public bool IsUnderThirty { get; set; } | ||
} | ||
} | ||
} |
82 changes: 0 additions & 82 deletions
82
docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSample.cs
This file was deleted.
Oops, something went wrong.
87 changes: 87 additions & 0 deletions
87
docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/CustomMappingSaveAndLoad.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.ML; | ||
using Microsoft.ML.Transforms; | ||
|
||
namespace Samples.Dynamic | ||
{ | ||
public static class CustomMappingSaveAndLoad | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable and convert it to an IDataView. | ||
var samples = new List<InputData> | ||
{ | ||
new InputData { Age = 26 }, | ||
new InputData { Age = 35 }, | ||
new InputData { Age = 34 }, | ||
new InputData { Age = 28 }, | ||
}; | ||
var data = mlContext.Data.LoadFromEnumerable(samples); | ||
|
||
// Custom transformations can be used to transform data directly, or as part of a pipeline of estimators. | ||
var pipeline = mlContext.Transforms.CustomMapping(new IsUnderThirtyCustomAction().GetMapping(), contractName: "IsUnderThirty"); | ||
var transformer = pipeline.Fit(data); | ||
|
||
// To save and load the CustomMapping estimator, the assembly in which the custom action is defined needs to be registered in the | ||
// environment. The following registers the assembly where IsUnderThirtyCustomAction is defined. | ||
mlContext.ComponentCatalog.RegisterAssembly(typeof(IsUnderThirtyCustomAction).Assembly); | ||
|
||
// Now the transform pipeline can be saved and loaded through the usual MLContext method. | ||
mlContext.Model.Save(transformer, data.Schema, "customTransform.zip"); | ||
var loadedTransform = mlContext.Model.Load("customTransform.zip", out var inputSchema); | ||
|
||
// Now we can transform the data and look at the output to confirm the behavior of the estimator. | ||
// This operation doesn't actually evaluate data until we read the data below. | ||
var transformedData = loadedTransform.Transform(data); | ||
|
||
var dataEnumerable = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, reuseRowObject: true); | ||
Console.WriteLine("Age\tIsUnderThirty"); | ||
foreach (var row in dataEnumerable) | ||
Console.WriteLine($"{row.Age}\t {row.IsUnderThirty}"); | ||
|
||
// Expected output: | ||
// Age IsUnderThirty | ||
// 26 True | ||
// 35 False | ||
// 34 False | ||
// 28 True | ||
} | ||
|
||
// The custom action needs to implement the abstract class CustomMappingFactory, and needs to have attribute | ||
// CustomMappingFactoryAttribute with argument equal to the contractName used to define the CustomMapping estimator | ||
// which uses the action. | ||
[CustomMappingFactoryAttribute("IsUnderThirty")] | ||
private class IsUnderThirtyCustomAction : CustomMappingFactory<InputData, CustomMappingOutput> | ||
{ | ||
// We define the custom mapping between input and output rows that will be applied by the transformation. | ||
public static void CustomAction(InputData input, CustomMappingOutput output) | ||
=> output.IsUnderThirty = input.Age < 30; | ||
|
||
public override Action<InputData, CustomMappingOutput> GetMapping() | ||
=> CustomAction; | ||
} | ||
|
||
// Defines only the column to be generated by the custom mapping transformation in addition to the columns already present. | ||
private class CustomMappingOutput | ||
{ | ||
public bool IsUnderThirty { get; set; } | ||
} | ||
|
||
// Defines the schema of the input data. | ||
private class InputData | ||
{ | ||
public float Age { get; set; } | ||
} | ||
|
||
// Defines the schema of the transformed data, which includes the new column IsUnderThirty. | ||
private class TransformedData : InputData | ||
{ | ||
public bool IsUnderThirty { get; set; } | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/IndicateMissingValuesMultiColumn.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using Microsoft.ML; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Samples.Dynamic | ||
{ | ||
public static class IndicateMissingValuesMultiColumn | ||
{ | ||
public static void Example() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var mlContext = new MLContext(); | ||
|
||
// Get a small dataset as an IEnumerable and convert it to an IDataView. | ||
var samples = new List<DataPoint>() | ||
{ | ||
new DataPoint(){ Features1 = new float[3] {1, 1, 0}, Features2 = new float[2] {1, 1} }, | ||
new DataPoint(){ Features1 = new float[3] {0, float.NaN, 1}, Features2 = new float[2] {float.NaN, 1} }, | ||
new DataPoint(){ Features1 = new float[3] {-1, float.NaN, -3}, Features2 = new float[2] {1, float.PositiveInfinity} }, | ||
}; | ||
var data = mlContext.Data.LoadFromEnumerable(samples); | ||
|
||
// IndicateMissingValues is used to create a boolean containing 'true' where the value in the | ||
// input column is missing. For floats and doubles, missing values are NaN. | ||
// We can use an array of InputOutputColumnPair to apply the MissingValueIndicatorEstimator | ||
// to multiple columns in one pass over the data. | ||
var pipeline = mlContext.Transforms.IndicateMissingValues(new[] { | ||
new InputOutputColumnPair("MissingIndicator1", "Features1"), | ||
new InputOutputColumnPair("MissingIndicator2", "Features2") | ||
}); | ||
|
||
// Now we can transform the data and look at the output to confirm the behavior of the estimator. | ||
// This operation doesn't actually evaluate data until we read the data below. | ||
var tansformer = pipeline.Fit(data); | ||
var transformedData = tansformer.Transform(data); | ||
|
||
// We can extract the newly created column as an IEnumerable of SampleDataTransformed, the class we define below. | ||
var rowEnumerable = mlContext.Data.CreateEnumerable<SampleDataTransformed>(transformedData, reuseRowObject: false); | ||
|
||
// And finally, we can write out the rows of the dataset, looking at the columns of interest. | ||
foreach (var row in rowEnumerable) | ||
Console.WriteLine($"Features1: [{string.Join(", ", row.Features1)}]\t MissingIndicator1: [{string.Join(", ", row.MissingIndicator1)}]\t " + | ||
$"Features2: [{string.Join(", ", row.Features2)}]\t MissingIndicator2: [{string.Join(", ", row.MissingIndicator2)}]"); | ||
|
||
// Expected output: | ||
// Features1: [1, 1, 0] MissingIndicator1: [False, False, False] Features2: [1, 1] MissingIndicator2: [False, False] | ||
// Features1: [0, NaN, 1] MissingIndicator1: [False, True, False] Features2: [NaN, 1] MissingIndicator2: [True, False] | ||
// Features1: [-1, NaN, -3] MissingIndicator1: [False, True, False] Features2: [1, ∞] MissingIndicator2: [False, False] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NIT - spacing for MissingIndicator doesnt align with the above lines. |
||
} | ||
|
||
private class DataPoint | ||
{ | ||
[VectorType(3)] | ||
public float[] Features1 { get; set; } | ||
[VectorType(2)] | ||
public float[] Features2 { get; set; } | ||
} | ||
|
||
private sealed class SampleDataTransformed : DataPoint | ||
{ | ||
public bool[] MissingIndicator1 { get; set; } | ||
public bool[] MissingIndicator2 { get; set; } | ||
|
||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.