Skip to content

Samples for FeatureSelection transform estimators #3184

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 0 additions & 120 deletions docs/samples/Microsoft.ML.Samples/Dynamic/FeatureSelectionTransform.cs

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
public static class SelectFeaturesBasedOnCount
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable and convert it to an IDataView.
var rawData = GetData();

// Printing the columns of the input data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in rawData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector));

// NumericVector StringVector
// 4,NaN,6 A,WA,Male
// 4,5,6 A,,Female
// 4,5,6 A,NY,
// 4,NaN,NaN A,,Male

var data = mlContext.Data.LoadFromEnumerable(rawData);

// We will use the SelectFeaturesBasedOnCount to retain only those slots which have at least 'count' non-default values per slot.
var pipeline =
mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(outputColumnName: "NumericVector", count: 3) // Usage on numeric column.
.Append(mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(outputColumnName: "StringVector", count: 3)); // Usage on text column.

var transformedData = pipeline.Fit(data).Transform(data);

var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);

// Printing the columns of the transformed data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in convertedData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector));

// NumericVector StringVector
// 4,6 A,Male
// 4,6 A,Female
// 4,6 A,
// 4,NaN A,Male
}

public class TransformedData
{
public float[] NumericVector { get; set; }

public string[] StringVector { get; set; }
}

public class InputData
{
[VectorType(3)]
public float[] NumericVector { get; set; }

[VectorType(3)]
public string[] StringVector { get; set; }
}

/// <summary>
/// Return a few rows of data.
/// </summary>
public static IEnumerable<InputData> GetData()
{
var data = new List<InputData>
{
new InputData
{
NumericVector = new float[] { 4, float.NaN, 6 },
StringVector = new string[] { "A", "WA", "Male"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", "", "Female"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", "NY", null}
},
new InputData
{
NumericVector = new float[] { 4, float.NaN, float.NaN },
StringVector = new string[] { "A", null, "Male"}
}
};
return data;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
using System;
using System.Collections.Generic;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace Samples.Dynamic
{
public static class SelectFeaturesBasedOnCountMultiColumn
{
public static void Example()
{
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
// as well as the source of randomness.
var mlContext = new MLContext();

// Get a small dataset as an IEnumerable and convert it to an IDataView.
var rawData = GetData();

// Printing the columns of the input data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in rawData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector));

// NumericVector StringVector
// 4,NaN,6 A,WA,Male
// 4,5,6 A,,Female
// 4,5,6 A,NY,
// 4,NaN,NaN A,,Male

var data = mlContext.Data.LoadFromEnumerable(rawData);

// We will use the SelectFeaturesBasedOnCount transform estimator, to retain only those slots which have
// at least 'count' non-default values per slot.

// Multi column example. This pipeline transform two columns using the provided parameters.
var pipeline = mlContext.Transforms.FeatureSelection.SelectFeaturesBasedOnCount(
new InputOutputColumnPair[] { new InputOutputColumnPair("NumericVector"), new InputOutputColumnPair("StringVector") },
count: 3);

var transformedData = pipeline.Fit(data).Transform(data);

var convertedData = mlContext.Data.CreateEnumerable<TransformedData>(transformedData, true);

// Printing the columns of the transformed data.
Console.WriteLine($"NumericVector StringVector");
foreach (var item in convertedData)
Console.WriteLine("{0,-25} {1,-25}", string.Join(",", item.NumericVector), string.Join(",", item.StringVector));

// NumericVector StringVector
// 4,6 A,Male
// 4,6 A,Female
// 4,6 A,
// 4,NaN A,Male
}

private class TransformedData
{
public float[] NumericVector { get; set; }

public string[] StringVector { get; set; }
}

public class InputData
{
[VectorType(3)]
public float[] NumericVector { get; set; }

[VectorType(3)]
public string[] StringVector { get; set; }
}

/// <summary>
/// Returns a few rows of data.
/// </summary>
public static IEnumerable<InputData> GetData()
{
var data = new List<InputData>
{
new InputData
{
NumericVector = new float[] { 4, float.NaN, 6 },
StringVector = new string[] { "A", "WA", "Male"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", "", "Female"}
},
new InputData
{
NumericVector = new float[] { 4, 5, 6 },
StringVector = new string[] { "A", "NY", null}
},
new InputData
{
NumericVector = new float[] { 4, float.NaN, float.NaN },
StringVector = new string[] { "A", null, "Male"}
}
};
return data;
}
}
}
Loading