Open
Description
openedon Feb 27, 2019
System information
- OS version/distro: 0.11.0-preview-27427-9
Issue
OneHotEncoding with Outputkind.Bin spits out data elements of 3-dimensional arrays, while 2 dimensional arrays expected. Outputkind.Ind and Outputkind.Bag are correctly spitting out 2 dimensional arrays. See repro source code below
Source code / logs
using System;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms.Categorical;
namespace ConsoleApp1
{
class TestData
{
public string data;
}
class Program
{
static void Main(string[] args)
{
var trainData = new[] { new TestData() { data = "male" }, new TestData() { data = "female"} };
var testData = new[] { new TestData() { data = "male" }, new TestData() { data = "female" }, new TestData() { data = "fem" }, new TestData() { data = "fem1" } };
var mlContext = new MLContext();
var trainDataView = mlContext.Data.LoadFromEnumerable(trainData);
var testDataView = mlContext.Data.LoadFromEnumerable(testData);
var pipe = mlContext.Transforms.Categorical.OneHotEncoding("Bag", "data", OneHotEncodingTransformer.OutputKind.Bag)
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Key", "data", OneHotEncodingTransformer.OutputKind.Key))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Ind", "data", OneHotEncodingTransformer.OutputKind.Ind))
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Bin", "data", OneHotEncodingTransformer.OutputKind.Bin));
var transformer = pipe.Fit(trainDataView);
var result = transformer.Transform(testDataView);
var bags = result.GetColumn<float[]>(mlContext, "Bag").ToArray();
var inds = result.GetColumn<float[]>(mlContext, "Ind").ToArray();
var bins = result.GetColumn<float[]>(mlContext, "Bin").ToArray();
Console.WriteLine("Number of dimensions (should be 2) in each output kind: {0} {1} {2}", bags[0].Length, inds[0].Length, bins[0].Length);
// Number of dimensions (should be 2) in each output kind: 2 2 3
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment