Skip to content

Fix a value-mapping bug #3180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 46 additions & 10 deletions src/Microsoft.ML.Data/Transforms/ConversionsExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,14 @@ public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType
{
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType,

var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName, treatValuesAsKeyType);

return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
}

Expand Down Expand Up @@ -293,7 +300,15 @@ internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputTy
env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
return new ValueMappingEstimator<TInputType, TOutputType>(env, keys, values, InputOutputColumnPair.ConvertToValueTuples(columns));

var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName, false);

return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
InputOutputColumnPair.ConvertToValueTuples(columns));
}

/// <summary>
Expand Down Expand Up @@ -323,8 +338,15 @@ internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputTy
env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
return new ValueMappingEstimator<TInputType, TOutputType>(env, keys, values, treatValuesAsKeyType,
InputOutputColumnPair.ConvertToValueTuples(columns));

var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName, treatValuesAsKeyType);

return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
InputOutputColumnPair.ConvertToValueTuples(columns));
}

/// <summary>
Expand Down Expand Up @@ -354,7 +376,15 @@ public static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputType
{
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
return new ValueMappingEstimator<TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values,

// Convert parallel key and value lists to IDataView with two columns, so that the underlying infra can use it.
var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName);

return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
}

Expand Down Expand Up @@ -386,8 +416,15 @@ internal static ValueMappingEstimator<TInputType, TOutputType> MapValue<TInputTy
env.CheckValue(columns, nameof(columns));
var keys = keyValuePairs.Select(pair => pair.Key);
var values = keyValuePairs.Select(pair => pair.Value);
return new ValueMappingEstimator<TInputType, TOutputType>(env, keys, values,
InputOutputColumnPair.ConvertToValueTuples(columns));

var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values,
ValueMappingTransformer.DefaultKeyColumnName,
ValueMappingTransformer.DefaultValueColumnName);

return new ValueMappingEstimator<TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap,
lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName],
lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName],
InputOutputColumnPair.ConvertToValueTuples(columns));
Copy link
Member

@sfilipi sfilipi Apr 4, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is a bit of repetition here, does it make sense to make it part of the ctor? I mean create the lookup map inside the ctor, just pass the columns.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel columns must go with the associated IDataView. There should be no column if its IDataView hasn't been created.

}

/// <summary>
Expand All @@ -413,7 +450,7 @@ public static ValueMappingEstimator MapValue(
this TransformsCatalog.ConversionTransforms catalog,
string outputColumnName, IDataView lookupMap, DataViewSchema.Column keyColumn, DataViewSchema.Column valueColumn, string inputColumnName = null)
{
return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn.Name, valueColumn.Name,
return new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumn, valueColumn,
new[] { (outputColumnName, inputColumnName ?? outputColumnName) });
}

Expand Down Expand Up @@ -442,8 +479,7 @@ internal static ValueMappingEstimator MapValue(
{
var env = CatalogUtils.GetEnvironment(catalog);
env.CheckValue(columns, nameof(columns));
return new ValueMappingEstimator(env, lookupMap, keyColumn.Name, valueColumn.Name,
InputOutputColumnPair.ConvertToValueTuples(columns));
return new ValueMappingEstimator(env, lookupMap, keyColumn, valueColumn, InputOutputColumnPair.ConvertToValueTuples(columns));
}
}
}
Loading