Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added slot names support for OnnxTransformer #4857

Merged
merged 2 commits into from
Feb 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion src/Microsoft.ML.OnnxConverter/SaveOnnxCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Google.Protobuf;
using Microsoft.ML;
using Microsoft.ML.Command;
Expand Down Expand Up @@ -188,7 +190,9 @@ internal static ModelProto ConvertTransformListToOnnxModel(OnnxContextImpl ctx,
if (outputData.Schema[i].IsHidden)
continue;

var idataviewColumnName = outputData.Schema[i].Name;
var column = outputData.Schema[i];

var idataviewColumnName = column.Name;

// Since the last IDataView also contains columns of the initial IDataView, last IDataView's columns found in
// _inputToDrop should be removed too.
Expand All @@ -204,11 +208,39 @@ internal static ModelProto ConvertTransformListToOnnxModel(OnnxContextImpl ctx,
var trueVariableName = ctx.AddIntermediateVariable(null, idataviewColumnName + ".output", true);
ctx.CreateNode("Identity", variableName, trueVariableName, ctx.GetNodeName("Identity"), "");
ctx.AddOutputVariable(outputData.Schema[i].Type, trueVariableName);

if (column.HasSlotNames())
AddSlotNames(ctx, column);
}

// Add metadata graph outputs

return ctx.MakeModel();
}

private static void AddSlotNames(OnnxContextImpl ctx, DataViewSchema.Column column)
{
VBuffer<ReadOnlyMemory<char>> slotNames = default;
column.GetSlotNames(ref slotNames);
IEnumerable<string> slotNamesAsStrings = slotNames.DenseValues().Select(name => name.ToString());

string opType = "LabelEncoder";
string labelEncoderInputName = $"mlnet.{column.Name}.unusedInput";
string labelEncoderOutputName = $"mlnet.{column.Name}.unusedOutput";
string labelEncoderNodeName = $"mlnet.{column.Name}.SlotNames";

string[] oneVals = new string[] { "one" };
long[] dims = new long[] { 1, 1 };
var one = ctx.AddInitializer(oneVals, dims, labelEncoderNodeName);

var labelEncoderOutput = ctx.AddIntermediateVariable(NumberDataViewType.Int64, labelEncoderOutputName, true);
var node = ctx.CreateNode(opType, one, labelEncoderOutput, labelEncoderNodeName);
node.AddAttribute("keys_strings", slotNamesAsStrings);
node.AddAttribute("values_int64s", Enumerable.Range(0, slotNames.Length).Select(x => (long)x));
Copy link

@yaeldekel yaeldekel Feb 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

values_int64s [](start = 31, length = 13)

Why do we need this? #Resolved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are unused. But are specified only to satisfy ORT.


In reply to: 381867237 [](ancestors = 381867237)


ctx.AddOutputVariable(NumberDataViewType.Int64, labelEncoderOutput);
}

private void Run(IChannel ch)
{
ILegacyDataLoader loader = null;
Expand Down
32 changes: 31 additions & 1 deletion src/Microsoft.ML.OnnxTransformer/OnnxTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.Runtime;
using Microsoft.ML.Transforms.Onnx;
using static Microsoft.ML.Model.OnnxConverter.OnnxCSharpToProtoWrapper;
using OnnxShape = System.Collections.Generic.List<int>;

[assembly: LoadableClass(OnnxTransformer.Summary, typeof(IDataTransform), typeof(OnnxTransformer),
Expand Down Expand Up @@ -416,11 +417,40 @@ protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore()
{
var onnxOutputName = _parent.Outputs[i];
var columnName = onnxOutputName.EndsWith(stdSuffix) ? onnxOutputName.Replace(stdSuffix, "") : onnxOutputName;
info[i] = new DataViewSchema.DetachedColumn(columnName, _parent.OutputTypes[i], null);

var builder = new DataViewSchema.Annotations.Builder();
AddSlotNames(columnName, builder);

info[i] = new DataViewSchema.DetachedColumn(columnName, _parent.OutputTypes[i], builder.ToAnnotations());
}
return info;
}

private void AddSlotNames(string columnName, DataViewSchema.Annotations.Builder builder)
{
var graph = _parent.Model.Graph;
var nodes = graph.Node;

var slotNamesNodeName = $"mlnet.{columnName}.SlotNames";
var slotsNode = nodes.FirstOrDefault(node => node.Name == slotNamesNodeName);
var slotsAttr = slotsNode?.Attribute.FirstOrDefault(attr => attr.Name == "keys_strings");
if (slotsAttr == null)
return;

int count = slotsAttr.Strings.Count();
ValueGetter<VBuffer<ReadOnlyMemory<char>>> getter = (ref VBuffer<ReadOnlyMemory<char>> dst) =>
{
var dstEditor = VBufferEditor.Create(ref dst, count);
for (int i = 0; i < count; i++)
{
dstEditor.Values[i] = slotsAttr.Strings[i].ToString(Encoding.UTF8).AsMemory();
}
dst = dstEditor.Commit();
};

builder.AddSlotNames(count, getter);
}

private protected override Func<int, bool> GetDependenciesCore(Func<int, bool> activeOutput)
{
return col => Enumerable.Range(0, _parent.Outputs.Length).Any(i => activeOutput(i)) && _inputColIndices.Any(i => i == col);
Expand Down
9 changes: 9 additions & 0 deletions src/Microsoft.ML.OnnxTransformer/OnnxUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using Microsoft.ML.Runtime;
using static Microsoft.ML.Model.OnnxConverter.OnnxCSharpToProtoWrapper;
using OnnxShape = System.Collections.Generic.List<int>;

namespace Microsoft.ML.Transforms.Onnx
Expand Down Expand Up @@ -157,6 +158,8 @@ public OnnxVariableInfo(string name, OnnxShape shape, Type typeInOnnxRuntime, Da
/// </summary>
internal OnnxModelInfo ModelInfo { get; }

internal GraphProto Graph { get; }

/// <summary>
/// Constructs OnnxModel object from file.
/// </summary>
Expand Down Expand Up @@ -217,6 +220,8 @@ public OnnxModel(string modelFile, int? gpuDeviceId = null, bool fallbackToCpu =

// Create a view to the used ONNX model from ONNXRuntime's perspective.
ModelInfo = new OnnxModelInfo(inputInfos, outputInfos, overrideableInitializers);

Graph = model.Graph;
}

private List<OnnxVariableInfo> GetOnnxVariablesFromMetadata(IReadOnlyDictionary<string, NodeMetadata> nodeMetadata,
Expand All @@ -233,6 +238,10 @@ private List<OnnxVariableInfo> GetOnnxVariablesFromMetadata(IReadOnlyDictionary<
var dataViewType = typePool[name];
var caster = casterPool?[name];

if (name.StartsWith("mlnet.") &&
(name.EndsWith(".unusedInput") || name.EndsWith(".unusedOutput")))
continue;

OnnxVariableInfo info = null;
if (shapeDictionary != null && shapeDictionary.ContainsKey(name))
{
Expand Down
3 changes: 0 additions & 3 deletions src/Microsoft.ML.Transforms/Text/NgramTransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -770,9 +770,6 @@ public void SaveAsOnnx(OnnxContext ctx)

private void SaveAsOnnxCore(OnnxContext ctx, int iinfo, string srcVariableName, string dstVariableName)
{
VBuffer<ReadOnlyMemory<char>> slotNames = default;
GetSlotNames(iinfo, 0, ref slotNames);

var transformInfo = _parent._transformInfos[iinfo];

// TfIdfVectorizer accepts strings, int32 and int64 tensors.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,8 @@
{
"name": "target_weights",
"floats": [
0.50476193,
-0.97911227
0.504761934,
-0.979112267
Copy link

@yaeldekel yaeldekel Feb 20, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How come these changed? #Resolved

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure. But I see this occurring off and on that the baselines numbers change when we run them locally. I am ignoring them because the change is in the 7th decimal place


In reply to: 381858622 [](ancestors = 381858622)

],
"type": "FLOATS"
}
Expand Down Expand Up @@ -428,6 +428,51 @@
"name": "Identity1",
"opType": "Identity"
},
{
"input": [
"mlnet.F2.SlotNames"
],
"output": [
"mlnet.F2.unusedOutput"
],
"name": "mlnet.F2.SlotNames",
"opType": "LabelEncoder",
"attribute": [
{
"name": "keys_strings",
"strings": [
"NA==",
"MQ==",
"OA==",
"MTA=",
"Mg==",
"Mw==",
"Nw==",
"NQ==",
"Ng==",
"OQ=="
],
"type": "STRINGS"
},
{
"name": "values_int64s",
"ints": [
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9"
],
"type": "INTS"
}
],
"domain": "ai.onnx.ml"
},
{
"input": [
"Features"
Expand All @@ -438,6 +483,53 @@
"name": "Identity2",
"opType": "Identity"
},
{
"input": [
"mlnet.Features.SlotNames"
],
"output": [
"mlnet.Features.unusedOutput"
],
"name": "mlnet.Features.SlotNames",
"opType": "LabelEncoder",
"attribute": [
{
"name": "keys_strings",
"strings": [
"RjE=",
"RjIuNA==",
"RjIuMQ==",
"RjIuOA==",
"RjIuMTA=",
"RjIuMg==",
"RjIuMw==",
"RjIuNw==",
"RjIuNQ==",
"RjIuNg==",
"RjIuOQ=="
],
"type": "STRINGS"
},
{
"name": "values_int64s",
"ints": [
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10"
],
"type": "INTS"
}
],
"domain": "ai.onnx.ml"
},
{
"input": [
"PredictedLabel"
Expand Down Expand Up @@ -484,6 +576,28 @@
0
],
"name": "Offset"
},
{
"dims": [
"1",
"1"
],
"dataType": 8,
"stringData": [
"b25l"
],
"name": "mlnet.F2.SlotNames"
},
{
"dims": [
"1",
"1"
],
"dataType": 8,
"stringData": [
"b25l"
],
"name": "mlnet.Features.SlotNames"
}
],
"input": [
Expand Down Expand Up @@ -597,6 +711,24 @@
}
}
},
{
"name": "mlnet.F2.unusedOutput",
"type": {
"tensorType": {
"elemType": 7,
"shape": {
"dim": [
{
"dimValue": "-1"
},
{
"dimValue": "1"
}
]
}
}
}
},
{
"name": "Features.output",
"type": {
Expand All @@ -615,6 +747,24 @@
}
}
},
{
"name": "mlnet.Features.unusedOutput",
"type": {
"tensorType": {
"elemType": 7,
"shape": {
"dim": [
{
"dimValue": "-1"
},
{
"dimValue": "1"
}
]
}
}
}
},
{
"name": "PredictedLabel.output",
"type": {
Expand Down
Loading