C#: Add CreateFromMemory to FixedBufferOnnxValue to allow bind user buffers and pass custom binary compatible types (#5886)

yuslepukhin · web-flow · commit c2d610066a46 · 2020-11-24T14:10:14.000-08:00
Add CreateFromMemory to FixedBufferOnnxValue so users can bind their own custom binary compatible buffers to feed/fetch data.
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
@@ -1,11 +1,14 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
 using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
 using System.Buffers;
 
 namespace Microsoft.ML.OnnxRuntime
 {
     /// <summary>
-    /// Represents an Onnx Value with its underlying buffer pinned
+    /// Represents an OrtValue with its underlying buffer pinned
     /// </summary>
     public class FixedBufferOnnxValue : IDisposable
     {
@@ -28,11 +31,14 @@ private FixedBufferOnnxValue(MemoryHandle pinnedMemory, OrtValue ortValue, OnnxV
         /// </summary>
         /// <typeparam name="T"></typeparam>
         /// <param name="value"></param>
-        /// <returns></returns>
+        /// <returns>a disposable instance of FixedBufferOnnxValue</returns>
         public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
         {
             MemoryHandle? memHandle;
             var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
+            // memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be
+            /// disposed (unpinned) when all is said is done. This is the case for blittable types but does not
+            /// happen for string type where each element has its own allocation.
             if (memHandle.HasValue)
             {
                 return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
@@ -43,6 +49,84 @@ public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
             }
         }
 
+        /// <summary>
+        /// This is a factory method that creates a disposable instance of FixedBufferOnnxValue
+        /// on top of a buffer. Internally, it will pin managed buffer and will create
+        /// an OrtValue containing a tensor that will not own the memory.
+        /// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run()
+        /// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types
+        /// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types
+        /// I.e. those that have the same binary representation as the original type. This includes all existing types
+        /// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size.
+        /// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue
+        /// See example below.
+        /// </summary>
+        /// <typeparam name="T">Blittable data type, compatible with supported types</typeparam>
+        /// <param name="memoryInfo">memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance</param>
+        /// <param name="memory"></param>
+        /// <param name="elementType">TensorElementType</param>
+        /// <param name="shape">shape of the tensor to be created</param>
+        /// <param name="bytesSize">size of the allocation in bytes</param>
+        /// <returns>a disposable instance of FixedBufferOnnxValue</returns>
+        /// <example>
+        /// Here is an example of using a 3rd party library class for processing float16/bfloat16.
+        /// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures
+        /// so DenseTensor can recognize it.
+        /// 
+        /// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation
+        /// matches native one and its size is 16-bits, you can use the following conceptual example
+        /// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[]
+        ///
+        /// \code{.cs}
+        /// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); }
+        /// Half[] input = new Half[] { 5646, 12345 };
+        /// var input_shape = new long[] {input.Length};
+        /// Half[] output = new Half[40]; // Whatever the expected len/shape is must match
+        /// var output_shape = new long[] {output.Length};
+        /// 
+        /// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
+        ///
+        /// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
+        ///                         input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort))
+        /// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
+        ///                               output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort))
+        /// {
+        ///    FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput};
+        ///    FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput};
+        ///    session.Run(inputNames, inputValues, outputNames, outputValues);
+        ///   // Output is now in output[]
+        /// }
+        /// \endcode
+        /// </example>
+        public static FixedBufferOnnxValue CreateFromMemory<T>(OrtMemoryInfo memoryInfo, Memory<T> memory,
+            TensorElementType elementType, long[] shape, long bytesSize)
+        {
+            if(elementType == TensorElementType.String)
+            {
+                throw new ArgumentException("String data type is not supported");
+            }
+
+            var memHandle = memory.Pin();
+            try
+            {
+                IntPtr memPtr;
+                unsafe
+                {
+                    memPtr = (IntPtr)memHandle.Pointer;
+                }
+                var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo,
+                                                        elementType,
+                                                        shape,
+                                                        memPtr, bytesSize);
+                return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
+            }
+            catch (Exception e)
+            {
+                memHandle.Dispose();
+                throw e;
+            }
+        }
+
         #region IDisposable Support
 
         /// <summary>
@@ -51,7 +135,7 @@ public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
         /// <param name="disposing">true if invoked from Dispose()</param>
         protected virtual void Dispose(bool disposing)
         {
-            if(_disposed)
+            if (_disposed)
             {
                 return;
             }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -455,7 +455,7 @@ public void Run(
         /// Create OrtIoBinding instance to bind pre-allocated buffers
         /// to input/output
         /// </summary>
-        /// <returns></returns>
+        /// <returns>A new instance of OrtIoBinding</returns>
         public OrtIoBinding CreateIoBinding()
         {
             return new OrtIoBinding(this);
@@ -469,8 +469,8 @@ public OrtIoBinding CreateIoBinding()
         /// the expense of fetching them and pairing with names.
         /// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
         /// </summary>
-        /// <param name="runOptions"></param>
-        /// <param name="ioBinding"></param>
+        /// <param name="runOptions">runOptions</param>
+        /// <param name="ioBinding">ioBinding instance to use</param>
         public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs
@@ -2,7 +2,6 @@
 // Licensed under the MIT License.
 
 using System;
-using System.Runtime.InteropServices;
 
 namespace Microsoft.ML.OnnxRuntime
 {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime
     /// <summary>
     /// This class enable to bind inputs and outputs to pre-allocated
     /// memory. This enables interesting scenarios. For example, if your input
-    /// already resides in some pre-allocated memory even if on a device you bind
+    /// already resides in some pre-allocated memory like GPU, you can bind
     /// that piece of memory to an input name and shape and onnxruntime will use that as input.
-    /// Other traditional inputs can also be bound that already exists as Tensors
+    /// Other traditional inputs can also be bound that already exists as Tensors.
+    ///
+    /// Note, that this arrangement is designed to minimize data copies and to that effect
+    /// your memory allocations must match what is expected by the model, whether you run on
+    /// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not
+    /// match the one expected by the model. However, copies with OrtIoBindings are only done once,
+    /// at the time of the binding, not at run time. This means, that if your input data required a copy,
+    /// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is
+    /// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match
+    /// for your use case.
+    ///
+    /// The fact that data copy is not made during runtime also has performance implications.
     /// </summary>
     public class OrtIoBinding : SafeHandle
     {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -90,14 +90,14 @@ internal IntPtr Disown()
         public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
                                                          long[] shape,
                                                          IntPtr dataBuffer,
-                                                         uint bufferLength)
+                                                         long bufferLength)
         {
             Type type;
             int width;
             TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
-            if(width == 0)
+            if(width < 1)
             {
-                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");
             }
 
             var shapeSize = ArrayUtilities.GetSizeForShape(shape);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -4,8 +4,6 @@
 using System;
 using System.Runtime.InteropServices;
 using System.Text;
-using System.Runtime.InteropServices;
-using System.IO;
 
 namespace Microsoft.ML.OnnxRuntime
 {
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -229,16 +229,26 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
 
-            // Set the graph optimization level for this session.
-            SessionOptions options = new SessionOptions();
-            options.GraphOptimizationLevel = graphOptimizationLevel;
-            if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
-
-            using (var session = new InferenceSession(modelPath, options))
+            using (var cleanUp = new DisposableList<IDisposable>())
             {
+                // Set the graph optimization level for this session.
+                SessionOptions options = new SessionOptions();
+                options.GraphOptimizationLevel = graphOptimizationLevel;
+                if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
+                cleanUp.Add(options);
+
+                var session = new InferenceSession(modelPath, options);
+                cleanUp.Add(session);
+
                 var inputMeta = session.InputMetadata;
+                var outputMeta = session.OutputMetadata;
                 var container = new List<NamedOnnxValue>();
 
+                float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
+                int[] expectedDimensions = { 1, 1000, 1, 1 };  // hardcoded for now for the test data
+                ReadOnlySpan<int> expectedOutputDimensions = expectedDimensions;
+                string[] expectedOutputNames = new string[] { "softmaxout_1" };
+
                 float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model
 
                 foreach (var name in inputMeta.Keys)
@@ -249,8 +259,6 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
                     container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
                 }
 
-                ReadOnlySpan<int> expectedOutputDimensions = new int[] { 1, 1000, 1, 1 };
-                string[] expectedOutputNames = new string[] { "softmaxout_1" };
 
                 // Run inference with named inputs and outputs created with in Run()
                 using (var results = session.Run(container))  // results is an IReadOnlyList<NamedOnnxValue> container
@@ -291,9 +299,40 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
                     }
                 }
 
+                // Run inference with outputs pinned from buffers
+                using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
+                using(var pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
+                {
+                    var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
+
+                    // Create inputs
+                    Assert.Single(inputMeta.Keys);
+                    var inputNames = inputMeta.Keys.ToArray();
+                    var inputName = inputNames[0];
+                    Assert.Equal(typeof(float), inputMeta[inputName].ElementType);
+                    Assert.True(inputMeta[inputName].IsTensor);
+                    var longShape = Array.ConvertAll<int, long>(inputMeta[inputName].Dimensions, d => d);
+                    var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
+                    pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, inputData,
+                        TensorElementType.Float, longShape, byteSize));
+
+
+                    // Prepare output buffer
+                    Assert.Single(outputMeta.Keys);
+                    var outputNames = outputMeta.Keys.ToArray();
+                    var outputName = outputNames[0];
+                    Assert.Equal(typeof(float), outputMeta[outputName].ElementType);
+                    Assert.True(outputMeta[outputName].IsTensor);
+                    longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, d => d);
+                    byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
+                    float[] outputBuffer = new float[expectedOutput.Length];
+                    pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, outputBuffer, 
+                        TensorElementType.Float, longShape, byteSize));
+
+                    session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs);
+                    Assert.Equal(expectedOutput, outputBuffer, new floatComparer());
+                }
 
-                float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
-                int[] expectedDimensions = { 1, 1000, 1, 1 };  // hardcoded for now for the test data
                 // Run inference with named inputs and named outputs
                 {
                     // correct pre-allocated outputs
@@ -1954,6 +1993,10 @@ private void TestIOBinding()
                 var inputTensor = tuple.Item3;
                 var outputData = tuple.Item4;
                 dispList.Add(session);
+                var runOptions = new RunOptions();
+                dispList.Add(runOptions);
+
+                var inputMeta = session.InputMetadata;
                 var outputMeta = session.OutputMetadata;
                 var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
 
@@ -1967,8 +2010,8 @@ private void TestIOBinding()
                 {
                     var cyrName = "несуществующийВыход";
                     var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
-                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
-                    ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
+                    ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput);
+                    ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput);
                     string[] outputs = ioBinding.GetOutputNames();
                     Assert.Equal(2, outputs.Length);
                     Assert.Equal(outputName, outputs[0]);
@@ -1982,7 +2025,7 @@ private void TestIOBinding()
                 {
                     ioBinding.BindInput(inputName, fixeInputBuffer);
                     ioBinding.BindOutput(outputName, fixedOutputBuffer);
-                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
                     {
                         Assert.Equal(1, outputs.Count);
                         var output = outputs.First();
@@ -2000,7 +2043,7 @@ private void TestIOBinding()
                     ioBinding.BindInput(inputName, fixedInputBuffer);
                     ioBinding.BindOutputToDevice(outputName, allocator.Info);
 
-                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
                     {
                         Assert.Equal(1, outputs.Count);
                         var output = outputs.First();
@@ -2040,7 +2083,7 @@ private void TestWeightSharingBetweenSessions()
                 }
                 var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float);
                 var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float,
-                dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
+                                        dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
 
                 SessionOptions options = new SessionOptions();
                 options.AddInitializer("W", sharedInitializer);
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,6 @@`
`2`	`2`	`// Licensed under the MIT License.`
`3`	`3`
`4`	`4`	`using System;`
`5`		`-using System.Runtime.InteropServices;`
`6`	`5`
`7`	`6`	`namespace Microsoft.ML.OnnxRuntime`
`8`	`7`	`{`
Original file line number	Diff line number	Diff line change
`@@ -90,14 +90,14 @@ internal IntPtr Disown()`
`90`	`90`	`public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,`
`91`	`91`	`long[] shape,`
`92`	`92`	`IntPtr dataBuffer,`
`93`		`- uint bufferLength)`
	`93`	`+ long bufferLength)`
`94`	`94`	`{`
`95`	`95`	`Type type;`
`96`	`96`	`int width;`
`97`	`97`	`TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);`
`98`		`- if(width == 0)`
	`98`	`+ if(width < 1)`
`99`	`99`	`{`
`100`		`- throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");`
	`100`	`+ throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");`
`101`	`101`	`}`
`102`	`102`
`103`	`103`	`var shapeSize = ArrayUtilities.GetSizeForShape(shape);`
Original file line number	Diff line number	Diff line change
`@@ -4,8 +4,6 @@`
`4`	`4`	`using System;`
`5`	`5`	`using System.Runtime.InteropServices;`
`6`	`6`	`using System.Text;`
`7`		`-using System.Runtime.InteropServices;`
`8`		`-using System.IO;`
`9`	`7`
`10`	`8`	`namespace Microsoft.ML.OnnxRuntime`
`11`	`9`	`{`