Skip to content

Commit c2d6100

Browse files
authored
C#: Add CreateFromMemory to FixedBufferOnnxValue to allow bind user buffers and pass custom binary compatible types (#5886)
Add CreateFromMemory to FixedBufferOnnxValue so users can bind their own custom binary compatible buffers to feed/fetch data.
1 parent 705d093 commit c2d6100

File tree

8 files changed

+173
-34
lines changed

8 files changed

+173
-34
lines changed

csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
14
using Microsoft.ML.OnnxRuntime.Tensors;
25
using System;
36
using System.Buffers;
47

58
namespace Microsoft.ML.OnnxRuntime
69
{
710
/// <summary>
8-
/// Represents an Onnx Value with its underlying buffer pinned
11+
/// Represents an OrtValue with its underlying buffer pinned
912
/// </summary>
1013
public class FixedBufferOnnxValue : IDisposable
1114
{
@@ -28,11 +31,14 @@ private FixedBufferOnnxValue(MemoryHandle pinnedMemory, OrtValue ortValue, OnnxV
2831
/// </summary>
2932
/// <typeparam name="T"></typeparam>
3033
/// <param name="value"></param>
31-
/// <returns></returns>
34+
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
3235
public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
3336
{
3437
MemoryHandle? memHandle;
3538
var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
39+
// memHandle will have a value when CreateFromTensorObject() pins managed memory and that will have to be
40+
/// disposed (unpinned) when all is said is done. This is the case for blittable types but does not
41+
/// happen for string type where each element has its own allocation.
3642
if (memHandle.HasValue)
3743
{
3844
return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
@@ -43,6 +49,84 @@ public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
4349
}
4450
}
4551

52+
/// <summary>
53+
/// This is a factory method that creates a disposable instance of FixedBufferOnnxValue
54+
/// on top of a buffer. Internally, it will pin managed buffer and will create
55+
/// an OrtValue containing a tensor that will not own the memory.
56+
/// Such instance of FixedBufferOnnxValue can be used both as input and output in InferenceSession.Run()
57+
/// overload. As compared to CreateFromTensor(), this allows you to pass in buffers with custom data types
58+
/// that are blittable as defined in https://docs.microsoft.com/en-us/dotnet/framework/interop/blittable-and-non-blittable-types
59+
/// I.e. those that have the same binary representation as the original type. This includes all existing types
60+
/// but may also allow using custom types for Float16 and BFloat16 providing they have the same layout and size.
61+
/// The resulting instance must be disposed of to release pinned memory and deallocate native OrtValue
62+
/// See example below.
63+
/// </summary>
64+
/// <typeparam name="T">Blittable data type, compatible with supported types</typeparam>
65+
/// <param name="memoryInfo">memoryInfo. For managed buffers simply use OrtMemoryInfo.DefaultInstance</param>
66+
/// <param name="memory"></param>
67+
/// <param name="elementType">TensorElementType</param>
68+
/// <param name="shape">shape of the tensor to be created</param>
69+
/// <param name="bytesSize">size of the allocation in bytes</param>
70+
/// <returns>a disposable instance of FixedBufferOnnxValue</returns>
71+
/// <example>
72+
/// Here is an example of using a 3rd party library class for processing float16/bfloat16.
73+
/// Currently, to pass tensor data and create a tensor one must copy data to Float16/BFloat16 structures
74+
/// so DenseTensor can recognize it.
75+
///
76+
/// If you are using a library that has a class Half and it is blittable, that is its managed in memory representation
77+
/// matches native one and its size is 16-bits, you can use the following conceptual example
78+
/// to feed/fetch data for inference using Half array. This allows you to avoid copying data from your Half[] to Float16[]
79+
///
80+
/// \code{.cs}
81+
/// unsafe { Debug.Assert(sizeof(ushort) == sizeof(Half)); }
82+
/// Half[] input = new Half[] { 5646, 12345 };
83+
/// var input_shape = new long[] {input.Length};
84+
/// Half[] output = new Half[40]; // Whatever the expected len/shape is must match
85+
/// var output_shape = new long[] {output.Length};
86+
///
87+
/// var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
88+
///
89+
/// using(var fixedBufferInput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
90+
/// input, TensorElementType.Float16, input_shape, input.Length * sizeof(ushort))
91+
/// using(var fixedBufferOutput = FixedBufferOnnxvalue.CreateFromMemory<Half>(memInfo,
92+
/// output, TensorElementType.Float16, output_shape, output.Length * sizeof(ushort))
93+
/// {
94+
/// FixedBufferOnnxvalue[] inputValues = new FixedBufferOnnxvalue[]{fixedBufferInput};
95+
/// FixedBufferOnnxvalue[] outputValues = new FixedBufferOnnxvalue[]{fixedBufferOutput};
96+
/// session.Run(inputNames, inputValues, outputNames, outputValues);
97+
/// // Output is now in output[]
98+
/// }
99+
/// \endcode
100+
/// </example>
101+
public static FixedBufferOnnxValue CreateFromMemory<T>(OrtMemoryInfo memoryInfo, Memory<T> memory,
102+
TensorElementType elementType, long[] shape, long bytesSize)
103+
{
104+
if(elementType == TensorElementType.String)
105+
{
106+
throw new ArgumentException("String data type is not supported");
107+
}
108+
109+
var memHandle = memory.Pin();
110+
try
111+
{
112+
IntPtr memPtr;
113+
unsafe
114+
{
115+
memPtr = (IntPtr)memHandle.Pointer;
116+
}
117+
var ortValue = OrtValue.CreateTensorValueWithData(memoryInfo,
118+
elementType,
119+
shape,
120+
memPtr, bytesSize);
121+
return new FixedBufferOnnxValue(memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
122+
}
123+
catch (Exception e)
124+
{
125+
memHandle.Dispose();
126+
throw e;
127+
}
128+
}
129+
46130
#region IDisposable Support
47131

48132
/// <summary>
@@ -51,7 +135,7 @@ public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
51135
/// <param name="disposing">true if invoked from Dispose()</param>
52136
protected virtual void Dispose(bool disposing)
53137
{
54-
if(_disposed)
138+
if (_disposed)
55139
{
56140
return;
57141
}

csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ public void Run(
455455
/// Create OrtIoBinding instance to bind pre-allocated buffers
456456
/// to input/output
457457
/// </summary>
458-
/// <returns></returns>
458+
/// <returns>A new instance of OrtIoBinding</returns>
459459
public OrtIoBinding CreateIoBinding()
460460
{
461461
return new OrtIoBinding(this);
@@ -469,8 +469,8 @@ public OrtIoBinding CreateIoBinding()
469469
/// the expense of fetching them and pairing with names.
470470
/// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
471471
/// </summary>
472-
/// <param name="runOptions"></param>
473-
/// <param name="ioBinding"></param>
472+
/// <param name="runOptions">runOptions</param>
473+
/// <param name="ioBinding">ioBinding instance to use</param>
474474
public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
475475
{
476476
NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));

csharp/src/Microsoft.ML.OnnxRuntime/NativeApiStatus.cs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
// Licensed under the MIT License.
33

44
using System;
5-
using System.Runtime.InteropServices;
65

76
namespace Microsoft.ML.OnnxRuntime
87
{

csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,20 @@ namespace Microsoft.ML.OnnxRuntime
1010
/// <summary>
1111
/// This class enable to bind inputs and outputs to pre-allocated
1212
/// memory. This enables interesting scenarios. For example, if your input
13-
/// already resides in some pre-allocated memory even if on a device you bind
13+
/// already resides in some pre-allocated memory like GPU, you can bind
1414
/// that piece of memory to an input name and shape and onnxruntime will use that as input.
15-
/// Other traditional inputs can also be bound that already exists as Tensors
15+
/// Other traditional inputs can also be bound that already exists as Tensors.
16+
///
17+
/// Note, that this arrangement is designed to minimize data copies and to that effect
18+
/// your memory allocations must match what is expected by the model, whether you run on
19+
/// CPU or GPU. Data copy will still be made, if your pre-allocated memory location does not
20+
/// match the one expected by the model. However, copies with OrtIoBindings are only done once,
21+
/// at the time of the binding, not at run time. This means, that if your input data required a copy,
22+
/// your further input modifications would not be seen by onnxruntime unless you rebind it, even if it is
23+
/// the same buffer. If you require the scenario where data is copied, OrtIOBinding may not be the best match
24+
/// for your use case.
25+
///
26+
/// The fact that data copy is not made during runtime also has performance implications.
1627
/// </summary>
1728
public class OrtIoBinding : SafeHandle
1829
{

csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,14 @@ internal IntPtr Disown()
9090
public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
9191
long[] shape,
9292
IntPtr dataBuffer,
93-
uint bufferLength)
93+
long bufferLength)
9494
{
9595
Type type;
9696
int width;
9797
TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
98-
if(width == 0)
98+
if(width < 1)
9999
{
100-
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
100+
throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unsupported data type (such as string)");
101101
}
102102

103103
var shapeSize = ArrayUtilities.GetSizeForShape(shape);

csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
using System;
55
using System.Runtime.InteropServices;
66
using System.Text;
7-
using System.Runtime.InteropServices;
8-
using System.IO;
97

108
namespace Microsoft.ML.OnnxRuntime
119
{

csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,16 +229,26 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
229229
{
230230
string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
231231

232-
// Set the graph optimization level for this session.
233-
SessionOptions options = new SessionOptions();
234-
options.GraphOptimizationLevel = graphOptimizationLevel;
235-
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
236-
237-
using (var session = new InferenceSession(modelPath, options))
232+
using (var cleanUp = new DisposableList<IDisposable>())
238233
{
234+
// Set the graph optimization level for this session.
235+
SessionOptions options = new SessionOptions();
236+
options.GraphOptimizationLevel = graphOptimizationLevel;
237+
if (enableParallelExecution) options.ExecutionMode = ExecutionMode.ORT_PARALLEL;
238+
cleanUp.Add(options);
239+
240+
var session = new InferenceSession(modelPath, options);
241+
cleanUp.Add(session);
242+
239243
var inputMeta = session.InputMetadata;
244+
var outputMeta = session.OutputMetadata;
240245
var container = new List<NamedOnnxValue>();
241246

247+
float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
248+
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
249+
ReadOnlySpan<int> expectedOutputDimensions = expectedDimensions;
250+
string[] expectedOutputNames = new string[] { "softmaxout_1" };
251+
242252
float[] inputData = LoadTensorFromFile(@"bench.in"); // this is the data for only one input tensor for this model
243253

244254
foreach (var name in inputMeta.Keys)
@@ -249,8 +259,6 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
249259
container.Add(NamedOnnxValue.CreateFromTensor<float>(name, tensor));
250260
}
251261

252-
ReadOnlySpan<int> expectedOutputDimensions = new int[] { 1, 1000, 1, 1 };
253-
string[] expectedOutputNames = new string[] { "softmaxout_1" };
254262

255263
// Run inference with named inputs and outputs created with in Run()
256264
using (var results = session.Run(container)) // results is an IReadOnlyList<NamedOnnxValue> container
@@ -291,9 +299,40 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
291299
}
292300
}
293301

302+
// Run inference with outputs pinned from buffers
303+
using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
304+
using(var pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
305+
{
306+
var memInfo = OrtMemoryInfo.DefaultInstance; // CPU
307+
308+
// Create inputs
309+
Assert.Single(inputMeta.Keys);
310+
var inputNames = inputMeta.Keys.ToArray();
311+
var inputName = inputNames[0];
312+
Assert.Equal(typeof(float), inputMeta[inputName].ElementType);
313+
Assert.True(inputMeta[inputName].IsTensor);
314+
var longShape = Array.ConvertAll<int, long>(inputMeta[inputName].Dimensions, d => d);
315+
var byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
316+
pinnedInputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, inputData,
317+
TensorElementType.Float, longShape, byteSize));
318+
319+
320+
// Prepare output buffer
321+
Assert.Single(outputMeta.Keys);
322+
var outputNames = outputMeta.Keys.ToArray();
323+
var outputName = outputNames[0];
324+
Assert.Equal(typeof(float), outputMeta[outputName].ElementType);
325+
Assert.True(outputMeta[outputName].IsTensor);
326+
longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, d => d);
327+
byteSize = ArrayUtilities.GetSizeForShape(longShape) * sizeof(float);
328+
float[] outputBuffer = new float[expectedOutput.Length];
329+
pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromMemory<float>(memInfo, outputBuffer,
330+
TensorElementType.Float, longShape, byteSize));
331+
332+
session.Run(inputNames, pinnedInputs, outputNames, pinnedOutputs);
333+
Assert.Equal(expectedOutput, outputBuffer, new floatComparer());
334+
}
294335

295-
float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
296-
int[] expectedDimensions = { 1, 1000, 1, 1 }; // hardcoded for now for the test data
297336
// Run inference with named inputs and named outputs
298337
{
299338
// correct pre-allocated outputs
@@ -1954,6 +1993,10 @@ private void TestIOBinding()
19541993
var inputTensor = tuple.Item3;
19551994
var outputData = tuple.Item4;
19561995
dispList.Add(session);
1996+
var runOptions = new RunOptions();
1997+
dispList.Add(runOptions);
1998+
1999+
var inputMeta = session.InputMetadata;
19572000
var outputMeta = session.OutputMetadata;
19582001
var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
19592002

@@ -1967,8 +2010,8 @@ private void TestIOBinding()
19672010
{
19682011
var cyrName = "несуществующийВыход";
19692012
var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
1970-
ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
1971-
ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
2013+
ioBinding.BindOutput(outputName, TensorElementType.Float, longShape, ortAllocationOutput);
2014+
ioBinding.BindOutput(cyrName, TensorElementType.Float, longShape, ortAllocationOutput);
19722015
string[] outputs = ioBinding.GetOutputNames();
19732016
Assert.Equal(2, outputs.Length);
19742017
Assert.Equal(outputName, outputs[0]);
@@ -1982,7 +2025,7 @@ private void TestIOBinding()
19822025
{
19832026
ioBinding.BindInput(inputName, fixeInputBuffer);
19842027
ioBinding.BindOutput(outputName, fixedOutputBuffer);
1985-
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
2028+
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
19862029
{
19872030
Assert.Equal(1, outputs.Count);
19882031
var output = outputs.First();
@@ -2000,7 +2043,7 @@ private void TestIOBinding()
20002043
ioBinding.BindInput(inputName, fixedInputBuffer);
20012044
ioBinding.BindOutputToDevice(outputName, allocator.Info);
20022045

2003-
using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
2046+
using (var outputs = session.RunWithBindingAndNames(runOptions, ioBinding))
20042047
{
20052048
Assert.Equal(1, outputs.Count);
20062049
var output = outputs.First();
@@ -2040,7 +2083,7 @@ private void TestWeightSharingBetweenSessions()
20402083
}
20412084
var dataBufferNumBytes = (uint)dataBuffer.Length * sizeof(float);
20422085
var sharedInitializer = OrtValue.CreateTensorValueWithData(ortCpuMemInfo, Tensors.TensorElementType.Float,
2043-
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
2086+
dims, dataHandle.AddrOfPinnedObject(), dataBufferNumBytes);
20442087

20452088
SessionOptions options = new SessionOptions();
20462089
options.AddInitializer("W", sharedInitializer);

0 commit comments

Comments
 (0)