diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp index 3cb05beba5ca5..0f65c11945dc4 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphDescBuilder.cpp @@ -124,8 +124,12 @@ namespace Dml::GraphDescBuilder // Check whether this specific node requested support for constant CPU inputs if (std::find(requiredConstantCpuInputs.begin(), requiredConstantCpuInputs.end(), inputIndex) != requiredConstantCpuInputs.end()) { - const onnxruntime::NodeArg* arg = node.InputDefs()[inputIndex]; - tensor = constantCpuGraphInputGetter(arg->Name()); + auto inputDefs = node.InputDefs(); + if (inputIndex < inputDefs.size()) + { + const onnxruntime::NodeArg* arg = inputDefs[inputIndex]; + tensor = constantCpuGraphInputGetter(arg->Name()); + } } return tensor; diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphTransformer.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphTransformer.cpp index 7390022a03295..b029b56bb1b43 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphTransformer.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/GraphTransformer.cpp @@ -9,7 +9,7 @@ #include "GraphPartitioner.h" #include "core/providers/dml/OperatorAuthorHelper/Attributes.h" #include "core/providers/dml/OperatorAuthorHelper/OperatorHelper.h" -#include "core/providers/dml/OperatorAuthorHelper/OperatorRegistration.h" +#include "core/providers/dml/OperatorAuthorHelper/OperatorVersions.h" #include "core/framework/kernel_registry.h" #include "core/graph/graph_utils.h" diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorElementWise.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorElementWise.cpp index 08398e4b8e3c3..428ebffe678bf 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorElementWise.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorElementWise.cpp @@ -505,41 +505,53 @@ class DmlOperatorElementwiseQLinear : public DmlOperator std::vector outputShape = kernelInfo.GetTensorShapeDescription().GetOutputTensorShape(0); const uint32_t outputShapeDimCount = gsl::narrow_cast(outputShape.size()); - Initialize(kernelInfo, std::nullopt, std::nullopt, outputShape); + Initialize(kernelInfo, std::nullopt, std::nullopt); - // If the axis attribute is explicitly provided, then broadcasting must be performed along that axis. - // So massage the actual shapes of the scale and zero-point tensors (1D with length equal to the input - // axis being broadcast to) into broadcastable shapes. + uint32_t axis = 0; + uint32_t broadcastAxisLength = 0; + + // If an axis was given explicitly passed (or the default value 1 is set from the schema), + // then other inputs are broadcasting to the shape of the input data tensor. if (kernelInfo.HasAttribute(AttrName::Axis, MLOperatorAttributeType::Int)) { const int32_t signedAxis = gsl::narrow_cast(kernelInfo.GetAttribute(AttrName::Axis)); - const uint32_t axis = Dml::HandleNegativeAxis(signedAxis, outputShapeDimCount); - const uint32_t broadcastAxisLength = outputShape[axis]; + axis = Dml::HandleNegativeAxis(signedAxis, outputShapeDimCount); + broadcastAxisLength = outputShape[axis]; + } + + + // Explicitly reshape each of the inputs after the first input (scale and zero point tensors). + for (uint32_t index = 1, inputCount = gsl::narrow_cast(m_inputTensorDescs.size()); index < inputCount; ++index) + { + auto edgeDesc = kernelInfo.GetInputEdgeDescription(index); + assert(edgeDesc.edgeType == MLOperatorEdgeType::Tensor); + + // Fix up the the tensor shape by filling with trailing ones. So input[2,3] with axis=0 and scale[2] + // becomes scale[2,1], so that broadcasting works correctly. + std::vector inputTensorShape = kernelInfo.GetTensorShapeDescription().GetInputTensorShape(index); - // Explicitly reshape each of the inputs after the first input (scale and zero point tensors). - for (uint32_t index = 1, inputCount = gsl::narrow_cast(m_inputTensorDescs.size()); index < inputCount; ++index) + // If the input tensor is a 1D vector, then extra massaging is needed to project their + // 1D vectors back to the full shape for broadcasting along the given axis. + // The 1D vector should have a length equal to the output tensor's dimension on that axis. + if (inputTensorShape.size() == 1 && inputTensorShape != outputShape) { - auto edgeDesc = kernelInfo.GetInputEdgeDescription(index); - assert(edgeDesc.edgeType == MLOperatorEdgeType::Tensor); - - // Fix up the the tensor shape by filling with trailing ones. So input[2,3] with axis=0 and scale[2] - // becomes scale[2,1], so that broadcasting works correctly. - std::vector adjustedInputTensorShape = kernelInfo.GetTensorShapeDescription().GetInputTensorShape(index); - ML_CHECK_VALID_ARGUMENT(adjustedInputTensorShape.size() == 1); - ML_CHECK_VALID_ARGUMENT(adjustedInputTensorShape[0] == broadcastAxisLength); - adjustedInputTensorShape.insert(adjustedInputTensorShape.end(), outputShapeDimCount - 1 - axis, 1); - - m_inputTensorDescs[index] = TensorDesc( - edgeDesc.tensorDataType, - gsl::make_span(outputShape), - gsl::make_span(adjustedInputTensorShape), - TensorAxis::DoNotCoerce, - TensorAxis::W, - TensorAxis::RightAligned, - NchwDimensionCount, // minDimensionCount - 0 // guaranteedBaseOffsetAlignment - ); + ML_CHECK_VALID_ARGUMENT(inputTensorShape[0] == broadcastAxisLength); + inputTensorShape.insert(inputTensorShape.begin(), axis, 1); + inputTensorShape.insert(inputTensorShape.end(), outputShapeDimCount - 1 - axis, 1); } + // For any other shape (scalar/ND), leave it alone, and the TensorDesc constructor + // will apply broadcasting with standard elementwise alignment. + + m_inputTensorDescs[index] = TensorDesc( + edgeDesc.tensorDataType, + gsl::make_span(outputShape), + gsl::make_span(inputTensorShape), + TensorAxis::DoNotCoerce, + TensorAxis::W, + TensorAxis::RightAligned, + NchwDimensionCount, // minDimensionCount + 0 // guaranteedBaseOffsetAlignment + ); } std::vector inputDescs = GetDmlInputDescs(); diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorReduce.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorReduce.cpp index 2e02df98b1d5d..18c8657921556 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorReduce.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorReduce.cpp @@ -14,44 +14,51 @@ class DmlOperatorReduce : public DmlOperator, public ReduceHelperBase DML_REDUCE_FUNCTION function ) : DmlOperator(kernelInfo), - ReduceHelperBase(kernelInfo, - kernelInfo.GetTensorShapeDescription(), - (function != DML_REDUCE_FUNCTION_ARGMAX && function != DML_REDUCE_FUNCTION_ARGMIN)) + ReduceHelperBase( + kernelInfo, + kernelInfo.GetTensorShapeDescription(), + (function != DML_REDUCE_FUNCTION_ARGMAX && function != DML_REDUCE_FUNCTION_ARGMIN) + ) { - ML_CHECK_VALID_ARGUMENT(kernelInfo.GetInputCount() == 1); + ML_CHECK_VALID_ARGUMENT(kernelInfo.GetInputCount() >= 1); ML_CHECK_VALID_ARGUMENT(kernelInfo.GetOutputCount() == 1); - DmlOperator::Initialize(kernelInfo); + std::vector> inputIndices = { 0 }; + std::vector> outputIndices = { 0 }; + DmlOperator::Initialize(kernelInfo, inputIndices, outputIndices, std::nullopt, std::nullopt, 1u); std::vector dmlAxes; std::vector reducedDims = kernelInfo.GetTensorShapeDescription().GetInputTensorShape(0); - int dimOffset = gsl::narrow_cast(m_inputTensorDescs[0].GetDimensionCount() - reducedDims.size()); for (auto& dim : m_axes) { + // Replace all reduced axes with 1 for their size. assert(dim < static_cast(reducedDims.size())); // ReduceHelperBase already validated this. reducedDims[dim] = 1; - dmlAxes.push_back(static_cast(dim + dimOffset)); + dmlAxes.push_back(static_cast(dim)); // Signed to unsigned which DML expects. } if (!m_keepDims) { - // DML doesn't know about keepDim and always assume the dim is preserved after reduce. + // DML expects the input and output tensors to have identical counts and doesn't know about + // ONNX's 'keepdims' attribute, keeping all dimensions anyway rather removing those of size 1. // So if m_keepDims is false, the ONNX output dim is different than DML tensor desc dim. + // // ReduceSum example: - // input dims: {3, 2, 2} - // axes: 1 - // keepDims: 0 + // input dims: {3, 2, 2} + // axes: 1 + // keepDims: 0 // - // the ONNX output expect to be of dim {3, 2}, while DML expect the output tensor desc - // dim to be {3, 1, 2}. - // + // The ONNX output expects output dims of {3, 2}, + // while DML expect the output tensor desc of {3, 1, 2}. m_outputTensorDescs[0] = CreateTensorDescFromOutput( - kernelInfo, - 0, - TensorAxis::DoNotCoerce, - TensorAxis::W, + kernelInfo, + 0, + TensorAxis::DoNotCoerce, + TensorAxis::W, TensorAxis::RightAligned, - reducedDims); + reducedDims, + 1 // minimumDimensionCount + ); } std::vector inputDescs = GetDmlInputDescs(); diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorResize.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorResize.cpp index f1c6531a1dddd..1eb7532742b29 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorResize.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorResize.cpp @@ -19,9 +19,10 @@ constexpr NameAndIndex coordinateTransformationModes[] = constexpr NameAndIndex nearestNeighborRoundingModes[] = { {"", 0}, - {"round_prefer_floor", 0}, - {"round_prefer_ceil", 1}, - {"floor", 2}, + {"round_prefer_floor", 0}, // round halves down + {"round_prefer_ceil", 1}, // round halves up + {"floor", 2}, // round always down + // {"ceil", 3}, // round always up (requires a DirectML API addition) }; void ComputePixelOffsetsAndScales( @@ -338,9 +339,9 @@ void CALLBACK QueryResize(IMLOperatorSupportQueryContextPrivate* context, bool* DML_OP_DEFINE_CREATION_FUNCTION(Resize10, VersionedKernel); DML_OP_DEFINE_CREATION_FUNCTION(Resize11, VersionedKernel); +DML_OP_DEFINE_CREATION_FUNCTION(Resize13, VersionedKernel); DML_OP_DEFINE_CREATION_FUNCTION(Upsample7, VersionedKernel); DML_OP_DEFINE_CREATION_FUNCTION(Upsample9, VersionedKernel); DML_OP_DEFINE_CREATION_FUNCTION(Upsample10, VersionedKernel); -DML_OP_DEFINE_CREATION_FUNCTION(Upsample13, VersionedKernel); } // namespace Dml diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSplit.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSplit.cpp index c1bde5533328b..df99a83c7cce5 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSplit.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSplit.cpp @@ -11,13 +11,18 @@ class DmlOperatorSplit : public DmlOperator, public SplitHelper public: using Self = DmlOperatorSplit; - DmlOperatorSplit(const MLOperatorKernelCreationContext& kernelInfo) + DmlOperatorSplit(const MLOperatorKernelCreationContext& kernelInfo, uint32_t opsetVersion) : DmlOperator(kernelInfo), - SplitHelper(kernelInfo, kernelInfo.GetTensorShapeDescription()) + SplitHelper(kernelInfo, kernelInfo.GetTensorShapeDescription(), opsetVersion) { - ML_CHECK_VALID_ARGUMENT(kernelInfo.GetInputCount() == 1, "DML only supports split on a single input tensor."); - ML_CHECK_VALID_ARGUMENT(kernelInfo.GetOutputCount() > 0, "Runtime error no output stream specified."); - DmlOperator::Initialize(kernelInfo); + ML_CHECK_VALID_ARGUMENT(kernelInfo.GetInputCount() > 0, "Splits needs an input tensor."); + ML_CHECK_VALID_ARGUMENT(kernelInfo.GetOutputCount() > 0, "Splits needs an output tensor."); + + // Use only the first input tensor. Later opset versions may pass parameters + // like splits as dynamic parameters via tensors rather than constants, + // and that second parameter is CPU based. + std::vector> inputIndices = {0}; + DmlOperator::Initialize(kernelInfo, inputIndices, std::nullopt); uint32_t dmlAxis = GetDmlAdjustedAxis(m_axis, kernelInfo, m_inputTensorDescs.front().GetDimensionCount()); @@ -36,6 +41,8 @@ class DmlOperatorSplit : public DmlOperator, public SplitHelper } }; -DML_OP_DEFINE_CREATION_FUNCTION(Split, DmlOperatorSplit); +DML_OP_DEFINE_CREATION_FUNCTION(Split7, VersionedKernel); +DML_OP_DEFINE_CREATION_FUNCTION(Split11, VersionedKernel); +DML_OP_DEFINE_CREATION_FUNCTION(Split13, VersionedKernel); } // namespace Dml diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp index 4c2937ee49489..0974b006dd3f9 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp @@ -4,7 +4,7 @@ #include "precomp.h" #include "OperatorRegistration.h" #include "core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h" -#include "core/providers/dml/OperatorAuthorHelper/OperatorRegistration.h" +#include "core/providers/dml/OperatorAuthorHelper/OperatorVersions.h" #include "core/providers/dml/DmlExecutionProvider/inc/IWinmlExecutionProvider.h" #include "core/framework/customregistry.h" #include "onnx/defs/operator_sets.h" @@ -103,7 +103,9 @@ DML_OP_EXTERN_CREATION_FUNCTION(GRU); DML_OP_EXTERN_CREATION_FUNCTION(LSTM); DML_OP_EXTERN_CREATION_FUNCTION(Gather); DML_OP_EXTERN_CREATION_FUNCTION(Flatten); -DML_OP_EXTERN_CREATION_FUNCTION(Split); +DML_OP_EXTERN_CREATION_FUNCTION(Split7); +DML_OP_EXTERN_CREATION_FUNCTION(Split11); +DML_OP_EXTERN_CREATION_FUNCTION(Split13); DML_OP_EXTERN_CREATION_FUNCTION(Transpose); DML_OP_EXTERN_CREATION_FUNCTION(Tile); DML_OP_EXTERN_CREATION_FUNCTION(Concat); @@ -165,7 +167,6 @@ DML_OP_EXTERN_CREATION_FUNCTION(ImageScaler); DML_OP_EXTERN_CREATION_FUNCTION(Upsample7); DML_OP_EXTERN_CREATION_FUNCTION(Upsample9); DML_OP_EXTERN_CREATION_FUNCTION(Upsample10); -DML_OP_EXTERN_CREATION_FUNCTION(Upsample13); DML_OP_EXTERN_CREATION_FUNCTION(Sigmoid); DML_OP_EXTERN_CREATION_FUNCTION(HardSigmoid); DML_OP_EXTERN_CREATION_FUNCTION(Tanh); @@ -229,6 +230,7 @@ DML_OP_EXTERN_CREATION_FUNCTION(Scatter11); DML_OP_EXTERN_CREATION_FUNCTION(Scatter13); DML_OP_EXTERN_CREATION_FUNCTION(Resize10); DML_OP_EXTERN_CREATION_FUNCTION(Resize11); +DML_OP_EXTERN_CREATION_FUNCTION(Resize13); DML_OP_EXTERN_CREATION_FUNCTION(ConstantOfShape); DML_OP_EXTERN_CREATION_FUNCTION(IsInf); DML_OP_EXTERN_CREATION_FUNCTION(Mod); @@ -303,6 +305,7 @@ constexpr static std::array supportedTypeListLogica constexpr static std::array supportedTypeListSigned = { SupportedTensorDataTypes::Float16to32 | SupportedTensorDataTypes::Int64 | SupportedTensorDataTypes::Int32 | SupportedTensorDataTypes::Int16 | SupportedTensorDataTypes::Int8 }; constexpr static std::array supportedTypeListRange = {SupportedTensorDataTypes::Int16|SupportedTensorDataTypes::Int32|SupportedTensorDataTypes::Int64|SupportedTensorDataTypes::Float32}; constexpr static std::array supportedTypeListResize11 = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float16to32 /* ROI read by CPU */}; +constexpr static std::array supportedTypeListResize13 = supportedTypeListResize11; constexpr static std::array supportedTypeListInteger = {SupportedTensorDataTypes::Int8|SupportedTensorDataTypes::UInt8, SupportedTensorDataTypes::Int8|SupportedTensorDataTypes::UInt8, SupportedTensorDataTypes::Int32 }; constexpr static std::array supportedTypeListInteger8 = {SupportedTensorDataTypes::Int8|SupportedTensorDataTypes::UInt8 }; constexpr static std::array supportedTypeListRoiAlign = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Int32|SupportedTensorDataTypes::Int64 }; @@ -333,7 +336,7 @@ constexpr auto requiredConstantCpuInputs(Args... args) return std::make_pair(inputs, static_cast(sizeof...(args))); } -// Define a single row of registration information. +// Define a single row of OperatorRegistrationInformation. #define REG_INFO(version, operatorName, ...) \ #operatorName, OnnxOperatorSet##version::sc_sinceVer_##operatorName, onnxruntime::kOnnxDomain, Create##operatorName, ShapeInferenceFunction, false, ##__VA_ARGS__, @@ -341,12 +344,10 @@ constexpr auto requiredConstantCpuInputs(Args... args) #define REG_INFO_VER(version, operatorName, ...) \ #operatorName, OnnxOperatorSet##version::sc_sinceVer_##operatorName, onnxruntime::kOnnxDomain, Create##operatorName##version, ShapeInferenceFunction, false, ##__VA_ARGS__, -// Identity operators use Copy, alias their first input, and require floating point formats -// for usage in the graph, besides constant inputs. This is because they currently use -// element-wise identity operators in the graph for striding support, but issue actual copies -// outside the graph. Element-wise identity currently only supports floating point types. +// Identity operators use Copy, alias their first input, and use elementwise identity operators +// when needed for striding support, but issue actual copies outside the graph. #define REG_INFO_ID(version, operatorName, ...) \ - #operatorName, OnnxOperatorSet##version::sc_sinceVer_##operatorName, onnxruntime::kOnnxDomain, CreateCopy, ShapeInferenceFunction, true, ##__VA_ARGS__, + #operatorName, OnnxOperatorSet##version::sc_sinceVer_##operatorName, onnxruntime::kOnnxDomain, CreateCopy, ShapeInferenceFunction, true, ##__VA_ARGS__, // MS-domain operators #define REG_INFO_MS(version, operatorName, ...) \ @@ -398,8 +399,9 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation {REG_INFO_MS( 1, ConvTransposeWithDynamicPads, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(2))}, // Data Reorganization Layers - {REG_INFO( 7, Split, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, - {REG_INFO( 11, Split, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, // Adds negative axis. + {REG_INFO_VER( 7, Split, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, + {REG_INFO_VER( 11, Split, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, // Adds negative axis. + {REG_INFO_VER( 13, Split, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, // Moves splits from constant parameter to dynamic input. {REG_INFO( 7, Transpose, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, {REG_INFO( 13, Transpose, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, {REG_INFO( 7, Concat, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, @@ -448,10 +450,12 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation {REG_INFO_ID( 13, Flatten, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, {REG_INFO_ID( 7, Squeeze, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, {REG_INFO_ID( 11, Squeeze, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, + {REG_INFO_ID( 13, Squeeze, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, {REG_INFO_ID( 7, Unsqueeze, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, {REG_INFO_ID( 11, Unsqueeze, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported)}, - {REG_INFO_ID( 7, Reshape, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, - {REG_INFO_ID( 13, Reshape, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, + {REG_INFO_ID( 13, Unsqueeze, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, + {REG_INFO_ID( 7, Reshape, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, + {REG_INFO_ID( 13, Reshape, typeNameListDefault, supportedTypeListAllScalars, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, // Elementwise {REG_INFO( 7, Sqrt, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, @@ -505,7 +509,9 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation {REG_INFO( 7, Atan, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, {REG_INFO( 7, Affine, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, {REG_INFO( 10, QuantizeLinear, typeNameListTwo, supportedTypeListQuantizeLinear, DmlGraphSupport::Supported)}, + {REG_INFO( 13, QuantizeLinear, typeNameListTwo, supportedTypeListQuantizeLinear, DmlGraphSupport::Supported)}, {REG_INFO( 10, DequantizeLinear, typeNameListTwo, supportedTypeListDequantizeLinear, DmlGraphSupport::Supported)}, + {REG_INFO( 13, DequantizeLinear, typeNameListTwo, supportedTypeListDequantizeLinear, DmlGraphSupport::Supported)}, {REG_INFO_MS( 1, QuantizeLinear, typeNameListTwo, supportedTypeListQuantize, DmlGraphSupport::Supported)}, {REG_INFO_MS( 1, DequantizeLinear, typeNameListTwo, supportedTypeListQuantize, DmlGraphSupport::Supported)}, {REG_INFO( 9, Sign, typeNameListDefault, supportedTypeListNumericDefault, DmlGraphSupport::Supported)}, @@ -522,6 +528,7 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation {REG_INFO( 9, Where, typeNameListWhere, supportedTypeListWhere, DmlGraphSupport::Supported)}, {REG_INFO( 7, ReduceSum, typeNameListDefault, supportedTypeListFloat16to32Ints32to64, DmlGraphSupport::Supported)}, {REG_INFO( 11, ReduceSum, typeNameListDefault, supportedTypeListFloat16to32Ints32to64, DmlGraphSupport::Supported)}, + {REG_INFO( 13, ReduceSum, typeNameListDefault, supportedTypeListFloat16to32Ints32to64, DmlGraphSupport::Supported, requiredConstantCpuInputs(1))}, {REG_INFO_VER( 12, Einsum, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(), std::nullopt, QueryEinSum )}, {REG_INFO( 7, ReduceMean, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, {REG_INFO( 11, ReduceMean, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, @@ -588,9 +595,9 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation {REG_INFO_VER( 7, Upsample, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, {REG_INFO_VER( 9, Upsample, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(1) /*scales*/)}, {REG_INFO_VER( 10, Upsample, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(1) /*scales*/)}, - {REG_INFO_VER( 13, Upsample, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(1) /*scales*/)}, {REG_INFO_VER( 10, Resize, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(1) /*scales*/)}, {REG_INFO_VER( 11, Resize, typeNameListTwo, supportedTypeListResize11, DmlGraphSupport::Supported, requiredConstantCpuInputs(1, 2, 3) /*roi, scales, sizes*/, std::nullopt, QueryResize)}, + {REG_INFO_VER( 13, Resize, typeNameListTwo, supportedTypeListResize13, DmlGraphSupport::Supported, requiredConstantCpuInputs(1, 2, 3) /*roi, scales, sizes*/, std::nullopt, QueryResize)}, // Activation Functions {REG_INFO( 7, Sigmoid, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorUtility.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorUtility.cpp index 2349f1c9bd68b..b9302ed01e159 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorUtility.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorUtility.cpp @@ -3,7 +3,7 @@ #include "precomp.h" #include "OperatorUtility.h" -#include "core/providers/dml/OperatorAuthorHelper/OperatorRegistration.h" +#include "core/providers/dml/OperatorAuthorHelper/OperatorVersions.h" namespace Dml { diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/precomp.h b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/precomp.h index d5259e732e109..b787bbbcaed5c 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/precomp.h +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/precomp.h @@ -7,4 +7,4 @@ #include "core/providers/dml/OperatorAuthorHelper/Attributes.h" #include "core/providers/dml/OperatorAuthorHelper/OperatorHelper.h" #include "DmlOperator.h" -#include "OperatorRegistration.h" \ No newline at end of file +#include "OperatorRegistration.h" diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h index 95ce76a9bf95b..71392bf155595 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/Attributes.h @@ -47,7 +47,6 @@ namespace AttrName static constexpr const char* InputForget = "input_forget"; static constexpr const char* K = "k"; static constexpr const char* KeepDims = "keepdims"; - static constexpr const char* SelectLastIndex = "select_last_index"; static constexpr const char* KernelShape = "kernel_shape"; static constexpr const char* LinearBeforeReset = "linear_before_reset"; static constexpr const char* Lambda = "lambd"; // Deliberate typo to match ONNX spec. @@ -58,6 +57,7 @@ namespace AttrName static constexpr const char* Min = "min"; static constexpr const char* Mode = "mode"; static constexpr const char* NearestMode = "nearest_mode"; + static constexpr const char* NoopWithEmptyAxes = "noop_with_empty_axes"; static constexpr const char* NormalizeVariance = "normalize_variance"; static constexpr const char* P = "p"; static constexpr const char* OutputHeight = "output_height"; @@ -72,6 +72,7 @@ namespace AttrName static constexpr const char* Scale = "scale"; static constexpr const char* Scales = "scales"; static constexpr const char* Seed = "seed"; + static constexpr const char* SelectLastIndex = "select_last_index"; static constexpr const char* Shape = "shape"; static constexpr const char* Size = "size"; static constexpr const char* Sorted = "sorted"; diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h index cf149601b3ce5..41a4866755805 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/MLOperatorAuthorHelper.h @@ -190,10 +190,10 @@ class MLOperatorTensorShapeDescription return ret; } - Microsoft::WRL::ComPtr GetInterface() const { return m_impl; } + Microsoft::WRL::ComPtr GetInterface() const noexcept { return m_impl; } protected: - Microsoft::WRL::ComPtr m_impl ; + Microsoft::WRL::ComPtr m_impl; }; class MLOperatorAttributes @@ -203,6 +203,12 @@ class MLOperatorAttributes { } + // For cases of interop where the caller needs to pass the unwrapped class across a boundary. + Microsoft::WRL::ComPtr GetInterface() const noexcept + { + return m_impl; + } + uint32_t GetAttributeElementCount( _In_z_ MLConstStringParam name, MLOperatorAttributeType type) const @@ -638,7 +644,7 @@ class MLOperatorTypeInferenceContext : public MLOperatorAttributes MLOperatorTypeInferenceContext(IMLOperatorTypeInferenceContext* impl) : MLOperatorAttributes(impl), m_impl(impl) {} // For cases of interop where the caller needs to pass the unwrapped class across a boundary. - Microsoft::WRL::ComPtr GetInterface() const noexcept + Microsoft::WRL::ComPtr GetInterface() const noexcept { return m_impl; } diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp index 1e7f1be00bdd5..646d1d7a969f2 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.cpp @@ -617,6 +617,122 @@ namespace OperatorHelper return outputShape; } + void ConvolutionHelperBase::InitializeKernelAndShapes(const IShapeInformationAdapter& shapeInformation) + { + const std::vector inputDimensions = shapeInformation.GetInputTensorShape(m_inputTensorIndex); + const std::vector filterDims = shapeInformation.GetInputTensorShape(m_filterTensorIndex); + + ML_CHECK_VALID_ARGUMENT( + inputDimensions.size() >= 3 && inputDimensions.size() <= 5, + "Input dimensions must be: 3, 4, 5." + ); + + ResolvingPadding(inputDimensions); + + m_outputShapes.resize(1); + m_outputShapes[0] = InitializeKernelOutputDimensions(inputDimensions, m_kernel); + m_outputShapes[0].GetShape()[C] = filterDims[K]; + } + + void ConvolutionHelperBase::InitializeKernelAndShapesTransposed( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + bool hasDynamicPads + ) + { + auto& attributes = kernelInformation.GetAttributes(); + std::vector outputShape = attributes.GetOptionalAttributeVectorInt32(AttrName::OutputShape); + if (!outputShape.empty()) + { + ML_CHECK_VALID_ARGUMENT( + outputShape.size() >= m_kernel.spatialDimensionCount, + "The output shape must equal the number of spatial dimensions" + ); + } + + const std::vector inputDimensions = shapeInformation.GetInputTensorShape(m_inputTensorIndex); + const std::vector filterDims = shapeInformation.GetInputTensorShape(m_filterTensorIndex); + + ML_CHECK_VALID_ARGUMENT(inputDimensions.size() > NonspatialDimensionCount, "Input dimensions must be >= 3"); + + if (hasDynamicPads) + { + MLOperatorTensor padsTensor = kernelInformation.GetConstantInputTensor(2); + const std::vector& padsTensorDimensions = padsTensor.GetShape(); + ML_CHECK_VALID_ARGUMENT(padsTensorDimensions.size() == 1, "Pads dimensions must equal 1"); + const size_t dimCount = padsTensorDimensions[0]; + ML_CHECK_VALID_ARGUMENT(dimCount == 2 * NchwSpatialDimensionCount, "Pads count must equal 4"); + const int64_t* padsData = padsTensor.GetData(); + + for (size_t i = 0; i < dimCount; ++i) + { + ML_CHECK_VALID_ARGUMENT(padsData[i] >= 0, "Padding values must be greater than or equal to 0"); + if (i < dimCount / 2) + { + m_kernel.startPadding[i] = gsl::narrow_cast(padsData[i]); + } + else + { + m_kernel.endPadding[i - dimCount/2] = gsl::narrow_cast(padsData[i]); + } + } + } + else + { + ResolvingPadding(inputDimensions); + } + + m_outputShapes.resize(1); + m_outputShapes[0] = InitializeKernelOutputDimsTranspose(inputDimensions, m_kernel); + static_assert(C < NonspatialDimensionCount); + assert(m_outputShapes[0].GetShape().size() > C); + m_outputShapes[0].GetShape()[C] = filterDims[C] * m_groupCount; + + if (!outputShape.empty()) + { + // Start padding, end padding, and output padding are all ignored if output shape is set. + std::fill(m_kernel.outputPadding, m_kernel.outputPadding + m_kernel.spatialDimensionCount, 0); + + if (outputShape.size() > 2) + { + ML_CHECK_VALID_ARGUMENT(outputShape[outputShape.size() - 3] == gsl::narrow_cast(m_outputShapes[0].GetShape()[C]), "Output channel must be equivalent to filter channel."); + } + + for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) + { + size_t outputIndex = outputShape.size() - m_kernel.spatialDimensionCount + i; + ML_CHECK_VALID_ARGUMENT(outputShape[outputIndex] >= gsl::narrow_cast(inputDimensions[H + i]), "Output dimension cannot be smaller than input dimension."); + m_outputShapes[0].GetShape()[H + i] = outputShape[outputIndex]; + } + + const int dimOffset = gsl::narrow_cast(inputDimensions.size() - m_kernel.spatialDimensionCount); + + for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) + { + int stride = m_kernel.strides[i]; + int windowSize = m_kernel.windowSize[i]; + + // Compute padding such that in reverse order, the logical input (m_outputShapes below) is fully defined + // for a convolution over the logical output region (inputDimensions below). + // + // The padding required is the first windowSize element (for the first logical output element), + // plus (logicalOutput - 1) steps of stride (the distance between each windowed set of logical + // input elements), minus the actual logical input size. + int paddings = gsl::narrow_cast((inputDimensions[i + dimOffset] - 1) * stride + windowSize - m_outputShapes[0].GetShape()[i + dimOffset]); + paddings = std::max(0, paddings); + + m_kernel.startPadding[i] = m_kernel.autoPadSameUpper ? (paddings + 1) / 2 : paddings / 2; + m_kernel.endPadding[i] = paddings - m_kernel.startPadding[i]; + } + } + } + + std::vector ConvolutionHelperBase::GetOutputShapes(const MLShapeInferenceContext& shapeInformation) const + { + ORT_UNUSED_PARAMETER(shapeInformation); + return m_outputShapes; + } + void ConvolutionHelperBase::ResolvingPadding(gsl::span inputDimensions) { ResolveAutoPadding(m_kernel, inputDimensions); @@ -638,13 +754,30 @@ namespace OperatorHelper } void SplitHelper::Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions + IKernelInformationAdapter const& kernelInformation, + IShapeInformationAdapter const& shapeInformation, + uint32_t opsetVersion ) { + auto& operatorAttributes = kernelInformation.GetAttributes(); + if (opsetVersion >= 13) // Axes are a dynamic input parameter. + { + // The tensor is optional, which if empty, means to default to equal splits. + if (kernelInformation.IsInputValid(1)) + { + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(1), /*out*/ m_split); + } + } + else // Axes were a constant attribute parameter. + { + m_split = operatorAttributes.GetOptionalAttributeVectorInt32(AttrName::Split); + } + + const std::vector inputDimensions = shapeInformation.GetInputTensorShape(0); + const uint32_t inputDimCount = gsl::narrow_cast(inputDimensions.size()); - m_axis = static_cast(HandleNegativeAxis(operatorAttributes.GetOptionalAttribute(AttrName::Axis, 0), inputDimCount)); - m_split = operatorAttributes.GetOptionalAttributeVectorInt32(AttrName::Split); + const uint32_t axis = operatorAttributes.GetOptionalAttribute(AttrName::Axis, 0); + m_axis = static_cast(HandleNegativeAxis(axis, inputDimCount)); } std::vector SplitHelper::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const @@ -699,13 +832,124 @@ namespace OperatorHelper return edgeShapes; } + void SliceHelper::Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ) + { + auto& attributes = kernelInformation.GetAttributes(); + std::vector inputDimensions = shapeInformation.GetInputTensorShape(0); + + std::vector starts; + std::vector ends; + std::vector axes; + std::vector steps; + + if (opsetVersion >= 10) + { + // Get starts, ends, optional axes, and optional steps from constant tensor inputs. + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(1), /*out*/ starts); + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(2), /*out*/ ends); + if (kernelInformation.IsInputValid(3)) + { + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(3), /*out*/ axes); + } + if (kernelInformation.IsInputValid(4)) + { + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(4), /*out*/ steps); + } + } + else if (opsetVersion >= 7) + { + // Read starts, ends, and axes from attributes. + starts = attributes.GetOptionalAttributeVectorInt32(AttrName::Starts); + ends = attributes.GetOptionalAttributeVectorInt32(AttrName::Ends); + axes = attributes.GetOptionalAttributeVectorInt32(AttrName::Axes); + } + + const uint32_t inputDimensionCount = gsl::narrow_cast(inputDimensions.size()); + HandleNegativeAxes(/*inout*/ axes, inputDimensionCount); + + ML_CHECK_VALID_ARGUMENT(starts.size() == ends.size(), "'starts' must equal 'ends' in size."); + ML_CHECK_VALID_ARGUMENT(steps.empty() || steps.size() == axes.size(), "'steps' must equal 'axes' in size, or 'steps' must be empty."); + ML_CHECK_VALID_ARGUMENT(axes.empty() || starts.size() == axes.size(), "'axes' must equal 'starts' in size, or 'axes' must be empty."); + + m_outputDimensions.assign(inputDimensions.begin(), inputDimensions.end()); + m_offsets.resize(m_outputDimensions.size()); + m_sizes.resize(m_outputDimensions.size()); + m_strides.resize(m_outputDimensions.size(), 1); // Default initialize to all steps to 1's. + + // Set initial defaults lest 'starts' and 'ends' arrays are shorter than the dimension count. + std::copy(inputDimensions.begin(), inputDimensions.begin() + m_outputDimensions.size(), m_sizes.begin()); + + // Clamp selected dimensions to given 'starts' and 'ends'. + for (int i = 0, ci = gsl::narrow_cast(starts.size()); i < ci; ++i) + { + int dimIndex = axes.empty() ? i : axes[i]; + int stride = steps.empty() ? 1 : steps[i]; + ML_CHECK_VALID_ARGUMENT(static_cast(dimIndex) < static_cast(inputDimensions.size()), "'axes' must be valid with within actual input dimensions."); + ML_CHECK_VALID_ARGUMENT(stride != 0, "'steps' must not be 0."); + + // Positive values are offsets from 0. + // Negative values are offsets from back of the dimension's size. + // INT_MIN is a special value in ONNX which means to treat it as the smallest + // possible value, rather than the usual reversed from-the-back semantics. + int dim = gsl::narrow_cast(inputDimensions[dimIndex]); + int start = (starts[i] < 0 && starts[i] > INT_MIN) ? (starts[i] + dim) : starts[i]; + int end = (ends[i] < 0 && starts[i] > INT_MIN) ? (ends[i] + dim) : ends[i]; + + // For negative strides, the ONNX start and end values are off-by-one. + // So fix them such that the start value remains the minimum extent + // of the slice window, and end remains the maximum exclusive extent. + if (stride < 0) + { + std::swap(start, end); + start += (start < INT_MAX) ? 1 : 0; // Avoid overflow wrap. + end += (end < INT_MAX) ? 1 : 0; + } + + // Clamp the dimensions to the slice extents. + // Clamp negative numbers to 0, per case test_slice_start_out_of_bounds. + start = std::max(start, 0); + end = std::min(end, dim); + int size = std::max(end - start, 0); + + // Set the input window offsets/sizes, and compute output size based on input + // window size (rounding up). + // e.g. a window size 13 and step 3 yields 5 output elements. + int absoluteStride = abs(stride); + m_outputDimensions[dimIndex] = (size / absoluteStride) + (size % absoluteStride != 0); + m_offsets[dimIndex] = start; + m_strides[dimIndex] = stride; + m_sizes[dimIndex] = gsl::narrow_cast(size); + } + } + std::vector SliceHelper::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { return { m_outputDimensions }; } - void PaddingHelper::Initialize(const MLOperatorAttributes& operatorAttributes, gsl::span padding, uint32_t opsetVersion) + void PaddingHelper::Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ) { + auto& attributes = kernelInformation.GetAttributes(); + + std::vector padding; + if (opsetVersion >= 11) + { + MLOperatorTensor padsTensor = kernelInformation.GetConstantInputTensor(1); + ReadCpuLocalTensorIntoInt32(padsTensor, /*out*/ padding); + } + else + { + padding = attributes.GetOptionalAttributeVectorInt32(AttrName::Pads); + } + ML_CHECK_VALID_ARGUMENT(padding.size() % 2 == 0, "Padding must be even count, including begin/end pairs."); uint32_t dimCount = gsl::narrow_cast(padding.size() / 2); @@ -746,10 +990,13 @@ namespace OperatorHelper } void GatherHelper::Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation ) { + auto& operatorAttributes = kernelInformation.GetAttributes(); + std::vector inputDimensions = shapeInformation.GetInputTensorShape(0); + int32_t signedOnnxAxis = operatorAttributes.GetOptionalAttribute(AttrName::Axis, 0); uint32_t inputRank = gsl::narrow_cast(inputDimensions.size()); m_axis = HandleNegativeAxis(signedOnnxAxis, inputRank); @@ -865,6 +1112,39 @@ namespace OperatorHelper return { EdgeShapes(std::move(outputDimensions)) }; } + void ReduceHelperBase::Initialize( + IKernelInformationAdapter const& kernelInformation, + IShapeInformationAdapter const& shapeInformation, + bool usingMultipleAxes + ) + { + auto& attributes = kernelInformation.GetAttributes(); + m_keepDims = attributes.GetOptionalAttribute(AttrName::KeepDims, 1); + m_selectLastIndex = attributes.GetOptionalAttribute(AttrName::SelectLastIndex, 0); + m_noopWithEmptyAxes = attributes.GetOptionalAttribute(AttrName::NoopWithEmptyAxes, 0); + + if (usingMultipleAxes) // Read full axis list. e.g. ReduceSum. + { + if (kernelInformation.IsInputValid(1)) // Axes are from a dynamic input parameter. + { + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(1), /*out*/ m_axes); + } + else // Axes were a constant attribute parameter. + { + m_axes = attributes.GetOptionalAttributeVectorInt32(AttrName::Axes); + } + } + else // Only read a single axis. e.g. ArgMin/ArgMax. + { + int axis = attributes.GetOptionalAttribute(AttrName::Axis, 0); + m_axes.push_back(axis); + } + + std::vector inputShape = shapeInformation.GetInputTensorShape(0); + HandleNegativeAxes(/*inout*/ m_axes, gsl::narrow_cast(inputShape.size())); + HandleEmptyAxes(/*inout*/ m_axes, inputShape, m_noopWithEmptyAxes); + } + std::vector ReduceHelperBase::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { // Example: @@ -909,13 +1189,18 @@ namespace OperatorHelper } } - void ReduceHelperBase::AdjustAxesAndOutputShape(const std::vector& inputShape) + void ReduceHelperBase::HandleEmptyAxes( + /*inout*/std::vector& axes, + gsl::span inputShape, + bool treatEmptyAsNop + ) { - // If axes is not specified, reduce over all the dimensions - if (m_axes.empty()) + // If axes is not specified, reduce over all the dimensions. + // If empty axes should be treated as a nop, then just leave them as-is. + if (axes.empty() && !treatEmptyAsNop) { - m_axes.resize(inputShape.size()); - std::iota(m_axes.begin(), m_axes.end(), 0); + axes.resize(inputShape.size()); + std::iota(axes.begin(), axes.end(), 0); } } @@ -1236,7 +1521,32 @@ namespace OperatorHelper return {std::move(outputDims)}; } - + + void TopKHelper::Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ) + { + auto& attributes = kernelInformation.GetAttributes(); + int32_t k; + if (opsetVersion >= 10) + { + MLOperatorTensor kTensor = kernelInformation.GetConstantInputTensor(1); + k = gsl::narrow_cast(ReadScalarTensorCastToInt64(kTensor)); + } + else + { + k = attributes.template GetOptionalAttribute(AttrName::K, -1); + } + ML_CHECK_VALID_ARGUMENT(k >= 0, "Attribute k is missing or negative."); + m_k = k; + + auto inputShape = shapeInformation.GetInputTensorShape(0); + int32_t axis = attributes.template GetOptionalAttribute(AttrName::Axis, -1); + m_axis = HandleNegativeAxis(axis, gsl::narrow_cast(inputShape.size())); + } + std::vector TopKHelper::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { assert(m_axis >= 0); @@ -1528,11 +1838,20 @@ namespace OperatorHelper } void SqueezeHelper::Initialize( - gsl::span axes, - gsl::span inputDimensions + IKernelInformationAdapter const& kernelInformation, + IShapeInformationAdapter const& shapeInformation, + uint32_t opsetVersion ) { - m_axes.assign(axes.begin(), axes.end()); + if (opsetVersion >= 13) // Axes are a dynamic input parameter. + { + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(1), /*out*/ m_axes); + } + else // Axes were a constant attribute parameter. + { + m_axes = kernelInformation.GetAttributes().GetOptionalAttributeVectorInt32(AttrName::Axes); + } + std::vector inputDimensions = shapeInformation.GetInputTensorShape(0); HandleNegativeAxes(/*inout*/ m_axes, gsl::narrow_cast(inputDimensions.size())); std::sort(m_axes.begin(), m_axes.end()); } @@ -1571,12 +1890,22 @@ namespace OperatorHelper } void UnsqueezeHelper::Initialize( - gsl::span axes, - gsl::span inputDimensions + IKernelInformationAdapter const& kernelInformation, + IShapeInformationAdapter const& shapeInformation, + uint32_t opsetVersion ) { - m_axes.assign(axes.begin(), axes.end()); - const uint32_t outputDimensionCount = gsl::narrow_cast(inputDimensions.size() + axes.size()); + if (opsetVersion >= 13) // Axes are a dynamic input parameter. + { + ReadCpuLocalTensorIntoInt32(kernelInformation.GetConstantInputTensor(1), /*out*/ m_axes); + } + else // Axes were a constant attribute parameter. + { + m_axes = kernelInformation.GetAttributes().GetOptionalAttributeVectorInt32(AttrName::Axes); + } + std::vector inputDimensions = shapeInformation.GetInputTensorShape(0); + + const uint32_t outputDimensionCount = gsl::narrow_cast(inputDimensions.size() + m_axes.size()); HandleNegativeAxes(/*inout*/ m_axes, outputDimensionCount); std::sort(m_axes.begin(), m_axes.end()); } @@ -1615,7 +1944,19 @@ namespace OperatorHelper return { EdgeShapes(outputDimensions) }; } - + + void ReshapeHelper::Initialize(IKernelInformationAdapter const& kernelInformation) + { + ML_CHECK_VALID_ARGUMENT(kernelInformation.GetInputCount() >= 2); + ML_CHECK_VALID_ARGUMENT(kernelInformation.GetOutputCount() >= 1); + + // The 'shape' tensor is a 1D tensor holding the new shape to reshape to, + // and the first element of its own shape holds how many dimensions there + // will be for the output. + MLOperatorTensor shapeTensor = kernelInformation.GetConstantInputTensor(1); + ReadCpuLocalTensorIntoInt32(shapeTensor, /*out*/ m_shapeDims); + } + std::vector ReshapeHelper::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { // Fill in the output dimensions. The shape may have -1 in a single dimension, @@ -1709,15 +2050,84 @@ namespace OperatorHelper return { EdgeShapes(desiredTensorShape) }; } + void TileHelper::Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation + ) + { + m_inputDimensions = shapeInformation.GetInputTensorShape(0); + + // Read the repeats tensor. + const std::vector repeatsTensorDimensions = shapeInformation.GetInputTensorShape(1); + ML_CHECK_VALID_ARGUMENT(repeatsTensorDimensions.size() == 1, "Tile's repeats tensor must be 1D."); + const size_t dimCount = repeatsTensorDimensions[0]; + + MLOperatorTensor repeatsTensor = kernelInformation.GetConstantInputTensor(1); + const int64_t* repeatsData = repeatsTensor.GetData(); + ML_CHECK_VALID_ARGUMENT(m_inputDimensions.size() == dimCount, "Tile's repeats tensor must be the same dimension count as the input tensor."); + ML_CHECK_VALID_ARGUMENT(repeatsTensor.IsCpuData(), "Tile's repeats tensor must be CPU Tensor."); + + for (size_t i = 0; i < dimCount; ++i) + { + ML_CHECK_VALID_ARGUMENT(repeatsData[i] >= 0, "Repeat values should be >= 0."); + m_repeatsData.push_back(gsl::narrow_cast(repeatsData[i])); + } + + // Update the computed output shape accordingly, repeat every axis's length by the repeat count. + m_outputDimensions.assign(m_inputDimensions.begin(), m_inputDimensions.end()); + + for (size_t dimIndex = 0; dimIndex < dimCount; ++dimIndex) + { + m_outputDimensions[dimIndex] *= m_repeatsData[dimIndex]; + } + } + std::vector TileHelper::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { return { EdgeShapes(m_outputDimensions) }; } void ResizeHelper::Initialize( - gsl::span outputSizes + IKernelInformationAdapter const& kernelInformation, + IShapeInformationAdapter const& shapeInformation, + uint32_t opsetVersion ) { + auto& attributes = kernelInformation.GetAttributes(); + m_inputDimensions = shapeInformation.GetInputTensorShape(0); + std::vector outputSizes; + + if (opsetVersion >= 11) + { + if (kernelInformation.IsInputValid(1)) + { + MLOperatorTensor regionOfInterestTensor = kernelInformation.GetConstantInputTensor(1); + ReadCpuLocalTensorIntoFloat32(regionOfInterestTensor, /*out*/ m_regionOfInterest); + } + if (kernelInformation.IsInputValid(2)) + { + MLOperatorTensor scalesTensor = kernelInformation.GetConstantInputTensor(2); + ReadCpuLocalTensorIntoFloat32(scalesTensor, /*out*/ m_scales); + } + if (kernelInformation.IsInputValid(3)) + { + MLOperatorTensor outputSizesTensor = kernelInformation.GetConstantInputTensor(3); + ReadCpuLocalTensorIntoInt32(outputSizesTensor, /*out*/ outputSizes); + } + } + else if (opsetVersion >= 9) + { + // Read the scales from the 2nd tensor. + // Compatible with Upsample-9/Upsample-10 and Resize-10. + MLOperatorTensor scalesTensor = kernelInformation.GetConstantInputTensor(1); + ReadCpuLocalTensorIntoFloat32(scalesTensor, /*out*/ m_scales); + } + else + { + // From attribute, compatible with Upsample-7. + m_scales = attributes.template GetOptionalAttribute>(AttrName::Scales, std::vector()); + } + assert(m_outputDimensions.empty()); ML_CHECK_VALID_ARGUMENT(m_scales.empty() || outputSizes.empty(), "scales and roi cannot both be present."); @@ -1797,6 +2207,35 @@ namespace OperatorHelper return { m_outputDimensions }; } + void OneHotHelper::Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation + ) + { + ML_CHECK_VALID_ARGUMENT(kernelInformation.GetInputCount() == 3); + ML_CHECK_VALID_ARGUMENT(kernelInformation.GetOutputCount() == 1); + + auto& attributes = kernelInformation.GetAttributes(); + const std::vector inputDimensions = shapeInformation.GetInputTensorShape(0); + std::vector outputDimensions; + + m_onnxAxis = attributes.template GetOptionalAttribute(AttrName::Axis, -1); + + // Get 'depth' tensor, which is really a scalar for the output size along the given axis. + MLOperatorTensor shapeTensor = kernelInformation.GetConstantInputTensor(1); + + auto indicesShape = shapeInformation.GetInputTensorShape(0); + m_absoluteAxis = HandleNegativeAxis(m_onnxAxis, gsl::narrow_cast(indicesShape.size() + 1)); + + // The shape tensor ('depth') is a 0D tensor holding the size for the output tensor along the specified axis. + // It must be registered as OrtMemType::OrtMemTypeCPUInput for CPU read access. + const int64_t depth64 = ReadScalarTensorCastToInt64(shapeTensor); + ML_CHECK_VALID_ARGUMENT(depth64 > 0, "Negative or zero 'depth' values for OneHot are illegal."); + const uint32_t depth = gsl::narrow_cast(depth64); + m_outputDimensions.assign(indicesShape.begin(), indicesShape.end()); + m_outputDimensions.insert(m_outputDimensions.begin() + m_absoluteAxis, depth); + } + std::vector OneHotHelper::GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { return { EdgeShapes(m_outputDimensions) }; diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h index dbaa14d2d5863..32c34a19fa540 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h @@ -9,7 +9,9 @@ #include "../DmlExecutionProvider/src/ErrorHandling.h" #include "MLOperatorAuthorHelper.h" -namespace OperatorHelper { +namespace OperatorHelper +{ + bool ContainsEmptyDimensions(gsl::span dimensions); std::vector BroadcastTensorShape( @@ -25,12 +27,15 @@ std::vector BroadcastTensorShape( #pragma optimize("", off) #endif template -void FindValueIndices(gsl::span values, T value, /*out*/ std::vector& indices) { +void FindValueIndices(gsl::span values, T value, /*out*/ std::vector& indices) +{ indices.clear(); - for (size_t i = 0, valuesCount = values.size(); i < valuesCount; ++i) { + for (size_t i = 0, valuesCount = values.size(); i < valuesCount; ++i) + { // Work around compiler bug on x86 release by using data() rather than operator [] directly. // cl.exe 19.20.27412.4 for x86 - if (values.data()[i] == value) { + if (values.data()[i] == value) + { indices.push_back(gsl::narrow_cast(i)); } } @@ -53,11 +58,13 @@ void HandleNegativeAxes(gsl::span onnxAxes, uint32_t dimCount); // ellidable input indices = {1,3,4} // output values = {2,3,5} template -void RemoveValuesByIndex(gsl::span indices, bool keepOneValue, /*inout*/ std::vector& values) { +void RemoveValuesByIndex(gsl::span indices, bool keepOneValue, /*inout*/ std::vector& values) +{ assert(std::is_sorted(indices.begin(), indices.end())); // Keep the last value at least, if all values would otherwise be removed. - if (keepOneValue && !indices.empty() && indices.size() == values.size()) { + if (keepOneValue && !indices.empty() && indices.size() == values.size()) + { indices = indices.first(indices.size() - 1); } @@ -68,7 +75,8 @@ void RemoveValuesByIndex(gsl::span indices, bool keepOneValue, / size_t nextIndex = (indicesIterator == indicesEnd) ? SIZE_MAX : *(indicesIterator++); // For every value, either skip the entry, or copy it to the output. - for (size_t i = 0; i < oldValuesCount; ++i) { + for (size_t i = 0; i < oldValuesCount; ++i) + { if (i == nextIndex) // Skip and remove entry. { nextIndex = (indicesIterator == indicesEnd) ? SIZE_MAX : *(indicesIterator++); @@ -108,7 +116,8 @@ double ReadScalarTensorCastToFloat64(const MLOperatorTensor& tensor); void ReadCpuLocalTensorIntoInt32(const MLOperatorTensor& tensor, std::vector& result); void ReadCpuLocalTensorIntoFloat32(const MLOperatorTensor& tensor, std::vector& result); -class EdgeShapes { +class EdgeShapes +{ public: EdgeShapes() = default; EdgeShapes(const std::vector& dim) { m_shapes = dim; } @@ -124,7 +133,8 @@ class EdgeShapes { std::vector m_shapes; }; -struct KernelArgs { +struct KernelArgs +{ // Initialize arrays up to NcdhwSpatialDimensionCount to avoid vector allocations, // but it's important to use .spatialDimensionCount when accessing them because // values beyond that may be bogus. @@ -149,7 +159,8 @@ struct KernelArgs { ML_CHECK_VALID_ARGUMENT(spatialDimensionCount <= NcdhwSpatialDimensionCount); } - void FillWithLeadingValues(gsl::span input, gsl::span output, uint32_t fillCount, uint32_t value) { + void FillWithLeadingValues(gsl::span input, gsl::span output, uint32_t fillCount, uint32_t value) + { // e.g. // input = [5,6,7,8] // fillcount = 2 @@ -166,9 +177,11 @@ struct KernelArgs { // Create a copy of an existing kernel args with a minimum dimension count, // filling the leading attribute values with 1's or 0's respectively. - KernelArgs(KernelArgs const& kernelArgs, uint32_t minimumDimensionCount) : autoPad(kernelArgs.autoPad), + KernelArgs(KernelArgs const& kernelArgs, uint32_t minimumDimensionCount) + : autoPad(kernelArgs.autoPad), autoPadSameUpper(kernelArgs.autoPadSameUpper), - spatialDimensionCount(std::max(kernelArgs.spatialDimensionCount, minimumDimensionCount)) { + spatialDimensionCount(std::max(kernelArgs.spatialDimensionCount, minimumDimensionCount)) + { ML_CHECK_VALID_ARGUMENT(spatialDimensionCount <= NcdhwSpatialDimensionCount); uint32_t fillCount = (minimumDimensionCount > kernelArgs.spatialDimensionCount) ? minimumDimensionCount - kernelArgs.spatialDimensionCount : 0; @@ -205,13 +218,15 @@ void MatMulShapeMapping( std::vector& inputShape1, std::vector& outputShape); -class GetOutputShapeAsInputShapeHelper { +class GetOutputShapeAsInputShapeHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. // Default to first input tensor. template - GetOutputShapeAsInputShapeHelper(const Info_t& info, const Shape_t& shape) { + GetOutputShapeAsInputShapeHelper(const Info_t& info, const Shape_t& shape) + { ORT_UNUSED_PARAMETER(info); ORT_UNUSED_PARAMETER(shape); }; @@ -233,7 +248,8 @@ class GetOutputShapeAsInputShapeHelper { }; template -class GetOutputShapeAsSpecificInputShapeHelper : public GetOutputShapeAsInputShapeHelper { +class GetOutputShapeAsSpecificInputShapeHelper : public GetOutputShapeAsInputShapeHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -243,7 +259,8 @@ class GetOutputShapeAsSpecificInputShapeHelper : public GetOutputShapeAsInputSha {} }; -class GetBroadcastedOutputShapeHelper { +class GetBroadcastedOutputShapeHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -253,18 +270,74 @@ class GetBroadcastedOutputShapeHelper { std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; }; -class RandomUniformHelperBase { +struct IKernelInformationAdapter +{ + virtual uint32_t GetInputCount() const noexcept = 0; + virtual uint32_t GetOutputCount() const noexcept = 0; + virtual bool IsInputValid(uint32_t inputIndex) const noexcept = 0; + virtual bool IsOutputValid(uint32_t outputIndex) const noexcept = 0; + virtual MLOperatorTensor GetConstantInputTensor(uint32_t inputIndex) const = 0; + virtual bool HasAttribute(_In_z_ MLConstStringParam name, MLOperatorAttributeType type) const noexcept = 0; + virtual MLOperatorAttributes const& GetAttributes() const noexcept = 0; +}; + +struct IShapeInformationAdapter +{ + virtual uint32_t GetInputTensorDimensionCount(uint32_t inputIndex) const = 0; + virtual std::vector GetInputTensorShape(uint32_t inputIndex) const = 0; +}; + +// To avoid duplicating dozens of templated functions that vary only on the source of the kernel +// information, provide a thin abstraction. +// +// InformationSourceType may be MLOperatorKernelCreationContext or MLShapeInferenceContext. +template +struct KernelInformationAdapter : IKernelInformationAdapter +{ + KernelInformationAdapter(InformationSourceType& informationSource) : m_informationSource(informationSource) {} + + virtual uint32_t GetInputCount() const noexcept { return m_informationSource.GetInputCount(); } + virtual uint32_t GetOutputCount() const noexcept { return m_informationSource.GetOutputCount(); } + virtual bool IsInputValid(uint32_t inputIndex) const noexcept { return m_informationSource.IsInputValid(inputIndex); } + virtual bool IsOutputValid(uint32_t outputIndex) const noexcept { return m_informationSource.IsOutputValid(outputIndex); } + virtual MLOperatorTensor GetConstantInputTensor(uint32_t inputIndex) const { return m_informationSource.GetConstantInputTensor(inputIndex); } + virtual bool HasAttribute(_In_z_ MLConstStringParam name, MLOperatorAttributeType type) const noexcept { return m_informationSource.HasAttribute(name, type); } + virtual MLOperatorAttributes const& GetAttributes() const noexcept { return m_informationSource; } + + InformationSourceType& m_informationSource; +}; + +// To avoid duplicating dozens of templated functions that vary only on the source of the kernel +// information, provide a thin abstraction (light enough to just be passed by value). +// +// InformationSourceType may be MLOperatorKernelCreationContext or MLShapeInferenceContext. +template +struct ShapeInformationAdapter : IShapeInformationAdapter +{ + ShapeInformationAdapter(InformationSourceType& informationSource) : m_informationSource(informationSource) {} + + virtual uint32_t GetInputTensorDimensionCount(uint32_t inputIndex) const { return m_informationSource.GetInputTensorDimensionCount(inputIndex); } + virtual std::vector GetInputTensorShape(uint32_t inputIndex) const { return m_informationSource.GetInputTensorShape(inputIndex); } + + InformationSourceType& m_informationSource; +}; + +class RandomUniformHelperBase +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. template - RandomUniformHelperBase(const Info_t& info) { + RandomUniformHelperBase(const Info_t& info) + { m_high = info.GetOptionalAttribute(AttrName::High, 1.0f); m_low = info.GetOptionalAttribute(AttrName::Low, 0.0f); - if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) { + if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) + { m_seed = info.GetAttribute(AttrName::Seed); } - else { + else + { m_seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); } } @@ -275,10 +348,12 @@ class RandomUniformHelperBase { float m_seed; }; -class RandomUniformHelper : public RandomUniformHelperBase { +class RandomUniformHelper : public RandomUniformHelperBase +{ public: template - RandomUniformHelper(const Info_t& info, const Shape_t& shape) : RandomUniformHelperBase(info) { + RandomUniformHelper(const Info_t& info, const Shape_t& shape) : RandomUniformHelperBase(info) + { auto shapeAttribute = info.GetOptionalAttributeVectorInt32(AttrName::Shape); ML_CHECK_VALID_ARGUMENT(!shapeAttribute.empty(), "Attribute shape is missing."); m_tensorShape.assign(shapeAttribute.begin(), shapeAttribute.end()); @@ -291,18 +366,22 @@ class RandomUniformHelper : public RandomUniformHelperBase { std::vector m_tensorShape; }; -class RandomNormalHelperBase { +class RandomNormalHelperBase +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. template - RandomNormalHelperBase(const Info_t& info) { + RandomNormalHelperBase(const Info_t& info) + { m_mean = info.GetOptionalAttribute(AttrName::Mean, 0.0f); m_scale = info.GetOptionalAttribute(AttrName::Scale, 1.0f); - if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) { + if (info.HasAttribute(AttrName::Seed, MLOperatorAttributeType::Float)) + { m_seed = info.GetAttribute(AttrName::Seed); } - else { + else + { m_seed = static_cast(std::chrono::high_resolution_clock::now().time_since_epoch().count()); } } @@ -313,10 +392,12 @@ class RandomNormalHelperBase { float m_seed; }; -class RandomNormalHelper : public RandomNormalHelperBase { +class RandomNormalHelper : public RandomNormalHelperBase +{ public: template - RandomNormalHelper(const Info_t& info, const Shape_t& shape) : RandomNormalHelperBase(info) { + RandomNormalHelper(const Info_t& info, const Shape_t& shape) : RandomNormalHelperBase(info) + { auto shapeAttribute = info.GetOptionalAttributeVectorInt32(AttrName::Shape); ML_CHECK_VALID_ARGUMENT(!shapeAttribute.empty(), "Attribute shape is missing."); m_tensorShape.assign(shapeAttribute.begin(), shapeAttribute.end()); @@ -347,123 +428,25 @@ class ConvolutionHelperBase if (!transpose) { - InitializeKernelAndShapes(shape); + InitializeKernelAndShapes(ShapeInformationAdapter(shape)); } else { - InitializeKernelAndShapesTransposed(info, shape, hasDynamicPads); + InitializeKernelAndShapesTransposed(KernelInformationAdapter(info), ShapeInformationAdapter(shape), hasDynamicPads); } } - void ResolvingPadding(gsl::span inputDimensions); - - const std::vector& GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const { - ORT_UNUSED_PARAMETER(shapeInfo); - return m_outputShapes; - } - - template - void InitializeKernelAndShapes(const Shape_t& shapeInfo) { - const std::vector inputDimensions = shapeInfo.GetInputTensorShape(m_inputTensorIndex); - const std::vector filterDims = shapeInfo.GetInputTensorShape(m_filterTensorIndex); - - ML_CHECK_VALID_ARGUMENT( - inputDimensions.size() >= 3 && inputDimensions.size() <= 5, - "Input dimensions must be: 3, 4, 5." - ); - - ResolvingPadding(inputDimensions); - - m_outputShapes.resize(1); - m_outputShapes[0] = InitializeKernelOutputDimensions(inputDimensions, m_kernel); - m_outputShapes[0].GetShape()[C] = filterDims[K]; - } - - - template - void InitializeKernelAndShapesTransposed(const Info_t& info, const Shape_t& shapeInfo, bool hasDynamicPads) - { - std::vector outputShape = info.GetOptionalAttributeVectorInt32(AttrName::OutputShape); - if (!outputShape.empty()) - { - ML_CHECK_VALID_ARGUMENT( - outputShape.size() >= m_kernel.spatialDimensionCount, - "The output shape must equal the number of spatial dimensions" - ); - } + void ResolvingPadding(gsl::span inputDimensions); - const std::vector inputDimensions = shapeInfo.GetInputTensorShape(m_inputTensorIndex); - const std::vector filterDims = shapeInfo.GetInputTensorShape(m_filterTensorIndex); - - ML_CHECK_VALID_ARGUMENT(inputDimensions.size() > NonspatialDimensionCount, "Input dimensions must be >= 3"); + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - if (hasDynamicPads) - { - MLOperatorTensor padsTensor = info.GetConstantInputTensor(2); - const std::vector& padsTensorDimensions = padsTensor.GetShape(); - ML_CHECK_VALID_ARGUMENT(padsTensorDimensions.size() == 1, "Pads dimensions must equal 1"); - const size_t dimCount = padsTensorDimensions[0]; - ML_CHECK_VALID_ARGUMENT(dimCount == 2 * NchwSpatialDimensionCount, "Pads count must equal 4"); - const int64_t* padsData = padsTensor.GetData(); - - for (size_t i = 0; i < dimCount; ++i) - { - ML_CHECK_VALID_ARGUMENT(padsData[i] >= 0, "Padding values must be greater than or equal to 0"); - if (i < dimCount / 2) - { - m_kernel.startPadding[i] = gsl::narrow_cast(padsData[i]); - } - else - { - m_kernel.endPadding[i - dimCount/2] = gsl::narrow_cast(padsData[i]); - } - } - } - else - { - ResolvingPadding(inputDimensions); - } + void InitializeKernelAndShapes(const IShapeInformationAdapter& shapeInfo); - m_outputShapes.resize(1); - m_outputShapes[0] = InitializeKernelOutputDimsTranspose(inputDimensions, m_kernel); - static_assert(C < NonspatialDimensionCount); - assert(m_outputShapes[0].GetShape().size() > C); - m_outputShapes[0].GetShape()[C] = filterDims[C] * m_groupCount; - - if (!outputShape.empty()) { - // Start padding, end padding, and output padding are all ignored if output shape is set. - std::fill(m_kernel.outputPadding, m_kernel.outputPadding + m_kernel.spatialDimensionCount, 0); - - if (outputShape.size() > 2) { - ML_CHECK_VALID_ARGUMENT(outputShape[outputShape.size() - 3] == gsl::narrow_cast(m_outputShapes[0].GetShape()[C]), "Output channel must be equivalent to filter channel."); - } - - for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) { - size_t outputIndex = outputShape.size() - m_kernel.spatialDimensionCount + i; - ML_CHECK_VALID_ARGUMENT(outputShape[outputIndex] >= gsl::narrow_cast(inputDimensions[H + i]), "Output dimension cannot be smaller than input dimension."); - m_outputShapes[0].GetShape()[H + i] = outputShape[outputIndex]; - } - - const int dimOffset = gsl::narrow_cast(inputDimensions.size() - m_kernel.spatialDimensionCount); - - for (size_t i = 0; i < m_kernel.spatialDimensionCount; ++i) { - int stride = m_kernel.strides[i]; - int windowSize = m_kernel.windowSize[i]; - - // Compute padding such that in reverse order, the logical input (m_outputShapes below) is fully defined - // for a convolution over the logical output region (inputDimensions below). - // - // The padding required is the first windowSize element (for the first logical output element), - // plus (logicalOutput - 1) steps of stride (the distance between each windowed set of logical - // input elements), minus the actual logical input size. - int paddings = gsl::narrow_cast((inputDimensions[i + dimOffset] - 1) * stride + windowSize - m_outputShapes[0].GetShape()[i + dimOffset]); - paddings = std::max(0, paddings); - - m_kernel.startPadding[i] = m_kernel.autoPadSameUpper ? (paddings + 1) / 2 : paddings / 2; - m_kernel.endPadding[i] = paddings - m_kernel.startPadding[i]; - } - } - } + void InitializeKernelAndShapesTransposed( + const IKernelInformationAdapter& info, + const IShapeInformationAdapter& shapeInfo, + bool hasDynamicPads + ); protected: uint32_t m_groupCount; @@ -519,7 +502,8 @@ class GemmHelper std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - enum InputTensors { + enum InputTensors + { IN_A, IN_B, IN_C @@ -533,7 +517,8 @@ class GemmHelper float m_beta = 0.0f; }; -class TransposeHelper { +class TransposeHelper +{ public: void Initialize( const MLOperatorAttributes& operatorAttributes, @@ -542,7 +527,8 @@ class TransposeHelper { // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - TransposeHelper(const Info_t& info, const Shape_t& shape) { + TransposeHelper(const Info_t& info, const Shape_t& shape) + { Initialize(info, shape.GetInputTensorShape(0)); } @@ -552,17 +538,21 @@ class TransposeHelper { std::vector m_permutations; }; -class SplitHelper { +class SplitHelper +{ public: void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions); + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ); // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - SplitHelper(const Info_t& info, const Shape_t& shape) { - Initialize(info, shape.GetInputTensorShape(0)); + SplitHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), opsetVersion); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -574,118 +564,19 @@ class SplitHelper { class SliceHelper { -public: - template - void ReadIndexTensors( - const Info_t& operatorInfo, - /*out*/ std::vector& starts, - /*out*/ std::vector& ends, - /*out*/ std::vector& axes, - /*out*/ std::vector& steps - ) - { - // Get starts, ends, optional axes, and optional steps from constant inputs. - ReadCpuLocalTensorIntoInt32(operatorInfo.GetConstantInputTensor(1), /*out*/ starts); - ReadCpuLocalTensorIntoInt32(operatorInfo.GetConstantInputTensor(2), /*out*/ ends); - if (operatorInfo.IsInputValid(3)) - { - ReadCpuLocalTensorIntoInt32(operatorInfo.GetConstantInputTensor(3), /*out*/ axes); - } - if (operatorInfo.IsInputValid(4)) - { - ReadCpuLocalTensorIntoInt32(operatorInfo.GetConstantInputTensor(4), /*out*/ steps); - } - } - - template void Initialize( - const Info_t& operatorInfo, - gsl::span inputDimensions, + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, uint32_t opsetVersion - ) - { - std::vector starts; - std::vector ends; - std::vector axes; - std::vector steps; - - if (opsetVersion >= 10) - { - // Read starts, ends, and axes from tensors. - ReadIndexTensors(operatorInfo, /*out*/ starts, /*out*/ ends, /*out*/ axes, /*out*/ steps); - } - else if (opsetVersion >= 7) - { - // Read starts, ends, and axes from attributes. - starts = operatorInfo.GetOptionalAttributeVectorInt32(AttrName::Starts); - ends = operatorInfo.GetOptionalAttributeVectorInt32(AttrName::Ends); - axes = operatorInfo.GetOptionalAttributeVectorInt32(AttrName::Axes); - } - - const uint32_t inputDimensionCount = gsl::narrow_cast(inputDimensions.size()); - HandleNegativeAxes(/*inout*/ axes, inputDimensionCount); - - ML_CHECK_VALID_ARGUMENT(starts.size() == ends.size(), "'starts' must equal 'ends' in size."); - ML_CHECK_VALID_ARGUMENT(steps.empty() || steps.size() == axes.size(), "'steps' must equal 'axes' in size, or 'steps' must be empty."); - ML_CHECK_VALID_ARGUMENT(axes.empty() || starts.size() == axes.size(), "'axes' must equal 'starts' in size, or 'axes' must be empty."); - - m_outputDimensions.assign(inputDimensions.begin(), inputDimensions.end()); - m_offsets.resize(m_outputDimensions.size()); - m_sizes.resize(m_outputDimensions.size()); - m_strides.resize(m_outputDimensions.size(), 1); // Default initialize to all steps to 1's. - - // Set initial defaults lest 'starts' and 'ends' arrays are shorter than the dimension count. - std::copy(inputDimensions.begin(), inputDimensions.begin() + m_outputDimensions.size(), m_sizes.begin()); - - // Clamp selected dimensions to given 'starts' and 'ends'. - for (int i = 0, ci = gsl::narrow_cast(starts.size()); i < ci; ++i) - { - int dimIndex = axes.empty() ? i : axes[i]; - int stride = steps.empty() ? 1 : steps[i]; - ML_CHECK_VALID_ARGUMENT(static_cast(dimIndex) < static_cast(inputDimensions.size()), "'axes' must be valid with within actual input dimensions."); - ML_CHECK_VALID_ARGUMENT(stride != 0, "'steps' must not be 0."); - - // Positive values are offsets from 0. - // Negative values are offsets from back of the dimension's size. - // INT_MIN is a special value in ONNX which means to treat it as the smallest - // possible value, rather than the usual reversed from-the-back semantics. - int dim = gsl::narrow_cast(inputDimensions[dimIndex]); - int start = (starts[i] < 0 && starts[i] > INT_MIN) ? (starts[i] + dim) : starts[i]; - int end = (ends[i] < 0 && starts[i] > INT_MIN) ? (ends[i] + dim) : ends[i]; - - // For negative strides, the ONNX start and end values are off-by-one. - // So fix them such that the start value remains the minimum extent - // of the slice window, and end remains the maximum exclusive extent. - if (stride < 0) - { - std::swap(start, end); - start += (start < INT_MAX) ? 1 : 0; // Avoid overflow wrap. - end += (end < INT_MAX) ? 1 : 0; - } - - // Clamp the dimensions to the slice extents. - // Clamp negative numbers to 0, per case test_slice_start_out_of_bounds. - start = std::max(start, 0); - end = std::min(end, dim); - int size = std::max(end - start, 0); - - // Set the input window offsets/sizes, and compute output size based on input - // window size (rounding up). - // e.g. a window size 13 and step 3 yields 5 output elements. - int absoluteStride = abs(stride); - m_outputDimensions[dimIndex] = (size / absoluteStride) + (size % absoluteStride != 0); - m_offsets[dimIndex] = start; - m_strides[dimIndex] = stride; - m_sizes[dimIndex] = gsl::narrow_cast(size); - } - } + ); +public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template SliceHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) { - Initialize(info, shape.GetInputTensorShape(0), opsetVersion); + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), opsetVersion); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -699,25 +590,20 @@ class SliceHelper class PaddingHelper { + void Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ); + public: - void Initialize(const MLOperatorAttributes& operatorAttributes, gsl::span padding, uint32_t opsetVersion); // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - PaddingHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) { - std::vector padding; - if (opsetVersion >= 11) - { - MLOperatorTensor padsTensor = info.GetConstantInputTensor(1); - ReadCpuLocalTensorIntoInt32(padsTensor, /*out*/ padding); - } - else - { - padding = info.GetOptionalAttributeVectorInt32(AttrName::Pads); - } - - Initialize(info, padding, opsetVersion); + PaddingHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), opsetVersion); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -735,37 +621,41 @@ class VersionedOpsetHelper : public OpsetHelper VersionedOpsetHelper(const Info_t& info, const Shape_t& shape) : OpsetHelper(info, shape, OpsetVersion) {} }; -class ReduceHelperBase { +class ReduceHelperBase +{ + void Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + bool usingMultipleAxes + ); + public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - ReduceHelperBase(const Info_t& info, const Shape_t& shape, bool usingAxes) { - m_keepDims = info.template GetOptionalAttribute(AttrName::KeepDims, 1); - m_selectLastIndex = info.template GetOptionalAttribute(AttrName::SelectLastIndex, 0); - if (usingAxes) { - m_axes = info.GetOptionalAttributeVectorInt32(AttrName::Axes); - } - else { - int axis = info.template GetOptionalAttribute(AttrName::Axis, 0); - m_axes.push_back(axis); - } - std::vector inputShape = shape.GetInputTensorShape(0); - HandleNegativeAxes(/*inout*/ m_axes, gsl::narrow_cast(inputShape.size())); - AdjustAxesAndOutputShape(inputShape); + ReduceHelperBase(const Info_t& info, const Shape_t& shape, bool usingMultipleAxes) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), usingMultipleAxes); } + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; private: - void AdjustAxesAndOutputShape(const std::vector& inputShape); + static void HandleEmptyAxes( + /*inout*/std::vector& onnxAxes, + gsl::span inputShape, + bool treatEmptyAsNop + ); protected: - std::vector m_axes; - int m_keepDims = 0; - int m_selectLastIndex = 0; + std::vector m_axes; + int m_keepDims = 0; // Keep the dimensions rather than removing size 1 dimensions. + int m_selectLastIndex = 0; // Prefer the higher index if there is a tie between element values. + int m_noopWithEmptyAxes = 0; // Reduce nothing if axis list is empty. }; -class ArgMinArgMaxHelper : public ReduceHelperBase { +class ArgMinArgMaxHelper : public ReduceHelperBase +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -773,7 +663,8 @@ class ArgMinArgMaxHelper : public ReduceHelperBase { ArgMinArgMaxHelper(const Info_t& info, const Shape_t& shape) : ReduceHelperBase(info, shape, false) {} }; -class ReduceHelper : public ReduceHelperBase { +class ReduceHelper : public ReduceHelperBase +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -847,7 +738,8 @@ class EinSumHelper RecognizedOperatorType m_recognizedOperatorType = RecognizedOperatorType::None; }; -class MatMulHelperBase { +class MatMulHelperBase +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -878,26 +770,21 @@ class QLinearMatMulHelper : public MatMulHelperBase }; -class TopKHelper { +class TopKHelper +{ + void Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ); + public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - TopKHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) { - int32_t k; - if (opsetVersion >= 10) { - MLOperatorTensor kTensor = info.GetConstantInputTensor(1); - k = gsl::narrow_cast(ReadScalarTensorCastToInt64(kTensor)); - } - else { - k = info.template GetOptionalAttribute(AttrName::K, -1); - } - ML_CHECK_VALID_ARGUMENT(k >= 0, "Attribute k is missing or negative."); - m_k = k; - - auto inputShape = shape.GetInputTensorShape(0); - int32_t axis = info.template GetOptionalAttribute(AttrName::Axis, -1); - m_axis = HandleNegativeAxis(axis, gsl::narrow_cast(inputShape.size())); + TopKHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), opsetVersion); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -907,12 +794,14 @@ class TopKHelper { uint32_t m_axis; }; -class RecurrentHelper { +class RecurrentHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - RecurrentHelper(const Info_t& info, const Shape_t& shape) { + RecurrentHelper(const Info_t& info, const Shape_t& shape) + { m_hiddenSize = info.template GetOptionalAttribute(AttrName::HiddenSize, 1); } @@ -922,16 +811,19 @@ class RecurrentHelper { int m_hiddenSize = 0; }; -class ConcatHelper { +class ConcatHelper +{ public: void Initialize( const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions); + gsl::span inputDimensions + ); // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - ConcatHelper(const Info_t& info, const Shape_t& shape) { + ConcatHelper(const Info_t& info, const Shape_t& shape) + { Initialize(info, shape.GetInputTensorShape(0)); } @@ -941,27 +833,32 @@ class ConcatHelper { int m_axis; }; -class CropHelper { +class CropHelper +{ public: - enum BorderDim { + enum BorderDim + { Left, Top, Right, Bottom }; - enum ScaleDim { + enum ScaleDim + { Height, Width }; void Initialize( const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions); + gsl::span inputDimensions + ); // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - CropHelper(const Info_t& info, const Shape_t& shape) { + CropHelper(const Info_t& info, const Shape_t& shape) + { Initialize(info, shape.GetInputTensorShape(0)); } @@ -972,12 +869,14 @@ class CropHelper { uint32_t m_sizes[NchwSpatialDimensionCount]; }; -class DepthToSpaceHelper { +class DepthToSpaceHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - DepthToSpaceHelper(const Info_t& info, const Shape_t& shape) { + DepthToSpaceHelper(const Info_t& info, const Shape_t& shape) + { m_blockSize = info.template GetOptionalAttribute(AttrName::BlockSize, -1); ML_CHECK_VALID_ARGUMENT(m_blockSize > 0, "Attribute blocksize is missing or equal to zero."); } @@ -988,12 +887,14 @@ class DepthToSpaceHelper { int32_t m_blockSize; }; -class SpaceToDepthHelper { +class SpaceToDepthHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - SpaceToDepthHelper(const Info_t& info, const Shape_t& shape) { + SpaceToDepthHelper(const Info_t& info, const Shape_t& shape) + { m_blockSize = info.template GetOptionalAttribute(AttrName::BlockSize, -1); ML_CHECK_VALID_ARGUMENT(m_blockSize > 0, "Attribute blocksize is missing or equal to zero."); } @@ -1004,16 +905,19 @@ class SpaceToDepthHelper { int32_t m_blockSize; }; -class FlattenHelper { +class FlattenHelper +{ public: void Initialize( const MLOperatorAttributes& operatorAttributes, - gsl::span inputDimensions); + gsl::span inputDimensions + ); // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - FlattenHelper(const Info_t& info, const Shape_t& shape) { + FlattenHelper(const Info_t& info, const Shape_t& shape) + { Initialize(info, shape.GetInputTensorShape(0)); } @@ -1023,7 +927,8 @@ class FlattenHelper { int m_axis = 1; }; -class MultinomialHelper { +class MultinomialHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -1033,17 +938,21 @@ class MultinomialHelper { std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; }; -class GatherHelper { -public: +class GatherHelper +{ void Initialize( - const MLOperatorAttributes& operatorAttributes, - gsl::span dataDimensions); + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation + ); + +public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - GatherHelper(const Info_t& info, const Shape_t& shape) { - Initialize(info, shape.GetInputTensorShape(0)); + GatherHelper(const Info_t& info, const Shape_t& shape) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape)); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1052,12 +961,14 @@ class GatherHelper { int m_axis = 0; }; -class GatherNdHelper { +class GatherNdHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - GatherNdHelper(const Info_t& info, const Shape_t& shape) { + GatherNdHelper(const Info_t& info, const Shape_t& shape) + { m_batchCount = info.template GetOptionalAttribute(AttrName::BatchDimensions, 0); } @@ -1067,7 +978,8 @@ class GatherNdHelper { int32_t m_batchCount; }; -class PoolingHelperBase { +class PoolingHelperBase +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -1075,10 +987,14 @@ class PoolingHelperBase { PoolingHelperBase( const Info_t& info, const Shape_t& shape, - bool useGlobalPooling) : m_kernel(useGlobalPooling + bool useGlobalPooling + ) + : m_kernel(useGlobalPooling ? InitializeGlobalKernel(shape.GetInputTensorShape(0)) - : InitializeKernel(info, static_cast(shape.GetInputTensorShape(0).size()), gsl::span())) { - if (!useGlobalPooling) { + : InitializeKernel(info, static_cast(shape.GetInputTensorShape(0).size()), gsl::span())) + { + if (!useGlobalPooling) + { ResolveAutoPadding(m_kernel, shape.GetInputTensorShape(0)); } } @@ -1099,7 +1015,7 @@ class UnpoolingHelper const Info_t& info, const Shape_t& shape ) - : m_inputShape(shape.GetInputTensorShape(0)), + : m_inputShape(shape.GetInputTensorShape(0)), m_kernel(InitializeKernel(info, static_cast(m_inputShape.size()), gsl::span())) { Initialize(); @@ -1115,13 +1031,15 @@ class UnpoolingHelper KernelArgs m_kernel; }; -class GlobalPoolingHelper : public PoolingHelperBase { +class GlobalPoolingHelper : public PoolingHelperBase +{ public: template GlobalPoolingHelper(const Info_t& info, const Shape_t& shape) : PoolingHelperBase(info, shape, true) {} }; -class PoolingHelper : public PoolingHelperBase { +class PoolingHelper : public PoolingHelperBase +{ public: template PoolingHelper(const Info_t& info, const Shape_t& shape) : PoolingHelperBase(info, shape, false) {} @@ -1174,19 +1092,21 @@ class RoiAlignHelper : public RoiPoolingHelperBase std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; }; -class SqueezeHelper { +class SqueezeHelper +{ public: void Initialize( - gsl::span axes, - gsl::span inputDimensions); + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ); // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - SqueezeHelper(const Info_t& info, const Shape_t& shape) { - Initialize( - info.GetOptionalAttributeVectorInt32(AttrName::Axes), - shape.GetInputTensorShape(0)); + SqueezeHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), opsetVersion); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1195,19 +1115,21 @@ class SqueezeHelper { std::vector m_axes; }; -class UnsqueezeHelper { +class UnsqueezeHelper +{ public: void Initialize( - gsl::span axes, - gsl::span inputDimensions); + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ); // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. template - UnsqueezeHelper(const Info_t& info, const Shape_t& shape) { - Initialize( - info.GetOptionalAttributeVectorInt32(AttrName::Axes), - shape.GetInputTensorShape(0)); + UnsqueezeHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), opsetVersion); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1217,32 +1139,32 @@ class UnsqueezeHelper { }; template -void CALLBACK ShapeInferenceFunction(IMLOperatorShapeInferenceContext* inference_context) { +void CALLBACK ShapeInferenceFunction(IMLOperatorShapeInferenceContext* inference_context) +{ MLShapeInferenceContext helperContext(inference_context); T opHelper(helperContext, helperContext); // EdgeInfo to contain whether tensor, whether unused, and what shape is std::vector outputShapes = opHelper.GetOutputShapes(helperContext); - for (uint32_t i = 0; i < outputShapes.size(); ++i) { - if (outputShapes[i].IsTensor() && !outputShapes[i].IsUnused()) { + for (uint32_t i = 0; i < outputShapes.size(); ++i) + { + if (outputShapes[i].IsTensor() && !outputShapes[i].IsUnused()) + { helperContext.SetOutputTensorShape(i, outputShapes[i].GetShape()); } } } -class ReshapeHelper { +class ReshapeHelper +{ + void Initialize(const IKernelInformationAdapter& kernelInformation); + public: template - ReshapeHelper(const Info_t& info, const Shape_t& shape) { - ML_CHECK_VALID_ARGUMENT(info.GetInputCount() >= 2); - ML_CHECK_VALID_ARGUMENT(info.GetOutputCount() >= 1); - - // The 'shape' tensor is a 1D tensor holding the new shape to reshape to, - // and the first element of its own shape holds how many dimensions there - // will be for the output. - MLOperatorTensor shapeTensor = info.GetConstantInputTensor(1); - ReadCpuLocalTensorIntoInt32(shapeTensor, /*out*/ m_shapeDims); + ReshapeHelper(const Info_t& info, const Shape_t& shape) + { + Initialize(KernelInformationAdapter(info)); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1251,10 +1173,12 @@ class ReshapeHelper { std::vector m_shapeDims; }; -class ExpandHelper { +class ExpandHelper +{ public: template - ExpandHelper(const Info_t& info, const Shape_t& shape) { + ExpandHelper(const Info_t& info, const Shape_t& shape) + { } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1262,10 +1186,12 @@ class ExpandHelper { protected: }; -class ConstantOfShapeHelper { +class ConstantOfShapeHelper +{ public: template - ConstantOfShapeHelper(const Info_t& info, const Shape_t& shape) { + ConstantOfShapeHelper(const Info_t& info, const Shape_t& shape) + { } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1273,33 +1199,18 @@ class ConstantOfShapeHelper { protected: }; -class TileHelper { +class TileHelper +{ + void Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation + ); + public: template - TileHelper(const Info_t& info, const Shape_t& shapeInfo) { - m_inputDimensions = shapeInfo.GetInputTensorShape(0); - - // Read the repeats tensor. - const std::vector repeatsTensorDimensions = shapeInfo.GetInputTensorShape(1); - ML_CHECK_VALID_ARGUMENT(repeatsTensorDimensions.size() == 1, "Tile's repeats tensor must be 1D."); - const size_t dimCount = repeatsTensorDimensions[0]; - - MLOperatorTensor repeatsTensor = info.GetConstantInputTensor(1); - const int64_t* repeatsData = repeatsTensor.GetData(); - ML_CHECK_VALID_ARGUMENT(m_inputDimensions.size() == dimCount, "Tile's repeats tensor must be the same dimension count as the input tensor."); - ML_CHECK_VALID_ARGUMENT(repeatsTensor.IsCpuData(), "Tile's repeats tensor must be CPU Tensor."); - - for (size_t i = 0; i < dimCount; ++i) { - ML_CHECK_VALID_ARGUMENT(repeatsData[i] >= 0, "Repeat values should be >= 0."); - m_repeatsData.push_back(gsl::narrow_cast(repeatsData[i])); - } - - // Update the computed output shape accordingly, repeat every axis's length by the repeat count. - m_outputDimensions.assign(m_inputDimensions.begin(), m_inputDimensions.end()); - - for (size_t dimIndex = 0; dimIndex < dimCount; ++dimIndex) { - m_outputDimensions[dimIndex] *= m_repeatsData[dimIndex]; - } + TileHelper(const Info_t& info, const Shape_t& shapeInfo) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shapeInfo)); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1310,60 +1221,34 @@ class TileHelper { std::vector m_outputDimensions; }; -class ResizeHelper { - public: - // Info_t is used to obtain attributes which will be used for calculating the output shape later. - // Shape_t is used to obtain input shape which will be used for adjusting attribute value. - template - ResizeHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) { - - m_inputDimensions = shape.GetInputTensorShape(0); - std::vector outputSizes; - - if (opsetVersion >= 11) { - if (info.IsInputValid(1)) - { - MLOperatorTensor regionOfInterestTensor = info.GetConstantInputTensor(1); - ReadCpuLocalTensorIntoFloat32(regionOfInterestTensor, /*out*/ m_regionOfInterest); - } - if (info.IsInputValid(2)) - { - MLOperatorTensor scalesTensor = info.GetConstantInputTensor(2); - ReadCpuLocalTensorIntoFloat32(scalesTensor, /*out*/ m_scales); - } - if (info.IsInputValid(3)) - { - MLOperatorTensor outputSizesTensor = info.GetConstantInputTensor(3); - ReadCpuLocalTensorIntoInt32(outputSizesTensor, /*out*/ outputSizes); - } - } - else if (opsetVersion >= 9) { - // Read the scales from the 2nd tensor. - // Compatible with Upsample-9/Upsample-10 and Resize-10. - MLOperatorTensor scalesTensor = info.GetConstantInputTensor(1); - ReadCpuLocalTensorIntoFloat32(scalesTensor, /*out*/ m_scales); - } - else - { - // From attribute, compatible with Upsample-7. - m_scales = info.template GetOptionalAttribute>(AttrName::Scales, std::vector()); - } - - Initialize(outputSizes); - } - - void Initialize(gsl::span outputSizes); - - std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; - - protected: - std::vector m_inputDimensions; - std::vector m_outputDimensions; - std::vector m_scales; - std::vector m_regionOfInterest; // Stored as [start1, ..., startN, end1, ..., endN], where N is the input rank. +class ResizeHelper +{ + void Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation, + uint32_t opsetVersion + ); + +public: + // Info_t is used to obtain attributes which will be used for calculating the output shape later. + // Shape_t is used to obtain input shape which will be used for adjusting attribute value. + template + ResizeHelper(const Info_t& info, const Shape_t& shape, uint32_t opsetVersion) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shape), opsetVersion); + } + + std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; + +protected: + std::vector m_inputDimensions; + std::vector m_outputDimensions; + std::vector m_scales; + std::vector m_regionOfInterest; // Stored as [start1, ..., startN, end1, ..., endN], where N is the input rank. }; -class RangeHelper { +class RangeHelper +{ public: // Info_t is used to obtain attributes which will be used for calculating the output shape later. // Shape_t is used to obtain input shape which will be used for adjusting attribute value. @@ -1394,31 +1279,18 @@ class RangeHelper { TensorScalarData m_valueDelta; }; -class OneHotHelper { +class OneHotHelper +{ + void Initialize( + const IKernelInformationAdapter& kernelInformation, + const IShapeInformationAdapter& shapeInformation + ); + public: template - OneHotHelper(const Info_t& info, const Shape_t& shapeInfo) { - ML_CHECK_VALID_ARGUMENT(info.GetInputCount() == 3); - ML_CHECK_VALID_ARGUMENT(info.GetOutputCount() == 1); - - const std::vector inputDimensions = shapeInfo.GetInputTensorShape(0); - std::vector outputDimensions; - - m_onnxAxis = info.template GetOptionalAttribute(AttrName::Axis, -1); - - // Get 'depth' tensor, which is really a scalar for the output size along the given axis. - MLOperatorTensor shapeTensor = info.GetConstantInputTensor(1); - - auto indicesShape = shapeInfo.GetInputTensorShape(0); - m_absoluteAxis = HandleNegativeAxis(m_onnxAxis, gsl::narrow_cast(indicesShape.size() + 1)); - - // The shape tensor ('depth') is a 0D tensor holding the size for the output tensor along the specified axis. - // It must be registered as OrtMemType::OrtMemTypeCPUInput for CPU read access. - const int64_t depth64 = ReadScalarTensorCastToInt64(shapeTensor); - ML_CHECK_VALID_ARGUMENT(depth64 > 0, "Negative or zero 'depth' values for OneHot are illegal."); - const uint32_t depth = gsl::narrow_cast(depth64); - m_outputDimensions.assign(indicesShape.begin(), indicesShape.end()); - m_outputDimensions.insert(m_outputDimensions.begin() + m_absoluteAxis, depth); + OneHotHelper(const Info_t& info, const Shape_t& shapeInfo) + { + Initialize(KernelInformationAdapter(info), ShapeInformationAdapter(shapeInfo)); } std::vector GetOutputShapes(const MLShapeInferenceContext& shapeInfo) const; @@ -1463,8 +1335,13 @@ using ShapeInferenceHelper_Scatter13 = ShapeInferenceHelper_ScatterElements; // using ShapeInferenceHelper_GatherND = GatherNdHelper; using ShapeInferenceHelper_ScatterND = GetOutputShapeAsInputShapeHelper; -using ShapeInferenceHelper_Flatten = FlattenHelper; -using ShapeInferenceHelper_Split = SplitHelper; +using ShapeInferenceHelper_Flatten7 = FlattenHelper; +using ShapeInferenceHelper_Flatten9 = FlattenHelper; +using ShapeInferenceHelper_Flatten11 = FlattenHelper; +using ShapeInferenceHelper_Flatten13 = FlattenHelper; +using ShapeInferenceHelper_Split7 = VersionedOpsetHelper; +using ShapeInferenceHelper_Split11 = VersionedOpsetHelper; +using ShapeInferenceHelper_Split13 = VersionedOpsetHelper; using ShapeInferenceHelper_Transpose = TransposeHelper; using ShapeInferenceHelper_Concat = ConcatHelper; using ShapeInferenceHelper_Slice7 = VersionedOpsetHelper; @@ -1477,16 +1354,22 @@ using ShapeInferenceHelper_Pad13 = VersionedOpsetHelper; using ShapeInferenceHelper_SpaceToDepth = SpaceToDepthHelper; using ShapeInferenceHelper_DepthToSpace = DepthToSpaceHelper; -using ShapeInferenceHelper_Squeeze = SqueezeHelper; -using ShapeInferenceHelper_Unsqueeze = UnsqueezeHelper; +using ShapeInferenceHelper_Squeeze7 = VersionedOpsetHelper; +using ShapeInferenceHelper_Squeeze11 = VersionedOpsetHelper; +using ShapeInferenceHelper_Squeeze13 = VersionedOpsetHelper; +using ShapeInferenceHelper_Unsqueeze7 = VersionedOpsetHelper; +using ShapeInferenceHelper_Unsqueeze11 = VersionedOpsetHelper; +using ShapeInferenceHelper_Unsqueeze13 = VersionedOpsetHelper; using ShapeInferenceHelper_EyeLike = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_Expand = ExpandHelper; -using ShapeInferenceHelper_Reshape = ReshapeHelper; +using ShapeInferenceHelper_Reshape7 = ReshapeHelper; +using ShapeInferenceHelper_Reshape13 = ReshapeHelper; using ShapeInferenceHelper_ConstantOfShape = ConstantOfShapeHelper; using ShapeInferenceHelper_Tile = TileHelper; using ShapeInferenceHelper_Resize10 = VersionedOpsetHelper; using ShapeInferenceHelper_Resize11 = VersionedOpsetHelper; +using ShapeInferenceHelper_Resize13 = VersionedOpsetHelper; using ShapeInferenceHelper_OneHot = OneHotHelper; using ShapeInferenceHelper_Sqrt = GetOutputShapeAsInputShapeHelper; @@ -1562,7 +1445,6 @@ using ShapeInferenceHelper_ImageScaler = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_Upsample7 = VersionedOpsetHelper; using ShapeInferenceHelper_Upsample9 = VersionedOpsetHelper; using ShapeInferenceHelper_Upsample10 = VersionedOpsetHelper; -using ShapeInferenceHelper_Upsample13 = VersionedOpsetHelper; using ShapeInferenceHelper_Sigmoid = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_HardSigmoid = GetOutputShapeAsInputShapeHelper; @@ -1584,7 +1466,8 @@ using ShapeInferenceHelper_ParametricSoftplus = GetOutputShapeAsInputShapeHelper using ShapeInferenceHelper_Dropout = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_Shrink = GetOutputShapeAsInputShapeHelper; -using ShapeInferenceHelper_Identity = GetOutputShapeAsInputShapeHelper; +using ShapeInferenceHelper_Identity7 = GetOutputShapeAsInputShapeHelper; +using ShapeInferenceHelper_Identity13 = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_MatMul = MatMulHelper; using ShapeInferenceHelper_MatMulInteger = MatMulHelper; using ShapeInferenceHelper_QLinearMatMul = QLinearMatMulHelper; diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorRegistration.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h similarity index 97% rename from onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorRegistration.h rename to onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h index 0185d466f96ac..303f3ffe20671 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorRegistration.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h @@ -276,6 +276,7 @@ namespace OperatorHelper static const int sc_sinceVer_Concat = 13; static const int sc_sinceVer_Constant = 13; static const int sc_sinceVer_DepthToSpace = 13; + static const int sc_sinceVer_DequantizeLinear = 13; static const int sc_sinceVer_Div = 13; static const int sc_sinceVer_Equal = 13; static const int sc_sinceVer_Erf = 13; @@ -303,6 +304,7 @@ namespace OperatorHelper static const int sc_sinceVer_Neg = 13; static const int sc_sinceVer_Pad = 13; static const int sc_sinceVer_Pow = 13; + static const int sc_sinceVer_QuantizeLinear = 13; static const int sc_sinceVer_Reciprocal = 13; static const int sc_sinceVer_ReduceL1 = 13; static const int sc_sinceVer_ReduceL2 = 13; @@ -312,23 +314,28 @@ namespace OperatorHelper static const int sc_sinceVer_ReduceMean = 13; static const int sc_sinceVer_ReduceMin = 13; static const int sc_sinceVer_ReduceProd = 13; + static const int sc_sinceVer_ReduceSum = 13; static const int sc_sinceVer_ReduceSumSquare = 13; static const int sc_sinceVer_Relu = 13; static const int sc_sinceVer_Reshape = 13; + static const int sc_sinceVer_Resize = 13; static const int sc_sinceVer_Scatter = 13; static const int sc_sinceVer_ScatterElements = 13; static const int sc_sinceVer_ScatterND = 13; static const int sc_sinceVer_Sigmoid = 13; static const int sc_sinceVer_Sign = 13; static const int sc_sinceVer_Slice = 13; + static const int sc_sinceVer_Split = 13; static const int sc_sinceVer_SpaceToDepth = 13; static const int sc_sinceVer_Sqrt = 13; + static const int sc_sinceVer_Squeeze = 13; static const int sc_sinceVer_Sub = 13; static const int sc_sinceVer_Sum = 13; static const int sc_sinceVer_Tanh = 13; static const int sc_sinceVer_Tile = 13; static const int sc_sinceVer_Transpose = 13; - static const int sc_sinceVer_Upsample = 13; + static const int sc_sinceVer_Unsqueeze = 13; + static const int sc_sinceVer_ReduseSum = 13; } // namespace OnnxOperatorSet13 namespace MsftOperatorSet1 diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h index 0970ce868903a..1dc650c5d6f00 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/SchemaInferenceOverrider.h @@ -4,7 +4,7 @@ #pragma once #include "OperatorHelper.h" -#include "OperatorRegistration.h" +#include "OperatorVersions.h" namespace SchemaInferenceOverrider { @@ -61,7 +61,7 @@ namespace SchemaInferenceOverrider } } -#pragma push_macro("REGISTER_FUSED_OP_SCHEMA") +#pragma push_macro("OVERRIDE_SCHEMA") #define OVERRIDE_SCHEMA(version, isLatest, opName) \ OverrideSchemaInferenceFunction( \ #opName, OperatorHelper::OnnxOperatorSet##version##::sc_sinceVer_##opName, isLatest, gsl::span()); @@ -82,7 +82,7 @@ OverrideSchemaInferenceFunction