add external data support to tensor proto utils (#6257)

* update unpack tensor utilities to support loading external data * more updates * fix test * fix nuphar build * minor build fix * add tests * fix Android CI * fix warning * fix DML build failure and some warnings * more updates * more updates * plus few updates * plus some refactoring * changes per review * plus some change * remove temp code * plus updates to safeint usage * build fix * fix for safeint
microsoft · thiagocrepaldi · Feb 2, 2021 · Dec 15, 2020 · Dec 15, 2020 · Dec 15, 2020
commit f7034b9bca705b31fa69f31486416795e0eccbd9
diff --git a/include/onnxruntime/core/graph/graph.h b/include/onnxruntime/core/graph/graph.h
@@ -108,6 +108,9 @@ class Node {
   /** Gets the domain of the OperatorSet that specifies the operator returned by #OpType. */
   const std::string& Domain() const noexcept { return domain_; }
 
+  /** Gets the path of the owning model if any. */
+  const Path& ModelPath() const noexcept;
+
   /** Gets the Node's execution priority.
   @remarks Lower value means higher priority  */
   int Priority() const noexcept { return priority_; };
@@ -149,6 +152,7 @@ class Node {
 
   /** Gets the function body if applicable otherwise nullptr. */
   const Function* GetFunctionBody() const noexcept { return func_body_; }
+
 #endif
 
   /**

diff --git a/include/onnxruntime/core/graph/graph_viewer.h b/include/onnxruntime/core/graph/graph_viewer.h
@@ -42,6 +42,9 @@ class GraphViewer {
   /** Gets the Graph description. */
   const std::string& Description() const noexcept;
 
+  /** Gets the path of the owning model if any **/
+  const Path& ModelPath() const noexcept { return graph_->ModelPath(); }
+
   /**
   Gets a tensor created from an initializer.
   @param tensor_name The tensor name

diff --git a/onnxruntime/core/framework/tensorprotoutils.cc b/onnxruntime/core/framework/tensorprotoutils.cc
diff --git a/onnxruntime/core/framework/tensorprotoutils.h b/onnxruntime/core/framework/tensorprotoutils.h
@@ -7,7 +7,9 @@
 #include <type_traits>
 
 #include "core/common/common.h"
+#include "core/common/path.h"
 #include "core/common/status.h"
+#include "core/framework/endian_utils.h"
 #include "core/framework/allocator.h"
 #include "core/framework/ml_value.h"
 #include "core/framework/mem_buffer.h"
@@ -58,22 +60,33 @@ ONNXTensorElementDataType GetTensorElementType(const ONNX_NAMESPACE::TensorProto
 template <size_t alignment>
 common::Status GetSizeInBytesFromTensorProto(const ONNX_NAMESPACE::TensorProto& tensor_proto, size_t* out);
 
-template <typename T>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
-                    /*out*/ T* p_data, size_t expected_size);
-
-// Convert the NodeProto from a Constant node into a TensorProto that can be used as an initializer
+// Convert the AttributeProto from a Constant node into a TensorProto that can be used as an initializer
+// If AttributeProto contains a TensorProto, this tensor proto is converted as is including the case when the
+// the data location is external. i.e. it does not load the external data.
+// However if AttributeProto contains SparseTensorProto then it converts the data into dense tensor proto
+// (including loading external data when applicable).
+// model_path is used for contructing full path for external_data
 common::Status ConstantNodeProtoToTensorProto(const ONNX_NAMESPACE::NodeProto& node,
+                                              const Path& model_path,
                                               ONNX_NAMESPACE::TensorProto& tensor);
 
 // Convert a SparseTensorProto to a dense TensorProto
+// If the SparseTensorProto contains external data then it loads the data and converts to dense tensor proto
+// The resulting TensorProto will contain the data as raw data.
+// model_path is used for contructing full path for external_data
 common::Status SparseTensorProtoToDenseTensorProto(const ONNX_NAMESPACE::SparseTensorProto& sparse,
+                                                   const Path& model_path,
                                                    ONNX_NAMESPACE::TensorProto& dense);
 
 #if !defined(ORT_MINIMAL_BUILD)
+// Convert a TensorProto to a SparseTensorProto
+// If the tensorproto contains external data then it loads the data and converts to sparse tensor
+// The resulting SparseTensorProto will contain the data as raw data
+// model_path is used for contructing full path for external_data
 common::Status DenseTensorToSparseTensorProto(const ONNX_NAMESPACE::TensorProto& dense,
+                                              const Path& model_path,
                                               ONNX_NAMESPACE::SparseTensorProto& sparse);
-#endif // !ORT_MINIMAL_BUILD
+#endif  // !ORT_MINIMAL_BUILD
 
 inline bool HasDimValue(const ONNX_NAMESPACE::TensorShapeProto_Dimension& dim) {
   return dim.value_case() == ONNX_NAMESPACE::TensorShapeProto_Dimension::kDimValue;
@@ -109,6 +122,13 @@ inline bool HasRawData(const ONNX_NAMESPACE::TensorProto& ten_proto) {
          ten_proto.has_raw_data();  // XXX: Figure out how to do in proto3
 }
 
+inline bool HasExternalData(const ONNX_NAMESPACE::TensorProto& ten_proto) {
+  // Can not be UNDEFINED and can not be STRING but test for STRING is usually performed separately
+  // to return an error
+  return ten_proto.data_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED &&
+         ten_proto.data_location() == ONNX_NAMESPACE::TensorProto_DataLocation_EXTERNAL;
+}
+
 inline bool HasDataType(const ONNX_NAMESPACE::TensorProto& ten_proto) {
   return ten_proto.data_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED;
 }
@@ -126,10 +146,9 @@ inline bool HasElemType(const ONNX_NAMESPACE::TypeProto_SparseTensor& ten_proto)
 }
 
 inline bool HasName(const ONNX_NAMESPACE::SparseTensorProto& ten_proto) {
-  return ten_proto.values().has_name(); // XXX
+  return ten_proto.values().has_name();  // XXX
 }
 
-
 inline bool HasKeyType(const ONNX_NAMESPACE::TypeProto_Map& map_proto) {
   return map_proto.key_type() != ONNX_NAMESPACE::TensorProto::UNDEFINED;
 }
@@ -219,9 +238,37 @@ inline bool HasName(const ONNX_NAMESPACE::NodeProto& node_proto) {
   return node_proto.has_name();
 }
 
-// UnpackTensor from either raw data or the type specific data field.
+#if !defined(ORT_MINIMAL_BUILD)
+// Unpack tensor which contains external data. Uses the tensor_proto_dir to construct the full path for external data.
+// If tensor_proto_dir == nullptr then uses the current directory instead.
+// This function does not unpack string_data of a tensor
+template <typename T>
+Status UnpackTensorWithExternalData(const ONNX_NAMESPACE::TensorProto& tensor,
+                                    const ORTCHAR_T* tensor_proto_dir, size_t expected_size,
+                                    /*out*/ T* p_data);
+#endif  // !defined(ORT_MINIMAL_BUILD)
+
+// UnpackTensor from raw data or the type specific data field. Does not handle external data.
+// If the tensor does not contain raw data then raw_data should be nullptr and raw_data_len should be 0.
 template <typename T>
-Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, /*out*/ T* p_data, size_t expected_size) {
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const void* raw_data, size_t raw_data_len,
+                    /*out*/ T* p_data, size_t expected_size);
+
+// UnpackTensor from raw data, external data or the type specific data field.
+// Uses the model path to construct the full path for loading external data. In case when model_path is empty
+// it uses current directory.
+template <typename T>
+Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, const Path& model_path, /*out*/ T* p_data, size_t expected_size) {
+#if !defined(ORT_MINIMAL_BUILD)
+  if (HasExternalData(tensor)) {
+    auto tensor_proto_path = model_path.IsEmpty() ? nullptr : model_path.ParentPath().ToPathString().c_str();
+    return UnpackTensorWithExternalData(tensor, tensor_proto_path, expected_size, p_data);
+  }
+#else
+  ORT_UNUSED_PARAMETER(model_path);
+  ORT_RETURN_IF(HasExternalData(tensor), "TensorProto with external data is not supported in ORT minimal build.");
+#endif
+
   return HasRawData(tensor)
              ? UnpackTensor(tensor, tensor.raw_data().data(), tensor.raw_data().size(), p_data, expected_size)
              : UnpackTensor(tensor, nullptr, 0, p_data, expected_size);
@@ -231,11 +278,13 @@ Status UnpackTensor(const ONNX_NAMESPACE::TensorProto& tensor, /*out*/ T* p_data
  * Unpack the data from an initializer tensor
  * Please note, this function does not unpack string_data of an initializer tensor
  * @param initializer       given initializer tensor
+ * @param initializer_dir   model_path to construct external data dir path. When this is empty, current dir is used.
  * @param unpacked_tensor   the data from the initaizlier in uint8_t* form
  * @param tensor_byte_size  the byte size of the unpacked_tensor
  * @returns                 Status::OK() if data is unpacked successfully
  */
 common::Status UnpackInitializerData(const ONNX_NAMESPACE::TensorProto& initializer,
+                                     const Path& model_path,
                                      std::unique_ptr<uint8_t[]>& unpacked_tensor,
                                      size_t& tensor_byte_size) ORT_MUST_USE_RESULT;
 

diff --git a/onnxruntime/core/graph/graph.cc b/onnxruntime/core/graph/graph.cc
@@ -430,6 +430,10 @@ void Node::SetPriority(int priority) noexcept {
   priority_ = priority;
 }
 
+const Path& Node::ModelPath() const noexcept {
+  return graph_->ModelPath();
+}
+
 #if !defined(ORT_MINIMAL_BUILD)
 
 const Function* Node::GetFunctionBody(bool try_init_func_body) {
@@ -966,6 +970,7 @@ Graph::Graph(const Model& owning_model,
       is_loaded_from_model_file_(GraphLoadedFromModelFile(graph_proto_)) {
   ORT_ENFORCE(graph_proto != nullptr, "graph_proto cannot be null");
   ArgNameToTypeMap name_to_type_map;
+  const auto& model_path = ModelPath();
 
   // Process 'Constant' nodes
   // Put the 'TensorProto' stored in the 'Constant' nodes attribute into the graphs initializer list
@@ -975,7 +980,7 @@ Graph::Graph(const Model& owning_model,
     }
 
     const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
-    auto status = utils::ConstantNodeProtoToTensorProto(node, *tensor);
+    auto status = utils::ConstantNodeProtoToTensorProto(node, model_path, *tensor);
     ORT_ENFORCE(status.IsOK(), status.ToString());
     if (node.attribute(0).type() == AttributeProto_AttributeType_SPARSE_TENSOR) {
       auto p = sparse_tensor_names_.emplace(tensor->name());
@@ -1000,7 +1005,7 @@ Graph::Graph(const Model& owning_model,
     for (const auto& sparse_tensor : graph_proto_->sparse_initializer()) {
       ORT_ENFORCE(utils::HasName(sparse_tensor), "Sparse initializer must have a name. This model is invalid");
       const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
-      auto status = utils::SparseTensorProtoToDenseTensorProto(sparse_tensor, *tensor);
+      auto status = utils::SparseTensorProtoToDenseTensorProto(sparse_tensor, model_path, *tensor);
       ORT_ENFORCE(status.IsOK(), status.ToString());
       auto p = sparse_tensor_names_.emplace(tensor->name());
       ORT_ENFORCE(p.second, "Duplicate sparse_tensor_initializer: '", tensor->name(), "' Model is invalid.");
@@ -2810,18 +2815,20 @@ common::Status Graph::SaveToOrtFormat(flatbuffers::FlatBufferBuilder& builder,
   std::vector<flatbuffers::Offset<fbs::Tensor>> initializers_data;
   assert(sparse_tensor_names_.size() <= name_to_initial_tensor_.size());
   initializers_data.reserve(name_to_initial_tensor_.size() - sparse_tensor_names_.size());
+  const auto& model_path = ModelPath();
+
   for (const auto& pair : name_to_initial_tensor_) {
     if (sparse_tensor_names_.find(pair.first) == sparse_end) {
       flatbuffers::Offset<fbs::Tensor> fbs_tensor;
       ORT_RETURN_IF_ERROR(
-          experimental::utils::SaveInitializerOrtFormat(builder, *pair.second, fbs_tensor));
+          experimental::utils::SaveInitializerOrtFormat(builder, *pair.second, model_path, fbs_tensor));
       initializers_data.push_back(fbs_tensor);
     } else {
       SparseTensorProto sparse_initializer;
-      ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(*pair.second, sparse_initializer));
+      ORT_RETURN_IF_ERROR(utils::DenseTensorToSparseTensorProto(*pair.second, model_path, sparse_initializer));
       flatbuffers::Offset<fbs::SparseTensor> fbs_sparse_tensor;
       ORT_RETURN_IF_ERROR(
-          experimental::utils::SaveSparseInitializerOrtFormat(builder, sparse_initializer, fbs_sparse_tensor));
+          experimental::utils::SaveSparseInitializerOrtFormat(builder, sparse_initializer, model_path, fbs_sparse_tensor));
       sparse_initializers_data.push_back(fbs_sparse_tensor);
     }
   }
@@ -2995,6 +3002,10 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
 
   GraphProto result;
   ToGraphProtoInternal(result);
+  // Path of the owning model
+  // This is used for constructing full path for external data
+  // if it exists
+  const auto& model_path = ModelPath();
 
   // We want to make sure that sparse initializers do not appear
   // as dense duplicates within the initializers list.
@@ -3006,7 +3017,7 @@ ONNX_NAMESPACE::GraphProto Graph::ToGraphProto() const {
         *mutable_initializer->Add() = initializer;
       } else {
         auto& sparse_initializer = *result.add_sparse_initializer();
-        auto status = utils::DenseTensorToSparseTensorProto(initializer, sparse_initializer);
+        auto status = utils::DenseTensorToSparseTensorProto(initializer, model_path, sparse_initializer);
         ORT_ENFORCE(status.IsOK(), "Failed to convert dense initializer to sparse");
       }
     }
@@ -3495,13 +3506,14 @@ Status Graph::InlineFunction(Node& node) {
   }
 
   RemoveNode(node.Index());
+  const auto& model_path = ModelPath();
   for (const auto& subgraph_node : subgraph.Nodes()) {
     if (subgraph_node.OpType() == kConstant) {
       // Copy constant nodes _value to name_to_initial_tensor_
       ONNX_NAMESPACE::NodeProto subgraph_node_proto{};
       subgraph_node.ToProto(subgraph_node_proto);
       const gsl::not_null<TensorProto*> tensor{graph_proto_->add_initializer()};
-      ORT_RETURN_IF_ERROR(utils::ConstantNodeProtoToTensorProto(subgraph_node_proto, *tensor));
+      ORT_RETURN_IF_ERROR(utils::ConstantNodeProtoToTensorProto(subgraph_node_proto, model_path, *tensor));
       name_to_initial_tensor_[tensor->name()] = tensor;
     } else {
       std::vector<NodeArg*> inputs, outputs;
@@ -3697,12 +3709,14 @@ common::Status Graph::LoadFromOrtFormat(const onnxruntime::experimental::fbs::Gr
 
   if (fbs_sparse_initializers) {
     sparse_tensor_names_.reserve(fbs_sparse_initializers->size());
+    const auto& model_path = ModelPath();
+
     for (const auto* fbs_sparse_tensor : *fbs_sparse_initializers) {
       ORT_RETURN_IF(nullptr == fbs_sparse_tensor, "Sparse Initializer tensor is missing. Invalid ORT format model.");
       SparseTensorProto sparse_initializer;
       ORT_RETURN_IF_ERROR(experimental::utils::LoadSparseInitializerOrtFormat(*fbs_sparse_tensor, sparse_initializer));
       TensorProto& initializer = *deserialized_proto_data_.add_initializer();
-      ORT_RETURN_IF_ERROR(utils::SparseTensorProtoToDenseTensorProto(sparse_initializer, initializer));
+      ORT_RETURN_IF_ERROR(utils::SparseTensorProtoToDenseTensorProto(sparse_initializer, model_path, initializer));
       auto p = name_to_initial_tensor_.emplace(initializer.name(), &initializer);
       if (!p.second) {
         LOGS(logger_, WARNING) << "Duplicate initializer (dense, sparse or ConstantNode): '" << initializer.name()

diff --git a/onnxruntime/core/graph/graph_flatbuffers_utils.cc b/onnxruntime/core/graph/graph_flatbuffers_utils.cc
@@ -28,6 +28,7 @@ SaveDims(flatbuffers::FlatBufferBuilder& builder, const DimsFieldType& dims) {
 
 Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
                                 const TensorProto& initializer,
+                                const Path& model_path,
                                 flatbuffers::Offset<fbs::Tensor>& fbs_tensor) {
   auto name = SaveStringToOrtFormat(builder, initializer.has_name(), initializer.name());
   auto doc_string = SaveStringToOrtFormat(builder, initializer.has_doc_string(), initializer.doc_string());
@@ -46,7 +47,7 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
     std::unique_ptr<uint8_t[]> unpacked_tensor;
     size_t tensor_byte_size = 0;
     ORT_RETURN_IF_ERROR(
-        onnxruntime::utils::UnpackInitializerData(initializer, unpacked_tensor, tensor_byte_size));
+        onnxruntime::utils::UnpackInitializerData(initializer, model_path, unpacked_tensor, tensor_byte_size));
     raw_data = builder.CreateVector(unpacked_tensor.get(), tensor_byte_size);
   }
 
@@ -65,16 +66,17 @@ Status SaveInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
 
 Status SaveSparseInitializerOrtFormat(flatbuffers::FlatBufferBuilder& builder,
                                       const ONNX_NAMESPACE::SparseTensorProto& initializer,
+                                      const Path& model_path,
                                       flatbuffers::Offset<fbs::SparseTensor>& fbs_sparse_tensor) {
   // values
   const auto& values = initializer.values();
   flatbuffers::Offset<fbs::Tensor> values_off;
-  ORT_RETURN_IF_ERROR(SaveInitializerOrtFormat(builder, values, values_off));
+  ORT_RETURN_IF_ERROR(SaveInitializerOrtFormat(builder, values, model_path, values_off));
 
   // Indicies
   const auto& indicies = initializer.indices();
   flatbuffers::Offset<fbs::Tensor> indicies_off;
-  ORT_RETURN_IF_ERROR(SaveInitializerOrtFormat(builder, indicies, indicies_off));
+  ORT_RETURN_IF_ERROR(SaveInitializerOrtFormat(builder, indicies, model_path, indicies_off));
 
   // Shape
   auto shape = SaveDims(builder, initializer.dims());
@@ -122,7 +124,7 @@ Status SaveAttributeOrtFormat(flatbuffers::FlatBufferBuilder& builder,
     case fbs::AttributeType::TENSOR: {
       flatbuffers::Offset<fbs::Tensor> fbs_tensor;
       ORT_RETURN_IF_ERROR(
-          experimental::utils::SaveInitializerOrtFormat(builder, attr_proto.t(), fbs_tensor));
+          experimental::utils::SaveInitializerOrtFormat(builder, attr_proto.t(), graph->ModelPath(), fbs_tensor));
       GET_FBS_ATTR(builder, type, t, fbs_tensor);
     } break;
     case fbs::AttributeType::GRAPH: {
@@ -152,7 +154,7 @@ Status SaveAttributeOrtFormat(flatbuffers::FlatBufferBuilder& builder,
       for (const auto& tensor : attr_proto.tensors()) {
         flatbuffers::Offset<fbs::Tensor> fbs_tensor;
         ORT_RETURN_IF_ERROR(
-            experimental::utils::SaveInitializerOrtFormat(builder, tensor, fbs_tensor));
+            experimental::utils::SaveInitializerOrtFormat(builder, tensor, graph->ModelPath(), fbs_tensor));
         fbs_tensors_vec.push_back(fbs_tensor);
       }
       auto tensors = builder.CreateVector(fbs_tensors_vec);

diff --git a/onnxruntime/core/graph/graph_flatbuffers_utils.h b/onnxruntime/core/graph/graph_flatbuffers_utils.h
@@ -19,9 +19,10 @@ namespace onnxruntime {
 
 class Graph;
 class Node;
+class Path;
 
 namespace logging {
-class Logger;
+  class Logger;
 }
 
 namespace experimental {
@@ -36,11 +37,11 @@ namespace utils {
 // TODO, add ORT_MUST_USE_RESULT when it is moved to a different header
 onnxruntime::common::Status SaveInitializerOrtFormat(
     flatbuffers::FlatBufferBuilder& builder, const ONNX_NAMESPACE::TensorProto& initializer,
-    flatbuffers::Offset<fbs::Tensor>& fbs_tensor);
+    const Path& model_path, flatbuffers::Offset<fbs::Tensor>& fbs_tensor);
 
 onnxruntime::common::Status SaveSparseInitializerOrtFormat(
     flatbuffers::FlatBufferBuilder& builder, const ONNX_NAMESPACE::SparseTensorProto& initializer,
-    flatbuffers::Offset<fbs::SparseTensor>& fbs_sparse_tensor);
+    const Path& model_path, flatbuffers::Offset<fbs::SparseTensor>& fbs_sparse_tensor);
 
 // Convert a given AttributeProto into fbs::Attribute
 // Note, we current do not support graphs, and sparse_tensor(s)

diff --git a/onnxruntime/core/optimizer/matmul_scale_fusion.cc b/onnxruntime/core/optimizer/matmul_scale_fusion.cc
@@ -17,9 +17,9 @@ namespace onnxruntime {
 namespace {
 template <typename T>
 struct ExtractScalarAsFloatDispatchTarget {
-  Status operator()(const ONNX_NAMESPACE::TensorProto& tensor_proto, float& scalar_float) {
+  Status operator()(const ONNX_NAMESPACE::TensorProto& tensor_proto, const Path& model_path, float& scalar_float) {
     T scalar;
-    ORT_RETURN_IF_ERROR(utils::UnpackTensor(tensor_proto, &scalar, 1));
+    ORT_RETURN_IF_ERROR(utils::UnpackTensor(tensor_proto, model_path, &scalar, 1));
     scalar_float = static_cast<float>(scalar);
     return Status::OK();
   }
@@ -48,7 +48,7 @@ optional<float> GetScalarConstantInitializer(const Graph& graph, const NodeArg&
       Status, ExtractScalarAsFloatDispatchTarget,
       uint32_t, uint64_t, int32_t, int64_t, MLFloat16, float, double, BFloat16>
       dispatcher{initializer->data_type()};
-  ORT_THROW_IF_ERROR(dispatcher.Invoke(*initializer, scalar));
+  ORT_THROW_IF_ERROR(dispatcher.Invoke(*initializer, graph.ModelPath(), scalar));
 
   return {scalar};
 }