[Runtime][Pipeline Executor] Add the map logic of global input and su…

…bgraph input. (apache#9751) * [Runtime][Pipeline Executor] Add the map logic of global input and subgraph input. User can use "global input name" to feed input data for pipeline runtime. The name like "data_a" will be mapped into a input interface of subgraph. In this PR, we create the related logic to do the following things. 1. building the input map configuration 2. in runtime c++ module, parseing the input connection configuration then creating related data structure to record the said connection map. 3. providing the function to return the map information for verification. * address review comments. * addres review comments. * address review comments.
ylc · Jan 7, 2022 · 56a9d4e · 56a9d4e
1 parent 0d5ed21
commit 56a9d4e
Show file tree

Hide file tree

Showing 7 changed files with 306 additions and 107 deletions.
diff --git a/python/tvm/contrib/pipeline_executor.py b/python/tvm/contrib/pipeline_executor.py
@@ -49,16 +49,26 @@ def build(pipe_configs):
         Common interface for pipeline executor factory modules.
     """
     libs = {}
-    mod_n_configs = pipe_configs.get_config()
+    config = pipe_configs.get_config()
+    if "module_connection" not in config:
+        raise RuntimeError('"module_connection" is missing')
+    if "input_connection" not in config:
+        raise RuntimeError('"input_connection" is missing')
+
+    mod_n_configs = config["module_connection"]
     config_len = len(mod_n_configs)
-    string_config = [{} for _ in range(config_len)]
+    module_string_config = [{} for _ in range(config_len)]
+    # Use hardware configurations to build backend modules for each subgraph.
     for ir_mod, mod_config in mod_n_configs.items():
-        mconf = mod_config["pipeline"].copy()
-        mod_idx = mconf["mod_idx"]
+        pipe_config = mod_config["pipeline"].copy()
+        mod_idx = pipe_config["mod_idx"]
         dev = mod_config["dev"]
         target = mod_config["target"]
         build_func = relay.build
-        # Check whether there is a customized build function.
+        # Callers may need to use a customized building function to wrap the pre-building logic
+        # and the backend building logic. For example, in order to support a backend which only
+        # can do "int8" computation, the caller may need to merge the "quantization" logic
+        # into the building logic to creat a customized building function.
         if "build" in mod_config and mod_config["build"]:
             build_func = mod_config["build"]
 
@@ -70,11 +80,20 @@ def build(pipe_configs):
             mod_name=mod_config["mod_name"],
         )
 
-        mconf["dev"] = "{},{}".format(dev.device_type, dev.device_id)
-        # Create a pipeline configuration.
-        string_config[mod_idx] = mconf
+        pipe_config["dev"] = "{},{}".format(dev.device_type, dev.device_id)
+        # Use "mod_idx" as the key to create a "module_connection" map which is not only
+        # for the module index but also for the module connection used to build the pipeline.
+        module_string_config[mod_idx] = pipe_config
         libs[mod_idx] = {"lib": lib, "dev": dev}
 
+    # Creating a text form configuration to record the "input_connection" and the
+    # "module_connection" information. The "input_connection" is used to record the
+    # map of global input and subgraph input, and the "module_connection" is used to
+    # record module dependency.
+    string_config = {}
+    string_config["input_connection"] = config["input_connection"]
+    string_config["module_connection"] = module_string_config
+
     return PipelineExecutorFactoryModule(libs, string_config)
 
 
@@ -94,6 +113,17 @@ def __init__(self, module):
             self.module = module
         # Get the packed functions from the pipeline executor.
         self._get_num_outputs = self.module["get_num_outputs"]
+        self._get_input_pipeline_map = self.module["get_input_pipeline_map"]
+
+    def get_input_pipeline_map(self, name):
+        """Using the "name" to get the corresponding subgraph index and also get the "input name"
+        of the corresponding subgraph interface.
+        Returns
+        -------
+        input map: Array[str]
+            Returning the index and "input name" of the subgraph.
+        """
+        return self._get_input_pipeline_map(name)
 
     @property
     def num_outputs(self):
@@ -199,12 +229,48 @@ def is_pipeline_executor_interface(self):
             return not isinstance(self.io_owner, PipelineConfig.ModuleWrapper)
 
         def __repr__(self):
-            # Get all binding information.
-            ret = "  |{}: ".format(self.name)
+            # Geting the binding information in the form of text.
+            str_format = "  |{}: ".format(self.name)
             for binding in self.bindings:
                 mname, dname = binding.get_name()
-                ret += "{0}:{1} ".format(mname, dname)
-            return ret
+                str_format += "{0}:{1} ".format(mname, dname)
+
+            return str_format
+
+        def check_binding_dict(self, connection_dict):
+            """Checking the binding dictionary.
+            Parameter
+            ---------
+            connection_dict : Dict[str, Any]
+                It is a dictionary of module connections.
+            """
+            if "interface_name" not in connection_dict:
+                raise RuntimeError('"inteface_name" is missing in global config!"')
+            if "connection" not in connection_dict:
+                raise RuntimeError(f'"connection" is missing!"')
+            # The global interface mapping should be one-to-one.
+            if not connection_dict["connection"]:
+                raise RuntimeError("The global interface map is empty!")
+            if len(connection_dict["connection"]) > 1:
+                raise RuntimeError("A global interface maps multiple module interfaces!")
+            if "mod_idx" not in connection_dict["connection"][0]:
+                raise RuntimeError('"mod_idx" is missing!')
+
+        def get_binding_dict(self):
+            """Returning the binding information in the form of dictionary.
+            Returns
+            -------
+            data : Dict[str, Any]
+                The binding information is in the form of dictionary.
+            """
+            dict_format = {"interface_name": self.name, "connection": []}
+            for binding in self.bindings:
+                _, dname = binding.get_name()
+                midx = binding.get_owner_idx()
+                dict_format["connection"].append({"mod_idx": midx, "interface_name": dname})
+
+            self.check_binding_dict(dict_format)
+            return dict_format
 
         def check_dag_acyclic(self, start, inputs):
             """This is to check whether the DAG containing these input interfaces is acyclic.
@@ -243,30 +309,34 @@ def connect(self, binding):
 
             # Check whether the binding setting is correct or not.
             if self.io_owner == binding.io_owner:
-                raise RuntimeError(f"Can not bind itself.")
+                raise RuntimeError("Can not bind itself.")
 
             if not self.is_pipeline_executor_interface() and self.io_type == "input":
-                raise RuntimeError(f"Module can only bind from output interface!")
+                raise RuntimeError("Module can only bind from output interface!")
 
             if (
                 not self.is_pipeline_executor_interface()
                 and not binding.is_pipeline_executor_interface()
                 and binding.io_type == "output"
             ):
-                raise RuntimeError(f"Can not bind module output with another module output!")
+                raise RuntimeError("Can not bind module output with another module output!")
 
             if (
                 not self.is_pipeline_executor_interface()
                 and binding.is_pipeline_executor_interface()
                 and binding.io_type == "input"
             ):
-                raise RuntimeError(f"Can not bind module output with pipeline input!")
+                raise RuntimeError("Can not bind module output with pipeline input!")
 
             if self.is_pipeline_executor_interface() and self.io_type == "output":
-                raise RuntimeError(f"Global output can not be used as binding start point.")
+                raise RuntimeError("Global output can not be used as binding start point.")
 
-            if self.is_pipeline_executor_interface() and binding.io_type != "input":
-                raise RuntimeError(f"Global input can only bind with module input.")
+            if (
+                self.is_pipeline_executor_interface()
+                and self.io_type == "input"
+                and binding.io_type != "input"
+            ):
+                raise RuntimeError("Global input can only bind with module input.")
 
             self.bindings.append(binding)
             if not self.is_pipeline_executor_interface():
@@ -288,7 +358,7 @@ def connect(self, binding):
                 if not self.check_dag_acyclic(
                     binding.io_owner, self.io_owner.input_bindings.bindings
                 ):
-                    raise RuntimeError(f"Illegal connection: Cause a cycle!")
+                    raise RuntimeError("Illegal connection: Cause a cycle!")
 
     class BindingList:
         """Container for bindings(input or output interface).
@@ -357,7 +427,9 @@ def __getitem__(self, key):
                 if key == "output":
                     return self.output_bindings
 
-            raise RuntimeError(f"{key} not found!")
+                raise RuntimeError(f"{key} not found!")
+
+            raise RuntimeError('The data type of "key" is not supported!')
 
         def get_data_type(self, key, interface_type):
             """Get the module interface data type according to the key value and interface type.
@@ -468,6 +540,8 @@ def get_config(self):
         # Use topological sort to get the correct order of modules.
         self.dag_topology_sort()
         mconfig = {}
+        module_connection = {}
+        input_connection = {}
         for mod in self.mod_wrapper:
             # Generate pipeline configuration.
             mconf = {}
@@ -495,7 +569,7 @@ def get_config(self):
             mconf["mod_idx"] = module.idx
             mconf["output"] = output_conf
 
-            mconfig[mod] = {
+            module_connection[mod] = {
                 "pipeline": mconf,
                 "target_host": module.target_host,
                 "mod_name": "default",
@@ -505,6 +579,22 @@ def get_config(self):
                 "dev": module.dev,
             }
 
+            # Create a map of pipeline input and subgraph input.
+            input_connection = []
+            for input_name in self.input_bindings.bindings:
+                input_dict = self.input_bindings.bindings[input_name].get_binding_dict()
+                if "interface_name" not in input_dict["connection"][0]:
+                    raise RuntimeError("interface_name is missing in connection config!")
+                # Creating the map of global interface and subgraph interface.
+                input_map = {
+                    "global_interface_name": input_dict["interface_name"],
+                    "mod_idx": input_dict["connection"][0]["mod_idx"],
+                    "module_interface_name": input_dict["connection"][0]["interface_name"],
+                }
+                input_connection.append(input_map)
+
+        mconfig["module_connection"] = module_connection
+        mconfig["input_connection"] = input_connection
         return mconfig
 
     def dag_topology_sort(self):
@@ -601,11 +691,11 @@ def export_library(self, directory_path):
             Export the files to this directory.
         """
         if not self.pipeline_mods:
-            raise RuntimeError(f"The pipeline executor has not been initialized.")
+            raise RuntimeError("The pipeline executor has not been initialized.")
 
         # Check if the directory_path exists.
         if not os.path.exists(directory_path):
-            raise RuntimeError(f"The directory {directory_path} does not exist.")
+            raise RuntimeError("The directory {directory_path} does not exist.")
         # Create an load configuration.
         load_config_file_name = "{}/load_config".format(directory_path)
         pipeline_config_file_name = "{}/pipeline_config".format(directory_path)

diff --git a/src/runtime/pipeline/pipeline_executor.cc b/src/runtime/pipeline/pipeline_executor.cc
@@ -34,13 +34,32 @@ PackedFunc PipelineExecutor::GetFunction(const std::string& name,
   if (name == "get_num_outputs") {
     return PackedFunc(
         [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->NumOutputs(); });
+  } else if (name == "get_input_pipeline_map") {
+    return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+      if (String::CanConvertFrom(args[0])) {
+        *rv = this->GetInputPipeplineMapping(args[0].operator String());
+      } else {
+        LOG(FATAL) << "Function only support the input name value in the form of string";
+      }
+    });
   } else {
     LOG(FATAL) << "Unknown packed function: " << name;
     return PackedFunc();
   }
   return nullptr;
 }
 
+/*!
+ * \brief Using the global input name to get the index, and also get the input interface name
+   of corresponding subgraph from the input connection configuration.
+ * \param The global input name.
+ * \return Returning the index and the input interface name of corresponding subgraph.
+ */
+Array<String> PipelineExecutor::GetInputPipeplineMapping(std::string input_name) {
+  std::pair<int, std::string> map = input_connection_config[input_name];
+  return {std::to_string(map.first), map.second};
+}
+
 /*!
  * \brief Use the mod_config information to create a graph runtime list.
  * \param mod_config The config information that generates by the export library function call.
@@ -108,11 +127,11 @@ void PipelineExecutor::Init(const std::vector<Module>& modules, const std::strin
   // Use JSONReader to load pipeline configuration.
   std::istringstream is(pipeline_json);
   dmlc::JSONReader reader(&is);
-  PipelineConfig& pipeline_config = this->LoadPipelineConfig(&reader);
-  ICHECK(!pipeline_config.Empty()) << "The pipeline config information is empty.";
+  this->LoadConfig(&reader);
+  ICHECK(!pipeline_config_.Empty()) << "The pipeline config information is empty.";
   // Initialize the pipeline function class used for pipeline thread pool management
   // and schedule etc. This function returns the number of output.
-  num_outputs_ = pipeline_scheduler_.PipelineInit(modules, pipeline_config);
+  num_outputs_ = pipeline_scheduler_.PipelineInit(modules, pipeline_config_);
   return;
 }
 

diff --git a/src/runtime/pipeline/pipeline_executor.h b/src/runtime/pipeline/pipeline_executor.h
@@ -24,12 +24,14 @@
 #ifndef TVM_RUNTIME_PIPELINE_PIPELINE_EXECUTOR_H_
 #define TVM_RUNTIME_PIPELINE_PIPELINE_EXECUTOR_H_
 
+#include <tvm/relay/expr.h>
 #include <tvm/runtime/registry.h>
 
 #include <array>
 #include <iostream>
 #include <sstream>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "pipeline_scheduler.h"
@@ -67,7 +69,13 @@ class TVM_DLL PipelineExecutor : public ModuleNode {
    * \return The corresponding packed function.
    */
   virtual PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self);
-
+  /*!
+   * \brief Using the global input name to get the index, and also get the input interface name
+     of corresponding subgraph from the input connection configuration.
+   * \param The global input name.
+   * \return Returning the index and the input interface name of corresponding subgraph.
+   */
+  Array<String> GetInputPipeplineMapping(std::string input_name);
   /*!
    * \brief Get the number of outputs.
    *
@@ -115,37 +123,27 @@ class TVM_DLL PipelineExecutor : public ModuleNode {
   /*!\brief The class used to execute and schedule the pipeline logic.*/
   PipelineScheduler pipeline_scheduler_;
   /*!\brief The dependency information of each graph runtime module of the pipeline.*/
-  PipelineConfig pipeline_config_;
+  ConfigPipelineExecution pipeline_config_;
+  /*!\brief The map of global input and subgraph input.*/
+  InputConnectionConfig input_connection_config;
   /*!\brief The module information used to create the graph runtimes.*/
   ModuleConfig mod_config_;
   /*!\brief How many outputs are in this pipeline executor.*/
   size_t num_outputs_ = 0;
   /*!\brief Json loader.*/
-  PipelineConfig& LoadPipelineConfig(dmlc::JSONReader* reader) {
-    reader->BeginArray();
-    while (reader->NextArrayItem()) {
-      std::string key;
-      reader->BeginObject();
-      int mod_idx = -1;
-      OutputMap output;
-      std::string dev;
-      while (reader->NextObjectItem(&key)) {
-        if (key == "mod_idx") {
-          reader->Read(&mod_idx);
-        } else if (key == "dev") {
-          reader->Read(&dev);
-        } else if (key == "output") {
-          reader->Read(&output);
-        } else {
-          LOG(FATAL) << "do not support key " << key;
-        }
+  void LoadConfig(dmlc::JSONReader* reader) {
+    reader->BeginObject();
+    std::string key;
+    while (reader->NextObjectItem(&key)) {
+      if (key == "module_connection") {
+        reader->Read(&pipeline_config_);
+      } else if (key == "input_connection") {
+        reader->Read(&input_connection_config);
+      } else {
+        LOG(FATAL) << "do not support key " << key;
       }
-      ICHECK(mod_idx >= 0) << "Invalid mod_idx value " << mod_idx;
-      // Check if the output is successfully read.
-      ICHECK(!output.Empty()) << "Invalid output binding result.";
-      pipeline_config_.Insert(mod_idx, output);
     }
-    return pipeline_config_;
+    return;
   }
 };
 }  // namespace runtime

diff --git a/src/runtime/pipeline/pipeline_scheduler.cc b/src/runtime/pipeline/pipeline_scheduler.cc
@@ -28,7 +28,7 @@ namespace runtime {
  * \param pipeline_conf The dependency information of each graph executor module.
  */
 size_t PipelineScheduler::PipelineInit(const std::vector<Module>& modules,
-                                       const PipelineConfig& pipeline_config) {
+                                       const ConfigPipelineExecution& pipeline_config) {
   graph_modules_ = modules;
   int num_output = pipeline_config.GetGlobalOutputNum();
   return num_output;

diff --git a/src/runtime/pipeline/pipeline_scheduler.h b/src/runtime/pipeline/pipeline_scheduler.h
@@ -41,7 +41,8 @@ class PipelineScheduler {
    * \param modules The list of graph executor module.
    * \param pipeline_config The dependency information of each graph executor module.
    */
-  size_t PipelineInit(const std::vector<Module>& modules, const PipelineConfig& pipeline_config);
+  size_t PipelineInit(const std::vector<Module>& modules,
+                      const ConfigPipelineExecution& pipeline_config);
 
  private:
   /*!\brief The list of graph executors.*/