MarisaKirisame · MarisaKirisame · Oct 8, 2019 · Oct 8, 2019 · Oct 8, 2019 · Oct 8, 2019
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -97,6 +97,7 @@ We do encourage everyone to work anything they are interested in.
 - [Junru Shao](https://github.com/junrushao1994): @junrushao1994
 - [Haichen Shen](https://github.com/icemelon9): @icemelon9
 - [Xingjian Shi](https://github.com/sxjscience): @sxjscience
+- [Jon Soifer](https://github.com/soiferj): @soiferj
 - [Andrew Tulloch](https://github.com/ajtulloch): @ajtulloch
 - [Luis Vega](https://github.com/vegaluisjose): @vegaluisjose
 - [Alex Weaver](https://github.com/alex-weaver): @alex-weaver

diff --git a/apps/howto_deploy/python_deploy.py b/apps/howto_deploy/python_deploy.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Copyright (c) 2017 by Contributors
+# brief Example code on load and run TVM module.s
+# file python_deploy.py
+
+import tvm
+import numpy as np
+
+def verify(mod, fname):
+  # Get the function from the module
+  f = mod.get_function(fname)
+  # Use tvm.nd.array to convert numpy ndarray to tvm
+  # NDArray type, so that function can be invoked normally
+  N = 10 
+  x = tvm.nd.array(np.arange(N, dtype=np.float32))
+  y = tvm.nd.array(np.zeros(N, dtype=np.float32))
+  # Invoke the function
+  f(x, y)
+  np_x = x.asnumpy() 
+  np_y = y.asnumpy() 
+  # Verify correctness of function
+  assert(np.all([xi+1 == yi for xi, yi in zip(np_x, np_y)]))
+  print("Finish verification...")
+
+
+if __name__ == "__main__":
+  # The normal dynamic loading method for deployment
+  mod_dylib = tvm.module.load("lib/test_addone_dll.so")
+  print("Verify dynamic loading from test_addone_dll.so")
+  verify(mod_dylib, "addone")
+  # There might be methods to use the system lib way in
+  # python, but dynamic loading is good enough for now.
diff --git a/apps/howto_deploy/run_example.sh b/apps/howto_deploy/run_example.sh
@@ -25,5 +25,8 @@ export DYLD_LIBRARY_PATH=../../build:${DYLD_LIBRARY_PATH}
 echo "Run the deployment with all in one packed library..."
 lib/cpp_deploy_pack
 
-echo "Run the deployment with all in normal library..."
+echo "Run the cpp deployment with all in normal library..."
 lib/cpp_deploy_normal
+
+echo "Run the python deployment with all in normal library..."
+python python_deploy.py
diff --git a/cmake/modules/VTA.cmake b/cmake/modules/VTA.cmake
@@ -84,7 +84,7 @@ elseif(PYTHON)
     # Rules for Zynq-class FPGAs with pynq OS support (see pynq.io)
     if(${VTA_TARGET} STREQUAL "pynq" OR
        ${VTA_TARGET} STREQUAL "ultra96")
-      file(GLOB FPGA_RUNTIME_SRCS vta/src/pynq/pynq_driver.cc)
+      list(APPEND FPGA_RUNTIME_SRCS vta/src/pynq/pynq_driver.cc)
       # Rules for Pynq v2.4
       find_library(__cma_lib NAMES cma PATH /usr/lib)
     elseif(${VTA_TARGET} STREQUAL "de10nano")  # DE10-Nano rules

diff --git a/docker/install/ubuntu_install_onnx.sh b/docker/install/ubuntu_install_onnx.sh
@@ -23,5 +23,9 @@ set -o pipefail
 # fix to certain version for now
 pip3 install onnx==1.5.0
 
+# torch depends on a number of other packages, but unhelpfully, does
+# not expose that in the wheel!!!
+pip3 install future
+
 pip3 install https://download.pytorch.org/whl/cu80/torch-1.0.1.post2-cp36-cp36m-linux_x86_64.whl
 pip3 install torchvision
diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst
@@ -54,7 +54,7 @@ Our goal is to build the shared libraries:
 .. code:: bash
 
     sudo apt-get update
-    sudo apt-get install -y python3 python3-dev python3-setuptools gcc libtinfo-dev zlib1g-dev build-essential cmake
+    sudo apt-get install -y python3 python3-dev python3-setuptools gcc libtinfo-dev zlib1g-dev build-essential cmake libedit-dev libxml2-dev
 
 The minimal building requirements are
 

diff --git a/include/tvm/arithmetic.h b/include/tvm/arithmetic.h
@@ -245,6 +245,8 @@ class RewriteSimplifier {
               const Expr& new_expr,
               bool override = false);
 
+  std::function<void()> EnterConstraint(const Expr& constraint);
+
  private:
   friend class Analyzer;
   friend class ConstraintContext;

diff --git a/include/tvm/ir_pass.h b/include/tvm/ir_pass.h
@@ -206,6 +206,14 @@ Stmt StorageFlatten(Stmt stmt,
                     Map<Tensor, Buffer> extern_buffer,
                     int cache_line_size,
                     bool create_bound_attribute = false);
+/*!
+ * \brief Verify if there is any argument bound to compact buffer.
+ *
+ * \param stmt The stmt to be verified.
+ * \return true if there is any buffer_bind_scope attribute found,
+ *        otherwise, false.
+ */
+bool VerifyCompactBuffer(Stmt stmt);
 
 /*!
  * \brief Remove No Op from the Stmt.

diff --git a/include/tvm/relay/attrs/nn.h b/include/tvm/relay/attrs/nn.h
@@ -376,6 +376,15 @@ struct SparseTransposeAttrs : public tvm::AttrsNode<SparseTransposeAttrs> {
   TVM_DECLARE_ATTRS(SparseTransposeAttrs, "relay.attrs.SparseTransposeAttrs") {}
 };
 
+/*! \brief Attributes for FIFO buffer operator */
+struct FIFOBufferAttrs : public tvm::AttrsNode<FIFOBufferAttrs> {
+  int axis;
+
+  TVM_DECLARE_ATTRS(FIFOBufferAttrs, "relay.attrs.FIFOBufferAttrs") {
+    TVM_ATTR_FIELD(axis).set_default(0);
+  }
+};
+
 /*! \brief Attributes for upsampling operator */
 struct UpSamplingAttrs : public tvm::AttrsNode<UpSamplingAttrs> {
   int scale;

diff --git a/include/tvm/relay/op.h b/include/tvm/relay/op.h
@@ -532,14 +532,18 @@ inline const TVMRetValue& GenericOpMap::operator[](const Op& op) const {
   CHECK(op.defined());
   const uint32_t idx = op->index_;
   CHECK(idx < data_.size() && data_[idx].second != 0)
-      << "Attribute " << attr_name_ << " has not been registered for Operator "
-      << op->name;
+    << "Attribute " << attr_name_ << " has not been registered for Operator "
+    << op->name;
   return data_[idx].first;
 }
 
 template <typename ValueType>
 inline ValueType GenericOpMap::get(const Op& op, ValueType value) const {
   CHECK(op.defined());
+  if (count(op) == 0) {
+    std::cout << "Attribute " << attr_name_ << " has not been registered for Operator "
+              << op->name << std::endl;
+  }
   const uint32_t idx = op->index_;
   if (idx < data_.size() && data_[idx].second != 0) {
     return data_[idx].first;
@@ -551,6 +555,10 @@ inline ValueType GenericOpMap::get(const Op& op, ValueType value) const {
 template <typename ValueType>
 inline ValueType GenericOpMap::get(const Expr& expr, ValueType value) const {
   CHECK(expr.defined());
+  if (expr.as<OpNode>() && count(Downcast<Op>(expr)) == 0) {
+    std::cout << "Attribute " << attr_name_ << " has not been registered for Operator "
+              << Downcast<Op>(expr)->name << std::endl;
+  }
   if (const OpNode* op = expr.as<OpNode>()) {
     const uint32_t idx = op->index_;
     if (idx < data_.size() && data_[idx].second != 0) {

diff --git a/include/tvm/relay/op_attr_types.h b/include/tvm/relay/op_attr_types.h
@@ -79,6 +79,11 @@ using TNonComputational = bool;
  */
 using TShapeDataDependant = bool;
 
+/*!
+ * \brief Can we lower this even with dynamic shape?
+ */
+using TOpDynamicCompute = bool;
+
 /*!
  * \brief Computation description interface.
  *

diff --git a/include/tvm/runtime/vm.h b/include/tvm/runtime/vm.h
@@ -193,7 +193,7 @@ struct Instruction {
   static Instruction Ret(RegName return_reg);
   /*! \brief Construct a fatal instruction.
    *  \return The fatal instruction.
-   * */  
+   * */
   static Instruction Fatal();
   /*! \brief Construct a invoke packed instruction.
    *  \param packed_index The index of the packed function.
@@ -419,6 +419,20 @@ class VirtualMachine : public runtime::ModuleNode {
   runtime::Module lib;
   /*! \brief The virtual machine's packed function table. */
   std::vector<PackedFunc> packed_funcs;
+  size_t next_packed_func_index = 0;
+  size_t NewPackedFuncIndex() {
+    ++next_packed_func_index;
+    return next_packed_func_index - 1;
+  }
+  /*! \brief Construct a invoke packed instruction.
+   *  \param pf The PackedFunc.
+   *  \param arity The arity of the function.
+   *  \param output_size The number of outputs of the packed function.
+   *  \param args The argument registers.
+   *  \return The invoke packed instruction.
+   */
+  Instruction InvokeNewPacked(const PackedFunc& pf, Index arity, Index output_size,
+                              const std::vector<RegName>& args);
   /*! \brief The virtual machine's function table. */
   std::vector<VMFunction> functions;
   /*! \brief The current stack of call frames. */

diff --git a/nnvm/tutorials/tune_nnvm_arm.py b/nnvm/tutorials/tune_nnvm_arm.py
@@ -307,7 +307,8 @@ def tune_tasks(tasks,
                 tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
 
         # do tuning
-        tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)),
+        n_trial = min(n_trial, len(tsk.config_space))
+        tuner_obj.tune(n_trial=n_trial,
                        early_stopping=early_stopping,
                        measure_option=measure_option,
                        callbacks=[

diff --git a/nnvm/tutorials/tune_nnvm_cuda.py b/nnvm/tutorials/tune_nnvm_cuda.py
@@ -204,7 +204,8 @@ def tune_tasks(tasks,
                 tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
 
         # do tuning
-        tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)),
+        n_trial = min(n_trial, len(tsk.config_space))
+        tuner_obj.tune(n_trial=n_trial,
                        early_stopping=early_stopping,
                        measure_option=measure_option,
                        callbacks=[

diff --git a/nnvm/tutorials/tune_nnvm_mobile_gpu.py b/nnvm/tutorials/tune_nnvm_mobile_gpu.py
@@ -290,7 +290,8 @@ def tune_tasks(tasks,
                 tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))
 
         # do tuning
-        tuner_obj.tune(n_trial=min(n_trial, len(tsk.config_space)),
+        n_trial = min(n_trial, len(tsk.config_space))
+        tuner_obj.tune(n_trial=n_trial,
                        early_stopping=early_stopping,
                        measure_option=measure_option,
                        callbacks=[

diff --git a/python/tvm/autotvm/util.py b/python/tvm/autotvm/util.py
@@ -158,7 +158,7 @@ def get_const_int(exp):
     if not isinstance(exp, (expr.IntImm, expr.UIntImm)):
         exp = ir_pass.Simplify(exp)
     if not isinstance(exp, (expr.IntImm, expr.UIntImm)):
-        raise ValueError("Expect value to be constant int")
+        raise ValueError("Expect value to be constant int, but is: " + str(exp))
     return exp.value
 
 

diff --git a/python/tvm/build_module.py b/python/tvm/build_module.py
@@ -264,14 +264,17 @@ def build_config(**kwargs):
 
     return config
 
-def get_binds(args, binds=None):
+def get_binds(args, compact=False, binds=None):
     """Internal function to get binds and arg_list given arguments.
 
     Parameters
     ----------
     args : list of Buffer or Tensor or Var
         The argument lists to the function.
 
+    compact : bool
+        If the statement has already bound to a compact buffer.
+
     binds : dict of :any:`Tensor` to :any:`Buffer`, optional
         Dictionary that maps the Tensor to Buffer which specified the data layout
         requirement of the function. By default, a new compact buffer is created
@@ -290,12 +293,15 @@ def get_binds(args, binds=None):
     arg_list = []
     for x in args:
         if isinstance(x, tensor.Tensor):
+            any_dim = any(isinstance(i, expr.Var) for i in x.shape)
+            buffer_type = "auto_broadcast" if any_dim and not compact else ""
             if x not in binds:
                 buf = api.decl_buffer(x.shape,
                                       dtype=x.dtype,
                                       name=x.name,
                                       data_alignment=cfg.data_alignment,
-                                      offset_factor=cfg.offset_factor)
+                                      offset_factor=cfg.offset_factor,
+                                      buffer_type=buffer_type)
                 binds[x] = buf
                 arg_list.append(buf)
             else:
@@ -361,7 +367,6 @@ def lower(sch,
        The result function, if with_api_wrapper=False
        Then the Stmt before make api is returned.
     """
-    binds, arg_list = get_binds(args, binds)
     cfg = current_build_config()
     add_lower_pass = cfg.add_lower_pass if cfg.add_lower_pass else []
     if cfg.dump_pass_ir:
@@ -377,11 +382,16 @@ def lower(sch,
 
     for f in lower_phase0:
         stmt = f(stmt)
+
+    compact = ir_pass.VerifyCompactBuffer(stmt)
+    binds, arg_list = get_binds(args, compact, binds)
+
     # Phase 1
     stmt = ir_pass.StorageFlatten(stmt, binds, 64, cfg.instrument_bound_checkers)
     stmt = ir_pass.CanonicalSimplify(stmt)
     for f in lower_phase1:
         stmt = f(stmt)
+
     # Phase 2
     if not simple_mode:
         stmt = ir_pass.LoopPartition(stmt, cfg.partition_const_loop)
@@ -400,6 +410,7 @@ def lower(sch,
         cfg.unroll_explicit)
     for f in lower_phase2:
         stmt = f(stmt)
+
     # Phase 3
     stmt = ir_pass.Simplify(stmt)
     stmt = ir_pass.LowerStorageAccessInfo(stmt)
@@ -413,6 +424,7 @@ def lower(sch,
         stmt = ir_pass.InstrumentBoundCheckers(stmt)
     if simple_mode:
         return stmt
+
     return ir_pass.MakeAPI(stmt, name, arg_list, 0, cfg.restricted_func)