halide · steven-johnson · Nov 12, 2021 · Nov 2, 2021 · Nov 2, 2021 · Nov 2, 2021
diff --git a/apps/hannk/interpreter/interpreter.cpp b/apps/hannk/interpreter/interpreter.cpp
@@ -7,13 +7,14 @@
 
 #include <map>
 #include <set>
+#include <unordered_set>
 
 // TODO: apparently not part of the public Halide API. Should it be?
 extern "C" int halide_malloc_alignment();
 
 namespace hannk {
 
-Interpreter::Interpreter(std::unique_ptr<OpGroup> m, InterpreterOptions options)
+Interpreter::Interpreter(OpPtr m, InterpreterOptions options)
     : model_(std::move(m)), options_(std::move(options)) {
 }
 
@@ -28,10 +29,10 @@ class TensorVisitor : public OpVisitor {
 
     virtual void visit_tensor(const TensorPtr &t) = 0;
 
-    void visit(OpGroup *g) override {
+    void visit(const OpGroup *g) override {
         for (int i = 0; i < g->op_count(); i++) {
             op_index_++;
-            Op *op = g->op(i);
+            const Op *op = g->op(i);
             for (int j = 0; j < op->input_count(); j++) {
                 visit_tensor(op->input(j));
             }
@@ -88,7 +89,7 @@ class FindAllocatableTensors : public TensorVisitor {
     std::map<TensorStoragePtr, TensorAllocationInfo> tensor_info;
 };
 
-std::unique_ptr<char[]> allocate_tensors(OpGroup *root, const InterpreterOptions &options) {
+std::unique_ptr<char[]> allocate_tensors(const Op *root, const InterpreterOptions &options) {
     // Find the tensors that we want to allocate in an arena,
     // along the needed storage size and lifetime for each.
     FindAllocatableTensors find_tensors;
@@ -161,6 +162,62 @@ class VerifyAllAllocated : public TensorVisitor {
     }
 };
 
+#ifndef NDEBUG
+
+class Checker : public OpVisitor {
+    using OpVisitor::visit;
+
+    std::unordered_set<Tensor *> valid_tensors_;
+
+    void check_tensors(const Op *op) {
+        for (int j = 0; j < op->input_count(); j++) {
+            Tensor *t = op->input(j).get();
+            if (!t->is_constant() && !valid_tensors_.count(t)) {
+                HLOG(ERROR) << "Op " << op->name() << " uses tensor " << op->input(j)->name() << " but it is not produced yet\n";
+                correct = false;
+                return;
+            }
+        }
+        for (int j = 0; j < op->output_count(); j++) {
+            valid_tensors_.insert(op->output(j).get());
+        }
+    }
+
+    void visit_leaf(const Op *op) override {
+        if (!correct) {
+            return;
+        }
+        check_tensors(op);
+    }
+
+    void visit(const OpGroup *op) override {
+        if (!correct) {
+            return;
+        }
+        check_tensors(op);
+        OpVisitor::visit(op);
+    }
+
+public:
+    explicit Checker(const Op *root) {
+        for (int j = 0; j < root->input_count(); j++) {
+            valid_tensors_.insert(root->input(j).get());
+        }
+    }
+
+    bool correct = true;
+};
+
+// Verify that no Op comes before any of its input Tensors are produced.
+void do_check_op_order(const Op *root) {
+    Checker checker(root);
+    root->accept(&checker);
+    if (!checker.correct) {
+        HCHECK(0) << "The model is not in the correct order.";
+    }
+}
+#endif
+
 }  // namespace
 
 bool Interpreter::prepare() {
@@ -178,14 +235,46 @@ bool Interpreter::prepare() {
         return false;
     }
 
-    if (!pad_for_ops(model_.get())) {
+    const auto dump_model = [this](const char *msg, int min_verbosity) {
+        if (options_.verbosity >= min_verbosity) {
+            std::ostringstream os;
+            os << msg << "\n";
+            model_->dump(os);
+            HLOG(INFO) << os.str();
+        }
+    };
+
+    dump_model("Model after prepare():", 3);
+
+    model_ = pad_for_ops(std::move(model_));
+    if (!model_) {
         HLOG(ERROR) << "pad_for_ops() failed.";
         return false;
     }
-    in_place(model_.get());
-    fold_constants(model_.get());
-    remove_dead_ops(model_.get());
+    dump_model("Model after pad_for_ops():", 3);
+
+    model_ = in_place(std::move(model_));
+    dump_model("Model after in_place():", 3);
 
+    model_ = fold_constants(std::move(model_));
+    dump_model("Model after fold_constants():", 3);
+
+    model_ = flatten_groups(std::move(model_));
+    dump_model("Model after flatten_groups:", 3);
+
+    model_ = fuse_pad_ops(std::move(model_));
+    if (!model_) {
+        HLOG(ERROR) << "fuse_pad_ops() failed.";
+        return false;
+    }
+    dump_model("Model after fuse_pad_ops:", 3);
+
+    model_ = remove_dead_ops(std::move(model_));
+    dump_model("Model after remove_dead_ops:", 3);
+
+#ifndef NDEBUG
+    do_check_op_order(model_.get());
+#endif
     assert(tensor_storage_arena_ == nullptr);
     tensor_storage_arena_ = allocate_tensors(model_.get(), options_);
 
@@ -194,12 +283,7 @@ bool Interpreter::prepare() {
     model_->accept(&verify_all);
 #endif
 
-    if (options_.verbosity >= 2) {
-        std::ostringstream os;
-        os << "Model after transformations:\n";
-        model_->dump(os);
-        HLOG(INFO) << os.str();
-    }
+    dump_model("Model after all transformations:", 2);
 
     prepared_ = true;
     return true;
@@ -215,20 +299,54 @@ void Interpreter::execute() {
 
 TensorPtr Interpreter::get_tensor(const std::string &name) {
     HCHECK(prepared_);
-    for (int i = 0; i < model_->op_count(); i++) {
-        Op *op = model_->op(i);
-        for (int j = 0; j < op->input_count(); j++) {
-            if (op->input(j)->name() == name) {
-                return op->input(j);
+
+    class Finder : public OpVisitor {
+        using OpVisitor::visit;
+
+        bool find_tensor(const Op *op) {
+            if (result) {
+                return true;
+            }
+            for (int j = 0; j < op->input_count(); j++) {
+                if (op->input(j)->name() == name_) {
+                    result = op->input(j);
+                    return true;
+                }
+            }
+            for (int j = 0; j < op->output_count(); j++) {
+                if (op->output(j)->name() == name_) {
+                    result = op->output(j);
+                    return true;
+                }
             }
+            return false;
         }
-        for (int j = 0; j < op->output_count(); j++) {
-            if (op->output(j)->name() == name) {
-                return op->output(j);
+
+        void visit_leaf(const Op *op) override {
+            if (find_tensor(op)) {
+                return;
             }
         }
-    }
-    return nullptr;
+
+        void visit(const OpGroup *op) override {
+            if (find_tensor(op)) {
+                return;
+            }
+            OpVisitor::visit(op);
+        }
+
+        const std::string &name_;
+
+    public:
+        explicit Finder(const std::string &name)
+            : name_(name) {
+        }
+        TensorPtr result = nullptr;
+    };
+
+    Finder finder(name);
+    model_->accept(&finder);
+    return finder.result;
 }
 
 std::vector<TensorPtr> Interpreter::inputs() {

diff --git a/apps/hannk/interpreter/interpreter.h b/apps/hannk/interpreter/interpreter.h
@@ -17,13 +17,13 @@ struct InterpreterOptions {
 };
 
 class Interpreter {
-    std::unique_ptr<OpGroup> model_;
+    OpPtr model_;
     std::unique_ptr<char[]> tensor_storage_arena_;
     InterpreterOptions options_;
     bool prepared_ = false;
 
 public:
-    explicit Interpreter(std::unique_ptr<OpGroup> m, InterpreterOptions options = InterpreterOptions());
+    explicit Interpreter(OpPtr m, InterpreterOptions options = InterpreterOptions());
     ~Interpreter();
 
     // Return the Tensor in the current Model with the given name.

diff --git a/apps/hannk/interpreter/model.cpp b/apps/hannk/interpreter/model.cpp
@@ -40,24 +40,6 @@ void Op::set_input(int idx, TensorPtr t) {
     }
 }
 
-void Op::set_output(int idx, TensorPtr t) {
-    if (outputs_[idx]) {
-        outputs_[idx]->remove_producer(this);
-    }
-    outputs_[idx] = std::move(t);
-    if (outputs_[idx]) {
-        outputs_[idx]->add_producer(this);
-    }
-}
-
-void Op::set_input(TensorPtr t) {
-    set_input(0, std::move(t));
-}
-
-void Op::set_output(TensorPtr t) {
-    set_output(0, std::move(t));
-}
-
 bool Op::is_input(const TensorPtr &t) const {
     for (auto &i : inputs_) {
         if (i == t) {
@@ -112,39 +94,6 @@ BoundsMap OpGroup::map_bounds(int input_idx, int output_idx) const {
     return result;
 }
 
-void OpGroup::add(OpPtr to_add, const Op *before) {
-    for (auto i = ops_.begin(); i != ops_.end(); ++i) {
-        if (i->get() == before) {
-            ops_.insert(i, std::move(to_add));
-            return;
-        } else {
-            for (int j = 0; j < to_add->output_count(); ++j) {
-                for (int k = 0; k < (*i)->input_count(); ++k) {
-                    if ((*i)->input(k) == to_add->output(j)) {
-                        // i consumes an output of to_add.
-                        ops_.insert(i, std::move(to_add));
-                        return;
-                    }
-                }
-            }
-        }
-    }
-    ops_.push_back(std::move(to_add));
-}
-
-void OpGroup::remove(const Op *op) {
-    for (auto i = ops_.begin(); i != ops_.end(); ++i) {
-        if (i->get() == op) {
-            ops_.erase(i);
-            return;
-        }
-    }
-}
-
-void OpGroup::accept(OpVisitor *v) {
-    v->visit(this);
-}
-
 void OpGroup::dump(std::ostream &os, int indent) const {
     Op::dump(os, indent);
     for (const auto &i : ops_) {