Oneflow-Inc · xiacijie · May 25, 2022 · May 24, 2022 · May 24, 2022 · May 24, 2022
@@ -34,11 +34,12 @@ limitations under the License.
 #include "oneflow/core/framework/tensor_tuple.h"
 #include "oneflow/core/framework/user_op_registry.h"
 #include "oneflow/core/framework/nd_sbp.h"
+#include "oneflow/core/job/lazy_mode.h"
 #include "oneflow/core/operator/operator.h"
 #include "oneflow/core/job/sbp_parallel.h"
 #include "oneflow/core/job/job_build_and_infer_ctx_mgr.h"
 #include "oneflow/core/vm/vm_util.h"
-
+#include "oneflow/core/functional/functional.h"
 namespace oneflow {
 
 namespace one {
@@ -412,6 +413,12 @@ Maybe<std::string> GradAccTryInsertRepeatAfterFreeVar(const OperatorConf& var_co
 }
 
 Maybe<void> AddFreeEagerTensorToVariableOp(const std::shared_ptr<Tensor>& input_tensor) {
+  if (!input_tensor->is_contiguous()) {
+    LazyMode::Guard lazy_mode_disabled_guard(false);
+    JUST(functional::InplaceToContiguous(input_tensor));
+    JUST(vm::CurrentRankSync());
+  }
+
   CHECK_OR_RETURN(input_tensor->is_eager());
   const std::string& empty_lbn = TensorNameScope::Global()->Lookup(input_tensor);
   CHECK_OR_RETURN(empty_lbn.empty());
@@ -455,7 +462,6 @@ Maybe<void> AddFreeEagerTensorToVariableOp(const std::shared_ptr<Tensor>& input_
   //  create a new tensor for repeat op out. We just set repeat lbn as this free eager tensor's lbn.
   TensorNameScope::Global()->Record(input_tensor,
                                     *JUST(GradAccTryInsertRepeatAfterFreeVar(op_conf)));
-
   return Maybe<void>::Ok();
 }
 

@@ -93,7 +93,7 @@ Maybe<void> AutogradInterpreter::Apply(const OpExpr& op_expr, const TensorTuple&
 // NOTE: if this op not support stride, then need to tensor->contiguous()
 #define HANDLE_NON_CONTIGUOUS_INPUT(tensor_tuple_ptr)                                       \
   TensorTuple tmp_inputs;                                                                   \
-  if (!JUST(op_expr.SupportNonContiguous())) {                                              \
+  if (!LazyMode::is_enabled() && !JUST(op_expr.SupportNonContiguous())) {                   \
     tmp_inputs.resize(inputs.size());                                                       \
     for (size_t i = 0; i < inputs.size(); i++) { tmp_inputs[i] = inputs[i]->contiguous(); } \
     tensor_tuple_ptr = &tmp_inputs;                                                         \

diff --git a/python/oneflow/nn/graph/graph.py b/python/oneflow/nn/graph/graph.py
@@ -597,6 +597,12 @@ def _state(self):
             for bu in bu_gen:
                 yield bu
 
+    def __ensure_state_tensors_contiguous(self):
+        for state_block in self._state():
+            state_tensor = state_block.origin
+            if not state_tensor.is_contiguous():
+                state_tensor.contiguous_()
+
     def _filter_states(self):
         state_tensor_set = set()
         state_tensors = []
@@ -708,6 +714,7 @@ def build(self, *args, **kwargs):
         return a_graph
 
     def _compile(self, *args, **kwargs):
+        self.__ensure_input_tensors_contiguous(*args, **kwargs)
         _, eager_outputs = self.build_graph(*args, **kwargs)
         self.finish_complie_and_init_runtime()
         return eager_outputs
@@ -801,6 +808,8 @@ def finish_complie_and_init_runtime(self):
         self._additional_variable_tobe_loaded.clear()
 
     def __build_graph(self, *args, **kwargs):
+        self.__ensure_state_tensors_contiguous()
+
         # Filter to get unique states in graph
         state_op_names = self._filter_states()
 
@@ -1035,6 +1044,7 @@ def check_id_and_add(t, name):
                 )
 
     def __run(self, *args, **kwargs):
+        self.__ensure_input_tensors_contiguous(*args, **kwargs)
         try:
             flattened_eager_args = self.__flatten_io("input", *args, **kwargs)
             outputs_tensor_tuple = self._outputs_tensor_tuple_buffer[
@@ -1342,6 +1352,16 @@ def __del__(self):
             return
         oneflow._oneflow_internal.eager.Sync()
 
+    def __ensure_input_tensors_contiguous(self, *args, **kwargs):
+        io_node = IONode(None, 0, (args, kwargs), "_" + self.name + "_" + "input")
+
+        def leaf_node_fn(node):
+            if isinstance(node._value, Tensor) and not node._value.is_contiguous():
+                node._value.contiguous_()
+            return node
+
+        io_node.map_leaf(leaf_node_fn)
+
 
 if __name__ == "__main__":
     import doctest

diff --git a/python/oneflow/test/graph/test_graph_non_contiguous_tensors.py b/python/oneflow/test/graph/test_graph_non_contiguous_tensors.py
@@ -0,0 +1,104 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import unittest
+import oneflow.unittest
+import oneflow as flow
+import numpy as np
+
+
+class ModuleTest(flow.nn.Module):
+    def __init__(self, contiguous: bool, device):
+        super().__init__()
+        if contiguous:
+            self.weight = flow.nn.Parameter(flow.ones(4, 3, device=device))
+        else:
+            self.weight = flow.nn.Parameter(
+                flow.ones(3, 4, device=device).transpose(0, 1)
+            )
+
+    def forward(self, input):
+        res = flow.matmul(input, self.weight)
+        return res
+
+
+def _test_graph_non_contiguous_tensors(test_case, device):
+    bias = flow.tensor(
+        [[1, 2, 3], [3, 4, 5], [7, 7, 7],], dtype=flow.float32, device=device
+    )
+
+    free_eager_bias_contiguous = bias
+    free_eager_bias_non_contiguous = bias.transpose(0, 1).contiguous().transpose(0, 1)
+    test_case.assertTrue(free_eager_bias_contiguous.is_contiguous())
+    test_case.assertFalse(free_eager_bias_non_contiguous.is_contiguous())
+
+    class GraphTestContiguousTensors(flow.nn.Graph):
+        def __init__(self):
+            super().__init__()
+            self.model = ModuleTest(True, device)
+
+        def build(self, input):
+            res = self.model(input) + free_eager_bias_contiguous
+            return res
+
+    class GraphTestNonContiguousTensors(flow.nn.Graph):
+        def __init__(self):
+            super().__init__()
+            self.model = ModuleTest(False, device)
+
+        def build(self, input):
+            res = self.model(input) + free_eager_bias_non_contiguous
+            return res
+
+    graph_contiguous_tensors = GraphTestContiguousTensors()
+    graph_non_contiguous_tensors = GraphTestNonContiguousTensors()
+
+    test_case.assertTrue(graph_contiguous_tensors.model.weight.origin.is_contiguous())
+    test_case.assertFalse(
+        graph_non_contiguous_tensors.model.weight.origin.is_contiguous()
+    )
+
+    inp = flow.tensor(
+        [[1, 2, 3], [4, 5, 6], [3, 3, 3], [7, 8, 8]], dtype=flow.float32, device=device
+    )
+
+    non_contiguous_input = inp.transpose(0, 1)
+    test_case.assertFalse(non_contiguous_input.is_contiguous())
+
+    contiguous_input = non_contiguous_input.contiguous()
+    test_case.assertTrue(contiguous_input.is_contiguous())
+
+    contiguous_graph_output = graph_contiguous_tensors(contiguous_input)
+    non_contiguous_graph_output = graph_non_contiguous_tensors(non_contiguous_input)
+    test_case.assertTrue(
+        np.array_equal(
+            contiguous_graph_output.numpy(), non_contiguous_graph_output.numpy()
+        )
+    )
+
+
+@flow.unittest.skip_unless_1n1d()
+class TestGraphNonContiguousTensor(oneflow.unittest.TestCase):
+    def test_graph_non_contiguous_tensors_cpu(test_case):
+        _test_graph_non_contiguous_tensors(test_case, flow.device("cpu"))
+
+    @unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+    def test_graph_non_contiguous_tensors_gpu(test_case):
+        _test_graph_non_contiguous_tensors(test_case, flow.device("cuda"))
+
+
+if __name__ == "__main__":
+    unittest.main()