Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph mode non contiguous tensor issue #8281

Merged
merged 19 commits into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
309ee41
make sure tensors are contiguous during graph mode's compilation
xiacijie May 24, 2022
e874e9e
Merge branch 'master' into graph_mode_non_contiguous_tensor_issue
xiacijie May 24, 2022
c3366e9
make sure tensors ar e contiguous inside self._run and add test case
xiacijie May 24, 2022
183c013
Merge branch 'graph_mode_non_contiguous_tensor_issue' of github.com:O…
xiacijie May 24, 2022
90d17df
add license
xiacijie May 24, 2022
805aedb
remove device
xiacijie May 24, 2022
9f2819d
rename
xiacijie May 24, 2022
60a07bd
handle free eager tensor
xiacijie May 25, 2022
c3bff28
handle free eager tensors
xiacijie May 25, 2022
8762f54
Merge branch 'master' into graph_mode_non_contiguous_tensor_issue
mergify[bot] May 25, 2022
efd9322
Merge branch 'master' into graph_mode_non_contiguous_tensor_issue
xiacijie May 25, 2022
d9ff0ed
Merge branch 'master' into graph_mode_non_contiguous_tensor_issue
mergify[bot] May 25, 2022
033ffe1
remove unused include
xiacijie May 25, 2022
d83361f
Merge branch 'graph_mode_non_contiguous_tensor_issue' of github.com:O…
xiacijie May 25, 2022
1ff2e13
Merge branch 'master' into graph_mode_non_contiguous_tensor_issue
mergify[bot] May 25, 2022
69be1e5
skip cuda test if cpu only
xiacijie May 25, 2022
0135a2d
Merge branch 'graph_mode_non_contiguous_tensor_issue' of github.com:O…
xiacijie May 25, 2022
6471429
Merge branch 'master' into graph_mode_non_contiguous_tensor_issue
xiacijie May 25, 2022
21beb6e
Merge branch 'master' into graph_mode_non_contiguous_tensor_issue
xiacijie May 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions oneflow/core/framework/op_interpreter/lazy_op_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ limitations under the License.
#include "oneflow/core/framework/tensor_tuple.h"
#include "oneflow/core/framework/user_op_registry.h"
#include "oneflow/core/framework/nd_sbp.h"
#include "oneflow/core/job/lazy_mode.h"
#include "oneflow/core/operator/operator.h"
#include "oneflow/core/job/sbp_parallel.h"
#include "oneflow/core/job/job_build_and_infer_ctx_mgr.h"
#include "oneflow/core/vm/vm_util.h"

#include "oneflow/core/functional/functional.h"
namespace oneflow {

namespace one {
Expand Down Expand Up @@ -412,6 +413,12 @@ Maybe<std::string> GradAccTryInsertRepeatAfterFreeVar(const OperatorConf& var_co
}

Maybe<void> AddFreeEagerTensorToVariableOp(const std::shared_ptr<Tensor>& input_tensor) {
if (!input_tensor->is_contiguous()) {
LazyMode::Guard lazy_mode_disabled_guard(false);
JUST(functional::InplaceToContiguous(input_tensor));
JUST(vm::CurrentRankSync());
}

CHECK_OR_RETURN(input_tensor->is_eager());
const std::string& empty_lbn = TensorNameScope::Global()->Lookup(input_tensor);
CHECK_OR_RETURN(empty_lbn.empty());
Expand Down Expand Up @@ -455,7 +462,6 @@ Maybe<void> AddFreeEagerTensorToVariableOp(const std::shared_ptr<Tensor>& input_
// create a new tensor for repeat op out. We just set repeat lbn as this free eager tensor's lbn.
TensorNameScope::Global()->Record(input_tensor,
*JUST(GradAccTryInsertRepeatAfterFreeVar(op_conf)));

return Maybe<void>::Ok();
}

Expand Down
2 changes: 1 addition & 1 deletion oneflow/core/framework/op_interpreter/op_interpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Maybe<void> AutogradInterpreter::Apply(const OpExpr& op_expr, const TensorTuple&
// NOTE: if this op not support stride, then need to tensor->contiguous()
#define HANDLE_NON_CONTIGUOUS_INPUT(tensor_tuple_ptr) \
TensorTuple tmp_inputs; \
if (!JUST(op_expr.SupportNonContiguous())) { \
if (!LazyMode::is_enabled() && !JUST(op_expr.SupportNonContiguous())) { \
tmp_inputs.resize(inputs.size()); \
for (size_t i = 0; i < inputs.size(); i++) { tmp_inputs[i] = inputs[i]->contiguous(); } \
tensor_tuple_ptr = &tmp_inputs; \
Expand Down
20 changes: 20 additions & 0 deletions python/oneflow/nn/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,12 @@ def _state(self):
for bu in bu_gen:
yield bu

def __ensure_state_tensors_contiguous(self):
for state_block in self._state():
state_tensor = state_block.origin
if not state_tensor.is_contiguous():
state_tensor.contiguous_()

def _filter_states(self):
state_tensor_set = set()
state_tensors = []
Expand Down Expand Up @@ -708,6 +714,7 @@ def build(self, *args, **kwargs):
return a_graph

def _compile(self, *args, **kwargs):
self.__ensure_input_tensors_contiguous(*args, **kwargs)
_, eager_outputs = self.build_graph(*args, **kwargs)
self.finish_complie_and_init_runtime()
return eager_outputs
Expand Down Expand Up @@ -801,6 +808,8 @@ def finish_complie_and_init_runtime(self):
self._additional_variable_tobe_loaded.clear()

def __build_graph(self, *args, **kwargs):
self.__ensure_state_tensors_contiguous()

# Filter to get unique states in graph
state_op_names = self._filter_states()

Expand Down Expand Up @@ -1035,6 +1044,7 @@ def check_id_and_add(t, name):
)

def __run(self, *args, **kwargs):
self.__ensure_input_tensors_contiguous(*args, **kwargs)
try:
flattened_eager_args = self.__flatten_io("input", *args, **kwargs)
outputs_tensor_tuple = self._outputs_tensor_tuple_buffer[
Expand Down Expand Up @@ -1342,6 +1352,16 @@ def __del__(self):
return
oneflow._oneflow_internal.eager.Sync()

def __ensure_input_tensors_contiguous(self, *args, **kwargs):
io_node = IONode(None, 0, (args, kwargs), "_" + self.name + "_" + "input")

def leaf_node_fn(node):
if isinstance(node._value, Tensor) and not node._value.is_contiguous():
node._value.contiguous_()
Copy link
Contributor

@Flowingsun007 Flowingsun007 May 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

对于input tensor来说,是不是无需用inplace contiguous?

Copy link
Contributor Author

@xiacijie xiacijie May 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

input tensor用inplace 和非inplace的都可以

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

还是建议用非inplace的,因为省了一个op效率会更高,观感上也更clean一些

return node

io_node.map_leaf(leaf_node_fn)


if __name__ == "__main__":
import doctest
Expand Down
104 changes: 104 additions & 0 deletions python/oneflow/test/graph/test_graph_non_contiguous_tensors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""
Copyright 2020 The OneFlow Authors. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import unittest
import oneflow.unittest
import oneflow as flow
import numpy as np


class ModuleTest(flow.nn.Module):
def __init__(self, contiguous: bool, device):
super().__init__()
if contiguous:
self.weight = flow.nn.Parameter(flow.ones(4, 3, device=device))
else:
self.weight = flow.nn.Parameter(
flow.ones(3, 4, device=device).transpose(0, 1)
)

def forward(self, input):
res = flow.matmul(input, self.weight)
return res


def _test_graph_non_contiguous_tensors(test_case, device):
bias = flow.tensor(
[[1, 2, 3], [3, 4, 5], [7, 7, 7],], dtype=flow.float32, device=device
)

free_eager_bias_contiguous = bias
free_eager_bias_non_contiguous = bias.transpose(0, 1).contiguous().transpose(0, 1)
test_case.assertTrue(free_eager_bias_contiguous.is_contiguous())
test_case.assertFalse(free_eager_bias_non_contiguous.is_contiguous())

class GraphTestContiguousTensors(flow.nn.Graph):
def __init__(self):
super().__init__()
self.model = ModuleTest(True, device)

def build(self, input):
res = self.model(input) + free_eager_bias_contiguous
return res

class GraphTestNonContiguousTensors(flow.nn.Graph):
def __init__(self):
super().__init__()
self.model = ModuleTest(False, device)

def build(self, input):
res = self.model(input) + free_eager_bias_non_contiguous
return res

graph_contiguous_tensors = GraphTestContiguousTensors()
graph_non_contiguous_tensors = GraphTestNonContiguousTensors()

test_case.assertTrue(graph_contiguous_tensors.model.weight.origin.is_contiguous())
test_case.assertFalse(
graph_non_contiguous_tensors.model.weight.origin.is_contiguous()
)

inp = flow.tensor(
[[1, 2, 3], [4, 5, 6], [3, 3, 3], [7, 8, 8]], dtype=flow.float32, device=device
)

non_contiguous_input = inp.transpose(0, 1)
test_case.assertFalse(non_contiguous_input.is_contiguous())

contiguous_input = non_contiguous_input.contiguous()
test_case.assertTrue(contiguous_input.is_contiguous())

contiguous_graph_output = graph_contiguous_tensors(contiguous_input)
non_contiguous_graph_output = graph_non_contiguous_tensors(non_contiguous_input)
test_case.assertTrue(
np.array_equal(
contiguous_graph_output.numpy(), non_contiguous_graph_output.numpy()
)
)


@flow.unittest.skip_unless_1n1d()
class TestGraphNonContiguousTensor(oneflow.unittest.TestCase):
def test_graph_non_contiguous_tensors_cpu(test_case):
_test_graph_non_contiguous_tensors(test_case, flow.device("cpu"))

@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
def test_graph_non_contiguous_tensors_gpu(test_case):
_test_graph_non_contiguous_tensors(test_case, flow.device("cuda"))


if __name__ == "__main__":
unittest.main()