Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

disable large tensor in autotest #8177

Merged
merged 42 commits into from
May 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
1952317
disable large tensor in autotest
daquexian May 10, 2022
9e3de7f
rename
daquexian May 10, 2022
a396e03
Merge branch 'master' into disable_large_tensor_autotest
jackalcooper May 10, 2022
1c61fa2
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 10, 2022
626103f
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 10, 2022
4ee57dc
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 10, 2022
58c2ae2
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 10, 2022
0d14593
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 10, 2022
d8e669b
Merge branch 'master' into disable_large_tensor_autotest
jackalcooper May 11, 2022
f8bf948
update limit from 10 -> 20
daquexian May 11, 2022
d568a7e
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 11, 2022
f60305c
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 11, 2022
28bcdd8
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 11, 2022
f55b3d1
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 11, 2022
eb82cde
update test
daquexian May 12, 2022
4f82dfd
Merge branch 'disable_large_tensor_autotest' of github.com:Oneflow-In…
daquexian May 12, 2022
ee3ec87
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 12, 2022
104515e
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 12, 2022
742af33
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 12, 2022
9de5c2f
update maxpool test
daquexian May 13, 2022
e4bbc3d
Merge branch 'disable_large_tensor_autotest' of github.com:Oneflow-In…
daquexian May 13, 2022
5c7a102
Merge branch 'master' of https://github.com/Oneflow-Inc/oneflow into …
jackalcooper May 13, 2022
ee1d6a6
add
jackalcooper May 13, 2022
1d988d9
try fix
jackalcooper May 13, 2022
6e4c2fe
auto format by CI
oneflow-ci-bot May 13, 2022
36bfbe4
refine
jackalcooper May 13, 2022
0724059
Merge branch 'disable_large_tensor_autotest' of https://github.com/On…
jackalcooper May 13, 2022
1afba41
reset
jackalcooper May 13, 2022
bbd661e
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 13, 2022
aa5af6f
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 13, 2022
ae8822d
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 13, 2022
af7d4c5
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 13, 2022
abb90f3
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 14, 2022
46ad813
fix consistent addcmul
daquexian May 14, 2022
78766c6
Merge branch 'disable_large_tensor_autotest' of github.com:Oneflow-In…
daquexian May 14, 2022
7bec88b
limit 20 -> 32
daquexian May 14, 2022
54d8ff8
try fix
jackalcooper May 14, 2022
83e86f5
mv file
jackalcooper May 14, 2022
461d18c
Merge branch 'master' of https://github.com/Oneflow-Inc/oneflow into …
jackalcooper May 14, 2022
e9d4544
fix consistent normalize
daquexian May 14, 2022
c30678f
Merge branch 'master' into disable_large_tensor_autotest
jackalcooper May 14, 2022
8ed9747
Merge branch 'master' into disable_large_tensor_autotest
mergify[bot] May 14, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"llvm-vs-code-extensions.vscode-clangd",
"ms-vscode.cmake-tools"
"ms-vscode.cmake-tools",
"ms-python.python"
],
// Comment out connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "root",
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,7 @@ jobs:
if: ${{ !fromJson(matrix.cache-hit) && (matrix.test-type == 'speed-test' || (matrix.test-type == 'misc' && matrix.device == 'cpu')) && !fromJson(matrix.is-distributed) }}
run: |
docker exec \
-e ONEFLOW_TEST_TENSOR_SIZE_LIMIT_MB=1024 \
-e ONEFLOW_TEST_DIR=$PWD/python/oneflow/test/expensive \
${{ env.TEST_CONTAINER_NAME }} bash ci/test/expensive_generic_test_multi_client.sh
- name: Exception API test
Expand Down
24 changes: 12 additions & 12 deletions python/oneflow/test/expensive/test_permute.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ def test_permute4d_tensor_with_random_data(test_case):
shuffle(permute_list)
x = random_tensor(
ndim=ndim,
dim0=random(1, 32).to(int),
dim1=random(1, 59).to(int),
dim2=random(1, 65).to(int),
dim3=random(1, 127).to(int),
dim0=random(1, 7).to(int),
dim1=random(1, 15).to(int),
dim2=random(1, 9).to(int),
dim3=random(1, 19).to(int),
).to(device)
y = x.permute(permute_list)
return y
Expand All @@ -134,10 +134,10 @@ def test_permute4d_tensor_with_stride(test_case):
shuffle(permute_list1)
x = random_tensor(
ndim=ndim,
dim0=random(1, 32).to(int),
dim1=random(1, 59).to(int),
dim2=random(1, 65).to(int),
dim3=random(1, 127).to(int),
dim0=random(1, 7).to(int),
dim1=random(1, 15).to(int),
dim2=random(1, 9).to(int),
dim3=random(1, 19).to(int),
).to(device)
y = x.permute(permute_list1)
permute_list2 = [0, 1, 2, 3]
Expand Down Expand Up @@ -168,10 +168,10 @@ def test_permute4d_tensor_bool_with_random_data(test_case):
shuffle(permute_list)
x = random_tensor(
ndim=ndim,
dim0=random(1, 32).to(int),
dim1=random(1, 59).to(int),
dim2=random(1, 65).to(int),
dim3=random(1, 127).to(int),
dim0=random(1, 7).to(int),
dim1=random(1, 15).to(int),
dim2=random(1, 9).to(int),
dim3=random(1, 19).to(int),
).to(device=device, dtype=torch.bool)
y = x.permute(permute_list)
return y
Expand Down
4 changes: 2 additions & 2 deletions python/oneflow/test/modules/test_consistent_addcmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

@autotest(n=1, check_graph=False)
def _test_addcmul(test_case, ndim, placement, sbp):
shape = [random(low=2, high=4) * 8 for i in range(ndim)]
shape = [random(low=2, high=3) * 8 for i in range(ndim)]

input = random_tensor(ndim, *shape).to_global(placement=placement, sbp=sbp)
tensor1 = random_tensor(len(shape), *shape).to_global(placement=placement, sbp=sbp)
Expand All @@ -35,7 +35,7 @@ def _test_addcmul(test_case, ndim, placement, sbp):
class TestModule(flow.unittest.TestCase):
@globaltest
def test_addcmul(test_case):
ndim = random(low=2).to(int).value()
ndim = random(low=2, high=5).to(int).value()
for placement in all_placement():
for sbp in all_sbp(placement, max_dim=ndim):
_test_addcmul(test_case, ndim, placement, sbp)
Expand Down
20 changes: 10 additions & 10 deletions python/oneflow/test/modules/test_consistent_maxpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,15 @@ def _test_maxpool2d_functional(test_case, placement, sbp):
@autotest(n=1, check_graph=False)
def _test_maxpool3d_functional(test_case, placement, sbp):
return_indices = random().to(bool).value()
dim0 = random().to(int).value() * 8
dim1 = random().to(int).value() * 8
dim0 = random(high=4).to(int).value() * 8
dim1 = random(high=4).to(int).value() * 8
x = random_tensor(
ndim=5,
dim0=dim0,
dim1=dim1,
dim2=random(20, 22),
dim3=random(20, 22),
dim4=random(20, 22),
dim2=random(10, 12),
dim3=random(10, 12),
dim4=random(10, 12),
).to_global(placement, sbp)
y = torch.nn.functional.max_pool3d(
x,
Expand Down Expand Up @@ -151,8 +151,8 @@ def _test_maxpool2d(test_case, placement, sbp):
@autotest(n=1, check_graph=False)
def _test_maxpool3d(test_case, placement, sbp):
return_indices = random().to(bool).value()
dim0 = random().to(int).value() * 8
dim1 = random().to(int).value() * 8
dim0 = random(high=4).to(int).value() * 8
dim1 = random(high=4).to(int).value() * 8
m = torch.nn.MaxPool3d(
kernel_size=random(4, 6).to(_size_3_t),
stride=random(1, 3).to(_size_3_t),
Expand All @@ -166,9 +166,9 @@ def _test_maxpool3d(test_case, placement, sbp):
ndim=5,
dim0=dim0,
dim1=dim1,
dim2=random(20, 22),
dim3=random(20, 22),
dim4=random(20, 22),
dim2=random(10, 12),
dim3=random(10, 12),
dim4=random(10, 12),
).to_global(placement, sbp)
y = m(x)

Expand Down
4 changes: 2 additions & 2 deletions python/oneflow/test/modules/test_consistent_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

@autotest(n=1, check_graph=False)
def _test_functional_normalize(test_case, placement, sbp):
ndim = random(low=2).to(int).value()
shape = [random(low=2, high=4) * 8 for i in range(ndim)]
ndim = random(low=2, high=5).to(int).value()
shape = [random(low=2, high=3) * 8 for i in range(ndim)]
x = random_tensor(len(shape), *shape).to_global(placement=placement, sbp=sbp)
dim = random(low=0, high=ndim).to(int).value()
y = torch.nn.functional.normalize(x, oneof(2, 3, 4), dim, 1e-12)
Expand Down
14 changes: 7 additions & 7 deletions python/oneflow/test/modules/test_contiguous.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,9 @@ def test_permute3d_tensor_with_random_data(test_case):
shuffle(permute_list)
x = random_tensor(
ndim=ndim,
dim0=random(1, 32).to(int),
dim1=random(1, 59).to(int),
dim2=random(1, 65).to(int),
dim0=random(1, 7).to(int),
dim1=random(1, 15).to(int),
dim2=random(1, 9).to(int),
).to(device)
y = x.permute(permute_list)
z = y.contiguous()
Expand All @@ -73,10 +73,10 @@ def test_permute4d_tensor_with_random_data(test_case):
shuffle(permute_list)
x = random_tensor(
ndim=ndim,
dim0=random(1, 32).to(int),
dim1=random(1, 59).to(int),
dim2=random(1, 65).to(int),
dim3=random(1, 127).to(int),
dim0=random(1, 7).to(int),
dim1=random(1, 15).to(int),
dim2=random(1, 9).to(int),
dim3=random(1, 19).to(int),
).to(device)
y = x.permute(permute_list)
z = y.contiguous()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -785,15 +785,7 @@ def clear_note_fake_program():
flow.set_printoptions(profile="full")


gc_interval = int(os.getenv("ONEFLOW_TEST_GC_INTERVAL", 10))
gc_counter = 0


def manual_gc_collect():
global gc_counter
gc_counter += 1
if gc_counter % gc_interval == 0:
gc.collect()
tensor_size_limit_mb = int(os.getenv("ONEFLOW_TEST_TENSOR_SIZE_LIMIT_MB", 32))


class DualObject:
Expand Down Expand Up @@ -828,6 +820,10 @@ def __init__(self, name, pytorch, oneflow):
if testing:
dual_modules_to_test.append(self)
if isinstance(pytorch, torch_original.Tensor):
tensor_size_mb = pytorch.nelement() * pytorch.element_size() / 1024 / 1024
assert (
tensor_size_mb < tensor_size_limit_mb
), f"Tensor memory in autotest cannot be larger than {tensor_size_limit_mb}MB, but got {tensor_size_mb}MB"
if testing:
dual_objects_to_test.append(self)
self.pytorch = pytorch
Expand Down Expand Up @@ -879,14 +875,6 @@ def __eq__(self, other):
else:
return self.pytorch == other

def __del__(self):
# force running gc to avoid the periodic gc related to metaclass
# 'gc' will be None if Python is shutting down
try:
manual_gc_collect()
except Exception:
pass


dual_modules_to_test = []
dual_objects_to_test = []
Expand Down