From ce2dd44f778501b50acb2b161378f8dd859bffc8 Mon Sep 17 00:00:00 2001 From: fabiocapsouza Date: Sun, 15 Nov 2020 12:30:46 -0300 Subject: [PATCH] Revert "[s2s] test_distributed_eval (#8315)" This reverts commit a21fe566937691433ebdcd8af939e975dbfcfad1. --- docs/source/testing.rst | 3 +- examples/seq2seq/test_finetune_trainer.py | 9 +++-- .../test_seq2seq_examples_multi_gpu.py | 36 +------------------ src/transformers/testing_utils.py | 16 --------- 4 files changed, 8 insertions(+), 56 deletions(-) diff --git a/docs/source/testing.rst b/docs/source/testing.rst index 0a9d3d525bfa9e..aef3b7efc8126d 100644 --- a/docs/source/testing.rst +++ b/docs/source/testing.rst @@ -450,8 +450,7 @@ Inside tests: .. code-block:: bash - from transformers.testing_utils import get_gpu_count - n_gpu = get_gpu_count() # works with torch and tf + torch.cuda.device_count() diff --git a/examples/seq2seq/test_finetune_trainer.py b/examples/seq2seq/test_finetune_trainer.py index 6da0e240c41959..923ecf6d945831 100644 --- a/examples/seq2seq/test_finetune_trainer.py +++ b/examples/seq2seq/test_finetune_trainer.py @@ -2,9 +2,9 @@ import sys from unittest.mock import patch -from transformers import BertTokenizer, EncoderDecoderModel +from transformers import BertTokenizer, EncoderDecoderModel, is_torch_available from transformers.file_utils import is_datasets_available -from transformers.testing_utils import TestCasePlus, execute_subprocess_async, get_gpu_count, slow +from transformers.testing_utils import TestCasePlus, execute_subprocess_async, slow from transformers.trainer_callback import TrainerState from transformers.trainer_utils import set_seed @@ -13,6 +13,9 @@ from .test_seq2seq_examples import MBART_TINY +if is_torch_available(): + import torch + set_seed(42) MARIAN_MODEL = "sshleifer/student_marian_en_ro_6_1" @@ -193,7 +196,7 @@ def run_trainer(self, eval_steps: int, max_len: str, model_name: str, num_train_ """.split() # --eval_beams 2 - n_gpu = get_gpu_count() + n_gpu = torch.cuda.device_count() if n_gpu > 1: distributed_args = f""" -m torch.distributed.launch diff --git a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py index efc23b5681e040..463ad1e7d9b8c4 100644 --- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py +++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py @@ -3,14 +3,7 @@ import os import sys -from transformers.testing_utils import ( - TestCasePlus, - execute_subprocess_async, - get_gpu_count, - require_torch_gpu, - require_torch_multigpu, - slow, -) +from transformers.testing_utils import TestCasePlus, execute_subprocess_async, require_torch_multigpu from .test_seq2seq_examples import CHEAP_ARGS, make_test_data_dir from .utils import load_json @@ -87,30 +80,3 @@ def convert(k, v): self.assertEqual(len(metrics["test"]), 1) desired_n_evals = int(args_d["max_epochs"] * (1 / args_d["val_check_interval"]) / 2 + 1) self.assertEqual(len(metrics["val"]), desired_n_evals) - - @slow - @require_torch_gpu - def test_distributed_eval(self): - output_dir = self.get_auto_remove_tmp_dir() - args = f""" - --model_name Helsinki-NLP/opus-mt-en-ro - --save_dir {output_dir} - --data_dir test_data/wmt_en_ro - --num_beams 2 - --task translation - """.split() - - # we want this test to run even if there is only one GPU, but if there are more we use them all - n_gpu = get_gpu_count() - distributed_args = f""" - -m torch.distributed.launch - --nproc_per_node={n_gpu} - {self.test_file_dir}/run_distributed_eval.py - """.split() - cmd = [sys.executable] + distributed_args + args - execute_subprocess_async(cmd, env=self.get_env()) - - metrics_save_path = os.path.join(output_dir, "test_bleu.json") - metrics = load_json(metrics_save_path) - # print(metrics) - self.assertGreaterEqual(metrics["bleu"], 25) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 02998bcfd656b6..8eb41ac85f8817 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -297,22 +297,6 @@ def require_ray(test_case): return test_case -def get_gpu_count(): - """ - Return the number of available gpus (regardless of whether torch or tf is used) - """ - if _torch_available: - import torch - - return torch.cuda.device_count() - elif _tf_available: - import tensorflow as tf - - return len(tf.config.list_physical_devices("GPU")) - else: - return 0 - - def get_tests_dir(append_path=None): """ Args: