From 7fa01237432b36913643d8921ae06b67759e88ae Mon Sep 17 00:00:00 2001 From: hirosassa Date: Wed, 12 Feb 2025 15:57:17 +0900 Subject: [PATCH] Fix flakiness on tests (#438) * fix _get_temporary_directory to make unique directory name * fix back -n auto option * add tearDown process --- pyproject.toml | 4 +- test/test_large_data_fram_processor.py | 14 +++---- test/test_s3_zip_client.py | 20 ++++++---- test/test_target.py | 51 ++++++++++++++------------ test/util.py | 8 ++++ 5 files changed, 55 insertions(+), 42 deletions(-) create mode 100644 test/util.py diff --git a/pyproject.toml b/pyproject.toml index 120507a9..8afd9235 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,4 @@ check_untyped_defs = true [tool.pytest.ini_options] testpaths = ["test"] -# NOTE: `-n auto` makes pytest fast but unstable -# ref: https://github.com/m3dev/gokart/issues/436 -addopts = "-s -v --durations=0" +addopts = "-n auto -s -v --durations=0" diff --git a/test/test_large_data_fram_processor.py b/test/test_large_data_fram_processor.py index 6228200f..4aa05a39 100644 --- a/test/test_large_data_fram_processor.py +++ b/test/test_large_data_fram_processor.py @@ -6,18 +6,18 @@ import pandas as pd from gokart.target import LargeDataFrameProcessor - - -def _get_temporary_directory(): - return os.path.abspath(os.path.join(os.path.dirname(__name__), 'temporary')) +from test.util import _get_temporary_directory class LargeDataFrameProcessorTest(unittest.TestCase): + def setUp(self): + self.temporary_directory = _get_temporary_directory() + def tearDown(self): - shutil.rmtree(_get_temporary_directory(), ignore_errors=True) + shutil.rmtree(self.temporary_directory, ignore_errors=True) def test_save_and_load(self): - file_path = os.path.join(_get_temporary_directory(), 'test.zip') + file_path = os.path.join(self.temporary_directory, 'test.zip') df = pd.DataFrame(dict(data=np.random.uniform(0, 1, size=int(1e6)))) processor = LargeDataFrameProcessor(max_byte=int(1e6)) processor.save(df, file_path) @@ -26,7 +26,7 @@ def test_save_and_load(self): pd.testing.assert_frame_equal(loaded, df, check_like=True) def test_save_and_load_empty(self): - file_path = os.path.join(_get_temporary_directory(), 'test_with_empty.zip') + file_path = os.path.join(self.temporary_directory, 'test_with_empty.zip') df = pd.DataFrame() processor = LargeDataFrameProcessor(max_byte=int(1e6)) processor.save(df, file_path) diff --git a/test/test_s3_zip_client.py b/test/test_s3_zip_client.py index 8ab8bbb1..46f79fca 100644 --- a/test/test_s3_zip_client.py +++ b/test/test_s3_zip_client.py @@ -6,15 +6,19 @@ from moto import mock_aws from gokart.s3_zip_client import S3ZipClient - - -def _get_temporary_directory(): - return os.path.abspath(os.path.join(os.path.dirname(__name__), 'temporary')) +from test.util import _get_temporary_directory class TestS3ZipClient(unittest.TestCase): + def setUp(self): + self.temporary_directory = _get_temporary_directory() + def tearDown(self): - shutil.rmtree(_get_temporary_directory(), ignore_errors=True) + shutil.rmtree(self.temporary_directory, ignore_errors=True) + + # remove temporary zip archive if exists. + if os.path.exists(f'{self.temporary_directory}.zip'): + os.remove(f'{self.temporary_directory}.zip') @mock_aws def test_make_archive(self): @@ -22,7 +26,7 @@ def test_make_archive(self): conn.create_bucket(Bucket='test') file_path = os.path.join('s3://test/', 'test.zip') - temporary_directory = _get_temporary_directory() + temporary_directory = self.temporary_directory zip_client = S3ZipClient(file_path=file_path, temporary_directory=temporary_directory) # raise error if temporary directory does not exist. @@ -39,8 +43,8 @@ def test_unpack_archive(self): conn.create_bucket(Bucket='test') file_path = os.path.join('s3://test/', 'test.zip') - in_temporary_directory = os.path.join(_get_temporary_directory(), 'in', 'dummy') - out_temporary_directory = os.path.join(_get_temporary_directory(), 'out', 'dummy') + in_temporary_directory = os.path.join(self.temporary_directory, 'in', 'dummy') + out_temporary_directory = os.path.join(self.temporary_directory, 'out', 'dummy') # make dummy zip file. os.makedirs(in_temporary_directory, exist_ok=True) diff --git a/test/test_target.py b/test/test_target.py index 8090641f..2d82e76f 100644 --- a/test/test_target.py +++ b/test/test_target.py @@ -13,19 +13,19 @@ from gokart.file_processor import _ChunkedLargeFileReader from gokart.target import make_model_target, make_target - - -def _get_temporary_directory(): - return os.path.abspath(os.path.join(os.path.dirname(__name__), 'temporary')) +from test.util import _get_temporary_directory class LocalTargetTest(unittest.TestCase): + def setUp(self): + self.temporary_directory = _get_temporary_directory() + def tearDown(self): - shutil.rmtree(_get_temporary_directory(), ignore_errors=True) + shutil.rmtree(self.temporary_directory, ignore_errors=True) def test_save_and_load_pickle_file(self): obj = 1 - file_path = os.path.join(_get_temporary_directory(), 'test.pkl') + file_path = os.path.join(self.temporary_directory, 'test.pkl') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -37,7 +37,7 @@ def test_save_and_load_pickle_file(self): def test_save_and_load_text_file(self): obj = 1 - file_path = os.path.join(_get_temporary_directory(), 'test.txt') + file_path = os.path.join(self.temporary_directory, 'test.txt') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -47,7 +47,7 @@ def test_save_and_load_text_file(self): def test_save_and_load_gzip(self): obj = 1 - file_path = os.path.join(_get_temporary_directory(), 'test.gz') + file_path = os.path.join(self.temporary_directory, 'test.gz') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -57,7 +57,7 @@ def test_save_and_load_gzip(self): def test_save_and_load_npz(self): obj = np.ones(shape=10, dtype=np.float32) - file_path = os.path.join(_get_temporary_directory(), 'test.npz') + file_path = os.path.join(self.temporary_directory, 'test.npz') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) loaded = target.load() @@ -69,7 +69,7 @@ def test_save_and_load_figure(self): pd.DataFrame(dict(x=range(10), y=range(10))).plot.scatter(x='x', y='y') pyplot.savefig(figure_binary) figure_binary.seek(0) - file_path = os.path.join(_get_temporary_directory(), 'test.png') + file_path = os.path.join(self.temporary_directory, 'test.png') target = make_target(file_path=file_path, unique_id=None) target.dump(figure_binary.read()) @@ -78,7 +78,7 @@ def test_save_and_load_figure(self): def test_save_and_load_csv(self): obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - file_path = os.path.join(_get_temporary_directory(), 'test.csv') + file_path = os.path.join(self.temporary_directory, 'test.csv') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -88,7 +88,7 @@ def test_save_and_load_csv(self): def test_save_and_load_tsv(self): obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - file_path = os.path.join(_get_temporary_directory(), 'test.tsv') + file_path = os.path.join(self.temporary_directory, 'test.tsv') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -98,7 +98,7 @@ def test_save_and_load_tsv(self): def test_save_and_load_parquet(self): obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - file_path = os.path.join(_get_temporary_directory(), 'test.parquet') + file_path = os.path.join(self.temporary_directory, 'test.parquet') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -108,7 +108,7 @@ def test_save_and_load_parquet(self): def test_save_and_load_feather(self): obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=pd.Index([33, 44], name='object_index')) - file_path = os.path.join(_get_temporary_directory(), 'test.feather') + file_path = os.path.join(self.temporary_directory, 'test.feather') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -118,7 +118,7 @@ def test_save_and_load_feather(self): def test_save_and_load_feather_without_store_index_in_feather(self): obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=pd.Index([33, 44], name='object_index')).reset_index() - file_path = os.path.join(_get_temporary_directory(), 'test.feather') + file_path = os.path.join(self.temporary_directory, 'test.feather') target = make_target(file_path=file_path, unique_id=None, store_index_in_feather=False) target.dump(obj) @@ -128,7 +128,7 @@ def test_save_and_load_feather_without_store_index_in_feather(self): def test_last_modified_time(self): obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4])) - file_path = os.path.join(_get_temporary_directory(), 'test.csv') + file_path = os.path.join(self.temporary_directory, 'test.csv') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -136,14 +136,14 @@ def test_last_modified_time(self): self.assertIsInstance(t, datetime) def test_last_modified_time_without_file(self): - file_path = os.path.join(_get_temporary_directory(), 'test.csv') + file_path = os.path.join(self.temporary_directory, 'test.csv') target = make_target(file_path=file_path, unique_id=None) with self.assertRaises(FileNotFoundError): target.last_modification_time() def test_save_pandas_series(self): obj = pd.Series(data=[1, 2], name='column_name') - file_path = os.path.join(_get_temporary_directory(), 'test.csv') + file_path = os.path.join(self.temporary_directory, 'test.csv') target = make_target(file_path=file_path, unique_id=None) target.dump(obj) @@ -154,7 +154,7 @@ def test_save_pandas_series(self): def test_dump_with_lock(self): with patch('gokart.target.wrap_dump_with_lock') as wrap_with_lock_mock: obj = 1 - file_path = os.path.join(_get_temporary_directory(), 'test.pkl') + file_path = os.path.join(self.temporary_directory, 'test.pkl') target = make_target(file_path=file_path, unique_id=None) target.dump(obj, lock_at_dump=True) @@ -163,7 +163,7 @@ def test_dump_with_lock(self): def test_dump_without_lock(self): with patch('gokart.target.wrap_dump_with_lock') as wrap_with_lock_mock: obj = 1 - file_path = os.path.join(_get_temporary_directory(), 'test.pkl') + file_path = os.path.join(self.temporary_directory, 'test.pkl') target = make_target(file_path=file_path, unique_id=None) target.dump(obj, lock_at_dump=False) @@ -238,8 +238,11 @@ def test_save_on_s3_parquet(self): class ModelTargetTest(unittest.TestCase): + def setUp(self): + self.temporary_directory = _get_temporary_directory() + def tearDown(self): - shutil.rmtree(_get_temporary_directory(), ignore_errors=True) + shutil.rmtree(self.temporary_directory, ignore_errors=True) @staticmethod def _save_function(obj, path): @@ -251,10 +254,10 @@ def _load_function(path): def test_model_target_on_local(self): obj = 1 - file_path = os.path.join(_get_temporary_directory(), 'test.zip') + file_path = os.path.join(self.temporary_directory, 'test.zip') target = make_model_target( - file_path=file_path, temporary_directory=_get_temporary_directory(), save_function=self._save_function, load_function=self._load_function + file_path=file_path, temporary_directory=self.temporary_directory, save_function=self._save_function, load_function=self._load_function ) target.dump(obj) @@ -271,7 +274,7 @@ def test_model_target_on_s3(self): file_path = os.path.join('s3://test/', 'test.zip') target = make_model_target( - file_path=file_path, temporary_directory=_get_temporary_directory(), save_function=self._save_function, load_function=self._load_function + file_path=file_path, temporary_directory=self.temporary_directory, save_function=self._save_function, load_function=self._load_function ) target.dump(obj) diff --git a/test/util.py b/test/util.py new file mode 100644 index 00000000..102512f0 --- /dev/null +++ b/test/util.py @@ -0,0 +1,8 @@ +import os +import uuid + + +# TODO: use pytest.fixture to share this functionality with other tests +def _get_temporary_directory(): + _uuid = str(uuid.uuid4()) + return os.path.abspath(os.path.join(os.path.dirname(__name__), f'temporary-{_uuid}'))