Skip to content

Commit

Permalink
Fix flakiness on tests (#438)
Browse files Browse the repository at this point in the history
* fix _get_temporary_directory to make unique directory name

* fix back -n auto option

* add tearDown process
  • Loading branch information
hirosassa authored Feb 12, 2025
1 parent 4319ea5 commit 7fa0123
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 42 deletions.
4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,4 @@ check_untyped_defs = true

[tool.pytest.ini_options]
testpaths = ["test"]
# NOTE: `-n auto` makes pytest fast but unstable
# ref: https://github.com/m3dev/gokart/issues/436
addopts = "-s -v --durations=0"
addopts = "-n auto -s -v --durations=0"
14 changes: 7 additions & 7 deletions test/test_large_data_fram_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
import pandas as pd

from gokart.target import LargeDataFrameProcessor


def _get_temporary_directory():
return os.path.abspath(os.path.join(os.path.dirname(__name__), 'temporary'))
from test.util import _get_temporary_directory


class LargeDataFrameProcessorTest(unittest.TestCase):
def setUp(self):
self.temporary_directory = _get_temporary_directory()

def tearDown(self):
shutil.rmtree(_get_temporary_directory(), ignore_errors=True)
shutil.rmtree(self.temporary_directory, ignore_errors=True)

def test_save_and_load(self):
file_path = os.path.join(_get_temporary_directory(), 'test.zip')
file_path = os.path.join(self.temporary_directory, 'test.zip')
df = pd.DataFrame(dict(data=np.random.uniform(0, 1, size=int(1e6))))
processor = LargeDataFrameProcessor(max_byte=int(1e6))
processor.save(df, file_path)
Expand All @@ -26,7 +26,7 @@ def test_save_and_load(self):
pd.testing.assert_frame_equal(loaded, df, check_like=True)

def test_save_and_load_empty(self):
file_path = os.path.join(_get_temporary_directory(), 'test_with_empty.zip')
file_path = os.path.join(self.temporary_directory, 'test_with_empty.zip')
df = pd.DataFrame()
processor = LargeDataFrameProcessor(max_byte=int(1e6))
processor.save(df, file_path)
Expand Down
20 changes: 12 additions & 8 deletions test/test_s3_zip_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,27 @@
from moto import mock_aws

from gokart.s3_zip_client import S3ZipClient


def _get_temporary_directory():
return os.path.abspath(os.path.join(os.path.dirname(__name__), 'temporary'))
from test.util import _get_temporary_directory


class TestS3ZipClient(unittest.TestCase):
def setUp(self):
self.temporary_directory = _get_temporary_directory()

def tearDown(self):
shutil.rmtree(_get_temporary_directory(), ignore_errors=True)
shutil.rmtree(self.temporary_directory, ignore_errors=True)

# remove temporary zip archive if exists.
if os.path.exists(f'{self.temporary_directory}.zip'):
os.remove(f'{self.temporary_directory}.zip')

@mock_aws
def test_make_archive(self):
conn = boto3.resource('s3', region_name='us-east-1')
conn.create_bucket(Bucket='test')

file_path = os.path.join('s3://test/', 'test.zip')
temporary_directory = _get_temporary_directory()
temporary_directory = self.temporary_directory

zip_client = S3ZipClient(file_path=file_path, temporary_directory=temporary_directory)
# raise error if temporary directory does not exist.
Expand All @@ -39,8 +43,8 @@ def test_unpack_archive(self):
conn.create_bucket(Bucket='test')

file_path = os.path.join('s3://test/', 'test.zip')
in_temporary_directory = os.path.join(_get_temporary_directory(), 'in', 'dummy')
out_temporary_directory = os.path.join(_get_temporary_directory(), 'out', 'dummy')
in_temporary_directory = os.path.join(self.temporary_directory, 'in', 'dummy')
out_temporary_directory = os.path.join(self.temporary_directory, 'out', 'dummy')

# make dummy zip file.
os.makedirs(in_temporary_directory, exist_ok=True)
Expand Down
51 changes: 27 additions & 24 deletions test/test_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@

from gokart.file_processor import _ChunkedLargeFileReader
from gokart.target import make_model_target, make_target


def _get_temporary_directory():
return os.path.abspath(os.path.join(os.path.dirname(__name__), 'temporary'))
from test.util import _get_temporary_directory


class LocalTargetTest(unittest.TestCase):
def setUp(self):
self.temporary_directory = _get_temporary_directory()

def tearDown(self):
shutil.rmtree(_get_temporary_directory(), ignore_errors=True)
shutil.rmtree(self.temporary_directory, ignore_errors=True)

def test_save_and_load_pickle_file(self):
obj = 1
file_path = os.path.join(_get_temporary_directory(), 'test.pkl')
file_path = os.path.join(self.temporary_directory, 'test.pkl')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -37,7 +37,7 @@ def test_save_and_load_pickle_file(self):

def test_save_and_load_text_file(self):
obj = 1
file_path = os.path.join(_get_temporary_directory(), 'test.txt')
file_path = os.path.join(self.temporary_directory, 'test.txt')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -47,7 +47,7 @@ def test_save_and_load_text_file(self):

def test_save_and_load_gzip(self):
obj = 1
file_path = os.path.join(_get_temporary_directory(), 'test.gz')
file_path = os.path.join(self.temporary_directory, 'test.gz')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -57,7 +57,7 @@ def test_save_and_load_gzip(self):

def test_save_and_load_npz(self):
obj = np.ones(shape=10, dtype=np.float32)
file_path = os.path.join(_get_temporary_directory(), 'test.npz')
file_path = os.path.join(self.temporary_directory, 'test.npz')
target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
loaded = target.load()
Expand All @@ -69,7 +69,7 @@ def test_save_and_load_figure(self):
pd.DataFrame(dict(x=range(10), y=range(10))).plot.scatter(x='x', y='y')
pyplot.savefig(figure_binary)
figure_binary.seek(0)
file_path = os.path.join(_get_temporary_directory(), 'test.png')
file_path = os.path.join(self.temporary_directory, 'test.png')
target = make_target(file_path=file_path, unique_id=None)
target.dump(figure_binary.read())

Expand All @@ -78,7 +78,7 @@ def test_save_and_load_figure(self):

def test_save_and_load_csv(self):
obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]))
file_path = os.path.join(_get_temporary_directory(), 'test.csv')
file_path = os.path.join(self.temporary_directory, 'test.csv')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -88,7 +88,7 @@ def test_save_and_load_csv(self):

def test_save_and_load_tsv(self):
obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]))
file_path = os.path.join(_get_temporary_directory(), 'test.tsv')
file_path = os.path.join(self.temporary_directory, 'test.tsv')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -98,7 +98,7 @@ def test_save_and_load_tsv(self):

def test_save_and_load_parquet(self):
obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]))
file_path = os.path.join(_get_temporary_directory(), 'test.parquet')
file_path = os.path.join(self.temporary_directory, 'test.parquet')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -108,7 +108,7 @@ def test_save_and_load_parquet(self):

def test_save_and_load_feather(self):
obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=pd.Index([33, 44], name='object_index'))
file_path = os.path.join(_get_temporary_directory(), 'test.feather')
file_path = os.path.join(self.temporary_directory, 'test.feather')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -118,7 +118,7 @@ def test_save_and_load_feather(self):

def test_save_and_load_feather_without_store_index_in_feather(self):
obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]), index=pd.Index([33, 44], name='object_index')).reset_index()
file_path = os.path.join(_get_temporary_directory(), 'test.feather')
file_path = os.path.join(self.temporary_directory, 'test.feather')

target = make_target(file_path=file_path, unique_id=None, store_index_in_feather=False)
target.dump(obj)
Expand All @@ -128,22 +128,22 @@ def test_save_and_load_feather_without_store_index_in_feather(self):

def test_last_modified_time(self):
obj = pd.DataFrame(dict(a=[1, 2], b=[3, 4]))
file_path = os.path.join(_get_temporary_directory(), 'test.csv')
file_path = os.path.join(self.temporary_directory, 'test.csv')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
t = target.last_modification_time()
self.assertIsInstance(t, datetime)

def test_last_modified_time_without_file(self):
file_path = os.path.join(_get_temporary_directory(), 'test.csv')
file_path = os.path.join(self.temporary_directory, 'test.csv')
target = make_target(file_path=file_path, unique_id=None)
with self.assertRaises(FileNotFoundError):
target.last_modification_time()

def test_save_pandas_series(self):
obj = pd.Series(data=[1, 2], name='column_name')
file_path = os.path.join(_get_temporary_directory(), 'test.csv')
file_path = os.path.join(self.temporary_directory, 'test.csv')

target = make_target(file_path=file_path, unique_id=None)
target.dump(obj)
Expand All @@ -154,7 +154,7 @@ def test_save_pandas_series(self):
def test_dump_with_lock(self):
with patch('gokart.target.wrap_dump_with_lock') as wrap_with_lock_mock:
obj = 1
file_path = os.path.join(_get_temporary_directory(), 'test.pkl')
file_path = os.path.join(self.temporary_directory, 'test.pkl')
target = make_target(file_path=file_path, unique_id=None)
target.dump(obj, lock_at_dump=True)

Expand All @@ -163,7 +163,7 @@ def test_dump_with_lock(self):
def test_dump_without_lock(self):
with patch('gokart.target.wrap_dump_with_lock') as wrap_with_lock_mock:
obj = 1
file_path = os.path.join(_get_temporary_directory(), 'test.pkl')
file_path = os.path.join(self.temporary_directory, 'test.pkl')
target = make_target(file_path=file_path, unique_id=None)
target.dump(obj, lock_at_dump=False)

Expand Down Expand Up @@ -238,8 +238,11 @@ def test_save_on_s3_parquet(self):


class ModelTargetTest(unittest.TestCase):
def setUp(self):
self.temporary_directory = _get_temporary_directory()

def tearDown(self):
shutil.rmtree(_get_temporary_directory(), ignore_errors=True)
shutil.rmtree(self.temporary_directory, ignore_errors=True)

@staticmethod
def _save_function(obj, path):
Expand All @@ -251,10 +254,10 @@ def _load_function(path):

def test_model_target_on_local(self):
obj = 1
file_path = os.path.join(_get_temporary_directory(), 'test.zip')
file_path = os.path.join(self.temporary_directory, 'test.zip')

target = make_model_target(
file_path=file_path, temporary_directory=_get_temporary_directory(), save_function=self._save_function, load_function=self._load_function
file_path=file_path, temporary_directory=self.temporary_directory, save_function=self._save_function, load_function=self._load_function
)

target.dump(obj)
Expand All @@ -271,7 +274,7 @@ def test_model_target_on_s3(self):
file_path = os.path.join('s3://test/', 'test.zip')

target = make_model_target(
file_path=file_path, temporary_directory=_get_temporary_directory(), save_function=self._save_function, load_function=self._load_function
file_path=file_path, temporary_directory=self.temporary_directory, save_function=self._save_function, load_function=self._load_function
)

target.dump(obj)
Expand Down
8 changes: 8 additions & 0 deletions test/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import os
import uuid


# TODO: use pytest.fixture to share this functionality with other tests
def _get_temporary_directory():
_uuid = str(uuid.uuid4())
return os.path.abspath(os.path.join(os.path.dirname(__name__), f'temporary-{_uuid}'))

0 comments on commit 7fa0123

Please sign in to comment.