From 034523a3bc9f5a6d1567b8b48bdb757303efd8b0 Mon Sep 17 00:00:00 2001 From: Matt Stone Date: Sat, 13 Apr 2024 20:39:12 -0400 Subject: [PATCH] feat: Permit appending (#9) * refactor: cleanup * wip: tests * feat: test appending * doc: update docstring --- dataclass_io/reader.py | 2 +- dataclass_io/writer.py | 63 ++++++++++++++++++++++++++++++------------ tests/test_writer.py | 55 ++++++++++++++++++++++++++++++++++++ 3 files changed, 102 insertions(+), 18 deletions(-) diff --git a/dataclass_io/reader.py b/dataclass_io/reader.py index f5b3468..9b171c1 100644 --- a/dataclass_io/reader.py +++ b/dataclass_io/reader.py @@ -47,8 +47,8 @@ def __init__( comment=comment, ) - assert_file_is_readable(path) assert_dataclass_is_valid(dataclass_type) + assert_file_is_readable(path) assert_file_header_matches_dataclass(path, dataclass_type, file_format) self._dataclass_type = dataclass_type diff --git a/dataclass_io/writer.py b/dataclass_io/writer.py index 60deec6..c439568 100644 --- a/dataclass_io/writer.py +++ b/dataclass_io/writer.py @@ -8,10 +8,12 @@ from dataclass_io._lib.assertions import assert_dataclass_is_valid from dataclass_io._lib.assertions import assert_fieldnames_are_dataclass_attributes +from dataclass_io._lib.assertions import assert_file_header_matches_dataclass from dataclass_io._lib.assertions import assert_file_is_appendable from dataclass_io._lib.assertions import assert_file_is_writable from dataclass_io._lib.dataclass_extensions import DataclassInstance from dataclass_io._lib.dataclass_extensions import fieldnames +from dataclass_io._lib.file import FileFormat from dataclass_io._lib.file import WritableFileHandle from dataclass_io._lib.file import WriteMode @@ -65,30 +67,21 @@ def __init__( except ValueError: raise ValueError(f"`mode` must be either 'write' or 'append': {mode}") from None - assert_dataclass_is_valid(dataclass_type) - - self._fieldnames: list[str] - if include_fields is not None and exclude_fields is not None: - raise ValueError( - "Only one of `include_fields` and `exclude_fields` may be specified, not both." - ) - elif exclude_fields is not None: - assert_fieldnames_are_dataclass_attributes(exclude_fields, dataclass_type) - self._fieldnames = [f for f in fieldnames(dataclass_type) if f not in exclude_fields] - elif include_fields is not None: - assert_fieldnames_are_dataclass_attributes(include_fields, dataclass_type) - self._fieldnames = include_fields - else: - self._fieldnames = fieldnames(dataclass_type) + file_format = FileFormat(delimiter=delimiter) + assert_dataclass_is_valid(dataclass_type) if write_mode is WriteMode.WRITE: assert_file_is_writable(path, overwrite=overwrite) else: assert_file_is_appendable(path, dataclass_type=dataclass_type) - # TODO: check that header matches fieldnames - raise NotImplementedError + assert_file_header_matches_dataclass(path, dataclass_type, file_format) self._dataclass_type = dataclass_type + self._fieldnames = _validate_output_fieldnames( + dataclass_type=dataclass_type, + include_fields=include_fields, + exclude_fields=exclude_fields, + ) self._fout = path.open(write_mode.abbreviation) self._writer = DictWriter( f=self._fout, @@ -152,3 +145,39 @@ def writeall(self, dataclass_instances: Iterable[DataclassInstance]) -> None: """ for dataclass_instance in dataclass_instances: self.write(dataclass_instance) + + +def _validate_output_fieldnames( + dataclass_type: type[DataclassInstance], + include_fields: list[str] | None = None, + exclude_fields: list[str] | None = None, +) -> list[str]: + """ + Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists. + + * Only one of `include_fields` and `exclude_fields` may be specified. + * All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this + argument is specified, fields will be returned in the order they appear in the list. + * All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is + technically unnecessary, but is a safeguard against passing an incorrect list.) + * If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s + fieldnames. + + Raises: + ValueError: If both `include_fields` and `exclude_fields` are specified. + """ + + if include_fields is not None and exclude_fields is not None: + raise ValueError( + "Only one of `include_fields` and `exclude_fields` may be specified, not both." + ) + elif exclude_fields is not None: + assert_fieldnames_are_dataclass_attributes(exclude_fields, dataclass_type) + output_fieldnames = [f for f in fieldnames(dataclass_type) if f not in exclude_fields] + elif include_fields is not None: + assert_fieldnames_are_dataclass_attributes(include_fields, dataclass_type) + output_fieldnames = include_fields + else: + output_fieldnames = fieldnames(dataclass_type) + + return output_fieldnames diff --git a/tests/test_writer.py b/tests/test_writer.py index e75bf4b..7f5d265 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -45,6 +45,61 @@ def test_writer_writeall(tmp_path: Path) -> None: next(f) +def test_writer_append(tmp_path: Path) -> None: + """Test that we can append to a file.""" + fpath = tmp_path / "test.txt" + + with fpath.open("w") as fout: + fout.write("foo\tbar\n") + + with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer: + writer.write(FakeDataclass(foo="abc", bar=1)) + writer.write(FakeDataclass(foo="def", bar=2)) + + with open(fpath, "r") as f: + assert next(f) == "foo\tbar\n" + assert next(f) == "abc\t1\n" + assert next(f) == "def\t2\n" + with pytest.raises(StopIteration): + next(f) + + +def test_writer_append_raises_if_empty(tmp_path: Path) -> None: + """Test that we raise an error if we try to append to an empty file.""" + fpath = tmp_path / "test.txt" + fpath.touch() + + with pytest.raises(ValueError, match="The specified output file is empty"): + with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer: + writer.write(FakeDataclass(foo="abc", bar=1)) + + +def test_writer_append_raises_if_no_header(tmp_path: Path) -> None: + """Test that we raise an error if we try to append to a file with no header.""" + fpath = tmp_path / "test.txt" + with fpath.open("w") as fout: + fout.write("abc\t1\n") + + with pytest.raises(ValueError, match="The provided file does not have the same field names"): + with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer: + writer.write(FakeDataclass(foo="abc", bar=1)) + + +def test_writer_append_raises_if_header_does_not_match(tmp_path: Path) -> None: + """ + Test that we raise an error if we try to append to a file whose header doesn't match our + dataclass. + """ + fpath = tmp_path / "test.txt" + + with fpath.open("w") as fout: + fout.write("foo\tbar\tbaz\n") + + with pytest.raises(ValueError, match="The provided file does not have the same field names"): + with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer: + writer.write(FakeDataclass(foo="abc", bar=1)) + + def test_writer_include_fields(tmp_path: Path) -> None: """Test that we can include only a subset of fields.""" fpath = tmp_path / "test.txt"