Skip to content

Commit

Permalink
feat: Permit appending (#9)
Browse files Browse the repository at this point in the history
* refactor: cleanup

* wip: tests

* feat: test appending

* doc: update docstring
  • Loading branch information
msto authored Apr 14, 2024
1 parent c854fce commit 034523a
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 18 deletions.
2 changes: 1 addition & 1 deletion dataclass_io/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def __init__(
comment=comment,
)

assert_file_is_readable(path)
assert_dataclass_is_valid(dataclass_type)
assert_file_is_readable(path)
assert_file_header_matches_dataclass(path, dataclass_type, file_format)

self._dataclass_type = dataclass_type
Expand Down
63 changes: 46 additions & 17 deletions dataclass_io/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@

from dataclass_io._lib.assertions import assert_dataclass_is_valid
from dataclass_io._lib.assertions import assert_fieldnames_are_dataclass_attributes
from dataclass_io._lib.assertions import assert_file_header_matches_dataclass
from dataclass_io._lib.assertions import assert_file_is_appendable
from dataclass_io._lib.assertions import assert_file_is_writable
from dataclass_io._lib.dataclass_extensions import DataclassInstance
from dataclass_io._lib.dataclass_extensions import fieldnames
from dataclass_io._lib.file import FileFormat
from dataclass_io._lib.file import WritableFileHandle
from dataclass_io._lib.file import WriteMode

Expand Down Expand Up @@ -65,30 +67,21 @@ def __init__(
except ValueError:
raise ValueError(f"`mode` must be either 'write' or 'append': {mode}") from None

assert_dataclass_is_valid(dataclass_type)

self._fieldnames: list[str]
if include_fields is not None and exclude_fields is not None:
raise ValueError(
"Only one of `include_fields` and `exclude_fields` may be specified, not both."
)
elif exclude_fields is not None:
assert_fieldnames_are_dataclass_attributes(exclude_fields, dataclass_type)
self._fieldnames = [f for f in fieldnames(dataclass_type) if f not in exclude_fields]
elif include_fields is not None:
assert_fieldnames_are_dataclass_attributes(include_fields, dataclass_type)
self._fieldnames = include_fields
else:
self._fieldnames = fieldnames(dataclass_type)
file_format = FileFormat(delimiter=delimiter)

assert_dataclass_is_valid(dataclass_type)
if write_mode is WriteMode.WRITE:
assert_file_is_writable(path, overwrite=overwrite)
else:
assert_file_is_appendable(path, dataclass_type=dataclass_type)
# TODO: check that header matches fieldnames
raise NotImplementedError
assert_file_header_matches_dataclass(path, dataclass_type, file_format)

self._dataclass_type = dataclass_type
self._fieldnames = _validate_output_fieldnames(
dataclass_type=dataclass_type,
include_fields=include_fields,
exclude_fields=exclude_fields,
)
self._fout = path.open(write_mode.abbreviation)
self._writer = DictWriter(
f=self._fout,
Expand Down Expand Up @@ -152,3 +145,39 @@ def writeall(self, dataclass_instances: Iterable[DataclassInstance]) -> None:
"""
for dataclass_instance in dataclass_instances:
self.write(dataclass_instance)


def _validate_output_fieldnames(
dataclass_type: type[DataclassInstance],
include_fields: list[str] | None = None,
exclude_fields: list[str] | None = None,
) -> list[str]:
"""
Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
* Only one of `include_fields` and `exclude_fields` may be specified.
* All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
argument is specified, fields will be returned in the order they appear in the list.
* All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
technically unnecessary, but is a safeguard against passing an incorrect list.)
* If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
fieldnames.
Raises:
ValueError: If both `include_fields` and `exclude_fields` are specified.
"""

if include_fields is not None and exclude_fields is not None:
raise ValueError(
"Only one of `include_fields` and `exclude_fields` may be specified, not both."
)
elif exclude_fields is not None:
assert_fieldnames_are_dataclass_attributes(exclude_fields, dataclass_type)
output_fieldnames = [f for f in fieldnames(dataclass_type) if f not in exclude_fields]
elif include_fields is not None:
assert_fieldnames_are_dataclass_attributes(include_fields, dataclass_type)
output_fieldnames = include_fields
else:
output_fieldnames = fieldnames(dataclass_type)

return output_fieldnames
55 changes: 55 additions & 0 deletions tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,61 @@ def test_writer_writeall(tmp_path: Path) -> None:
next(f)


def test_writer_append(tmp_path: Path) -> None:
"""Test that we can append to a file."""
fpath = tmp_path / "test.txt"

with fpath.open("w") as fout:
fout.write("foo\tbar\n")

with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
writer.write(FakeDataclass(foo="abc", bar=1))
writer.write(FakeDataclass(foo="def", bar=2))

with open(fpath, "r") as f:
assert next(f) == "foo\tbar\n"
assert next(f) == "abc\t1\n"
assert next(f) == "def\t2\n"
with pytest.raises(StopIteration):
next(f)


def test_writer_append_raises_if_empty(tmp_path: Path) -> None:
"""Test that we raise an error if we try to append to an empty file."""
fpath = tmp_path / "test.txt"
fpath.touch()

with pytest.raises(ValueError, match="The specified output file is empty"):
with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
writer.write(FakeDataclass(foo="abc", bar=1))


def test_writer_append_raises_if_no_header(tmp_path: Path) -> None:
"""Test that we raise an error if we try to append to a file with no header."""
fpath = tmp_path / "test.txt"
with fpath.open("w") as fout:
fout.write("abc\t1\n")

with pytest.raises(ValueError, match="The provided file does not have the same field names"):
with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
writer.write(FakeDataclass(foo="abc", bar=1))


def test_writer_append_raises_if_header_does_not_match(tmp_path: Path) -> None:
"""
Test that we raise an error if we try to append to a file whose header doesn't match our
dataclass.
"""
fpath = tmp_path / "test.txt"

with fpath.open("w") as fout:
fout.write("foo\tbar\tbaz\n")

with pytest.raises(ValueError, match="The provided file does not have the same field names"):
with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
writer.write(FakeDataclass(foo="abc", bar=1))


def test_writer_include_fields(tmp_path: Path) -> None:
"""Test that we can include only a subset of fields."""
fpath = tmp_path / "test.txt"
Expand Down

0 comments on commit 034523a

Please sign in to comment.