feat: Permit appending (#9)

* refactor: cleanup * wip: tests * feat: test appending * doc: update docstring
msto · Apr 14, 2024 · 034523a · 034523a
1 parent c854fce
commit 034523a
Show file tree

Hide file tree

Showing 3 changed files with 102 additions and 18 deletions.
diff --git a/dataclass_io/reader.py b/dataclass_io/reader.py
@@ -47,8 +47,8 @@ def __init__(
             comment=comment,
         )
 
-        assert_file_is_readable(path)
         assert_dataclass_is_valid(dataclass_type)
+        assert_file_is_readable(path)
         assert_file_header_matches_dataclass(path, dataclass_type, file_format)
 
         self._dataclass_type = dataclass_type

diff --git a/dataclass_io/writer.py b/dataclass_io/writer.py
@@ -8,10 +8,12 @@
 
 from dataclass_io._lib.assertions import assert_dataclass_is_valid
 from dataclass_io._lib.assertions import assert_fieldnames_are_dataclass_attributes
+from dataclass_io._lib.assertions import assert_file_header_matches_dataclass
 from dataclass_io._lib.assertions import assert_file_is_appendable
 from dataclass_io._lib.assertions import assert_file_is_writable
 from dataclass_io._lib.dataclass_extensions import DataclassInstance
 from dataclass_io._lib.dataclass_extensions import fieldnames
+from dataclass_io._lib.file import FileFormat
 from dataclass_io._lib.file import WritableFileHandle
 from dataclass_io._lib.file import WriteMode
 
@@ -65,30 +67,21 @@ def __init__(
         except ValueError:
             raise ValueError(f"`mode` must be either 'write' or 'append': {mode}") from None
 
-        assert_dataclass_is_valid(dataclass_type)
-
-        self._fieldnames: list[str]
-        if include_fields is not None and exclude_fields is not None:
-            raise ValueError(
-                "Only one of `include_fields` and `exclude_fields` may be specified, not both."
-            )
-        elif exclude_fields is not None:
-            assert_fieldnames_are_dataclass_attributes(exclude_fields, dataclass_type)
-            self._fieldnames = [f for f in fieldnames(dataclass_type) if f not in exclude_fields]
-        elif include_fields is not None:
-            assert_fieldnames_are_dataclass_attributes(include_fields, dataclass_type)
-            self._fieldnames = include_fields
-        else:
-            self._fieldnames = fieldnames(dataclass_type)
+        file_format = FileFormat(delimiter=delimiter)
 
+        assert_dataclass_is_valid(dataclass_type)
         if write_mode is WriteMode.WRITE:
             assert_file_is_writable(path, overwrite=overwrite)
         else:
             assert_file_is_appendable(path, dataclass_type=dataclass_type)
-            # TODO: check that header matches fieldnames
-            raise NotImplementedError
+            assert_file_header_matches_dataclass(path, dataclass_type, file_format)
 
         self._dataclass_type = dataclass_type
+        self._fieldnames = _validate_output_fieldnames(
+            dataclass_type=dataclass_type,
+            include_fields=include_fields,
+            exclude_fields=exclude_fields,
+        )
         self._fout = path.open(write_mode.abbreviation)
         self._writer = DictWriter(
             f=self._fout,
@@ -152,3 +145,39 @@ def writeall(self, dataclass_instances: Iterable[DataclassInstance]) -> None:
         """
         for dataclass_instance in dataclass_instances:
             self.write(dataclass_instance)
+
+
+def _validate_output_fieldnames(
+    dataclass_type: type[DataclassInstance],
+    include_fields: list[str] | None = None,
+    exclude_fields: list[str] | None = None,
+) -> list[str]:
+    """
+    Subset and/or re-order the dataclass's fieldnames based on the specified include/exclude lists.
+
+    * Only one of `include_fields` and `exclude_fields` may be specified.
+    * All fieldnames specified in `include_fields` must be fields on `dataclass_type`. If this
+      argument is specified, fields will be returned in the order they appear in the list.
+    * All fieldnames specified in `exclude_fields` must be fields on `dataclass_type`. (This is
+      technically unnecessary, but is a safeguard against passing an incorrect list.)
+    * If neither `include_fields` or `exclude_fields` are specified, return the `dataclass_type`'s
+      fieldnames.
+
+    Raises:
+        ValueError: If both `include_fields` and `exclude_fields` are specified.
+    """
+
+    if include_fields is not None and exclude_fields is not None:
+        raise ValueError(
+            "Only one of `include_fields` and `exclude_fields` may be specified, not both."
+        )
+    elif exclude_fields is not None:
+        assert_fieldnames_are_dataclass_attributes(exclude_fields, dataclass_type)
+        output_fieldnames = [f for f in fieldnames(dataclass_type) if f not in exclude_fields]
+    elif include_fields is not None:
+        assert_fieldnames_are_dataclass_attributes(include_fields, dataclass_type)
+        output_fieldnames = include_fields
+    else:
+        output_fieldnames = fieldnames(dataclass_type)
+
+    return output_fieldnames
diff --git a/tests/test_writer.py b/tests/test_writer.py
@@ -45,6 +45,61 @@ def test_writer_writeall(tmp_path: Path) -> None:
             next(f)
 
 
+def test_writer_append(tmp_path: Path) -> None:
+    """Test that we can append to a file."""
+    fpath = tmp_path / "test.txt"
+
+    with fpath.open("w") as fout:
+        fout.write("foo\tbar\n")
+
+    with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
+        writer.write(FakeDataclass(foo="abc", bar=1))
+        writer.write(FakeDataclass(foo="def", bar=2))
+
+    with open(fpath, "r") as f:
+        assert next(f) == "foo\tbar\n"
+        assert next(f) == "abc\t1\n"
+        assert next(f) == "def\t2\n"
+        with pytest.raises(StopIteration):
+            next(f)
+
+
+def test_writer_append_raises_if_empty(tmp_path: Path) -> None:
+    """Test that we raise an error if we try to append to an empty file."""
+    fpath = tmp_path / "test.txt"
+    fpath.touch()
+
+    with pytest.raises(ValueError, match="The specified output file is empty"):
+        with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
+            writer.write(FakeDataclass(foo="abc", bar=1))
+
+
+def test_writer_append_raises_if_no_header(tmp_path: Path) -> None:
+    """Test that we raise an error if we try to append to a file with no header."""
+    fpath = tmp_path / "test.txt"
+    with fpath.open("w") as fout:
+        fout.write("abc\t1\n")
+
+    with pytest.raises(ValueError, match="The provided file does not have the same field names"):
+        with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
+            writer.write(FakeDataclass(foo="abc", bar=1))
+
+
+def test_writer_append_raises_if_header_does_not_match(tmp_path: Path) -> None:
+    """
+    Test that we raise an error if we try to append to a file whose header doesn't match our
+    dataclass.
+    """
+    fpath = tmp_path / "test.txt"
+
+    with fpath.open("w") as fout:
+        fout.write("foo\tbar\tbaz\n")
+
+    with pytest.raises(ValueError, match="The provided file does not have the same field names"):
+        with DataclassWriter(path=fpath, mode="append", dataclass_type=FakeDataclass) as writer:
+            writer.write(FakeDataclass(foo="abc", bar=1))
+
+
 def test_writer_include_fields(tmp_path: Path) -> None:
     """Test that we can include only a subset of fields."""
     fpath = tmp_path / "test.txt"