From 50171db75433fd083163cea5cd9fd037e8bc3f29 Mon Sep 17 00:00:00 2001 From: joocer Date: Sun, 12 May 2024 12:00:44 +0100 Subject: [PATCH] 0.6.21 --- mabel/data/writers/internals/blob_writer.py | 2 ++ tests/test_writer_stream_writer_backout.py | 2 +- tests/test_writer_stream_writer_substitutions.py | 11 ++++++----- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/mabel/data/writers/internals/blob_writer.py b/mabel/data/writers/internals/blob_writer.py index 381393b..4adece5 100644 --- a/mabel/data/writers/internals/blob_writer.py +++ b/mabel/data/writers/internals/blob_writer.py @@ -60,6 +60,8 @@ def arrow_append(self, record: dict = {}): self.commit() self.open_buffer() + return self.records_in_buffer + def text_append(self, record: dict = {}): # serialize the record if self.format == "text": diff --git a/tests/test_writer_stream_writer_backout.py b/tests/test_writer_stream_writer_backout.py index 9f72d59..9e4585b 100644 --- a/tests/test_writer_stream_writer_backout.py +++ b/tests/test_writer_stream_writer_backout.py @@ -6,7 +6,6 @@ from mabel.adapters.disk import DiskWriter, DiskReader from mabel.data import StreamWriter from mabel.data import Reader -from mabel.data.validator import schema_loader import shutil from pathlib import Path from rich import traceback @@ -37,6 +36,7 @@ def test_writer_backout(): w = StreamWriter( dataset=TEST_FOLDER, inner_writer=DiskWriter, + format="zstd", schema=SCHEMA, idle_timeout_seconds=1, ) diff --git a/tests/test_writer_stream_writer_substitutions.py b/tests/test_writer_stream_writer_substitutions.py index 366c550..1e1f35d 100644 --- a/tests/test_writer_stream_writer_substitutions.py +++ b/tests/test_writer_stream_writer_substitutions.py @@ -14,17 +14,18 @@ DATA_SET = [ {"key": 6, "value": ["s", "i", "x"], "combinations": 3}, # (6,s),(6,i),(6,x) {"key": [1, 0], "value": ["t", "e", "n"], "combinations": 6}, - {"key": 0, "combinations": 1}, + {"key": 0, "value": None, "combinations": 1}, ] def test_writer_substitutions(): - w = StreamWriter(dataset="TEST/{key}/{value}", inner_writer=NullWriter, schema=["@"]) + w = StreamWriter( + dataset="TEST/{key}/{value}", + inner_writer=NullWriter, + schema=["key", "value", "combinations"], + ) for record in DATA_SET: - # convert to a simd object to test behavior - as_json = orjson.dumps(record) - combinations = w.append(record) assert combinations == record["combinations"], combinations