Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 06579be

Browse files
authored
Merge pull request #805 from datafold/test_diff_result_list
yield list vs. indiv tuples
2 parents 60ac169 + a9cfd54 commit 06579be

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

data_diff/diff_tables.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from enum import Enum
77
from contextlib import contextmanager
88
from operator import methodcaller
9-
from typing import Dict, Set, Tuple, Iterator, Optional
9+
from typing import Dict, Set, List, Tuple, Iterator, Optional
1010
from concurrent.futures import ThreadPoolExecutor, as_completed
1111

1212
import attrs
@@ -28,6 +28,7 @@ class Algorithm(Enum):
2828

2929

3030
DiffResult = Iterator[Tuple[str, tuple]] # Iterator[Tuple[Literal["+", "-"], tuple]]
31+
DiffResultList = Iterator[List[Tuple[str, tuple]]]
3132

3233

3334
@attrs.define(frozen=False)
@@ -187,6 +188,7 @@ class TableDiffer(ThreadBase, ABC):
187188
ignored_columns1: Set[str] = attrs.field(factory=set)
188189
ignored_columns2: Set[str] = attrs.field(factory=set)
189190
_ignored_columns_lock: threading.Lock = attrs.field(factory=threading.Lock, init=False)
191+
yield_list: bool = False
190192

191193
def diff_tables(self, table1: TableSegment, table2: TableSegment, info_tree: InfoTree = None) -> DiffResultWrapper:
192194
"""Diff the given tables.
@@ -255,7 +257,9 @@ def _diff_tables_wrapper(self, table1: TableSegment, table2: TableSegment, info_
255257
def _validate_and_adjust_columns(self, table1: TableSegment, table2: TableSegment) -> None:
256258
pass
257259

258-
def _diff_tables_root(self, table1: TableSegment, table2: TableSegment, info_tree: InfoTree) -> DiffResult:
260+
def _diff_tables_root(
261+
self, table1: TableSegment, table2: TableSegment, info_tree: InfoTree
262+
) -> DiffResult | DiffResultList:
259263
return self._bisect_and_diff_tables(table1, table2, info_tree)
260264

261265
@abstractmethod
@@ -300,9 +304,9 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in
300304
f"size: table1 <= {btable1.approximate_size()}, table2 <= {btable2.approximate_size()}"
301305
)
302306

303-
ti = ThreadedYielder(self.max_threadpool_size)
307+
ti = ThreadedYielder(self.max_threadpool_size, self.yield_list)
304308
# Bisect (split) the table into segments, and diff them recursively.
305-
ti.submit(self._bisect_and_diff_segments, ti, btable1, btable2, info_tree)
309+
ti.submit(self._bisect_and_diff_segments, ti, btable1, btable2, info_tree, priority=999)
306310

307311
# Now we check for the second min-max, to diff the portions we "missed".
308312
# This is achieved by subtracting the table ranges, and dividing the resulting space into aligned boxes.
@@ -326,7 +330,7 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in
326330

327331
for p1, p2 in new_regions:
328332
extra_tables = [t.new_key_bounds(min_key=p1, max_key=p2) for t in (table1, table2)]
329-
ti.submit(self._bisect_and_diff_segments, ti, *extra_tables, info_tree)
333+
ti.submit(self._bisect_and_diff_segments, ti, *extra_tables, info_tree, priority=999)
330334

331335
return ti
332336

data_diff/thread_utils.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,19 +56,24 @@ class ThreadedYielder(Iterable):
5656
_futures: deque
5757
_yield: deque = attrs.field(alias="_yield") # Python keyword!
5858
_exception: Optional[None]
59+
yield_list: bool
5960

60-
def __init__(self, max_workers: Optional[int] = None):
61+
def __init__(self, max_workers: Optional[int] = None, yield_list: bool = False):
6162
super().__init__()
6263
self._pool = PriorityThreadPoolExecutor(max_workers)
6364
self._futures = deque()
6465
self._yield = deque()
6566
self._exception = None
67+
self.yield_list = yield_list
6668

6769
def _worker(self, fn, *args, **kwargs):
6870
try:
6971
res = fn(*args, **kwargs)
7072
if res is not None:
71-
self._yield += res
73+
if self.yield_list:
74+
self._yield.append(res)
75+
else:
76+
self._yield += res
7277
except Exception as e:
7378
self._exception = e
7479

0 commit comments

Comments
 (0)