Skip to content

Commit a7c4b6c

Browse files
committed
feat: change logic for tree diff aggregate
1 parent f3d0141 commit a7c4b6c

File tree

2 files changed

+37
-23
lines changed

2 files changed

+37
-23
lines changed

bigtree/tree/helper.py

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def prune_tree(
129129
130130
For pruning by `prune_path`,
131131
132-
- All siblings along the prune path will be removed.
132+
- All siblings along the prune path will be removed. All descendants will be kept by default.
133133
- If ``exact=True``, all descendants of prune path will be removed.
134134
- Prune path can be string (only one path) or a list of strings (multiple paths).
135135
- Prune path name should be unique, can be full path, partial path (trailing part of path), or node name.
@@ -156,7 +156,7 @@ def prune_tree(
156156
│ └── d
157157
└── e
158158
159-
Prune (default is keep descendants)
159+
# Prune tree
160160
161161
>>> root_pruned = prune_tree(root, "a/b")
162162
>>> root_pruned.show()
@@ -165,14 +165,14 @@ def prune_tree(
165165
├── c
166166
└── d
167167
168-
Prune exact path
168+
## Exact path
169169
170170
>>> root_pruned = prune_tree(root, "a/b", exact=True)
171171
>>> root_pruned.show()
172172
a
173173
└── b
174174
175-
Prune multiple paths
175+
## Multiple paths
176176
177177
>>> root_pruned = prune_tree(root, ["a/b/d", "a/e"])
178178
>>> root_pruned.show()
@@ -181,7 +181,7 @@ def prune_tree(
181181
│ └── d
182182
└── e
183183
184-
Prune by depth
184+
## By depth
185185
186186
>>> root_pruned = prune_tree(root, max_depth=2)
187187
>>> root_pruned.show()
@@ -265,11 +265,11 @@ def get_tree_diff(
265265
- For example: (+) refers to nodes that are in `other_tree` but not `tree`.
266266
- For example: (-) refers to nodes that are in `tree` but not `other_tree`.
267267
268-
If `detail=True`, (added) and (moved to) will be used instead of (+), (removed) and (moved from)
269-
will be used instead of (-).
268+
If `detail=True`, (added) and (moved to) will be used instead of (+), (removed) and (moved from) will be used
269+
instead of (-).
270270
271271
If `aggregate=True`, differences (+)/(added)/(moved to) and (-)/(removed)/(moved from) will only be indicated at
272-
the parent-level. This is useful when a subtree is shifted and we want the differences to shown only at the top node.
272+
the parent-level. This is useful when a subtree is shifted, and we want the differences shown only at the top node.
273273
274274
!!! note
275275
@@ -313,7 +313,7 @@ def get_tree_diff(
313313
│ └── photo2.jpg (-)
314314
└── file2.doc (+)
315315
316-
>>> # Get tree differences - all differences
316+
## All differences
317317
>>> tree_diff = get_tree_diff(root, root_other, only_diff=False)
318318
>>> tree_diff.show()
319319
Downloads
@@ -326,8 +326,10 @@ def get_tree_diff(
326326
├── file1.doc
327327
└── file2.doc (+)
328328
329-
>>> # Get tree differences - all differences with details
330-
>>> tree_diff = get_tree_diff(root, root_other, only_diff=False, detail=True)
329+
## All differences with details
330+
>>> tree_diff = get_tree_diff(
331+
... root, root_other, only_diff=False, detail=True
332+
... )
331333
>>> tree_diff.show()
332334
Downloads
333335
├── Pictures
@@ -339,8 +341,10 @@ def get_tree_diff(
339341
├── file1.doc
340342
└── file2.doc (added)
341343
342-
>>> # Get tree differences - all differences with details on aggregated level
343-
>>> tree_diff = get_tree_diff(root, root_other, only_diff=False, detail=True, aggregate=True)
344+
## All differences with details on aggregated level
345+
>>> tree_diff = get_tree_diff(
346+
... root, root_other, only_diff=False, detail=True, aggregate=True
347+
... )
344348
>>> tree_diff.show()
345349
Downloads
346350
├── Pictures
@@ -352,6 +356,16 @@ def get_tree_diff(
352356
├── file1.doc
353357
└── file2.doc (added)
354358
359+
## Only differences with details on aggregated level
360+
>>> tree_diff = get_tree_diff(root, root_other, detail=True, aggregate=True)
361+
>>> tree_diff.show()
362+
Downloads
363+
├── Pictures
364+
│ └── Trip (moved to)
365+
│ └── photo2.jpg
366+
├── Trip (moved from)
367+
└── file2.doc (added)
368+
355369
# Comparing tree attributes
356370
357371
- (~) will be added to node name if there are differences in tree attributes defined in `attr_list`.
@@ -381,7 +395,7 @@ def get_tree_diff(
381395
│ └── photo2.jpg [tags=photo2-new]
382396
└── file1.doc [tags=file1]
383397
384-
>>> # Get tree differences
398+
>>> # Get tree attribute differences
385399
>>> tree_diff = get_tree_diff(root, root_other, attr_list=["tags"])
386400
>>> tree_diff.show(attr_list=["tags"])
387401
Downloads
@@ -444,6 +458,12 @@ def get_tree_diff(
444458
(data_both[indicator_col] == "left_only")
445459
| (data_both[indicator_col] == "right_only")
446460
].drop_duplicates(subset=[name_col, parent_col], keep=False)
461+
if only_diff:
462+
# If only_diff and aggregate, remove children under (moved from)
463+
data_both = data_both.sort_values(indicator_col, ascending=False)
464+
data_both = data_both[
465+
~data_both.duplicated(subset=[name_col, parent_col])
466+
] # keep right_only
447467
else:
448468
data_both_agg = data_both
449469

tests/tree/test_helper.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -372,9 +372,7 @@ def test_tree_diff_aggregate(tree_node, tree_node_diff):
372372
"a\n"
373373
"├── b (-)\n"
374374
"│ ├── d (-)\n"
375-
"│ └── e (-)\n"
376-
"│ ├── g\n" # no (-)
377-
"│ └── h\n" # no (-)
375+
"│ └── e (-)\n" # children removed
378376
"├── c\n"
379377
"│ └── e (+)\n"
380378
"│ ├── g\n" # no (+)
@@ -437,9 +435,7 @@ def test_tree_diff_detail_aggregate(tree_node, tree_node_diff):
437435
"a\n"
438436
"├── b (removed)\n"
439437
"│ ├── d (removed)\n"
440-
"│ └── e (moved from)\n"
441-
"│ ├── g\n" # no (moved from)
442-
"│ └── h\n" # no (moved from)
438+
"│ └── e (moved from)\n" # children removed
443439
"├── c\n"
444440
"│ └── e (moved to)\n"
445441
"│ ├── g\n" # no (moved to)
@@ -459,9 +455,7 @@ def test_tree_diff_detail_aggregate_clash_names(tree_node, tree_node_diff):
459455
"a\n"
460456
"├── b (removed)\n"
461457
"│ ├── d (removed)\n"
462-
"│ └── e (moved from)\n"
463-
"│ ├── g\n" # no (moved from)
464-
"│ └── h\n" # no (moved from)
458+
"│ └── e (moved from)\n" # children removed
465459
"├── c\n"
466460
"│ └── e (moved to)\n"
467461
"│ ├── g\n" # no (moved to)

0 commit comments

Comments
 (0)