Skip to content

Commit 8d866c5

Browse files
committed
feat: check for moved indicator via dataframe operations
1 parent b667b02 commit 8d866c5

File tree

1 file changed

+38
-32
lines changed

1 file changed

+38
-32
lines changed

bigtree/tree/helper.py

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def get_tree_diff(
439439
indicator_col = "Exists"
440440
old_suffix = "_old"
441441
new_suffix = "_new"
442-
tree_sep = tree.sep
442+
moved_ind = "moved_ind"
443443

444444
data, data_other = (
445445
export.tree_to_dataframe(
@@ -475,32 +475,38 @@ def get_tree_diff(
475475
data_path_diff = data_compare
476476

477477
# Handle tree structure difference
478-
paths_removed = list(
479-
data_path_diff[data_path_diff[indicator_col] == "left_only"][path_col]
480-
)[::-1]
481-
paths_added = list(
482-
data_path_diff[data_path_diff[indicator_col] == "right_only"][path_col]
483-
)[::-1]
484-
485-
moved_from_ind: List[bool] = [True for _ in range(len(paths_removed))]
486-
moved_to_ind: List[bool] = [True for _ in range(len(paths_added))]
487-
if detail:
488-
names_removed = [path.split(tree_sep)[-1] for path in paths_removed]
489-
names_added = [path.split(tree_sep)[-1] for path in paths_added]
490-
moved_from_ind = [name in names_added for name in names_removed]
491-
moved_to_ind = [name in names_removed for name in names_added]
492-
493-
path_removed_to_suffix = {
494-
path: "-" if not detail else ("moved from" if move_ind else "removed")
495-
for path, move_ind in zip(paths_removed, moved_from_ind)
478+
data_tree = data_path_diff[data_path_diff[indicator_col] == "left_only"]
479+
data_tree_other = data_path_diff[data_path_diff[indicator_col] == "right_only"]
480+
data_tree[moved_ind] = False
481+
data_tree_other[moved_ind] = False
482+
483+
if len(data_tree) and len(data_tree_other):
484+
# Check for moved from and moved to
485+
move_from_condition = data_tree[
486+
data_tree[name_col].isin(set(data_tree_other[name_col]))
487+
]
488+
data_tree.loc[move_from_condition.index, moved_ind] = True
489+
move_to_condition = data_tree_other[
490+
data_tree_other[name_col].isin(set(data_tree[name_col]))
491+
]
492+
data_tree_other.loc[move_to_condition.index, moved_ind] = True
493+
494+
path_move_from = data_tree.set_index(path_col)[[moved_ind]].to_dict(orient="index")
495+
path_move_to = data_tree_other.set_index(path_col)[[moved_ind]].to_dict(
496+
orient="index"
497+
)
498+
499+
path_move_from_suffix = {
500+
path: "-" if not detail else ("moved from" if v[moved_ind] else "removed")
501+
for path, v in path_move_from.items()
496502
}
497-
path_added_to_suffix = {
498-
path: "+" if not detail else ("moved to" if move_ind else "added")
499-
for path, move_ind in zip(paths_added, moved_to_ind)
503+
path_move_to_suffix = {
504+
path: "+" if not detail else ("moved to" if v[moved_ind] else "added")
505+
for path, v in path_move_to.items()
500506
}
501507

502508
# Check tree attribute difference
503-
dict_attr_diff: Dict[str, Dict[str, Any]] = {}
509+
path_attr_diff: Dict[str, Dict[str, Any]] = {}
504510
if attr_list:
505511
data_both = data_compare[data_compare[indicator_col] == "both"]
506512
condition_attr_diff = (
@@ -517,7 +523,7 @@ def get_tree_diff(
517523
data_attr_diff = data_both[eval(condition_attr_diff)]
518524
dict_attr_all = data_attr_diff.set_index(path_col).to_dict(orient="index")
519525
for path, node_attr in dict_attr_all.items():
520-
dict_attr_diff[path] = {
526+
path_attr_diff[path] = {
521527
attr: (
522528
node_attr[f"{attr}{old_suffix}"],
523529
node_attr[f"{attr}{new_suffix}"],
@@ -531,24 +537,24 @@ def get_tree_diff(
531537
if only_diff:
532538
data_compare = data_compare[
533539
(data_compare[indicator_col] != "both")
534-
| (data_compare[path_col].isin(dict_attr_diff.keys()))
540+
| (data_compare[path_col].isin(path_attr_diff.keys()))
535541
]
536542
data_compare = data_compare[[path_col]].sort_values(path_col)
537543
if len(data_compare):
538544
tree_diff = construct.dataframe_to_tree(
539545
data_compare, node_type=tree.__class__, sep=tree.sep
540546
)
541-
for path in sorted(path_removed_to_suffix, reverse=True):
547+
for path in sorted(path_move_from_suffix, reverse=True):
542548
_node = search.find_full_path(tree_diff, path)
543-
_node.name += f""" ({path_removed_to_suffix[path]})"""
544-
for path in sorted(path_added_to_suffix, reverse=True):
549+
_node.name += f""" ({path_move_from_suffix[path]})"""
550+
for path in sorted(path_move_to_suffix, reverse=True):
545551
_node = search.find_full_path(tree_diff, path)
546-
_node.name += f""" ({path_added_to_suffix[path]})"""
552+
_node.name += f""" ({path_move_to_suffix[path]})"""
547553

548554
# Handle tree attribute difference
549-
if dict_attr_diff:
550-
tree_diff = construct.add_dict_to_tree_by_path(tree_diff, dict_attr_diff)
551-
for path in sorted(dict_attr_diff, reverse=True):
555+
if path_attr_diff:
556+
tree_diff = construct.add_dict_to_tree_by_path(tree_diff, path_attr_diff)
557+
for path in sorted(path_attr_diff, reverse=True):
552558
_node = search.find_full_path(tree_diff, path)
553559
_node.name += " (~)"
554560
return tree_diff

0 commit comments

Comments
 (0)