Skip to content

Commit 2698a4a

Browse files
authored
Merge pull request #326 from kayjan/feature/tree-diff-naming
Rename variables in tree_diff
2 parents 923488f + 5bfbb1f commit 2698a4a

File tree

1 file changed

+30
-35
lines changed

1 file changed

+30
-35
lines changed

bigtree/tree/helper.py

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,8 @@ def get_tree_diff(
438438
path_col = "PATH"
439439
parent_col = "PARENT"
440440
indicator_col = "Exists"
441+
old_suffix = "_old"
442+
new_suffix = "_new"
441443
tree_sep = tree.sep
442444

443445
data, data_other = (
@@ -457,6 +459,7 @@ def get_tree_diff(
457459
how="outer",
458460
on=[path_col, name_col, parent_col],
459461
indicator=indicator_col,
462+
suffixes=(old_suffix, new_suffix),
460463
)
461464
if aggregate:
462465
data_both_agg = data_both[
@@ -473,30 +476,20 @@ def get_tree_diff(
473476
data_both_agg = data_both
474477

475478
# Handle tree structure difference
476-
nodes_removed = list(
479+
paths_removed = list(
477480
data_both_agg[data_both_agg[indicator_col] == "left_only"][path_col]
478481
)[::-1]
479-
nodes_added = list(
482+
paths_added = list(
480483
data_both_agg[data_both_agg[indicator_col] == "right_only"][path_col]
481484
)[::-1]
482485

483-
moved_from_indicator: List[bool] = [True for _ in range(len(nodes_removed))]
484-
moved_to_indicator: List[bool] = [True for _ in range(len(nodes_added))]
486+
moved_from_ind: List[bool] = [True for _ in range(len(paths_removed))]
487+
moved_to_ind: List[bool] = [True for _ in range(len(paths_added))]
485488
if detail:
486-
node_names_removed = [
487-
node_removed.split(tree_sep)[-1] for node_removed in nodes_removed
488-
]
489-
node_names_added = [
490-
node_added.split(tree_sep)[-1] for node_added in nodes_added
491-
]
492-
moved_from_indicator = [
493-
node_name_removed in node_names_added
494-
for node_name_removed in node_names_removed
495-
]
496-
moved_to_indicator = [
497-
node_name_added in node_names_removed
498-
for node_name_added in node_names_added
499-
]
489+
names_removed = [path.split(tree_sep)[-1] for path in paths_removed]
490+
names_added = [path.split(tree_sep)[-1] for path in paths_added]
491+
moved_from_ind = [name in names_added for name in names_removed]
492+
moved_to_ind = [name in names_removed for name in names_added]
500493

501494
def add_suffix_to_path(
502495
_data: pd.DataFrame, _condition: pd.Series, _original_name: str, _suffix: str
@@ -518,8 +511,8 @@ def add_suffix_to_path(
518511

519512
def add_suffix_to_data(
520513
_data: pd.DataFrame,
521-
nodes_diff: List[str],
522-
move_indicator: List[bool],
514+
paths_diff: List[str],
515+
move_ind: List[bool],
523516
suffix_general: str,
524517
suffix_move: str,
525518
suffix_not_moved: str,
@@ -528,45 +521,47 @@ def add_suffix_to_data(
528521
529522
Args:
530523
_data (pd.DataFrame): original data with path column
531-
nodes_diff (List[str]): list of paths that were modified (e.g., added/removed)
532-
move_indicator (List[bool]): move indicator to indicate path was moved instead of added/removed
524+
paths_diff (List[str]): list of paths that were modified (e.g., added/removed)
525+
move_ind (List[bool]): move indicator to indicate path was moved instead of added/removed
533526
suffix_general (str): path suffix for general case
534527
suffix_move (str): path suffix if path was moved
535528
suffix_not_moved (str): path suffix if path is not moved (e.g., added/removed)
536529
"""
537-
for _node_diff, _move_indicator in zip(nodes_diff, move_indicator):
530+
for _path_diff, _move_ind in zip(paths_diff, move_ind):
538531
if not detail:
539532
suffix = suffix_general
540533
else:
541-
suffix = suffix_move if _move_indicator else suffix_not_moved
534+
suffix = suffix_move if _move_ind else suffix_not_moved
542535
condition_node_modified = data_both[path_col].str.endswith(
543-
_node_diff
544-
) | data_both[path_col].str.contains(_node_diff + tree_sep)
545-
add_suffix_to_path(data_both, condition_node_modified, _node_diff, suffix)
536+
_path_diff
537+
) | data_both[path_col].str.contains(_path_diff + tree_sep)
538+
add_suffix_to_path(data_both, condition_node_modified, _path_diff, suffix)
546539

547540
add_suffix_to_data(
548-
data_both, nodes_removed, moved_from_indicator, "-", "moved from", "removed"
549-
)
550-
add_suffix_to_data(
551-
data_both, nodes_added, moved_to_indicator, "+", "moved to", "added"
541+
data_both, paths_removed, moved_from_ind, "-", "moved from", "removed"
552542
)
543+
add_suffix_to_data(data_both, paths_added, moved_to_ind, "+", "moved to", "added")
553544

554545
# Check tree attribute difference
555546
path_changes_list_of_dict: List[Dict[str, Dict[str, Any]]] = []
556547
path_changes_deque: Deque[str] = deque([])
557548
for attr_change in attr_list:
558549
condition_diff = (
559550
(
560-
~data_both[f"{attr_change}_x"].isnull()
561-
| ~data_both[f"{attr_change}_y"].isnull()
551+
~data_both[f"{attr_change}{old_suffix}"].isnull()
552+
| ~data_both[f"{attr_change}{new_suffix}"].isnull()
553+
)
554+
& (
555+
data_both[f"{attr_change}{old_suffix}"]
556+
!= data_both[f"{attr_change}{new_suffix}"]
562557
)
563-
& (data_both[f"{attr_change}_x"] != data_both[f"{attr_change}_y"])
564558
& (data_both[indicator_col] == "both")
565559
)
566560
data_diff = data_both[condition_diff]
567561
if len(data_diff):
568562
tuple_diff = zip(
569-
data_diff[f"{attr_change}_x"], data_diff[f"{attr_change}_y"]
563+
data_diff[f"{attr_change}{old_suffix}"],
564+
data_diff[f"{attr_change}{new_suffix}"],
570565
)
571566
dict_attr_diff = [{attr_change: v} for v in tuple_diff]
572567
dict_path_diff = dict(list(zip(data_diff[path_col], dict_attr_diff)))

0 commit comments

Comments
 (0)