@@ -438,6 +438,8 @@ def get_tree_diff(
438438 path_col = "PATH"
439439 parent_col = "PARENT"
440440 indicator_col = "Exists"
441+ old_suffix = "_old"
442+ new_suffix = "_new"
441443 tree_sep = tree .sep
442444
443445 data , data_other = (
@@ -457,6 +459,7 @@ def get_tree_diff(
457459 how = "outer" ,
458460 on = [path_col , name_col , parent_col ],
459461 indicator = indicator_col ,
462+ suffixes = (old_suffix , new_suffix ),
460463 )
461464 if aggregate :
462465 data_both_agg = data_both [
@@ -473,30 +476,20 @@ def get_tree_diff(
473476 data_both_agg = data_both
474477
475478 # Handle tree structure difference
476- nodes_removed = list (
479+ paths_removed = list (
477480 data_both_agg [data_both_agg [indicator_col ] == "left_only" ][path_col ]
478481 )[::- 1 ]
479- nodes_added = list (
482+ paths_added = list (
480483 data_both_agg [data_both_agg [indicator_col ] == "right_only" ][path_col ]
481484 )[::- 1 ]
482485
483- moved_from_indicator : List [bool ] = [True for _ in range (len (nodes_removed ))]
484- moved_to_indicator : List [bool ] = [True for _ in range (len (nodes_added ))]
486+ moved_from_ind : List [bool ] = [True for _ in range (len (paths_removed ))]
487+ moved_to_ind : List [bool ] = [True for _ in range (len (paths_added ))]
485488 if detail :
486- node_names_removed = [
487- node_removed .split (tree_sep )[- 1 ] for node_removed in nodes_removed
488- ]
489- node_names_added = [
490- node_added .split (tree_sep )[- 1 ] for node_added in nodes_added
491- ]
492- moved_from_indicator = [
493- node_name_removed in node_names_added
494- for node_name_removed in node_names_removed
495- ]
496- moved_to_indicator = [
497- node_name_added in node_names_removed
498- for node_name_added in node_names_added
499- ]
489+ names_removed = [path .split (tree_sep )[- 1 ] for path in paths_removed ]
490+ names_added = [path .split (tree_sep )[- 1 ] for path in paths_added ]
491+ moved_from_ind = [name in names_added for name in names_removed ]
492+ moved_to_ind = [name in names_removed for name in names_added ]
500493
501494 def add_suffix_to_path (
502495 _data : pd .DataFrame , _condition : pd .Series , _original_name : str , _suffix : str
@@ -518,8 +511,8 @@ def add_suffix_to_path(
518511
519512 def add_suffix_to_data (
520513 _data : pd .DataFrame ,
521- nodes_diff : List [str ],
522- move_indicator : List [bool ],
514+ paths_diff : List [str ],
515+ move_ind : List [bool ],
523516 suffix_general : str ,
524517 suffix_move : str ,
525518 suffix_not_moved : str ,
@@ -528,45 +521,47 @@ def add_suffix_to_data(
528521
529522 Args:
530523 _data (pd.DataFrame): original data with path column
531- nodes_diff (List[str]): list of paths that were modified (e.g., added/removed)
532- move_indicator (List[bool]): move indicator to indicate path was moved instead of added/removed
524+ paths_diff (List[str]): list of paths that were modified (e.g., added/removed)
525+ move_ind (List[bool]): move indicator to indicate path was moved instead of added/removed
533526 suffix_general (str): path suffix for general case
534527 suffix_move (str): path suffix if path was moved
535528 suffix_not_moved (str): path suffix if path is not moved (e.g., added/removed)
536529 """
537- for _node_diff , _move_indicator in zip (nodes_diff , move_indicator ):
530+ for _path_diff , _move_ind in zip (paths_diff , move_ind ):
538531 if not detail :
539532 suffix = suffix_general
540533 else :
541- suffix = suffix_move if _move_indicator else suffix_not_moved
534+ suffix = suffix_move if _move_ind else suffix_not_moved
542535 condition_node_modified = data_both [path_col ].str .endswith (
543- _node_diff
544- ) | data_both [path_col ].str .contains (_node_diff + tree_sep )
545- add_suffix_to_path (data_both , condition_node_modified , _node_diff , suffix )
536+ _path_diff
537+ ) | data_both [path_col ].str .contains (_path_diff + tree_sep )
538+ add_suffix_to_path (data_both , condition_node_modified , _path_diff , suffix )
546539
547540 add_suffix_to_data (
548- data_both , nodes_removed , moved_from_indicator , "-" , "moved from" , "removed"
549- )
550- add_suffix_to_data (
551- data_both , nodes_added , moved_to_indicator , "+" , "moved to" , "added"
541+ data_both , paths_removed , moved_from_ind , "-" , "moved from" , "removed"
552542 )
543+ add_suffix_to_data (data_both , paths_added , moved_to_ind , "+" , "moved to" , "added" )
553544
554545 # Check tree attribute difference
555546 path_changes_list_of_dict : List [Dict [str , Dict [str , Any ]]] = []
556547 path_changes_deque : Deque [str ] = deque ([])
557548 for attr_change in attr_list :
558549 condition_diff = (
559550 (
560- ~ data_both [f"{ attr_change } _x" ].isnull ()
561- | ~ data_both [f"{ attr_change } _y" ].isnull ()
551+ ~ data_both [f"{ attr_change } { old_suffix } " ].isnull ()
552+ | ~ data_both [f"{ attr_change } { new_suffix } " ].isnull ()
553+ )
554+ & (
555+ data_both [f"{ attr_change } { old_suffix } " ]
556+ != data_both [f"{ attr_change } { new_suffix } " ]
562557 )
563- & (data_both [f"{ attr_change } _x" ] != data_both [f"{ attr_change } _y" ])
564558 & (data_both [indicator_col ] == "both" )
565559 )
566560 data_diff = data_both [condition_diff ]
567561 if len (data_diff ):
568562 tuple_diff = zip (
569- data_diff [f"{ attr_change } _x" ], data_diff [f"{ attr_change } _y" ]
563+ data_diff [f"{ attr_change } { old_suffix } " ],
564+ data_diff [f"{ attr_change } { new_suffix } " ],
570565 )
571566 dict_attr_diff = [{attr_change : v } for v in tuple_diff ]
572567 dict_path_diff = dict (list (zip (data_diff [path_col ], dict_attr_diff )))
0 commit comments