@@ -439,7 +439,7 @@ def get_tree_diff(
439439 indicator_col = "Exists"
440440 old_suffix = "_old"
441441 new_suffix = "_new"
442- tree_sep = tree . sep
442+ moved_ind = "moved_ind"
443443
444444 data , data_other = (
445445 export .tree_to_dataframe (
@@ -475,32 +475,38 @@ def get_tree_diff(
475475 data_path_diff = data_compare
476476
477477 # Handle tree structure difference
478- paths_removed = list (
479- data_path_diff [data_path_diff [indicator_col ] == "left_only" ][path_col ]
480- )[::- 1 ]
481- paths_added = list (
482- data_path_diff [data_path_diff [indicator_col ] == "right_only" ][path_col ]
483- )[::- 1 ]
484-
485- moved_from_ind : List [bool ] = [True for _ in range (len (paths_removed ))]
486- moved_to_ind : List [bool ] = [True for _ in range (len (paths_added ))]
487- if detail :
488- names_removed = [path .split (tree_sep )[- 1 ] for path in paths_removed ]
489- names_added = [path .split (tree_sep )[- 1 ] for path in paths_added ]
490- moved_from_ind = [name in names_added for name in names_removed ]
491- moved_to_ind = [name in names_removed for name in names_added ]
492-
493- path_removed_to_suffix = {
494- path : "-" if not detail else ("moved from" if move_ind else "removed" )
495- for path , move_ind in zip (paths_removed , moved_from_ind )
478+ data_tree = data_path_diff [data_path_diff [indicator_col ] == "left_only" ]
479+ data_tree_other = data_path_diff [data_path_diff [indicator_col ] == "right_only" ]
480+ data_tree [moved_ind ] = False
481+ data_tree_other [moved_ind ] = False
482+
483+ if len (data_tree ) and len (data_tree_other ):
484+ # Check for moved from and moved to
485+ move_from_condition = data_tree [
486+ data_tree [name_col ].isin (set (data_tree_other [name_col ]))
487+ ]
488+ data_tree .loc [move_from_condition .index , moved_ind ] = True
489+ move_to_condition = data_tree_other [
490+ data_tree_other [name_col ].isin (set (data_tree [name_col ]))
491+ ]
492+ data_tree_other .loc [move_to_condition .index , moved_ind ] = True
493+
494+ path_move_from = data_tree .set_index (path_col )[[moved_ind ]].to_dict (orient = "index" )
495+ path_move_to = data_tree_other .set_index (path_col )[[moved_ind ]].to_dict (
496+ orient = "index"
497+ )
498+
499+ path_move_from_suffix = {
500+ path : "-" if not detail else ("moved from" if v [moved_ind ] else "removed" )
501+ for path , v in path_move_from .items ()
496502 }
497- path_added_to_suffix = {
498- path : "+" if not detail else ("moved to" if move_ind else "added" )
499- for path , move_ind in zip ( paths_added , moved_to_ind )
503+ path_move_to_suffix = {
504+ path : "+" if not detail else ("moved to" if v [ moved_ind ] else "added" )
505+ for path , v in path_move_to . items ( )
500506 }
501507
502508 # Check tree attribute difference
503- dict_attr_diff : Dict [str , Dict [str , Any ]] = {}
509+ path_attr_diff : Dict [str , Dict [str , Any ]] = {}
504510 if attr_list :
505511 data_both = data_compare [data_compare [indicator_col ] == "both" ]
506512 condition_attr_diff = (
@@ -517,7 +523,7 @@ def get_tree_diff(
517523 data_attr_diff = data_both [eval (condition_attr_diff )]
518524 dict_attr_all = data_attr_diff .set_index (path_col ).to_dict (orient = "index" )
519525 for path , node_attr in dict_attr_all .items ():
520- dict_attr_diff [path ] = {
526+ path_attr_diff [path ] = {
521527 attr : (
522528 node_attr [f"{ attr } { old_suffix } " ],
523529 node_attr [f"{ attr } { new_suffix } " ],
@@ -531,24 +537,24 @@ def get_tree_diff(
531537 if only_diff :
532538 data_compare = data_compare [
533539 (data_compare [indicator_col ] != "both" )
534- | (data_compare [path_col ].isin (dict_attr_diff .keys ()))
540+ | (data_compare [path_col ].isin (path_attr_diff .keys ()))
535541 ]
536542 data_compare = data_compare [[path_col ]].sort_values (path_col )
537543 if len (data_compare ):
538544 tree_diff = construct .dataframe_to_tree (
539545 data_compare , node_type = tree .__class__ , sep = tree .sep
540546 )
541- for path in sorted (path_removed_to_suffix , reverse = True ):
547+ for path in sorted (path_move_from_suffix , reverse = True ):
542548 _node = search .find_full_path (tree_diff , path )
543- _node .name += f""" ({ path_removed_to_suffix [path ]} )"""
544- for path in sorted (path_added_to_suffix , reverse = True ):
549+ _node .name += f""" ({ path_move_from_suffix [path ]} )"""
550+ for path in sorted (path_move_to_suffix , reverse = True ):
545551 _node = search .find_full_path (tree_diff , path )
546- _node .name += f""" ({ path_added_to_suffix [path ]} )"""
552+ _node .name += f""" ({ path_move_to_suffix [path ]} )"""
547553
548554 # Handle tree attribute difference
549- if dict_attr_diff :
550- tree_diff = construct .add_dict_to_tree_by_path (tree_diff , dict_attr_diff )
551- for path in sorted (dict_attr_diff , reverse = True ):
555+ if path_attr_diff :
556+ tree_diff = construct .add_dict_to_tree_by_path (tree_diff , path_attr_diff )
557+ for path in sorted (path_attr_diff , reverse = True ):
552558 _node = search .find_full_path (tree_diff , path )
553559 _node .name += " (~)"
554560 return tree_diff
0 commit comments