@@ -439,7 +439,7 @@ def get_tree_diff(
439439 indicator_col = "Exists"
440440 old_suffix = "_old"
441441 new_suffix = "_new"
442- tree_sep = tree . sep
442+ moved_ind = "moved_ind"
443443
444444 data , data_other = (
445445 export .tree_to_dataframe (
@@ -475,32 +475,46 @@ def get_tree_diff(
475475 data_path_diff = data_compare
476476
477477 # Handle tree structure difference
478- paths_removed = list (
479- data_path_diff [data_path_diff [indicator_col ] == "left_only" ][path_col ]
480- )[::- 1 ]
481- paths_added = list (
482- data_path_diff [data_path_diff [indicator_col ] == "right_only" ][path_col ]
483- )[::- 1 ]
484-
485- moved_from_ind : List [bool ] = [True for _ in range (len (paths_removed ))]
486- moved_to_ind : List [bool ] = [True for _ in range (len (paths_added ))]
478+ data_tree = data_path_diff [data_path_diff [indicator_col ] == "left_only" ]
479+ data_tree_other = data_path_diff [data_path_diff [indicator_col ] == "right_only" ]
480+
487481 if detail :
488- names_removed = [path .split (tree_sep )[- 1 ] for path in paths_removed ]
489- names_added = [path .split (tree_sep )[- 1 ] for path in paths_added ]
490- moved_from_ind = [name in names_added for name in names_removed ]
491- moved_to_ind = [name in names_removed for name in names_added ]
492-
493- path_removed_to_suffix = {
494- path : "-" if not detail else ("moved from" if move_ind else "removed" )
495- for path , move_ind in zip (paths_removed , moved_from_ind )
496- }
497- path_added_to_suffix = {
498- path : "+" if not detail else ("moved to" if move_ind else "added" )
499- for path , move_ind in zip (paths_added , moved_to_ind )
500- }
482+ data_tree [moved_ind ] = False
483+ data_tree_other [moved_ind ] = False
484+
485+ if len (data_tree ) and len (data_tree_other ):
486+ # Check for moved from and moved to
487+ move_from_condition = data_tree [
488+ data_tree [name_col ].isin (set (data_tree_other [name_col ]))
489+ ]
490+ data_tree .loc [move_from_condition .index , moved_ind ] = True
491+ move_to_condition = data_tree_other [
492+ data_tree_other [name_col ].isin (set (data_tree [name_col ]))
493+ ]
494+ data_tree_other .loc [move_to_condition .index , moved_ind ] = True
495+
496+ path_move_from = data_tree .set_index (path_col )[[moved_ind ]].to_dict (
497+ orient = "index"
498+ )
499+ path_move_to = data_tree_other .set_index (path_col )[[moved_ind ]].to_dict (
500+ orient = "index"
501+ )
502+ path_move_from_suffix = {
503+ path : "moved from" if v [moved_ind ] else "removed"
504+ for path , v in path_move_from .items ()
505+ }
506+ path_move_to_suffix = {
507+ path : "moved to" if v [moved_ind ] else "added"
508+ for path , v in path_move_to .items ()
509+ }
510+ else :
511+ path_move_from_suffix = dict (zip (data_tree [path_col ], "-" * len (data_tree )))
512+ path_move_to_suffix = dict (
513+ zip (data_tree_other [path_col ], "+" * len (data_tree_other ))
514+ )
501515
502516 # Check tree attribute difference
503- dict_attr_diff : Dict [str , Dict [str , Any ]] = {}
517+ path_attr_diff : Dict [str , Dict [str , Any ]] = {}
504518 if attr_list :
505519 data_both = data_compare [data_compare [indicator_col ] == "both" ]
506520 condition_attr_diff = (
@@ -517,7 +531,7 @@ def get_tree_diff(
517531 data_attr_diff = data_both [eval (condition_attr_diff )]
518532 dict_attr_all = data_attr_diff .set_index (path_col ).to_dict (orient = "index" )
519533 for path , node_attr in dict_attr_all .items ():
520- dict_attr_diff [path ] = {
534+ path_attr_diff [path ] = {
521535 attr : (
522536 node_attr [f"{ attr } { old_suffix } " ],
523537 node_attr [f"{ attr } { new_suffix } " ],
@@ -531,24 +545,24 @@ def get_tree_diff(
531545 if only_diff :
532546 data_compare = data_compare [
533547 (data_compare [indicator_col ] != "both" )
534- | (data_compare [path_col ].isin (dict_attr_diff .keys ()))
548+ | (data_compare [path_col ].isin (path_attr_diff .keys ()))
535549 ]
536550 data_compare = data_compare [[path_col ]].sort_values (path_col )
537551 if len (data_compare ):
538552 tree_diff = construct .dataframe_to_tree (
539553 data_compare , node_type = tree .__class__ , sep = tree .sep
540554 )
541- for path in sorted (path_removed_to_suffix , reverse = True ):
555+ for path in sorted (path_move_from_suffix , reverse = True ):
542556 _node = search .find_full_path (tree_diff , path )
543- _node .name += f""" ({ path_removed_to_suffix [path ]} )"""
544- for path in sorted (path_added_to_suffix , reverse = True ):
557+ _node .name += f""" ({ path_move_from_suffix [path ]} )"""
558+ for path in sorted (path_move_to_suffix , reverse = True ):
545559 _node = search .find_full_path (tree_diff , path )
546- _node .name += f""" ({ path_added_to_suffix [path ]} )"""
560+ _node .name += f""" ({ path_move_to_suffix [path ]} )"""
547561
548562 # Handle tree attribute difference
549- if dict_attr_diff :
550- tree_diff = construct .add_dict_to_tree_by_path (tree_diff , dict_attr_diff )
551- for path in sorted (dict_attr_diff , reverse = True ):
563+ if path_attr_diff :
564+ tree_diff = construct .add_dict_to_tree_by_path (tree_diff , path_attr_diff )
565+ for path in sorted (path_attr_diff , reverse = True ):
552566 _node = search .find_full_path (tree_diff , path )
553567 _node .name += " (~)"
554568 return tree_diff
0 commit comments