55from bigtree .tree import construct , export , search
66from bigtree .utils import assertions , exceptions , iterators
77
8+ try :
9+ import pandas as pd
10+ except ImportError : # pragma: no cover
11+ from unittest .mock import MagicMock
12+
13+ pd = MagicMock ()
14+
815__all__ = ["clone_tree" , "get_subtree" , "prune_tree" , "get_tree_diff" ]
916BaseNodeT = TypeVar ("BaseNodeT" , bound = basenode .BaseNode )
1017BinaryNodeT = TypeVar ("BinaryNodeT" , bound = binarynode .BinaryNode )
@@ -237,6 +244,7 @@ def prune_tree(
237244 return tree_copy
238245
239246
247+ @exceptions .optional_dependencies_pandas
240248def get_tree_diff (
241249 tree : node .Node ,
242250 other_tree : node .Node ,
@@ -376,6 +384,7 @@ def get_tree_diff(
376384 name_col = "name"
377385 path_col = "PATH"
378386 indicator_col = "Exists"
387+ tree_sep = tree .sep
379388
380389 data , data_other = (
381390 export .tree_to_dataframe (
@@ -406,11 +415,12 @@ def get_tree_diff(
406415 moved_from_indicator : List [bool ] = [True for _ in range (len (nodes_removed ))]
407416 moved_to_indicator : List [bool ] = [True for _ in range (len (nodes_added ))]
408417 if detail :
409- _sep = tree .sep
410418 node_names_removed = [
411- node_removed .split (_sep )[- 1 ] for node_removed in nodes_removed
419+ node_removed .split (tree_sep )[- 1 ] for node_removed in nodes_removed
420+ ]
421+ node_names_added = [
422+ node_added .split (tree_sep )[- 1 ] for node_added in nodes_added
412423 ]
413- node_names_added = [node_added .split (_sep )[- 1 ] for node_added in nodes_added ]
414424 moved_from_indicator = [
415425 node_name_removed in node_names_added
416426 for node_name_removed in node_names_removed
@@ -420,15 +430,39 @@ def get_tree_diff(
420430 for node_name_added in node_names_added
421431 ]
422432
433+ def add_suffix_to_path (
434+ _data : pd .DataFrame , _condition : pd .Series , _original_name : str , _suffix : str
435+ ) -> pd .DataFrame :
436+ """Add suffix to path string
437+
438+ Args:
439+ _data (pd.DataFrame): original data with path column
440+ _condition (pd.Series): whether to add suffix, contains True/False values
441+ _original_name (str): path prefix to add suffix to
442+ _suffix (str): suffix to add to path column
443+
444+ Returns:
445+ (pd.DataFrame)
446+ """
447+ data_replace = _data [_condition ]
448+ data_replace [path_col ] = data_replace [path_col ].str .replace (
449+ _original_name , f"{ _original_name } ({ suffix } )" , regex = True
450+ )
451+ data_not_replace = _data [~ _condition ]
452+ return data_replace ._append (data_not_replace ).sort_index ()
453+
423454 for node_removed , move_indicator in zip (nodes_removed , moved_from_indicator ):
424455 if not detail :
425456 suffix = "-"
426457 elif move_indicator :
427458 suffix = "moved from"
428459 else :
429460 suffix = "removed"
430- data_both [path_col ] = data_both [path_col ].str .replace (
431- node_removed , f"{ node_removed } ({ suffix } )" , regex = True
461+ condition_node_removed = data_both [path_col ].str .endswith (
462+ node_removed
463+ ) | data_both [path_col ].str .contains (node_removed + tree_sep )
464+ data_both = add_suffix_to_path (
465+ data_both , condition_node_removed , node_removed , suffix
432466 )
433467 for node_added , move_indicator in zip (nodes_added , moved_to_indicator ):
434468 if not detail :
@@ -437,8 +471,11 @@ def get_tree_diff(
437471 suffix = "moved to"
438472 else :
439473 suffix = "added"
440- data_both [path_col ] = data_both [path_col ].str .replace (
441- node_added , f"{ node_added } ({ suffix } )" , regex = True
474+ condition_node_added = data_both [path_col ].str .endswith (node_added ) | data_both [
475+ path_col
476+ ].str .contains (node_added + tree_sep )
477+ data_both = add_suffix_to_path (
478+ data_both , condition_node_added , node_added , suffix
442479 )
443480
444481 # Check tree attribute difference
0 commit comments