66from bigtree .utils .assertions import (
77 assert_dataframe_no_duplicate_attribute ,
88 assert_dataframe_not_empty ,
9+ assert_key_not_in_dict_or_df ,
910 assert_length_not_empty ,
1011 filter_attributes ,
1112 isnull ,
2021__all__ = ["list_to_dag" , "dict_to_dag" , "dataframe_to_dag" ]
2122
2223
23- @optional_dependencies_pandas
2424def list_to_dag (
2525 relations : List [Tuple [str , str ]],
2626 node_type : Type [DAGNode ] = DAGNode ,
@@ -44,13 +44,26 @@ def list_to_dag(
4444 """
4545 assert_length_not_empty (relations , "Input list" , "relations" )
4646
47- relation_data = pd .DataFrame (relations , columns = ["parent" , "child" ])
48- return dataframe_to_dag (
49- relation_data , child_col = "child" , parent_col = "parent" , node_type = node_type
50- )
47+ node_dict : Dict [str , DAGNode ] = dict ()
48+ parent_node = DAGNode ()
49+
50+ for parent_name , child_name in relations :
51+ if parent_name not in node_dict :
52+ parent_node = node_type (parent_name )
53+ node_dict [parent_name ] = parent_node
54+ else :
55+ parent_node = node_dict [parent_name ]
56+ if child_name not in node_dict :
57+ child_node = node_type (child_name )
58+ node_dict [child_name ] = child_node
59+ else :
60+ child_node = node_dict [child_name ]
61+
62+ child_node .parents = [parent_node ]
63+
64+ return parent_node
5165
5266
53- @optional_dependencies_pandas
5467def dict_to_dag (
5568 relation_attrs : Dict [str , Any ],
5669 parent_key : str = "parents" ,
@@ -84,20 +97,34 @@ def dict_to_dag(
8497 """
8598 assert_length_not_empty (relation_attrs , "Dictionary" , "relation_attrs" )
8699
87- # Convert dictionary to dataframe
88- data = pd .DataFrame (relation_attrs ).T .rename_axis ("_tmp_child" ).reset_index ()
89- if parent_key not in data :
100+ node_dict : Dict [str , DAGNode ] = dict ()
101+ parent_node : DAGNode | None = None
102+
103+ for child_name , node_attrs in relation_attrs .items ():
104+ node_attrs = node_attrs .copy ()
105+ parent_names : List [str ] = []
106+ if parent_key in node_attrs :
107+ parent_names = node_attrs .pop (parent_key )
108+ assert_key_not_in_dict_or_df (node_attrs , ["parent" , "parents" , "children" ])
109+
110+ if child_name in node_dict :
111+ child_node = node_dict [child_name ]
112+ child_node .set_attrs (node_attrs )
113+ else :
114+ child_node = node_type (child_name , ** node_attrs )
115+ node_dict [child_name ] = child_node
116+
117+ for parent_name in parent_names :
118+ parent_node = node_dict .get (parent_name , node_type (parent_name ))
119+ node_dict [parent_name ] = parent_node
120+ child_node .parents = [parent_node ]
121+
122+ if parent_node is None :
90123 raise ValueError (
91124 f"Parent key { parent_key } not in dictionary, check `relation_attrs` and `parent_key`"
92125 )
93126
94- data = data .explode (parent_key )
95- return dataframe_to_dag (
96- data ,
97- child_col = "_tmp_child" ,
98- parent_col = parent_key ,
99- node_type = node_type ,
100- )
127+ return parent_node
101128
102129
103130@optional_dependencies_pandas
@@ -163,6 +190,7 @@ def dataframe_to_dag(
163190 attribute_cols = list (data .columns )
164191 attribute_cols .remove (child_col )
165192 attribute_cols .remove (parent_col )
193+ assert_key_not_in_dict_or_df (attribute_cols , ["parent" , "parents" , "children" ])
166194
167195 data = data [[child_col , parent_col ] + attribute_cols ].copy ()
168196
0 commit comments