diff --git a/gpt_index/data_structs/data_structs_v2.py b/gpt_index/data_structs/data_structs_v2.py index 2f128d9a5e8f6..a2664693d6653 100644 --- a/gpt_index/data_structs/data_structs_v2.py +++ b/gpt_index/data_structs/data_structs_v2.py @@ -101,6 +101,7 @@ def insert_under_parent( new_index = new_index or self.size if parent_node is None: self.root_nodes[new_index] = node.get_doc_id() + self.node_id_to_children_ids[node.get_doc_id()] = [] else: if parent_node.doc_id not in self.node_id_to_children_ids: self.node_id_to_children_ids[parent_node.get_doc_id()] = [] diff --git a/tests/indices/tree/test_base.py b/tests/indices/tree/test_base.py index 49b3cd45f4728..165d9dd52148c 100644 --- a/tests/indices/tree/test_base.py +++ b/tests/indices/tree/test_base.py @@ -315,6 +315,26 @@ def test_insert( assert nodes[0].ref_doc_id == "new_doc_test" +@patch_common +def test_twice_insert_empty( + _mock_init: Any, + _mock_predict: Any, + _mock_total_tokens_used: Any, + _mock_split_text_overlap: Any, + _mock_split_text: Any, +) -> None: + """# test twice insert from empty (with_id)""" + tree = GPTTreeIndex.from_documents([]) + + # test first insert + new_doc = Document("This is a new doc.", doc_id="new_doc") + tree.insert(new_doc) + # test second insert + new_doc_second = Document("This is a new doc2.", doc_id="new_doc_2") + tree.insert(new_doc_second) + assert len(tree.index_struct.all_nodes) == 2 + + def _mock_tokenizer(text: str) -> int: """Mock tokenizer that splits by spaces.""" return len(text.split(" "))