Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Light GraphRAG #4585

Merged
merged 17 commits into from
Jan 22, 2025
Prev Previous commit
Next Next commit
Add flat_uniq_list.
  • Loading branch information
KevinHuSh committed Jan 20, 2025
commit 7a8557c7938ead04fe77e8c5b24305ae945ec987
10 changes: 4 additions & 6 deletions graphrag/general/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from graphrag.general.graph_prompt import SUMMARIZE_DESCRIPTIONS_PROMPT
from graphrag.utils import get_llm_cache, set_llm_cache, handle_single_entity_extraction, \
handle_single_relationship_extraction, split_string_by_multi_markers
handle_single_relationship_extraction, split_string_by_multi_markers, flat_uniq_list
from rag.llm.chat_model import Base as CompletionLLM
from rag.utils import truncate

Expand Down Expand Up @@ -160,8 +160,7 @@ def _merge_nodes(self, entity_name: str, entities: list[dict]):
description = GRAPH_FIELD_SEP.join(
sorted(set([dp["description"] for dp in entities] + already_description))
)
already_source_ids.extend(set([dp["source_id"] for dp in entities]))
already_source_ids = list(set(already_source_ids))
already_source_ids = flat_uniq_list(entities, "source_id")
description = self._handle_entity_relation_summary(
entity_name, description
)
Expand Down Expand Up @@ -198,9 +197,8 @@ def _merge_edges(
description = GRAPH_FIELD_SEP.join(
sorted(set([dp["description"] for dp in edges_data] + already_description))
)
keywords = list(set([dp["keywords"] for dp in edges_data] + already_keywords))

source_id = list(set([dp["source_id"] for dp in edges_data] + already_source_ids))
keywords = flat_uniq_list(edges_data, "keywords") + already_keywords
source_id = flat_uniq_list(edges_data, "source_id") + already_source_ids

for need_insert_id in [src_id, tgt_id]:
if self._get_entity_(need_insert_id):
Expand Down
13 changes: 12 additions & 1 deletion graphrag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,15 @@ def set_graph(tenant_id, kb_id, graph):
settings.docStoreConn.update({"knowledge_graph_kwd": "graph"}, chunk,
search.index_name(tenant_id), kb_id)
else:
settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id))
settings.docStoreConn.insert([{"id": chunk_id(chunk), **chunk}], search.index_name(tenant_id))


def flat_uniq_list(arr, key):
res = []
for a in arr:
a = a[key]
if isinstance(a, list):
res.extend(a)
else:
res.append(a)
return list(set(res))