diff --git a/Dockerfile b/Dockerfile index d467fccb..207ffe9b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,7 @@ RUN --mount=type=ssh \ RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ - if [ "$TARGETARCH" = "amd64" ]; then pip install graphrag future; fi + if [ "$TARGETARCH" = "amd64" ]; then pip install "graphrag<=0.3.6" future; fi # Clean up RUN apt-get autoremove \ diff --git a/README.md b/README.md index 0d1e4ab8..adcb05ae 100644 --- a/README.md +++ b/README.md @@ -204,7 +204,7 @@ documents and developers who want to build their own RAG pipeline. - **Non-Docker Installation**: If you are not using Docker, install GraphRAG with the following command: ```shell - pip install graphrag future + pip install "graphrag<=0.3.6" future ``` - **Setting Up API KEY**: To use the GraphRAG retriever feature, ensure you set the `GRAPHRAG_API_KEY` environment variable. You can do this directly in your environment or by adding it to a `.env` file. diff --git a/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py b/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py index 5144828d..98ee2126 100644 --- a/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py +++ b/libs/ktem/ktem/index/file/graph/lightrag_pipelines.py @@ -150,16 +150,26 @@ async def lightrag_build_local_query_context( for k, n, d in zip(results, node_datas, node_degrees) if n is not None ] - use_text_units = await _find_most_related_text_unit_from_entities( - node_datas, query_param, text_chunks_db, knowledge_graph_inst - ) - use_relations = await _find_most_related_edges_from_entities( - node_datas, query_param, knowledge_graph_inst - ) + + try: + use_text_units = await _find_most_related_text_unit_from_entities( + node_datas, query_param, text_chunks_db, knowledge_graph_inst + ) + except Exception: + use_text_units = [] + + try: + use_relations = await _find_most_related_edges_from_entities( + node_datas, query_param, knowledge_graph_inst + ) + except Exception: + use_relations = [] + logging.info( f"Local query uses {len(node_datas)} entities, " f"{len(use_relations)} relations, {len(use_text_units)} text units" ) + entites_section_list = [["id", "entity", "type", "description", "rank"]] for i, n in enumerate(node_datas): entites_section_list.append( @@ -226,7 +236,9 @@ def call_graphrag_index(self, graph_id: str, docs: list[Document]): ) all_docs = [ - doc.text for doc in docs if doc.metadata.get("type", "text") == "text" + doc.text + for doc in docs + if doc.metadata.get("type", "text") == "text" and len(doc.text.strip()) > 0 ] yield Document( diff --git a/libs/ktem/ktem/index/file/graph/nano_pipelines.py b/libs/ktem/ktem/index/file/graph/nano_pipelines.py index 332edcd1..9b300938 100644 --- a/libs/ktem/ktem/index/file/graph/nano_pipelines.py +++ b/libs/ktem/ktem/index/file/graph/nano_pipelines.py @@ -232,7 +232,9 @@ def call_graphrag_index(self, graph_id: str, docs: list[Document]): ) all_docs = [ - doc.text for doc in docs if doc.metadata.get("type", "text") == "text" + doc.text + for doc in docs + if doc.metadata.get("type", "text") == "text" and len(doc.text.strip()) > 0 ] yield Document(