[GraphRAG][v2] Implementation of Agentic Retrieval and Performance enhancement of KG construction & Semantic Beam Search#350
Conversation
Make entity deduplication thread safe
| if (token[i] == ':') | ||
| ++colonCount; | ||
| } | ||
| util_logger.info("ColonCount: " + std::to_string(colonCount)); |
There was a problem hiding this comment.
| util_logger.info("ColonCount: " + std::to_string(colonCount)); | |
| util_logger.debug("ColonCount: " + std::to_string(colonCount)); |
There was a problem hiding this comment.
Shouldn't this be a debug message?
| // Make sure this colon is AFTER the scheme (not the one in https://) | ||
| if (lastColon != std::string::npos && lastColon > token.find("://") + 2) { | ||
| token = token.substr(0, lastColon); | ||
| util_logger.info("HTTPS rule applied, stripped to: " + token); |
There was a problem hiding this comment.
| util_logger.info("HTTPS rule applied, stripped to: " + token); | |
| util_logger.debug("HTTPS rule applied, stripped to: " + token); |
| if (lastColon != std::string::npos) { | ||
| token = token.substr(0, lastColon); | ||
| } | ||
| util_logger.info("ColonCount 3"); |
There was a problem hiding this comment.
| util_logger.info("ColonCount 3"); |
| // if (index->ntotal % 1000 == 0) { | ||
| // lock.unlock(); | ||
| // faiss_index_logger.info("saving faiss index periodically"); | ||
| // save(filePath); | ||
| // faiss_index_logger.info("saved faiss index periodically"); | ||
| // | ||
| // } |
There was a problem hiding this comment.
| // if (index->ntotal % 1000 == 0) { | |
| // lock.unlock(); | |
| // faiss_index_logger.info("saving faiss index periodically"); | |
| // save(filePath); | |
| // faiss_index_logger.info("saved faiss index periodically"); | |
| // | |
| // } |
| // faiss_index_logger.error(std::string("Failed to reconstruct embedding for ID ") + nodeId + ": " + | ||
| // e.what()); |
There was a problem hiding this comment.
| // faiss_index_logger.error(std::string("Failed to reconstruct embedding for ID ") + nodeId + ": " + | |
| // e.what()); |
| // int nlist = 100000; // number of clusters (IVF) | ||
| // int m = 64; // PQ number of sub-vectors | ||
| // int nbits = 8; // 8-bit quantization | ||
| // | ||
| // faiss::IndexFlatL2 quantizer(dim); | ||
| // | ||
| // faiss::IndexIVFPQ* index = new faiss::IndexIVFPQ( | ||
| // &quantizer, | ||
| // dim, | ||
| // nlist, // IVF clusters | ||
| // m, // number of PQ subvectors | ||
| // nbits // bits per subvector | ||
| // ); | ||
| // index->use_precomputed_table = 1; | ||
| // index->train(num_train_vectors, train_data); |
There was a problem hiding this comment.
| // int nlist = 100000; // number of clusters (IVF) | |
| // int m = 64; // PQ number of sub-vectors | |
| // int nbits = 8; // 8-bit quantization | |
| // | |
| // faiss::IndexFlatL2 quantizer(dim); | |
| // | |
| // faiss::IndexIVFPQ* index = new faiss::IndexIVFPQ( | |
| // &quantizer, | |
| // dim, | |
| // nlist, // IVF clusters | |
| // m, // number of PQ subvectors | |
| // nbits // bits per subvector | |
| // ); | |
| // index->use_precomputed_table = 1; | |
| // index->train(num_train_vectors, train_data); |
| static FaissIndex* getInstance(int embeddingDim, const std::string& filepath); | ||
|
|
||
| ~FaissIndex(); | ||
| // ~FaissIndex(); |
There was a problem hiding this comment.
| // ~FaissIndex(); |
| @@ -1,2 +1,2 @@ | |||
| hdfs.host=192.168.1.19 | |||
| hdfs.host=10.8.100.22 | |||
There was a problem hiding this comment.
Why did you change the IP address to this new value?
| RESULT = subprocess.check_output(["hostname", "-I"]).decode().strip() | ||
| SERVER_IP = RESULT.split()[0] | ||
| HOST = "127.0.0.1" | ||
| HOST = "10.8.100.248" |
There was a problem hiding this comment.
why we change the IP address here?
| @@ -0,0 +1,19 @@ | |||
| """Copyright 2024 JasmineGraph Team | |||
There was a problem hiding this comment.
| """Copyright 2024 JasmineGraph Team | |
| """Copyright 2026 JasmineGraph Team |
| @@ -0,0 +1,204 @@ | |||
| """Copyright 2025 JasmineGraph Team | |||
There was a problem hiding this comment.
| """Copyright 2025 JasmineGraph Team | |
| """Copyright 2026 JasmineGraph Team |
| def test(host, port): | ||
| """Test the JasmineGraph server by sending a series of commands and checking the responses.""" | ||
|
|
||
| # subprocess.run(['bash', OLLAMA_SETUP_SCRIPT], check=True) |
There was a problem hiding this comment.
| # subprocess.run(['bash', OLLAMA_SETUP_SCRIPT], check=True) |
miyurud
left a comment
There was a problem hiding this comment.
I have put some comments on the PR. Please fix all of them and explain how you address those by replying on each comment.
|
# Conflicts: # src/frontend/JasmineGraphFrontEnd.cpp # src/frontend/JasmineGraphFrontEnd.h # src/frontend/core/executor/impl/SemanticBeamSearchExecutor.cpp # src/knowledgegraph/construction/Pipeline.cpp # src/query/processor/nlp/semanticbeamsearch/SemanticBeamSearch.cpp # src/server/JasmineGraphInstanceProtocol.cpp # src/server/JasmineGraphInstanceProtocol.h # src/server/JasmineGraphInstanceService.cpp # src/util/Utils.cpp # src/util/Utils.h # src/vectorstore/FaissIndex.cpp # test-docker.sh # test-k8s.sh
Saji master 16 5
|




Summary
This PR introduces the new Agentic Retrieval and improvements in Knowledge Graph (KG) construction and semantic search, focusing on performance, scalability, and reliability.
1. Agentic Retrieval
2. Knowledge Graph (KG) Construction
2. Semantic Beam Search & Graph / Query Optimizations
Impact
Notes for Reviewers