neo4j-labs · kartikpersistent · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/backend/requirements.txt b/backend/requirements.txt
@@ -69,18 +69,18 @@ jsonpath-python==1.0.6
 jsonpointer==2.4
 json-repair==0.25.2
 kiwisolver==1.4.5
-langchain==0.2.8
-langchain-aws==0.1.9
-langchain-anthropic==0.1.19
-langchain-fireworks==0.1.4
-langchain-google-genai==1.0.7
-langchain-community==0.2.7
-langchain-core==0.2.19
-langchain-experimental==0.0.62
-langchain-google-vertexai==1.0.6
-langchain-groq==0.1.6
-langchain-openai==0.1.14
-langchain-text-splitters==0.2.2
+langchain
+langchain-aws
+langchain-anthropic
+langchain-fireworks
+langchain-google-genai
+langchain-community
+langchain-core
+langchain-experimental
+langchain-google-vertexai
+langchain-groq
+langchain-openai
+langchain-text-splitters
 langdetect==1.0.9
 langsmith==0.1.83
 layoutparser==0.3.4

diff --git a/backend/score.py b/backend/score.py
@@ -246,10 +246,12 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database
             json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
             logger.log_struct(json_obj)
             logging.info(f'Updated KNN Graph')
-        if "create_fulltext_index" in tasks:
-            await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database)
-            json_obj = {'api_name': 'post_processing/create_fulltext_index', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
-            logger.log_struct(json_obj)
+
+        if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks:
+            await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities")
+            # await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword")
+            josn_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+            logger.log_struct(josn_obj)
             logging.info(f'Full Text index created')
         if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks:
             await asyncio.to_thread(create_entity_embedding, graph)
@@ -467,8 +469,8 @@ async def delete_document_and_entities(uri=Form(),
         graph = create_graph_database_connection(uri, userName, password, database)
         graphDb_data_Access = graphDBdataAccess(graph)
         result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri)
-        entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0
-        message = f"Deleted {files_list_size} documents with {entities_count} entities from database"
+        # entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0
+        message = f"Deleted {files_list_size} documents with entities from database"
         json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
         logger.log_struct(json_obj)
         return create_api_response('Success',message=message)

diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py
@@ -41,26 +41,26 @@
 
 def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD):
     try:
-        if mode == "hybrid":
-            # neo_db = Neo4jVector.from_existing_graph(
-            #     embedding=EMBEDDING_FUNCTION,
-            #     index_name=index_name,
-            #     retrieval_query=retrieval_query,
-            #     graph=graph,
-            #     search_type="hybrid",
-            #     node_label="Chunk",
-            #     embedding_node_property="embedding",
-            #     text_node_properties=["text"]
-            #     # keyword_index_name=keyword_index
-            # )
-            neo_db = Neo4jVector.from_existing_index(
+        if mode == "fulltext" or mode == "fulltext+graph":
+            neo_db = Neo4jVector.from_existing_graph(
                 embedding=EMBEDDING_FUNCTION,
                 index_name=index_name,
                 retrieval_query=retrieval_query,
                 graph=graph,
                 search_type="hybrid",
+                node_label="Chunk",
+                embedding_node_property="embedding",
+                text_node_properties=["text"],
                 keyword_index_name=keyword_index
             )
+            # neo_db = Neo4jVector.from_existing_index(
+            #     embedding=EMBEDDING_FUNCTION,
+            #     index_name=index_name,
+            #     retrieval_query=retrieval_query,
+            #     graph=graph,
+            #     search_type="hybrid",
+            #     keyword_index_name=keyword_index
+            # )
             logging.info(f"Successfully retrieved Neo4jVector index '{index_name}' and keyword index '{keyword_index}'")
         else:
             neo_db = Neo4jVector.from_existing_index(
@@ -374,7 +374,7 @@ def QA_RAG(graph, model, question, document_names,session_id, mode):
                 "user": "chatbot"
             } 
             return result
-        elif mode == "vector" or mode == "hybrid":
+        elif mode == "vector" or mode == "fulltext":
             retrieval_query = VECTOR_SEARCH_QUERY
         else:
             retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT)