neo4j-labs · aashipandya · Aug 27, 2024 · Aug 20, 2024 · Aug 20, 2024 · Aug 20, 2024
diff --git a/backend/src/main.py b/backend/src/main.py
@@ -277,91 +277,97 @@ def processing_source(uri, userName, password, database, model, file_name, pages
   create_chunks_obj = CreateChunksofDocument(pages, graph)
   chunks = create_chunks_obj.split_file_into_chunks()
   chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks)
-  if result[0]['Status'] != 'Processing':      
-    obj_source_node = sourceNode()
-    status = "Processing"
-    obj_source_node.file_name = file_name
-    obj_source_node.status = status
-    obj_source_node.total_chunks = len(chunks)
-    obj_source_node.total_pages = len(pages)
-    obj_source_node.model = model
-    logging.info(file_name)
-    logging.info(obj_source_node)
-    graphDb_data_Access.update_source_node(obj_source_node)
-
-    logging.info('Update the status as Processing')
-    update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
-    # selected_chunks = []
-    is_cancelled_status = False
-    job_status = "Completed"
-    node_count = 0
-    rel_count = 0
-    for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
-      select_chunks_upto = i+update_graph_chunk_processed
-      logging.info(f'Selected Chunks upto: {select_chunks_upto}')
-      if len(chunkId_chunkDoc_list) <= select_chunks_upto:
-         select_chunks_upto = len(chunkId_chunkDoc_list)
-      selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]
+
+  if len(result) > 0:
+    if result[0]['Status'] != 'Processing':      
+      obj_source_node = sourceNode()
+      status = "Processing"
+      obj_source_node.file_name = file_name
+      obj_source_node.status = status
+      obj_source_node.total_chunks = len(chunks)
+      obj_source_node.total_pages = len(pages)
+      obj_source_node.model = model
+      logging.info(file_name)
+      logging.info(obj_source_node)
+      graphDb_data_Access.update_source_node(obj_source_node)
+
+      logging.info('Update the status as Processing')
+      update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
+      # selected_chunks = []
+      is_cancelled_status = False
+      job_status = "Completed"
+      node_count = 0
+      rel_count = 0
+      for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
+        select_chunks_upto = i+update_graph_chunk_processed
+        logging.info(f'Selected Chunks upto: {select_chunks_upto}')
+        if len(chunkId_chunkDoc_list) <= select_chunks_upto:
+          select_chunks_upto = len(chunkId_chunkDoc_list)
+        selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]
+        result = graphDb_data_Access.get_current_status_document_node(file_name)
+        is_cancelled_status = result[0]['is_cancelled']
+        logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
+        if bool(is_cancelled_status) == True:
+          job_status = "Cancelled"
+          logging.info('Exit from running loop of processing file')
+          exit
+        else:
+          node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
+          end_time = datetime.now()
+          processed_time = end_time - start_time
+
+          obj_source_node = sourceNode()
+          obj_source_node.file_name = file_name
+          obj_source_node.updated_at = end_time
+          obj_source_node.processing_time = processed_time
+          obj_source_node.node_count = node_count
+          obj_source_node.processed_chunk = select_chunks_upto
+          obj_source_node.relationship_count = rel_count
+          graphDb_data_Access.update_source_node(obj_source_node)
+
       result = graphDb_data_Access.get_current_status_document_node(file_name)
       is_cancelled_status = result[0]['is_cancelled']
-      logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
       if bool(is_cancelled_status) == True:
-         job_status = "Cancelled"
-         logging.info('Exit from running loop of processing file')
-         exit
-      else:
-        node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
-        end_time = datetime.now()
-        processed_time = end_time - start_time
-
-        obj_source_node = sourceNode()
-        obj_source_node.file_name = file_name
-        obj_source_node.updated_at = end_time
-        obj_source_node.processing_time = processed_time
-        obj_source_node.node_count = node_count
-        obj_source_node.processed_chunk = select_chunks_upto
-        obj_source_node.relationship_count = rel_count
-        graphDb_data_Access.update_source_node(obj_source_node)
-
-    result = graphDb_data_Access.get_current_status_document_node(file_name)
-    is_cancelled_status = result[0]['is_cancelled']
-    if bool(is_cancelled_status) == True:
-       logging.info(f'Is_cancelled True at the end extraction')
-       job_status = 'Cancelled'
-    logging.info(f'Job Status at the end : {job_status}')
-    end_time = datetime.now()
-    processed_time = end_time - start_time
-    obj_source_node = sourceNode()
-    obj_source_node.file_name = file_name
-    obj_source_node.status = job_status
-    obj_source_node.processing_time = processed_time
+        logging.info(f'Is_cancelled True at the end extraction')
+        job_status = 'Cancelled'
+      logging.info(f'Job Status at the end : {job_status}')
+      end_time = datetime.now()
+      processed_time = end_time - start_time
+      obj_source_node = sourceNode()
+      obj_source_node.file_name = file_name
+      obj_source_node.status = job_status
+      obj_source_node.processing_time = processed_time
 
-    graphDb_data_Access.update_source_node(obj_source_node)
-    logging.info('Updated the nodeCount and relCount properties in Document node')
-    logging.info(f'file:{file_name} extraction has been completed')
+      graphDb_data_Access.update_source_node(obj_source_node)
+      logging.info('Updated the nodeCount and relCount properties in Document node')
+      logging.info(f'file:{file_name} extraction has been completed')
 
 
-    # merged_file_path have value only when file uploaded from local
-
-    if is_uploaded_from_local:
-      gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
-      if gcs_file_cache == 'True':
-        folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
-        delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
-      else:
-        delete_uploaded_local_file(merged_file_path, file_name)  
+      # merged_file_path have value only when file uploaded from local
 
-    return {
-        "fileName": file_name,
-        "nodeCount": node_count,
-        "relationshipCount": rel_count,
-        "processingTime": round(processed_time.total_seconds(),2),
-        "status" : job_status,
-        "model" : model,
-        "success_count" : 1
-    }
+      if is_uploaded_from_local:
+        gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
+        if gcs_file_cache == 'True':
+          folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
+          delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
+        else:
+          delete_uploaded_local_file(merged_file_path, file_name)  
+
+      return {
+          "fileName": file_name,
+          "nodeCount": node_count,
+          "relationshipCount": rel_count,
+          "processingTime": round(processed_time.total_seconds(),2),
+          "status" : job_status,
+          "model" : model,
+          "success_count" : 1
+      }
+    else:
+      logging.info('File does not process because it\'s already in Processing status')
   else:
-     logging.info('File does not process because it\'s already in Processing status')
+    error_message = "Unable to get the status of docuemnt node."
+    logging.error(error_message)
+    raise Exception(error_message)
 
 def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count):
   #create vector index and update chunk node with embedding

diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx
@@ -175,7 +175,11 @@ const ChatInfoModal: React.FC<chatInfoMessage> = ({
       ) : (
         <Tabs size='large' fill='underline' onChange={onChangeTabs} value={activeTab}>
           {mode != 'graph' ? <Tabs.Tab tabId={3}>Sources used</Tabs.Tab> : <></>}
-          {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? <Tabs.Tab tabId={4}>Top Entities used</Tabs.Tab> : <></>}
+          {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? (
+            <Tabs.Tab tabId={4}>Top Entities used</Tabs.Tab>
+          ) : (
+            <></>
+          )}
           {mode === 'graph' && cypher_query?.trim().length ? (
             <Tabs.Tab tabId={6}>Generated Cypher Query</Tabs.Tab>
           ) : (

diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
@@ -381,7 +381,7 @@ const Content: React.FC<ContentProps> = ({
         setextractLoading(false);
         await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
       });
-    } else if (queueFiles && !queue.isEmpty()&&processingFilesCount<batchSize) {
+    } else if (queueFiles && !queue.isEmpty() && processingFilesCount < batchSize) {
       data = scheduleBatchWiseProcess(queue.items, true);
       Promise.allSettled(data).then(async (_) => {
         setextractLoading(false);

diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx
@@ -767,7 +767,7 @@ const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
           }
           return prev + 1;
         });
-        queue.remove(fileName)
+        queue.remove(fileName);
       } else {
         let errorobj = { error: res.data.error, message: res.data.message, fileName };
         throw new Error(JSON.stringify(errorobj));

diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx
@@ -97,10 +97,10 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
     graphType.includes('DocumentChunk') && graphType.includes('Entities')
       ? queryMap.DocChunkEntities
       : graphType.includes('DocumentChunk')
-        ? queryMap.DocChunks
-        : graphType.includes('Entities')
-          ? queryMap.Entities
-          : '';
+      ? queryMap.DocChunks
+      : graphType.includes('Entities')
+      ? queryMap.Entities
+      : '';
 
   // fit graph to original position
   const handleZoomToFit = () => {
@@ -135,10 +135,10 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
       const nodeRelationshipData =
         viewPoint === graphLabels.showGraphView
           ? await graphQueryAPI(
-            userCredentials as UserCredentials,
-            graphQuery,
-            selectedRows?.map((f) => f.name)
-          )
+              userCredentials as UserCredentials,
+              graphQuery,
+              selectedRows?.map((f) => f.name)
+            )
           : await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']);
       return nodeRelationshipData;
     } catch (error: any) {

diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx
@@ -80,12 +80,12 @@ export default function DeduplicationTab() {
   const onRemove = (nodeid: string, similarNodeId: string) => {
     setDuplicateNodes((prev) => {
       return prev.map((d) =>
-        d.e.elementId === nodeid
+        (d.e.elementId === nodeid
           ? {
               ...d,
               similar: d.similar.filter((n) => n.elementId != similarNodeId),
             }
-          : d
+          : d)
       );
     });
   };