neo4j-labs · prakriti-solankey · Aug 21, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024
diff --git a/README.md b/README.md
@@ -40,13 +40,13 @@ DIFFBOT_API_KEY="your-diffbot-key"
 
 if you only want OpenAI:
 ```env
-LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o"
+VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o"
 OPENAI_API_KEY="your-openai-key"
 ```
 
 if you only want Diffbot:
 ```env
-LLM_MODELS="diffbot"
+VITE_LLM_MODELS="diffbot"
 DIFFBOT_API_KEY="your-diffbot-key"
 ```
 
@@ -59,13 +59,13 @@ docker-compose up --build
 
 By default, the input sources will be: Local files, Youtube, Wikipedia ,AWS S3 and Webpages. As this default config is applied:
 ```env
-REACT_APP_SOURCES="local,youtube,wiki,s3,web"
+VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web"
 ```
 
 If however you want the Google GCS integration, add `gcs` and your Google client ID:
 ```env
-REACT_APP_SOURCES="local,youtube,wiki,s3,gcs,web"
-GOOGLE_CLIENT_ID="xxxx"
+VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,gcs,web"
+VITE_GOOGLE_CLIENT_ID="xxxx"
 ```
 
 You can of course combine all (local, youtube, wikipedia, s3 and gcs) or remove any you don't want/need.
@@ -75,12 +75,12 @@ You can of course combine all (local, youtube, wikipedia, s3 and gcs) or remove
 By default,all of the chat modes will be available: vector, graph+vector and graph.
 If none of the mode is mentioned in the chat modes variable all modes will be available:
 ```env
-CHAT_MODES=""
+VITE_CHAT_MODES=""
 ```
 
 If however you want to specify the only vector mode or only graph mode you can do that by specifying the mode in the env:
 ```env
-CHAT_MODES="vector,graph+vector"
+VITE_CHAT_MODES="vector,graph+vector"
 ```
 
 #### Running Backend and Frontend separately (dev environment)
@@ -143,15 +143,15 @@ Allow unauthenticated request : Yes
 | LANGCHAIN_PROJECT       | Optional           |               | Project for Langchain                                                                             |
 | LANGCHAIN_TRACING_V2    | Optional           | true          | Flag to enable Langchain tracing                                                                  |
 | LANGCHAIN_ENDPOINT      | Optional           | https://api.smith.langchain.com | Endpoint for Langchain API                                                            |
-| BACKEND_API_URL         | Optional           | http://localhost:8000 | URL for backend API                                                                       |
-| BLOOM_URL               | Optional           | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization |
-| REACT_APP_SOURCES       | Mandatory          | local,youtube,wiki,s3 | List of input sources that will be available                                               |
-| LLM_MODELS              | Mandatory          | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A
-| CHAT_MODES              | Mandatory          | vector,graph+vector,graph,hybrid | Chat modes available for Q&A
-| ENV                     | Mandatory          | DEV or PROD           | Environment variable for the app                                                                 |
-| TIME_PER_CHUNK          | Optional           | 4             | Time per chunk for processing                                                                    |
-| CHUNK_SIZE              | Optional           | 5242880       | Size of each chunk of file for upload                                                                |
-| GOOGLE_CLIENT_ID        | Optional           |               | Client ID for Google authentication                                                              |
+| VITE_BACKEND_API_URL         | Optional           | http://localhost:8000 | URL for backend API                                                                       |
+| VITE_BLOOM_URL               | Optional           | https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true | URL for Bloom visualization |
+| VITE_REACT_APP_SOURCES       | Mandatory          | local,youtube,wiki,s3 | List of input sources that will be available                                               |
+| VITE_LLM_MODELS              | Mandatory          | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A
+| VITE_CHAT_MODES              | Mandatory          | vector,graph+vector,graph,hybrid | Chat modes available for Q&A
+| VITE_ENV                     | Mandatory          | DEV or PROD           | Environment variable for the app                                                                 |
+| VITE_TIME_PER_CHUNK          | Optional           | 4             | Time per chunk for processing                                                                    |
+| VITE_CHUNK_SIZE              | Optional           | 5242880       | Size of each chunk of file for upload                                                                |
+| VITE_GOOGLE_CLIENT_ID        | Optional           |               | Client ID for Google authentication                                                              |
 | GCS_FILE_CACHE          | Optional           | False         | If set to True, will save the files to process into GCS. If set to False, will save the files locally   |
 | ENTITY_EMBEDDING        | Optional           | False         | If set to True, It will add embeddings for each entity in database |
 | LLM_MODEL_CONFIG_ollama_<model_name>         | Optional      |               | Set ollama config as - model_name,model_local_url for local deployments |

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -51,17 +51,17 @@ services:
       context: ./frontend
       dockerfile: Dockerfile
       args:
-        - BACKEND_API_URL=${BACKEND_API_URL-http://localhost:8000}
-        - REACT_APP_SOURCES=${REACT_APP_SOURCES-local,youtube,wiki,s3}
-        - LLM_MODELS=${LLM_MODELS-diffbot,openai-gpt-3.5,openai-gpt-4o}
-        - GOOGLE_CLIENT_ID=${GOOGLE_CLIENT_ID-""}
-        - BLOOM_URL=${BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true}
-        - TIME_PER_CHUNK=${TIME_PER_CHUNK-4}
-        - TIME_PER_PAGE=${TIME_PER_PAGE-50}
-        - CHUNK_SIZE=${CHUNK_SIZE-5242880}
-        - ENV=${ENV-DEV}
-        - CHAT_MODES=${CHAT_MODES-""}
-        - BATCH_SIZE=${BATCH_SIZE-2}
+        - VITE_BACKEND_API_URL=${VITE_BACKEND_API_URL-http://localhost:8000}
+        - VITE_REACT_APP_SOURCES=${VITE_REACT_APP_SOURCES-local,youtube,wiki,s3}
+        - VITE_LLM_MODELS=${VITE_LLM_MODELS-diffbot,openai-gpt-3.5,openai-gpt-4o}
+        - VITE_GOOGLE_CLIENT_ID=${VITE_GOOGLE_CLIENT_ID-""}
+        - VITE_BLOOM_URL=${VITE_BLOOM_URL-https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true}
+        - VITE_TIME_PER_PAGE=${VITE_TIME_PER_PAGE-50}
+        - VITE_CHUNK_SIZE=${VITE_CHUNK_SIZE-5242880}
+        - VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE-5242880}
+        - VITE_ENV=${VITE_ENV-DEV}
+        - VITE_CHAT_MODES=${VITE_CHAT_MODES-""}
+        - VITE_BATCH_SIZE=${VITE_BATCH_SIZE-2}
     volumes:
       - ./frontend:/app
       - /app/node_modules

diff --git a/example.env b/example.env
@@ -25,13 +25,14 @@ GCS_FILE_CACHE = False
 ENTITY_EMBEDDING=True
 
 # Optional Frontend
-BACKEND_API_URL="http://localhost:8000"
-BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
-REACT_APP_SOURCES="local,youtube,wiki,s3,web"
-LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3"
-ENV="DEV"
-TIME_PER_CHUNK=4
-TIME_PER_PAGE=50
-CHUNK_SIZE=5242880
-GOOGLE_CLIENT_ID=""
-CHAT_MODES=""
+VITE_BACKEND_API_URL="http://localhost:8000"
+VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
+VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web"
+VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3"
+VITE_ENV="DEV"
+VITE_TIME_PER_CHUNK=4
+VITE_TIME_PER_PAGE=50
+VITE_CHUNK_SIZE=5242880
+VITE_GOOGLE_CLIENT_ID=""
+VITE_CHAT_MODES=""
+VITE_BATCH_SIZE=2
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
@@ -28,7 +28,7 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \
     VITE_ENV=$VITE_ENV \
     VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \
     VITE_CHAT_MODES=$VITE_CHAT_MODES \
-    VITE_BATCH_SIZE=$VITE_BATCH_SIZE
+    VITE_BATCH_SIZE=$VITE_BATCH_SIZE \
     yarn run build
 
 # Step 2: Serve the application using Nginx

diff --git a/frontend/example.env b/frontend/example.env
@@ -1,12 +1,12 @@
-BACKEND_API_URL="http://localhost:8000"
-BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
-REACT_APP_SOURCES="local,youtube,wiki,s3,web"
-LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o"
-ENV="DEV"
-TIME_PER_CHUNK=4
-TIME_PER_PAGE=50
-CHUNK_SIZE=5242880
-LARGE_FILE_SIZE=5242880
-GOOGLE_CLIENT_ID=""
-CHAT_MODES=""
-BATCH_SIZE=2
+VITE_BACKEND_API_URL="http://localhost:8000"
+VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
+VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web"
+VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o"
+VITE_ENV="DEV"
+VITE_TIME_PER_CHUNK=4
+VITE_TIME_PER_PAGE=50
+VITE_CHUNK_SIZE=5242880
+VITE_LARGE_FILE_SIZE=5242880
+VITE_GOOGLE_CLIENT_ID=""
+VITE_CHAT_MODES=""
+VITE_BATCH_SIZE=2
diff --git a/frontend/src/App.css b/frontend/src/App.css
@@ -233,7 +233,6 @@
   letter-spacing: 0;
   line-height: 1.25rem;
   width: max-content;
-  height: 30px;
   text-overflow: ellipsis;
   white-space: nowrap;
   overflow: hidden;
@@ -365,4 +364,13 @@
 .widthunset{
   width: initial !important;
   height: initial !important;
+}
+
+.text-input-container {
+  transition: width 1.5s ease;
+  /* width: 100dvh; */
+}
+
+.text-input-container.search-initiated {
+  width: 60dvh;
 }
diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
@@ -146,7 +146,7 @@ const Content: React.FC<ContentProps> = ({
       localStorage.setItem('processedCount', JSON.stringify({ db: userCredentials?.uri, count: processedCount }));
     }
     if (processedCount == batchSize) {
-      handleGenerateGraph([]);
+      handleGenerateGraph([], true);
     }
   }, [processedCount, userCredentials]);
 
@@ -209,15 +209,7 @@ const Content: React.FC<ContentProps> = ({
           userCredentials?.userName,
           userCredentials?.password,
           userCredentials?.database,
-          updateStatusForLargeFiles,
-          () => {
-            setProcessedCount((prev) => {
-              if (prev == 2) {
-                return 1;
-              }
-              return prev + 1;
-            });
-          }
+          updateStatusForLargeFiles
         );
       }
 
@@ -327,17 +319,13 @@ const Content: React.FC<ContentProps> = ({
     });
   };
 
-  const scheduleBatchWiseProcess = (
-    selectedRows: CustomFile[],
-    selectedNewFiles: CustomFile[],
-    isSelectedFiles: boolean
-  ) => {
+  const scheduleBatchWiseProcess = (selectedRows: CustomFile[], isSelectedFiles: boolean) => {
     let data = [];
     if (queue.size() > batchSize) {
       const batch = queue.items.slice(0, batchSize);
       data = triggerBatchProcessing(batch, selectedRows as CustomFile[], isSelectedFiles, false);
     } else {
-      let mergedfiles = [...queue.items, ...(selectedNewFiles as CustomFile[])];
+      let mergedfiles = [...selectedRows];
       let filesToProcess: CustomFile[] = [];
       if (mergedfiles.length > batchSize) {
         filesToProcess = mergedfiles.slice(0, batchSize);
@@ -351,86 +339,56 @@ const Content: React.FC<ContentProps> = ({
     return data;
   };
 
-  function getFilesToProcess(
-    processingFilesCount: number,
-    batchFiles: CustomFile[],
-    newFilesFromSelectedFiles: CustomFile[]
-  ) {
-    let filesToProcess: CustomFile[] = [];
-    if (processingFilesCount + batchFiles.length > batchSize) {
-      filesToProcess = batchFiles.slice(0, 1);
-      const remainingFiles = [...(newFilesFromSelectedFiles as CustomFile[])]
-        .splice(batchSize)
-        .concat(batchFiles.splice(1));
-      addFilesToQueue(remainingFiles);
-    } else {
-      filesToProcess = batchFiles;
-      const remainingFiles = [...(newFilesFromSelectedFiles as CustomFile[])].splice(batchSize);
-      addFilesToQueue(remainingFiles);
-    }
-    return filesToProcess;
-  }
-
   /**
-   *@param selectedFilesFromAllfiles iles to process in two ways one from selected files from table other way all new files from table.
-   *we will check whether queue is empty or not if queue is not empty we process queued files.
-   *if queue is empty we check whether selected files count is greater than batch size we slice the selected till batch size and process them remaining files are pushed to queue.
-   *if selectedfiles count is less than batch size we check whether the sum of selectedfiles count and processing files count is greater than the batch size.
-   *if it is greater than batch size we slice the selectedfiles to the substraction of batchsize and selectedfileslength we process them remaining files are pushed to queue.
-   *if sum of selectedfiles count and processing files count is smaller than the batch size we process those
+   * Processes files in batches, respecting a maximum batch size.
+   *
+   * This function prioritizes processing files from the queue if it's not empty.
+   * If the queue is empty, it processes the provided `filesTobeProcessed`:
+   *   - If the number of files exceeds the batch size, it processes a batch and queues the rest.
+   *   - If the number of files is within the batch size, it processes them all.
+   *   - If there are already files being processed, it adjusts the batch size to avoid exceeding the limit.
+   *
+   * @param filesTobeProcessed - The files to be processed.
+   * @param queueFiles - Whether to prioritize processing files from the queue. Defaults to false.
    */
-  const handleGenerateGraph = (selectedFilesFromAllfiles: CustomFile[]) => {
+  const handleGenerateGraph = (filesTobeProcessed: CustomFile[], queueFiles: boolean = false) => {
     let data = [];
     const processingFilesCount = filesData.filter((f) => f.status === 'Processing').length;
-    const newfiles = childRef.current?.getSelectedRows().filter((f) => f.status === 'New');
-    if (selectedfileslength && processingFilesCount < batchSize) {
-      const selectedRows = childRef.current?.getSelectedRows();
-      const selectedNewFiles = newfiles;
+    if (filesTobeProcessed.length && !queueFiles && processingFilesCount < batchSize) {
       if (!queue.isEmpty()) {
-        data = scheduleBatchWiseProcess(selectedRows as CustomFile[], selectedNewFiles as CustomFile[], true);
-      } else if (selectedfileslength > batchSize) {
-        const filesToProcess = selectedNewFiles?.slice(0, batchSize) as CustomFile[];
-        data = triggerBatchProcessing(filesToProcess, selectedRows as CustomFile[], true, false);
-        const remainingFiles = [...(selectedNewFiles as CustomFile[])].splice(batchSize);
+        data = scheduleBatchWiseProcess(filesTobeProcessed as CustomFile[], true);
+      } else if (filesTobeProcessed.length > batchSize) {
+        const filesToProcess = filesTobeProcessed?.slice(0, batchSize) as CustomFile[];
+        data = triggerBatchProcessing(filesToProcess, filesTobeProcessed as CustomFile[], true, false);
+        const remainingFiles = [...(filesTobeProcessed as CustomFile[])].splice(batchSize);
         addFilesToQueue(remainingFiles);
       } else {
-        let filesTobeProcess = childRef.current?.getSelectedRows() as CustomFile[];
-        if (selectedfileslength + processingFilesCount > batchSize) {
-          filesTobeProcess = childRef.current
-            ?.getSelectedRows()
-            .slice(0, batchSize - selectedfileslength) as CustomFile[];
-          const remainingFiles = [...(childRef.current?.getSelectedRows() as CustomFile[])].splice(1);
+        let filesTobeSchedule: CustomFile[] = filesTobeProcessed;
+        if (filesTobeProcessed.length + processingFilesCount > batchSize) {
+          filesTobeSchedule = filesTobeProcessed.slice(
+            0,
+            filesTobeProcessed.length + processingFilesCount - batchSize
+          ) as CustomFile[];
+          const idstoexclude = new Set(filesTobeSchedule.map((f) => f.id));
+          const remainingFiles = [...(childRef.current?.getSelectedRows() as CustomFile[])].filter(
+            (f) => !idstoexclude.has(f.id)
+          );
           addFilesToQueue(remainingFiles);
         }
-        data = triggerBatchProcessing(filesTobeProcess, selectedRows as CustomFile[], true, true);
+        data = triggerBatchProcessing(filesTobeSchedule, filesTobeProcessed, true, true);
       }
       Promise.allSettled(data).then(async (_) => {
         setextractLoading(false);
         await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
       });
-    } else if (selectedFilesFromAllfiles.length && processingFilesCount < batchSize) {
-      const newFilesFromSelectedFiles = selectedFilesFromAllfiles.filter((f) => f.status === 'New');
-      if (!queue.isEmpty()) {
-        data = scheduleBatchWiseProcess(selectedFilesFromAllfiles, newFilesFromSelectedFiles, false);
-      } else if (selectedFilesFromAllfiles.length > batchSize) {
-        const batchFiles = newFilesFromSelectedFiles.slice(0, batchSize) as CustomFile[];
-        const filesToProcess = getFilesToProcess(processingFilesCount, batchFiles, newFilesFromSelectedFiles);
-        data = triggerBatchProcessing(filesToProcess, selectedFilesFromAllfiles as CustomFile[], false, false);
-      } else {
-        data = triggerBatchProcessing(
-          selectedFilesFromAllfiles,
-          selectedFilesFromAllfiles as CustomFile[],
-          false,
-          true
-        );
-        Promise.allSettled(data).then(async (_) => {
-          setextractLoading(false);
-          await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
-        });
-      }
+    } else if (queueFiles && !queue.isEmpty()) {
+      data = scheduleBatchWiseProcess(queue.items, true);
+      Promise.allSettled(data).then(async (_) => {
+        setextractLoading(false);
+        await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
+      });
     } else {
-      const selectedNewFiles = newfiles;
-      addFilesToQueue(selectedNewFiles as CustomFile[]);
+      addFilesToQueue(filesTobeProcessed as CustomFile[]);
     }
   };
 
@@ -648,7 +606,7 @@ const Content: React.FC<ContentProps> = ({
       if (selectedLargeFiles.length) {
         setshowConfirmationModal(true);
       } else {
-        handleGenerateGraph(filesData);
+        handleGenerateGraph(childRef.current?.getSelectedRows().filter((f) => f.status === 'New'));
       }
     } else if (filesData.length) {
       const largefiles = filesData.filter((f) => {
@@ -668,7 +626,7 @@ const Content: React.FC<ContentProps> = ({
       if (largefiles.length) {
         setshowConfirmationModal(true);
       } else {
-        handleGenerateGraph(filesData);
+        handleGenerateGraph(filesData.filter((f) => f.status === 'New'));
       }
     }
   };
@@ -699,6 +657,7 @@ const Content: React.FC<ContentProps> = ({
             extractHandler={handleGenerateGraph}
             onClose={() => setshowConfirmationModal(false)}
             loading={extractLoading}
+            selectedRows={childRef.current?.getSelectedRows() as CustomFile[]}
           ></ConfirmationDialog>
         </Suspense>
       )}