neo4j-labs · prakriti-solankey · Feb 17, 2025 · Feb 17, 2025 · Feb 17, 2025 · Feb 17, 2025
diff --git a/README.md b/README.md
@@ -118,23 +118,34 @@ Allow unauthenticated request : Yes
 |-------------------------|--------------------|---------------|--------------------------------------------------------------------------------------------------|
 |                                                                                                                                                                 |
 | **BACKEND ENV** 
-| EMBEDDING_MODEL         | Optional           | all-MiniLM-L6-v2 | Model for generating the text embedding (all-MiniLM-L6-v2 , openai , vertexai)                |
-| IS_EMBEDDING            | Optional           | true          | Flag to enable text embedding                                                                    |
-| KNN_MIN_SCORE           | Optional           | 0.94          | Minimum score for KNN algorithm                                                                  |
-| GEMINI_ENABLED          | Optional           | False         | Flag to enable Gemini                                                                             |
-| GCP_LOG_METRICS_ENABLED | Optional           | False         | Flag to enable Google Cloud logs                                                                 |
-| NUMBER_OF_CHUNKS_TO_COMBINE | Optional       | 5             | Number of chunks to combine when processing embeddings                                           |
-| UPDATE_GRAPH_CHUNKS_PROCESSED | Optional     | 20            | Number of chunks processed before updating progress                                        |
-| NEO4J_URI               | Optional           | neo4j://database:7687 | URI for Neo4j database                                                                  |
-| NEO4J_USERNAME          | Optional           | neo4j         | Username for Neo4j database                                                                       |
-| NEO4J_PASSWORD          | Optional           | password      | Password for Neo4j database                                                                       |
-| LANGCHAIN_API_KEY       | Optional           |               | API key for Langchain                                                                             |
-| LANGCHAIN_PROJECT       | Optional           |               | Project for Langchain                                                                             |
-| LANGCHAIN_TRACING_V2    | Optional           | true          | Flag to enable Langchain tracing                                                                  |
-| GCS_FILE_CACHE          | Optional           | False         | If set to True, will save the files to process into GCS. If set to False, will save the files locally   |
-| LANGCHAIN_ENDPOINT      | Optional           | https://api.smith.langchain.com | Endpoint for Langchain API                                                            |
-| ENTITY_EMBEDDING        | Optional           | False         | If set to True, It will add embeddings for each entity in database |
-| LLM_MODEL_CONFIG_ollama_<model_name>         | Optional      |               | Set ollama config as - model_name,model_local_url for local deployments |
+| OPENAI_API_KEY          | Mandatory           |            |An OpenAPI Key is required to use open LLM model to authenticate andn track requests                |
+| DIFFBOT_API_KEY         | Mandatory           |            |API key is required to use Diffbot's NLP service to extraction entities and relatioship from unstructured data|
+| BUCKET                  | Mandatory           |            |bucket name to store uploaded file on GCS                                                           |
+| NEO4J_USER_AGENT        | Optional            | llm-graph-builder        | Name of the user agent to track neo4j database activity                              |
+| ENABLE_USER_AGENT       | Optional            | true       | Boolean value to enable/disable neo4j user agent                                                   |
+| DUPLICATE_TEXT_DISTANCE | Mandatory            | 5 | This value used to find distance for all node pairs in the graph and calculated based on node properties    |
+| DUPLICATE_SCORE_VALUE   | Mandatory            | 0.97 | Node score value to match duplicate node                                                                 |
+| EFFECTIVE_SEARCH_RATIO  | Mandatory            | 1 |                 |
+| GRAPH_CLEANUP_MODEL     | Optional            | 0.97 |  Model name to clean-up graph in post processing                                                           |
+| MAX_TOKEN_CHUNK_SIZE    | Optional            | 10000 | Maximum token size to process file content                                                               |
+| YOUTUBE_TRANSCRIPT_PROXY| Optional            |   | Proxy key to process youtube video for getting transcript                                                   |
+| EMBEDDING_MODEL         | Optional            | all-MiniLM-L6-v2 | Model for generating the text embedding (all-MiniLM-L6-v2 , openai , vertexai)                |
+| IS_EMBEDDING            | Optional            | true          | Flag to enable text embedding                                                                    |
+| KNN_MIN_SCORE           | Optional            | 0.94          | Minimum score for KNN algorithm                                                                  |
+| GEMINI_ENABLED          | Optional            | False         | Flag to enable Gemini                                                                             |
+| GCP_LOG_METRICS_ENABLED | Optional            | False         | Flag to enable Google Cloud logs                                                                 |
+| NUMBER_OF_CHUNKS_TO_COMBINE | Optional        | 5             | Number of chunks to combine when processing embeddings                                           |
+| UPDATE_GRAPH_CHUNKS_PROCESSED | Optional      | 20            | Number of chunks processed before updating progress                                        |
+| NEO4J_URI               | Optional            | neo4j://database:7687 | URI for Neo4j database                                                                  |
+| NEO4J_USERNAME          | Optional            | neo4j         | Username for Neo4j database                                                                       |
+| NEO4J_PASSWORD          | Optional            | password      | Password for Neo4j database                                                                       |
+| LANGCHAIN_API_KEY       | Optional            |               | API key for Langchain                                                                             |
+| LANGCHAIN_PROJECT       | Optional            |               | Project for Langchain                                                                             |
+| LANGCHAIN_TRACING_V2    | Optional            | true          | Flag to enable Langchain tracing                                                                  |
+| GCS_FILE_CACHE          | Optional            | False         | If set to True, will save the files to process into GCS. If set to False, will save the files locally   |
+| LANGCHAIN_ENDPOINT      | Optional            | https://api.smith.langchain.com | Endpoint for Langchain API                                                            |
+| ENTITY_EMBEDDING        | Optional            | False         | If set to True, It will add embeddings for each entity in database |
+| LLM_MODEL_CONFIG_ollama_<model_name>          | Optional      |               | Set ollama config as - model_name,model_local_url for local deployments |
 | RAGAS_EMBEDDING_MODEL         | Optional      | openai              | embedding model used by ragas evaluation framework                               |
 |                                                                                                                                                                        |
 | **FRONTEND ENV** 
@@ -151,18 +162,22 @@ Allow unauthenticated request : Yes
 | VITE_AUTH0_CLIENT_ID | Mandatory if you are enabling Authentication otherwise it is optional |       |Okta Oauth Client ID for authentication
 | VITE_AUTH0_DOMAIN | Mandatory if you are enabling Authentication otherwise it is optional |           | Okta Oauth Cliend Domain
 | VITE_SKIP_AUTH | Optional | true | Flag to skip the authentication 
+| VITE_CHUNK_OVERLAP | Optional |   20  | variable to configure chunk overlap
+| VITE_TOKENS_PER_CHUNK | Optional |  100  | variable to configure tokens count per chunk.This gives flexibility for users who may require different chunk sizes for various tokenization tasks, especially when working with large datasets or specific language models.
+| VITE_CHUNK_TO_COMBINE | Optional |   1  | variable to configure number of chunks to combine for parllel processing. 
 
 ## LLMs Supported 
 1. OpenAI
 2. Gemini
-3. Azure OpenAI(dev deployed version)
-4. Anthropic(dev deployed version)
-5. Fireworks(dev deployed version)
-6. Groq(dev deployed version)
-7. Amazon Bedrock(dev deployed version)
-8. Ollama(dev deployed version)
-9. Diffbot
-10. Other OpenAI compabtile baseurl models(dev deployed version)
+3. Diffbot
+4. Azure OpenAI(dev deployed version)
+5. Anthropic(dev deployed version)
+6. Fireworks(dev deployed version)
+7. Groq(dev deployed version)
+8. Amazon Bedrock(dev deployed version)
+9. Ollama(dev deployed version)
+10. Deepseek(dev deployed version)
+11. Other OpenAI compabtile baseurl models(dev deployed version)
 
 ## For local llms (Ollama)
 1. Pull the docker imgage of ollama

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -66,6 +66,9 @@ services:
         - VITE_AUTH0_DOMAIN=${VITE_AUTH0_DOMAIN-}
         - VITE_AUTH0_CLIENT_ID=${VITE_AUTH0_CLIENT_ID-}
         - VITE_SKIP_AUTH=${VITE_SKIP_AUTH-true}
+        - VITE_CHUNK_OVERLAP=${VITE_CHUNK_OVERLAP-}
+        - VITE_TOKENS_PER_CHUNK=${VITE_TOKENS_PER_CHUNK-}
+        - VITE_CHUNK_TO_COMBINE=${VITE_CHUNK_TO_COMBINE-}
         - DEPLOYMENT_ENV=local
     volumes:
       - ./frontend:/app

diff --git a/docs/frontend/frontend_docs.adoc b/docs/frontend/frontend_docs.adoc
@@ -321,6 +321,10 @@ image::images/Schema.jpg[PredefinedSchema, 600]
 
 image::images/UserDefinedSchema.jpg[UserDefinedSchema, 600]
 
+* ***Additional Instructions:*** 
+
+image::images/AddtionalInstructions.jpg[AddtionalInstructions, 600]
+
 * ***/delete_unconnected_nodes:***
   ** to remove the lonely entities.
 

diff --git a/docs/frontend/images/AddtionalInstructions.jpg b/docs/frontend/images/AddtionalInstructions.jpg
diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx
@@ -81,10 +81,10 @@ const ChatInfoModal: React.FC<chatInfoMessage> = ({
     error?.length
       ? 10
       : mode === chatModeLables['global search+vector+fulltext']
-        ? 7
-        : mode === chatModeLables.graph
-          ? 4
-          : 3
+      ? 7
+      : mode === chatModeLables.graph
+      ? 4
+      : 3
   );
   const [, copy] = useCopyToClipboard();
   const [copiedText, setcopiedText] = useState<boolean>(false);
@@ -97,15 +97,15 @@ const ChatInfoModal: React.FC<chatInfoMessage> = ({
     multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length > 4
       ? true
       : multiModelMetrics.length > 0 && Object.keys(multiModelMetrics[0]).length <= 4
-        ? false
-        : null
+      ? false
+      : null
   );
   const [isAdditionalMetricsWithSingleMode, setIsAdditionalMetricsWithSingleMode] = useState<boolean | null>(
     metricDetails != undefined && Object.keys(metricDetails).length > 3
       ? true
       : metricDetails != undefined && Object.keys(metricDetails).length <= 3
-        ? false
-        : null
+      ? false
+      : null
   );
   const actions: React.ComponentProps<typeof IconButton<'button'>>[] = useMemo(
     () => [
@@ -349,9 +349,9 @@ const ChatInfoModal: React.FC<chatInfoMessage> = ({
               {mode != chatModeLables.graph ? <Tabs.Tab tabId={3}>Sources used</Tabs.Tab> : <></>}
               {mode != chatModeLables.graph ? <Tabs.Tab tabId={5}>Chunks</Tabs.Tab> : <></>}
               {mode === chatModeLables['graph+vector'] ||
-                mode === chatModeLables.graph ||
-                mode === chatModeLables['graph+vector+fulltext'] ||
-                mode === chatModeLables['entity search+vector'] ? (
+              mode === chatModeLables.graph ||
+              mode === chatModeLables['graph+vector+fulltext'] ||
+              mode === chatModeLables['entity search+vector'] ? (
                 <Tabs.Tab tabId={4}>Top Entities used</Tabs.Tab>
               ) : (
                 <></>

diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
@@ -154,10 +154,10 @@ const Content: React.FC<ContentProps> = ({
               ? postProcessingTasks.filter((task) => task !== 'graph_schema_consolidation')
               : postProcessingTasks
             : hasSelections
-              ? postProcessingTasks.filter(
+            ? postProcessingTasks.filter(
                 (task) => task !== 'graph_schema_consolidation' && task !== 'enable_communities'
               )
-              : postProcessingTasks.filter((task) => task !== 'enable_communities');
+            : postProcessingTasks.filter((task) => task !== 'enable_communities');
           if (payload.length) {
             const response = await postProcessing(payload);
             if (response.data.status === 'Success') {
@@ -611,12 +611,12 @@ const Content: React.FC<ContentProps> = ({
           return prev.map((f) => {
             return f.name === filename
               ? {
-                ...f,
-                status: 'Ready to Reprocess',
-                processingProgress: isStartFromBegining ? 0 : f.processingProgress,
-                nodesCount: isStartFromBegining ? 0 : f.nodesCount,
-                relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount,
-              }
+                  ...f,
+                  status: 'Ready to Reprocess',
+                  processingProgress: isStartFromBegining ? 0 : f.processingProgress,
+                  nodesCount: isStartFromBegining ? 0 : f.nodesCount,
+                  relationshipsCount: isStartFromBegining ? 0 : f.relationshipsCount,
+                }
               : f;
           });
         });

diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx
@@ -64,10 +64,10 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
     graphType.includes('DocumentChunk') && graphType.includes('Entities')
       ? queryMap.DocChunkEntities
       : graphType.includes('DocumentChunk')
-        ? queryMap.DocChunks
-        : graphType.includes('Entities')
-          ? queryMap.Entities
-          : '';
+      ? queryMap.DocChunks
+      : graphType.includes('Entities')
+      ? queryMap.Entities
+      : '';
 
   // fit graph to original position
   const handleZoomToFit = () => {
@@ -266,8 +266,8 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
     viewPoint === graphLabels.showGraphView || viewPoint === graphLabels.chatInfoView
       ? graphLabels.generateGraph
       : viewPoint === graphLabels.showSchemaView
-        ? graphLabels.renderSchemaGraph
-        : `${graphLabels.inspectGeneratedGraphFrom} ${inspectedName}`;
+      ? graphLabels.renderSchemaGraph
+      : `${graphLabels.inspectGeneratedGraphFrom} ${inspectedName}`;
 
   const checkBoxView = viewPoint !== graphLabels.chatInfoView;
 

diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx
@@ -213,8 +213,9 @@ const PageLayout: React.FC = () => {
       ></SchemaFromTextDialog>
       {isLargeDesktop ? (
         <div
-          className={`layout-wrapper ${!isLeftExpanded ? 'drawerdropzoneclosed' : ''} ${!isRightExpanded ? 'drawerchatbotclosed' : ''
-            } ${!isRightExpanded && !isLeftExpanded ? 'drawerclosed' : ''}`}
+          className={`layout-wrapper ${!isLeftExpanded ? 'drawerdropzoneclosed' : ''} ${
+            !isRightExpanded ? 'drawerchatbotclosed' : ''
+          } ${!isRightExpanded && !isLeftExpanded ? 'drawerclosed' : ''}`}
         >
           <SideNav
             toggles3Modal={toggleS3Modal}

diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/AdditionalInstructions/index.tsx
@@ -113,7 +113,7 @@ export default function AdditionalInstructionsText({
           <h5>{appLabels.chunkingConfiguration}</h5>
         </div>
         <Select
-          label='Chunk Size'
+          label='Token Count Per Chunk'
           size={!tablet ? 'large' : 'medium'}
           selectProps={{
             options: defaultTokenChunkSizeOptions.map((value) => ({
@@ -134,6 +134,7 @@ export default function AdditionalInstructionsText({
                 `,
           }}
           type='creatable'
+          helpText='The maximum token limit is 10,000 for LLM processing. The total number of chunks will be calculated as 10,000 divided by the tokens per chunk you select. For example, selecting 500 tokens per chunk results in 20 chunks (10,000 / 500).'
         />
         <Select
           label='Chunk Overlap'

diff --git a/frontend/src/services/GetFiles.ts b/frontend/src/services/GetFiles.ts
@@ -24,4 +24,4 @@ export const getSourceNodes = async (userCredentials: UserCredentials) => {
     console.log(error);
     throw error;
   }
-};
+};
diff --git a/frontend/src/services/PollingAPI.ts b/frontend/src/services/PollingAPI.ts
@@ -41,4 +41,4 @@ export default async function subscribe(
     }
   }
   throw new Error(`Polling for ${fileName} timed out after ${MAX_POLLING_ATTEMPTS} attempts.`);
-}
+}
diff --git a/frontend/src/services/ServerSideStatusUpdateAPI.ts b/frontend/src/services/ServerSideStatusUpdateAPI.ts
@@ -36,4 +36,4 @@ export function triggerStatusUpdateAPI(
       datahandler(eventResponse);
     }
   };
-}
+}
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
@@ -255,7 +255,7 @@ export type ChatbotProps = {
   isChatOnly?: boolean;
   isDeleteChatLoading: boolean;
 };
-export interface WikipediaModalTypes extends Omit<S3ModalProps, ''> { }
+export interface WikipediaModalTypes extends Omit<S3ModalProps, ''> {}
 
 export interface GraphViewModalProps {
   open: boolean;
@@ -396,12 +396,12 @@ export interface commonserverresponse {
   message?: string | orphanTotalNodes;
   file_name?: string;
   data?:
-  | labelsAndTypes
-  | labelsAndTypes[]
-  | uploadData
-  | orphanNodeProps[]
-  | dupNodes[]
-  | { pageitems: chunkdata[]; total_pages: number };
+    | labelsAndTypes
+    | labelsAndTypes[]
+    | uploadData
+    | orphanNodeProps[]
+    | dupNodes[]
+    | { pageitems: chunkdata[]; total_pages: number };
 }
 export interface dupNodeProps {
   id: string;
@@ -486,20 +486,20 @@ export interface chatInfoMessage extends Partial<Messages> {
   relationships: ExtendedRelationship[];
   chunks: Chunk[];
   metricDetails:
-  | {
-    [key: string]: number | string;
-  }
-  | undefined;
+    | {
+        [key: string]: number | string;
+      }
+    | undefined;
   metricError: string;
   infoEntities: Entity[];
   communities: Community[];
   infoLoading: boolean;
   metricsLoading: boolean;
   activeChatmodes:
-  | {
-    [key: string]: ResponseMode;
-  }
-  | undefined;
+    | {
+        [key: string]: ResponseMode;
+      }
+    | undefined;
   multiModelMetrics: multimodelmetric[];
   saveInfoEntitites: (entities: Entity[]) => void;
   saveNodes: (chatNodes: ExtendedNode[]) => void;
-Original file line number
+Diff line change
@@ Expand Up @@
         console.log(error);
         throw error;
       }
-    };
+    };