Skip to content

Commit 709770f

Browse files
User flow changes for recreating supported vector index (#682)
* removed the if check * Add one more check for create vector index when chunks are exist without embeddings * removed local files * condition changes * chunks exists check * chunk exists without embeddings check * vector Index issue fixed * vector index with different dimension * Update graphDB_dataAccess.py --------- Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
1 parent 590f714 commit 709770f

15 files changed

+132
-67
lines changed

backend/files/About Amazon.pdf

-160 KB
Binary file not shown.
Binary file not shown.
-153 KB
Binary file not shown.

backend/files/Gitcheatsheet.pdf

-180 KB
Binary file not shown.
Binary file not shown.
Binary file not shown.
-378 KB
Binary file not shown.

backend/score.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -611,11 +611,11 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da
611611
gc.collect()
612612

613613
@app.post("/drop_create_vector_index")
614-
async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(), is_vector_index_recreate=Form()):
614+
async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), database=Form(), isVectorIndexExist=Form()):
615615
try:
616616
graph = create_graph_database_connection(uri, userName, password, database)
617617
graphDb_data_Access = graphDBdataAccess(graph)
618-
result = graphDb_data_Access.drop_create_vector_index(is_vector_index_recreate)
618+
result = graphDb_data_Access.drop_create_vector_index(isVectorIndexExist)
619619
return create_api_response('Success',message=result)
620620
except Exception as e:
621621
job_status = "Failed"

backend/src/graphDB_dataAccess.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -158,23 +158,34 @@ def connection_check_and_get_vector_dimensions(self):
158158
WHERE type = 'VECTOR' AND name = 'vector'
159159
RETURN options.indexConfig['vector.dimensions'] AS vector_dimensions
160160
""")
161+
162+
result_chunks = self.graph.query("""match (c:Chunk) return size(c.embedding) as embeddingSize, count(*) as chunks,
163+
count(c.embedding) as hasEmbedding
164+
""")
165+
161166
embedding_model = os.getenv('EMBEDDING_MODEL')
162167
embeddings, application_dimension = load_embedding_model(embedding_model)
163168
logging.info(f'embedding model:{embeddings} and dimesion:{application_dimension}')
169+
# print(chunks_exists)
164170

165171
if self.graph:
166172
if len(db_vector_dimension) > 0:
167173
return {'db_vector_dimension': db_vector_dimension[0]['vector_dimensions'], 'application_dimension':application_dimension, 'message':"Connection Successful"}
168174
else:
169-
logging.info("Vector index does not exist in database")
170-
return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful"}
175+
if len(db_vector_dimension) == 0 and len(result_chunks) == 0:
176+
logging.info("Chunks and vector index does not exists in database")
177+
return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":False}
178+
elif len(db_vector_dimension) == 0 and result_chunks[0]['hasEmbedding']==0 and result_chunks[0]['chunks'] > 0:
179+
return {'db_vector_dimension': 0, 'application_dimension':application_dimension, 'message':"Connection Successful","chunks_exists":True}
180+
else:
181+
return {'message':"Connection Successful"}
171182

172183
def execute_query(self, query, param=None):
173184
return self.graph.query(query, param)
174185

175186
def get_current_status_document_node(self, file_name):
176187
query = """
177-
MATCH(d:Document {fileName : $file_name}) RETURN d.status AS Status , d.processingTime AS processingTime,
188+
MATCH(d:Document {fileName : $file_name}) RETURN d.stats AS Status , d.processingTime AS processingTime,
178189
d.nodeCount AS nodeCount, d.model as model, d.relationshipCount as relationshipCount,
179190
d.total_pages AS total_pages, d.total_chunks AS total_chunks , d.fileSize as fileSize,
180191
d.is_cancelled as is_cancelled, d.processed_chunk as processed_chunk, d.fileSource as fileSource
@@ -322,15 +333,16 @@ def merge_duplicate_nodes(self,duplicate_nodes_list):
322333
param = {"rows":nodes_list}
323334
return self.execute_query(query,param)
324335

325-
def drop_create_vector_index(self, is_vector_index_recreate):
336+
def drop_create_vector_index(self, isVectorIndexExist):
326337
"""
327338
drop and create the vector index when vector index dimesion are different.
328339
"""
329340
embedding_model = os.getenv('EMBEDDING_MODEL')
330341
embeddings, dimension = load_embedding_model(embedding_model)
331-
if is_vector_index_recreate == 'true':
332-
self.graph.query("""drop index vector""")
333342

343+
if isVectorIndexExist == 'true':
344+
self.graph.query("""drop index vector""")
345+
# self.graph.query("""drop index vector""")
334346
self.graph.query("""CREATE VECTOR INDEX `vector` if not exists for (c:Chunk) on (c.embedding)
335347
OPTIONS {indexConfig: {
336348
`vector.dimensions`: $dimensions,
@@ -341,4 +353,4 @@ def drop_create_vector_index(self, is_vector_index_recreate):
341353
"dimensions" : dimension
342354
}
343355
)
344-
return "Drop and Re-Create vector index succesfully"
356+
return "Drop and Re-Create vector index succesfully"

frontend/src/components/Content.tsx

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,10 @@ const Content: React.FC<ContentProps> = ({
4747
const isTablet = useMediaQuery(`(min-width:${breakpoints.xs}) and (max-width: ${breakpoints.lg})`);
4848
const [init, setInit] = useState<boolean>(false);
4949
const [openConnection, setOpenConnection] = useState<connectionState>({
50-
isvectorIndexMatch: true,
5150
openPopUp: false,
52-
novectorindexInDB: true,
51+
chunksExists: false,
52+
vectorIndexMisMatch: false,
53+
chunksExistsWithDifferentDimension: false,
5354
});
5455
const [openGraphView, setOpenGraphView] = useState<boolean>(false);
5556
const [inspectedName, setInspectedName] = useState<string>('');
@@ -418,14 +419,24 @@ const Content: React.FC<ContentProps> = ({
418419
userDbVectorIndex: response.data.data.db_vector_dimension,
419420
})
420421
);
421-
if (response.data.data.application_dimension === response.data.data.db_vector_dimension) {
422+
if (
423+
(response.data.data.application_dimension === response.data.data.db_vector_dimension ||
424+
response.data.data.db_vector_dimension == 0) &&
425+
!response.data.data.chunks_exists
426+
) {
422427
setConnectionStatus(true);
423428
setOpenConnection((prev) => ({ ...prev, openPopUp: false }));
424429
} else {
425430
setOpenConnection({
426-
isvectorIndexMatch: false,
427431
openPopUp: true,
428-
novectorindexInDB: response.data.data.db_vector_dimension === 0,
432+
chunksExists: response.data.data.chunks_exists as boolean,
433+
vectorIndexMisMatch:
434+
response.data.data.db_vector_dimension > 0 &&
435+
response.data.data.db_vector_dimension != response.data.data.application_dimension,
436+
chunksExistsWithDifferentDimension:
437+
response.data.data.db_vector_dimension > 0 &&
438+
response.data.data.db_vector_dimension != response.data.data.application_dimension &&
439+
(response.data.data.chunks_exists ?? true),
429440
});
430441
setConnectionStatus(false);
431442
}
@@ -539,8 +550,9 @@ const Content: React.FC<ContentProps> = ({
539550
open={openConnection.openPopUp}
540551
setOpenConnection={setOpenConnection}
541552
setConnectionStatus={setConnectionStatus}
542-
isVectorIndexMatch={openConnection.isvectorIndexMatch}
543-
noVectorIndexFound={openConnection.novectorindexInDB}
553+
isVectorIndexMatch={openConnection.vectorIndexMisMatch}
554+
chunksExistsWithoutEmbedding={openConnection.chunksExists}
555+
chunksExistsWithDifferentEmbedding={openConnection.chunksExistsWithDifferentDimension}
544556
/>
545557
</Suspense>
546558

0 commit comments

Comments
 (0)