Skip to content

Dev to Staging #709

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 29 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
271e8d5
Merge branch 'DEV' of https://github.com/neo4j-labs/llm-graph-builder…
prakriti-solankey Aug 20, 2024
e078e21
connection _check
prakriti-solankey Aug 20, 2024
7c66bf2
Fix typo: correct 'josn_obj' to 'json_obj' (#697)
destiny966113 Aug 20, 2024
477fda0
lint fixes
kartikpersistent Aug 20, 2024
bacbcfc
connection _check
prakriti-solankey Aug 20, 2024
885c345
Merge branch 'DEV' of https://github.com/neo4j-labs/llm-graph-builder…
prakriti-solankey Aug 20, 2024
2fc3e13
Chatbot changes (#700)
vasanthasaikalluri Aug 21, 2024
c0cca99
Merge branch 'DEV' of https://github.com/neo4j-labs/llm-graph-builder…
prakriti-solankey Aug 21, 2024
279d820
fixed issue delete entities return count
praveshkumar1988 Aug 21, 2024
30f92cd
removed specified version due to dependency clashes between versions
praveshkumar1988 Aug 21, 2024
832afd9
Merge branch 'DEV' of https://github.com/neo4j-labs/llm-graph-builder…
praveshkumar1988 Aug 21, 2024
edcff3b
updated script"integration test cases"
abhishekkumar-27 Aug 21, 2024
71ed29a
decreased the delay for pollintg API
kartikpersistent Aug 21, 2024
7eb7605
Merge branch 'DEV' of https://github.com/neo4j-labs/llm-graph-builder…
kartikpersistent Aug 21, 2024
1af5877
Graph enhancements (#696)
prakriti-solankey Aug 21, 2024
4575198
changed chat mode names (#702)
vasanthasaikalluri Aug 21, 2024
2d76462
Merge branch 'STAGING' into DEV
prakriti-solankey Aug 21, 2024
a6ee345
env changes
kartikpersistent Aug 21, 2024
dc351a0
used axios instance for network calls
kartikpersistent Aug 22, 2024
a241c6b
disabled the toolip when dropdown is open state
kartikpersistent Aug 22, 2024
426906b
format fixes + chat mode naming changes
kartikpersistent Aug 22, 2024
78ec3fd
Merge branch 'DEV' of https://github.com/neo4j-labs/llm-graph-builder…
prakriti-solankey Aug 22, 2024
7d0b431
mode added to info model for entities
prakriti-solankey Aug 22, 2024
e60a6e3
Merge branch 'STAGING' into DEV
prakriti-solankey Aug 22, 2024
f2b1e17
Issue fixed, List out of index while getting status of dicuement node
praveshkumar1988 Aug 26, 2024
94c493e
processing count updated on cancel
kartikpersistent Aug 26, 2024
489b5ae
Merge branch 'DEV' of https://github.com/neo4j-labs/llm-graph-builder…
kartikpersistent Aug 26, 2024
3ef88b6
format fixes
kartikpersistent Aug 27, 2024
4e2f909
Merge branch 'STAGING' into DEV
kartikpersistent Aug 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 83 additions & 77 deletions backend/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,91 +277,97 @@ def processing_source(uri, userName, password, database, model, file_name, pages
create_chunks_obj = CreateChunksofDocument(pages, graph)
chunks = create_chunks_obj.split_file_into_chunks()
chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks)
if result[0]['Status'] != 'Processing':
obj_source_node = sourceNode()
status = "Processing"
obj_source_node.file_name = file_name
obj_source_node.status = status
obj_source_node.total_chunks = len(chunks)
obj_source_node.total_pages = len(pages)
obj_source_node.model = model
logging.info(file_name)
logging.info(obj_source_node)
graphDb_data_Access.update_source_node(obj_source_node)

logging.info('Update the status as Processing')
update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
# selected_chunks = []
is_cancelled_status = False
job_status = "Completed"
node_count = 0
rel_count = 0
for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
select_chunks_upto = i+update_graph_chunk_processed
logging.info(f'Selected Chunks upto: {select_chunks_upto}')
if len(chunkId_chunkDoc_list) <= select_chunks_upto:
select_chunks_upto = len(chunkId_chunkDoc_list)
selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]

if len(result) > 0:
if result[0]['Status'] != 'Processing':
obj_source_node = sourceNode()
status = "Processing"
obj_source_node.file_name = file_name
obj_source_node.status = status
obj_source_node.total_chunks = len(chunks)
obj_source_node.total_pages = len(pages)
obj_source_node.model = model
logging.info(file_name)
logging.info(obj_source_node)
graphDb_data_Access.update_source_node(obj_source_node)

logging.info('Update the status as Processing')
update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
# selected_chunks = []
is_cancelled_status = False
job_status = "Completed"
node_count = 0
rel_count = 0
for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
select_chunks_upto = i+update_graph_chunk_processed
logging.info(f'Selected Chunks upto: {select_chunks_upto}')
if len(chunkId_chunkDoc_list) <= select_chunks_upto:
select_chunks_upto = len(chunkId_chunkDoc_list)
selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]
result = graphDb_data_Access.get_current_status_document_node(file_name)
is_cancelled_status = result[0]['is_cancelled']
logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
if bool(is_cancelled_status) == True:
job_status = "Cancelled"
logging.info('Exit from running loop of processing file')
exit
else:
node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
end_time = datetime.now()
processed_time = end_time - start_time

obj_source_node = sourceNode()
obj_source_node.file_name = file_name
obj_source_node.updated_at = end_time
obj_source_node.processing_time = processed_time
obj_source_node.node_count = node_count
obj_source_node.processed_chunk = select_chunks_upto
obj_source_node.relationship_count = rel_count
graphDb_data_Access.update_source_node(obj_source_node)

result = graphDb_data_Access.get_current_status_document_node(file_name)
is_cancelled_status = result[0]['is_cancelled']
logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
if bool(is_cancelled_status) == True:
job_status = "Cancelled"
logging.info('Exit from running loop of processing file')
exit
else:
node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
end_time = datetime.now()
processed_time = end_time - start_time

obj_source_node = sourceNode()
obj_source_node.file_name = file_name
obj_source_node.updated_at = end_time
obj_source_node.processing_time = processed_time
obj_source_node.node_count = node_count
obj_source_node.processed_chunk = select_chunks_upto
obj_source_node.relationship_count = rel_count
graphDb_data_Access.update_source_node(obj_source_node)

result = graphDb_data_Access.get_current_status_document_node(file_name)
is_cancelled_status = result[0]['is_cancelled']
if bool(is_cancelled_status) == True:
logging.info(f'Is_cancelled True at the end extraction')
job_status = 'Cancelled'
logging.info(f'Job Status at the end : {job_status}')
end_time = datetime.now()
processed_time = end_time - start_time
obj_source_node = sourceNode()
obj_source_node.file_name = file_name
obj_source_node.status = job_status
obj_source_node.processing_time = processed_time
logging.info(f'Is_cancelled True at the end extraction')
job_status = 'Cancelled'
logging.info(f'Job Status at the end : {job_status}')
end_time = datetime.now()
processed_time = end_time - start_time
obj_source_node = sourceNode()
obj_source_node.file_name = file_name
obj_source_node.status = job_status
obj_source_node.processing_time = processed_time

graphDb_data_Access.update_source_node(obj_source_node)
logging.info('Updated the nodeCount and relCount properties in Document node')
logging.info(f'file:{file_name} extraction has been completed')
graphDb_data_Access.update_source_node(obj_source_node)
logging.info('Updated the nodeCount and relCount properties in Document node')
logging.info(f'file:{file_name} extraction has been completed')


# merged_file_path have value only when file uploaded from local

if is_uploaded_from_local:
gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
if gcs_file_cache == 'True':
folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
else:
delete_uploaded_local_file(merged_file_path, file_name)
# merged_file_path have value only when file uploaded from local

return {
"fileName": file_name,
"nodeCount": node_count,
"relationshipCount": rel_count,
"processingTime": round(processed_time.total_seconds(),2),
"status" : job_status,
"model" : model,
"success_count" : 1
}
if is_uploaded_from_local:
gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
if gcs_file_cache == 'True':
folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
else:
delete_uploaded_local_file(merged_file_path, file_name)

return {
"fileName": file_name,
"nodeCount": node_count,
"relationshipCount": rel_count,
"processingTime": round(processed_time.total_seconds(),2),
"status" : job_status,
"model" : model,
"success_count" : 1
}
else:
logging.info('File does not process because it\'s already in Processing status')
else:
logging.info('File does not process because it\'s already in Processing status')
error_message = "Unable to get the status of docuemnt node."
logging.error(error_message)
raise Exception(error_message)

def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count):
#create vector index and update chunk node with embedding
Expand Down
6 changes: 5 additions & 1 deletion frontend/src/components/ChatBot/ChatInfoModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,11 @@ const ChatInfoModal: React.FC<chatInfoMessage> = ({
) : (
<Tabs size='large' fill='underline' onChange={onChangeTabs} value={activeTab}>
{mode != 'graph' ? <Tabs.Tab tabId={3}>Sources used</Tabs.Tab> : <></>}
{mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? <Tabs.Tab tabId={4}>Top Entities used</Tabs.Tab> : <></>}
{mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? (
<Tabs.Tab tabId={4}>Top Entities used</Tabs.Tab>
) : (
<></>
)}
{mode === 'graph' && cypher_query?.trim().length ? (
<Tabs.Tab tabId={6}>Generated Cypher Query</Tabs.Tab>
) : (
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/components/Content.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ const Content: React.FC<ContentProps> = ({
setextractLoading(false);
await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
});
} else if (queueFiles && !queue.isEmpty()&&processingFilesCount<batchSize) {
} else if (queueFiles && !queue.isEmpty() && processingFilesCount < batchSize) {
data = scheduleBatchWiseProcess(queue.items, true);
Promise.allSettled(data).then(async (_) => {
setextractLoading(false);
Expand Down
2 changes: 1 addition & 1 deletion frontend/src/components/FileTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
}
return prev + 1;
});
queue.remove(fileName)
queue.remove(fileName);
} else {
let errorobj = { error: res.data.error, message: res.data.message, fileName };
throw new Error(JSON.stringify(errorobj));
Expand Down
16 changes: 8 additions & 8 deletions frontend/src/components/Graph/GraphViewModal.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
graphType.includes('DocumentChunk') && graphType.includes('Entities')
? queryMap.DocChunkEntities
: graphType.includes('DocumentChunk')
? queryMap.DocChunks
: graphType.includes('Entities')
? queryMap.Entities
: '';
? queryMap.DocChunks
: graphType.includes('Entities')
? queryMap.Entities
: '';

// fit graph to original position
const handleZoomToFit = () => {
Expand Down Expand Up @@ -135,10 +135,10 @@ const GraphViewModal: React.FunctionComponent<GraphViewModalProps> = ({
const nodeRelationshipData =
viewPoint === graphLabels.showGraphView
? await graphQueryAPI(
userCredentials as UserCredentials,
graphQuery,
selectedRows?.map((f) => f.name)
)
userCredentials as UserCredentials,
graphQuery,
selectedRows?.map((f) => f.name)
)
: await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']);
return nodeRelationshipData;
} catch (error: any) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,12 @@ export default function DeduplicationTab() {
const onRemove = (nodeid: string, similarNodeId: string) => {
setDuplicateNodes((prev) => {
return prev.map((d) =>
d.e.elementId === nodeid
(d.e.elementId === nodeid
? {
...d,
similar: d.similar.filter((n) => n.elementId != similarNodeId),
}
: d
: d)
);
});
};
Expand Down