Skip to content

Commit 358d5a6

Browse files
Chunk Text Details (#850)
* Community title added * Added api for fetching chunk text details * output format changed for chunk text * integrated the service layer for chunkdata * added the chunks * formatting output of llm call for title generation * formatting llm output for title generation * added flex row * Changes related to pagination of fetch chunk api * Integrated the pagination * page changes error resolved for fetch chunk api * for get neighbours api , community title added in properties * moving community title related changes to separate branch * Removed Query module from fastapi import statement * icon changes --------- Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
1 parent d8af5a5 commit 358d5a6

File tree

9 files changed

+281
-12
lines changed

9 files changed

+281
-12
lines changed

backend/score.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from langchain_google_vertexai import ChatVertexAI
1313
from src.api_response import create_api_response
1414
from src.graphDB_dataAccess import graphDBdataAccess
15-
from src.graph_query import get_graph_results
15+
from src.graph_query import get_graph_results,get_chunktext_results
1616
from src.chunkid_entities import get_entities_from_chunkids
1717
from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding
1818
from sse_starlette.sse import EventSourceResponse
@@ -818,5 +818,57 @@ async def calculate_metric(question: str = Form(),
818818
finally:
819819
gc.collect()
820820

821+
@app.post("/fetch_chunktext")
822+
async def fetch_chunktext(
823+
uri: str = Form(),
824+
database: str = Form(),
825+
userName: str = Form(),
826+
password: str = Form(),
827+
document_name: str = Form(),
828+
page_no: int = Form(1)
829+
):
830+
try:
831+
payload_json_obj = {
832+
'api_name': 'fetch_chunktext',
833+
'db_url': uri,
834+
'userName': userName,
835+
'database': database,
836+
'document_name': document_name,
837+
'page_no': page_no,
838+
'logging_time': formatted_time(datetime.now(timezone.utc))
839+
}
840+
logger.log_struct(payload_json_obj, "INFO")
841+
start = time.time()
842+
result = await asyncio.to_thread(
843+
get_chunktext_results,
844+
uri=uri,
845+
username=userName,
846+
password=password,
847+
database=database,
848+
document_name=document_name,
849+
page_no=page_no
850+
)
851+
end = time.time()
852+
elapsed_time = end - start
853+
json_obj = {
854+
'api_name': 'fetch_chunktext',
855+
'db_url': uri,
856+
'document_name': document_name,
857+
'page_no': page_no,
858+
'logging_time': formatted_time(datetime.now(timezone.utc)),
859+
'elapsed_api_time': f'{elapsed_time:.2f}'
860+
}
861+
logger.log_struct(json_obj, "INFO")
862+
return create_api_response('Success', data=result, message=f"Total elapsed API time {elapsed_time:.2f}")
863+
except Exception as e:
864+
job_status = "Failed"
865+
message = "Unable to get chunk text response"
866+
error_message = str(e)
867+
logging.exception(f'Exception in fetch_chunktext: {error_message}')
868+
return create_api_response(job_status, message=message, error=error_message)
869+
finally:
870+
gc.collect()
871+
872+
821873
if __name__ == "__main__":
822874
uvicorn.run(app)

backend/src/communities.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -487,4 +487,3 @@ def create_communities(uri, username, password, database,model=COMMUNITY_CREATIO
487487

488488

489489

490-

backend/src/graph_query.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from neo4j import GraphDatabase
44
import os
55
import json
6-
from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY
6+
from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY
77
# from neo4j.debug import watch
88

99
# watch("neo4j")
@@ -226,3 +226,34 @@ def get_graph_results(uri, username, password,database,document_names):
226226
driver.close()
227227

228228

229+
def get_chunktext_results(uri, username, password, database, document_name, page_no):
230+
"""Retrieves chunk text, position, and page number from graph data with pagination."""
231+
try:
232+
logging.info("Starting chunk text query process")
233+
offset = 10
234+
skip = (page_no - 1) * offset
235+
limit = offset
236+
driver = GraphDatabase.driver(uri, auth=(username, password))
237+
with driver.session(database=database) as session:
238+
total_chunks_result = session.run(COUNT_CHUNKS_QUERY, file_name=document_name)
239+
total_chunks = total_chunks_result.single()["total_chunks"]
240+
total_pages = (total_chunks + offset - 1) // offset # Calculate total pages
241+
records = session.run(CHUNK_TEXT_QUERY, file_name=document_name, skip=skip, limit=limit)
242+
pageitems = [
243+
{
244+
"text": record["chunk_text"],
245+
"position": record["chunk_position"],
246+
"pagenumber": record["page_number"]
247+
}
248+
for record in records
249+
]
250+
logging.info(f"Query process completed with {len(pageitems)} chunks retrieved")
251+
return {
252+
"pageitems": pageitems,
253+
"total_pages": total_pages
254+
}
255+
except Exception as e:
256+
logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}")
257+
raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e
258+
finally:
259+
driver.close()

backend/src/shared/constants.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,19 @@
161161
] AS entities
162162
"""
163163

164+
COUNT_CHUNKS_QUERY = """
165+
MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk)
166+
RETURN count(c) AS total_chunks
167+
"""
168+
169+
CHUNK_TEXT_QUERY = """
170+
MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk)
171+
RETURN c.text AS chunk_text, c.position AS chunk_position, c.page_number AS page_number
172+
ORDER BY c.position
173+
SKIP $skip
174+
LIMIT $limit
175+
"""
176+
164177
## CHAT SETUP
165178
CHAT_MAX_TOKENS = 1000
166179
CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5

frontend/src/components/Content.tsx

Lines changed: 57 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import {
1111
CustomFile,
1212
OptionType,
1313
UserCredentials,
14+
chunkdata,
1415
connectionState,
1516
} from '../types';
1617
import deleteAPI from '../services/DeleteFiles';
@@ -44,6 +45,8 @@ import retry from '../services/retry';
4445
import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts';
4546
import { useMessageContext } from '../context/UserMessages';
4647
import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast';
48+
import { getChunkText } from '../services/getChunkText';
49+
import ChunkPopUp from './Popups/ChunkPopUp';
4750

4851
const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal'));
4952
const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog'));
@@ -70,6 +73,7 @@ const Content: React.FC<ContentProps> = ({
7073
});
7174
const [openGraphView, setOpenGraphView] = useState<boolean>(false);
7275
const [inspectedName, setInspectedName] = useState<string>('');
76+
const [documentName, setDocumentName] = useState<string>('');
7377
const {
7478
setUserCredentials,
7579
userCredentials,
@@ -85,6 +89,12 @@ const Content: React.FC<ContentProps> = ({
8589
const [retryFile, setRetryFile] = useState<string>('');
8690
const [retryLoading, setRetryLoading] = useState<boolean>(false);
8791
const [showRetryPopup, toggleRetryPopup] = useReducer((state) => !state, false);
92+
const [showChunkPopup, toggleChunkPopup] = useReducer((state) => !state, false);
93+
const [chunksLoading, toggleChunksLoading] = useReducer((state) => !state, false);
94+
const [currentPage, setCurrentPage] = useState<number>(0);
95+
const [totalPageCount, setTotalPageCount] = useState<number | null>(null);
96+
const [textChunks, setTextChunks] = useState<chunkdata[]>([]);
97+
8898
const [alertStateForRetry, setAlertStateForRetry] = useState<BannerAlertProps>({
8999
showAlert: false,
90100
alertType: 'neutral',
@@ -122,7 +132,12 @@ const Content: React.FC<ContentProps> = ({
122132
}
123133
);
124134
const childRef = useRef<ChildRef>(null);
125-
135+
const incrementPage = () => {
136+
setCurrentPage((prev) => prev + 1);
137+
};
138+
const decrementPage = () => {
139+
setCurrentPage((prev) => prev - 1);
140+
};
126141
useEffect(() => {
127142
if (!init && !searchParams.has('connectURL')) {
128143
let session = localStorage.getItem('neo4j.connection');
@@ -149,7 +164,13 @@ const Content: React.FC<ContentProps> = ({
149164
setOpenConnection((prev) => ({ ...prev, openPopUp: true }));
150165
}
151166
}, []);
152-
167+
useEffect(() => {
168+
if (currentPage >= 1) {
169+
(async () => {
170+
await getChunks(documentName, currentPage);
171+
})();
172+
}
173+
}, [currentPage, documentName]);
153174
useEffect(() => {
154175
setFilesData((prevfiles) => {
155176
return prevfiles.map((curfile) => {
@@ -251,7 +272,15 @@ const Content: React.FC<ContentProps> = ({
251272
setModel(selectedOption?.value);
252273
}
253274
};
254-
275+
const getChunks = async (name: string, pageNo: number) => {
276+
toggleChunksLoading();
277+
const response = await getChunkText(userCredentials as UserCredentials, name, pageNo);
278+
setTextChunks(response.data.data.pageitems);
279+
if (!totalPageCount) {
280+
setTotalPageCount(response.data.data.total_pages);
281+
}
282+
toggleChunksLoading();
283+
};
255284
const extractData = async (uid: string, isselectedRows = false, filesTobeProcess: CustomFile[]) => {
256285
if (!isselectedRows) {
257286
const fileItem = filesData.find((f) => f.id == uid);
@@ -497,7 +526,7 @@ const Content: React.FC<ContentProps> = ({
497526
}
498527
};
499528

500-
function processWaitingFilesOnRefresh() {
529+
const processWaitingFilesOnRefresh = () => {
501530
let data = [];
502531
const processingFilesCount = filesData.filter((f) => f.status === 'Processing').length;
503532

@@ -517,7 +546,7 @@ const Content: React.FC<ContentProps> = ({
517546
.filter((f) => f.status === 'New' || f.status == 'Reprocess');
518547
addFilesToQueue(selectedNewFiles as CustomFile[]);
519548
}
520-
}
549+
};
521550

522551
const handleOpenGraphClick = () => {
523552
const bloomUrl = process.env.VITE_BLOOM_URL;
@@ -771,6 +800,18 @@ const Content: React.FC<ContentProps> = ({
771800
view='contentView'
772801
></DeletePopUp>
773802
)}
803+
{showChunkPopup && (
804+
<ChunkPopUp
805+
chunksLoading={chunksLoading}
806+
onClose={() => toggleChunkPopup()}
807+
showChunkPopup={showChunkPopup}
808+
chunks={textChunks}
809+
incrementPage={incrementPage}
810+
decrementPage={decrementPage}
811+
currentPage={currentPage}
812+
totalPageCount={totalPageCount}
813+
></ChunkPopUp>
814+
)}
774815
{showEnhancementDialog && (
775816
<GraphEnhancementDialog
776817
open={showEnhancementDialog}
@@ -859,6 +900,17 @@ const Content: React.FC<ContentProps> = ({
859900
setRetryFile(id);
860901
toggleRetryPopup();
861902
}}
903+
onChunkView={async (name) => {
904+
setDocumentName(name);
905+
if (name != documentName) {
906+
toggleChunkPopup();
907+
if (totalPageCount) {
908+
setTotalPageCount(null);
909+
}
910+
setCurrentPage(1);
911+
// await getChunks(name, 1);
912+
}
913+
}}
862914
ref={childRef}
863915
handleGenerateGraph={processWaitingFilesOnRefresh}
864916
></FileTable>

frontend/src/components/FileTable.tsx

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate,
3838
import { useCredentials } from '../context/UserCredentials';
3939
import {
4040
ArrowPathIconSolid,
41-
ClipboardDocumentIconOutline,
41+
ClipboardDocumentIconSolid,
4242
MagnifyingGlassCircleIconSolid,
43+
DocumentTextIconSolid,
4344
} from '@neo4j-ndl/react/icons';
4445
import CustomProgressBar from './UI/CustomProgressBar';
4546
import subscribe from '../services/PollingAPI';
@@ -56,7 +57,7 @@ import { ThemeWrapperContext } from '../context/ThemeWrapper';
5657
let onlyfortheFirstRender = true;
5758

5859
const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
59-
const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props;
60+
const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry, onChunkView } = props;
6061
const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } =
6162
useFileContext();
6263
const { userCredentials, isReadOnlyUser } = useCredentials();
@@ -527,10 +528,25 @@ const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
527528
handleCopy(copied);
528529
}}
529530
>
530-
<ClipboardDocumentIconOutline className={`${copyRow} ? 'cursor-wait': 'cursor`} />
531+
<ClipboardDocumentIconSolid className={`${copyRow} ? 'cursor-wait': 'cursor`} />
532+
</IconButtonWithToolTip>
533+
<IconButtonWithToolTip
534+
onClick={() => {
535+
onChunkView(info?.row?.original?.name as string);
536+
}}
537+
clean
538+
placement='left'
539+
label='chunktextaction'
540+
text='View Chunks'
541+
size='large'
542+
disabled={info.getValue() === 'Uploading'}
543+
>
544+
<DocumentTextIconSolid />
531545
</IconButtonWithToolTip>
532546
</>
533547
),
548+
size: 300,
549+
minSize: 180,
534550
header: () => <span>Actions</span>,
535551
footer: (info) => info.column.id,
536552
}),
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { Dialog, Typography, Flex, IconButton } from '@neo4j-ndl/react';
2+
import { ArrowLeftIconOutline, ArrowRightIconOutline } from '@neo4j-ndl/react/icons';
3+
import { chunkdata } from '../../../types';
4+
import Loader from '../../../utils/Loader';
5+
import { useMemo } from 'react';
6+
7+
const ChunkPopUp = ({
8+
showChunkPopup,
9+
chunks,
10+
onClose,
11+
chunksLoading,
12+
incrementPage,
13+
decrementPage,
14+
currentPage,
15+
totalPageCount,
16+
}: {
17+
showChunkPopup: boolean;
18+
chunks: chunkdata[];
19+
onClose: () => void;
20+
chunksLoading: boolean;
21+
incrementPage: () => void;
22+
decrementPage: () => void;
23+
currentPage: number | null;
24+
totalPageCount: number | null;
25+
}) => {
26+
const sortedChunksData = useMemo(() => {
27+
return chunks.sort((a, b) => a.position - b.position);
28+
}, [chunks]);
29+
return (
30+
<Dialog open={showChunkPopup} onClose={onClose}>
31+
<Dialog.Header>Text Chunks</Dialog.Header>
32+
<Dialog.Content>
33+
{chunksLoading ? (
34+
<Loader title='loading...'></Loader>
35+
) : (
36+
<ol className='max-h-80 overflow-y-auto'>
37+
{sortedChunksData.map((c, idx) => (
38+
<li key={`${idx}${c.position}`} className='flex flex-row gap-2'>
39+
<Flex flexDirection='column' gap='1'>
40+
<Flex flexDirection='row'>
41+
<Typography variant='label'>Position :</Typography>
42+
<Typography variant='subheading-medium'>{c.position}</Typography>
43+
</Flex>
44+
{c.pagenumber ? (
45+
<Flex flexDirection='row'>
46+
<Typography variant='label'>Page No :</Typography>{' '}
47+
<Typography variant='subheading-small'>{c.pagenumber}</Typography>
48+
</Flex>
49+
) : null}
50+
<Typography variant='body-medium'>{c.text}</Typography>
51+
</Flex>
52+
</li>
53+
))}
54+
</ol>
55+
)}
56+
</Dialog.Content>
57+
{totalPageCount != null && totalPageCount > 1 && (
58+
<Dialog.Actions className='flex !justify-center items-center'>
59+
<Flex flexDirection='row'>
60+
<IconButton disabled={currentPage === 1} onClick={decrementPage}>
61+
<ArrowLeftIconOutline />
62+
</IconButton>
63+
<IconButton disabled={currentPage === totalPageCount} onClick={incrementPage}>
64+
<ArrowRightIconOutline />
65+
</IconButton>
66+
</Flex>
67+
</Dialog.Actions>
68+
)}
69+
</Dialog>
70+
);
71+
};
72+
export default ChunkPopUp;

0 commit comments

Comments
 (0)