Skip to content

Chunk Text Details #850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion backend/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from langchain_google_vertexai import ChatVertexAI
from src.api_response import create_api_response
from src.graphDB_dataAccess import graphDBdataAccess
from src.graph_query import get_graph_results
from src.graph_query import get_graph_results,get_chunktext_results
from src.chunkid_entities import get_entities_from_chunkids
from src.post_processing import create_vector_fulltext_indexes, create_entity_embedding
from sse_starlette.sse import EventSourceResponse
Expand Down Expand Up @@ -818,5 +818,57 @@ async def calculate_metric(question: str = Form(),
finally:
gc.collect()

@app.post("/fetch_chunktext")
async def fetch_chunktext(
uri: str = Form(),
database: str = Form(),
userName: str = Form(),
password: str = Form(),
document_name: str = Form(),
page_no: int = Form(1)
):
try:
payload_json_obj = {
'api_name': 'fetch_chunktext',
'db_url': uri,
'userName': userName,
'database': database,
'document_name': document_name,
'page_no': page_no,
'logging_time': formatted_time(datetime.now(timezone.utc))
}
logger.log_struct(payload_json_obj, "INFO")
start = time.time()
result = await asyncio.to_thread(
get_chunktext_results,
uri=uri,
username=userName,
password=password,
database=database,
document_name=document_name,
page_no=page_no
)
end = time.time()
elapsed_time = end - start
json_obj = {
'api_name': 'fetch_chunktext',
'db_url': uri,
'document_name': document_name,
'page_no': page_no,
'logging_time': formatted_time(datetime.now(timezone.utc)),
'elapsed_api_time': f'{elapsed_time:.2f}'
}
logger.log_struct(json_obj, "INFO")
return create_api_response('Success', data=result, message=f"Total elapsed API time {elapsed_time:.2f}")
except Exception as e:
job_status = "Failed"
message = "Unable to get chunk text response"
error_message = str(e)
logging.exception(f'Exception in fetch_chunktext: {error_message}')
return create_api_response(job_status, message=message, error=error_message)
finally:
gc.collect()


if __name__ == "__main__":
uvicorn.run(app)
1 change: 0 additions & 1 deletion backend/src/communities.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,4 +487,3 @@ def create_communities(uri, username, password, database,model=COMMUNITY_CREATIO




33 changes: 32 additions & 1 deletion backend/src/graph_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from neo4j import GraphDatabase
import os
import json
from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY
from src.shared.constants import GRAPH_CHUNK_LIMIT,GRAPH_QUERY,CHUNK_TEXT_QUERY,COUNT_CHUNKS_QUERY
# from neo4j.debug import watch

# watch("neo4j")
Expand Down Expand Up @@ -226,3 +226,34 @@ def get_graph_results(uri, username, password,database,document_names):
driver.close()


def get_chunktext_results(uri, username, password, database, document_name, page_no):
"""Retrieves chunk text, position, and page number from graph data with pagination."""
try:
logging.info("Starting chunk text query process")
offset = 10
skip = (page_no - 1) * offset
limit = offset
driver = GraphDatabase.driver(uri, auth=(username, password))
with driver.session(database=database) as session:
total_chunks_result = session.run(COUNT_CHUNKS_QUERY, file_name=document_name)
total_chunks = total_chunks_result.single()["total_chunks"]
total_pages = (total_chunks + offset - 1) // offset # Calculate total pages
records = session.run(CHUNK_TEXT_QUERY, file_name=document_name, skip=skip, limit=limit)
pageitems = [
{
"text": record["chunk_text"],
"position": record["chunk_position"],
"pagenumber": record["page_number"]
}
for record in records
]
logging.info(f"Query process completed with {len(pageitems)} chunks retrieved")
return {
"pageitems": pageitems,
"total_pages": total_pages
}
except Exception as e:
logging.error(f"An error occurred in get_chunktext_results. Error: {str(e)}")
raise Exception("An error occurred in get_chunktext_results. Please check the logs for more details.") from e
finally:
driver.close()
13 changes: 13 additions & 0 deletions backend/src/shared/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,19 @@
] AS entities
"""

COUNT_CHUNKS_QUERY = """
MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk)
RETURN count(c) AS total_chunks
"""

CHUNK_TEXT_QUERY = """
MATCH (d:Document {fileName: $file_name})<-[:PART_OF]-(c:Chunk)
RETURN c.text AS chunk_text, c.position AS chunk_position, c.page_number AS page_number
ORDER BY c.position
SKIP $skip
LIMIT $limit
"""

## CHAT SETUP
CHAT_MAX_TOKENS = 1000
CHAT_SEARCH_KWARG_SCORE_THRESHOLD = 0.5
Expand Down
62 changes: 57 additions & 5 deletions frontend/src/components/Content.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
CustomFile,
OptionType,
UserCredentials,
chunkdata,
connectionState,
} from '../types';
import deleteAPI from '../services/DeleteFiles';
Expand Down Expand Up @@ -44,6 +45,8 @@ import retry from '../services/retry';
import { showErrorToast, showNormalToast, showSuccessToast } from '../utils/toasts';
import { useMessageContext } from '../context/UserMessages';
import PostProcessingToast from './Popups/GraphEnhancementDialog/PostProcessingCheckList/PostProcessingToast';
import { getChunkText } from '../services/getChunkText';
import ChunkPopUp from './Popups/ChunkPopUp';

const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal'));
const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog'));
Expand All @@ -70,6 +73,7 @@ const Content: React.FC<ContentProps> = ({
});
const [openGraphView, setOpenGraphView] = useState<boolean>(false);
const [inspectedName, setInspectedName] = useState<string>('');
const [documentName, setDocumentName] = useState<string>('');
const {
setUserCredentials,
userCredentials,
Expand All @@ -85,6 +89,12 @@ const Content: React.FC<ContentProps> = ({
const [retryFile, setRetryFile] = useState<string>('');
const [retryLoading, setRetryLoading] = useState<boolean>(false);
const [showRetryPopup, toggleRetryPopup] = useReducer((state) => !state, false);
const [showChunkPopup, toggleChunkPopup] = useReducer((state) => !state, false);
const [chunksLoading, toggleChunksLoading] = useReducer((state) => !state, false);
const [currentPage, setCurrentPage] = useState<number>(0);
const [totalPageCount, setTotalPageCount] = useState<number | null>(null);
const [textChunks, setTextChunks] = useState<chunkdata[]>([]);

const [alertStateForRetry, setAlertStateForRetry] = useState<BannerAlertProps>({
showAlert: false,
alertType: 'neutral',
Expand Down Expand Up @@ -122,7 +132,12 @@ const Content: React.FC<ContentProps> = ({
}
);
const childRef = useRef<ChildRef>(null);

const incrementPage = () => {
setCurrentPage((prev) => prev + 1);
};
const decrementPage = () => {
setCurrentPage((prev) => prev - 1);
};
useEffect(() => {
if (!init && !searchParams.has('connectURL')) {
let session = localStorage.getItem('neo4j.connection');
Expand All @@ -149,7 +164,13 @@ const Content: React.FC<ContentProps> = ({
setOpenConnection((prev) => ({ ...prev, openPopUp: true }));
}
}, []);

useEffect(() => {
if (currentPage >= 1) {
(async () => {
await getChunks(documentName, currentPage);
})();
}
}, [currentPage, documentName]);
useEffect(() => {
setFilesData((prevfiles) => {
return prevfiles.map((curfile) => {
Expand Down Expand Up @@ -251,7 +272,15 @@ const Content: React.FC<ContentProps> = ({
setModel(selectedOption?.value);
}
};

const getChunks = async (name: string, pageNo: number) => {
toggleChunksLoading();
const response = await getChunkText(userCredentials as UserCredentials, name, pageNo);
setTextChunks(response.data.data.pageitems);
if (!totalPageCount) {
setTotalPageCount(response.data.data.total_pages);
}
toggleChunksLoading();
};
const extractData = async (uid: string, isselectedRows = false, filesTobeProcess: CustomFile[]) => {
if (!isselectedRows) {
const fileItem = filesData.find((f) => f.id == uid);
Expand Down Expand Up @@ -497,7 +526,7 @@ const Content: React.FC<ContentProps> = ({
}
};

function processWaitingFilesOnRefresh() {
const processWaitingFilesOnRefresh = () => {
let data = [];
const processingFilesCount = filesData.filter((f) => f.status === 'Processing').length;

Expand All @@ -517,7 +546,7 @@ const Content: React.FC<ContentProps> = ({
.filter((f) => f.status === 'New' || f.status == 'Reprocess');
addFilesToQueue(selectedNewFiles as CustomFile[]);
}
}
};

const handleOpenGraphClick = () => {
const bloomUrl = process.env.VITE_BLOOM_URL;
Expand Down Expand Up @@ -771,6 +800,18 @@ const Content: React.FC<ContentProps> = ({
view='contentView'
></DeletePopUp>
)}
{showChunkPopup && (
<ChunkPopUp
chunksLoading={chunksLoading}
onClose={() => toggleChunkPopup()}
showChunkPopup={showChunkPopup}
chunks={textChunks}
incrementPage={incrementPage}
decrementPage={decrementPage}
currentPage={currentPage}
totalPageCount={totalPageCount}
></ChunkPopUp>
)}
{showEnhancementDialog && (
<GraphEnhancementDialog
open={showEnhancementDialog}
Expand Down Expand Up @@ -859,6 +900,17 @@ const Content: React.FC<ContentProps> = ({
setRetryFile(id);
toggleRetryPopup();
}}
onChunkView={async (name) => {
setDocumentName(name);
if (name != documentName) {
toggleChunkPopup();
if (totalPageCount) {
setTotalPageCount(null);
}
setCurrentPage(1);
// await getChunks(name, 1);
}
}}
ref={childRef}
handleGenerateGraph={processWaitingFilesOnRefresh}
></FileTable>
Expand Down
22 changes: 19 additions & 3 deletions frontend/src/components/FileTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ import { SourceNode, CustomFile, FileTableProps, UserCredentials, statusupdate,
import { useCredentials } from '../context/UserCredentials';
import {
ArrowPathIconSolid,
ClipboardDocumentIconOutline,
ClipboardDocumentIconSolid,
MagnifyingGlassCircleIconSolid,
DocumentTextIconSolid,
} from '@neo4j-ndl/react/icons';
import CustomProgressBar from './UI/CustomProgressBar';
import subscribe from '../services/PollingAPI';
Expand All @@ -56,7 +57,7 @@ import { ThemeWrapperContext } from '../context/ThemeWrapper';
let onlyfortheFirstRender = true;

const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry } = props;
const { isExpanded, connectionStatus, setConnectionStatus, onInspect, onRetry, onChunkView } = props;
const { filesData, setFilesData, model, rowSelection, setRowSelection, setSelectedRows, setProcessedCount, queue } =
useFileContext();
const { userCredentials, isReadOnlyUser } = useCredentials();
Expand Down Expand Up @@ -527,10 +528,25 @@ const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
handleCopy(copied);
}}
>
<ClipboardDocumentIconOutline className={`${copyRow} ? 'cursor-wait': 'cursor`} />
<ClipboardDocumentIconSolid className={`${copyRow} ? 'cursor-wait': 'cursor`} />
</IconButtonWithToolTip>
<IconButtonWithToolTip
onClick={() => {
onChunkView(info?.row?.original?.name as string);
}}
clean
placement='left'
label='chunktextaction'
text='View Chunks'
size='large'
disabled={info.getValue() === 'Uploading'}
>
<DocumentTextIconSolid />
</IconButtonWithToolTip>
</>
),
size: 300,
minSize: 180,
header: () => <span>Actions</span>,
footer: (info) => info.column.id,
}),
Expand Down
72 changes: 72 additions & 0 deletions frontend/src/components/Popups/ChunkPopUp/index.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import { Dialog, Typography, Flex, IconButton } from '@neo4j-ndl/react';
import { ArrowLeftIconOutline, ArrowRightIconOutline } from '@neo4j-ndl/react/icons';
import { chunkdata } from '../../../types';
import Loader from '../../../utils/Loader';
import { useMemo } from 'react';

const ChunkPopUp = ({
showChunkPopup,
chunks,
onClose,
chunksLoading,
incrementPage,
decrementPage,
currentPage,
totalPageCount,
}: {
showChunkPopup: boolean;
chunks: chunkdata[];
onClose: () => void;
chunksLoading: boolean;
incrementPage: () => void;
decrementPage: () => void;
currentPage: number | null;
totalPageCount: number | null;
}) => {
const sortedChunksData = useMemo(() => {
return chunks.sort((a, b) => a.position - b.position);
}, [chunks]);
return (
<Dialog open={showChunkPopup} onClose={onClose}>
<Dialog.Header>Text Chunks</Dialog.Header>
<Dialog.Content>
{chunksLoading ? (
<Loader title='loading...'></Loader>
) : (
<ol className='max-h-80 overflow-y-auto'>
{sortedChunksData.map((c, idx) => (
<li key={`${idx}${c.position}`} className='flex flex-row gap-2'>
<Flex flexDirection='column' gap='1'>
<Flex flexDirection='row'>
<Typography variant='label'>Position :</Typography>
<Typography variant='subheading-medium'>{c.position}</Typography>
</Flex>
{c.pagenumber ? (
<Flex flexDirection='row'>
<Typography variant='label'>Page No :</Typography>{' '}
<Typography variant='subheading-small'>{c.pagenumber}</Typography>
</Flex>
) : null}
<Typography variant='body-medium'>{c.text}</Typography>
</Flex>
</li>
))}
</ol>
)}
</Dialog.Content>
{totalPageCount != null && totalPageCount > 1 && (
<Dialog.Actions className='flex !justify-center items-center'>
<Flex flexDirection='row'>
<IconButton disabled={currentPage === 1} onClick={decrementPage}>
<ArrowLeftIconOutline />
</IconButton>
<IconButton disabled={currentPage === totalPageCount} onClick={incrementPage}>
<ArrowRightIconOutline />
</IconButton>
</Flex>
</Dialog.Actions>
)}
</Dialog>
);
};
export default ChunkPopUp;
Loading