Skip to content

label and checkboxes placement changes #675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,5 @@ google-cloud-cli-469.0.0-linux-x86_64.tar.gz
/backend/src/chunks
/backend/merged_files
/backend/chunks
google-cloud-cli-479.0.0-linux-x86_64.tar.gz
google-cloud-cli-linux-x86_64.tar.gz
.vennv
12 changes: 6 additions & 6 deletions backend/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,20 +241,20 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database
graph = create_graph_database_connection(uri, userName, password, database)
tasks = set(map(str.strip, json.loads(tasks)))

if "update_similarity_graph" in tasks:
if "materialize_text_chunk_similarities" in tasks:
await asyncio.to_thread(update_graph, graph)
josn_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
josn_obj = {'api_name': 'post_processing/materialize_text_chunk_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct(josn_obj)
logging.info(f'Updated KNN Graph')
if "create_fulltext_index" in tasks:
if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks:
await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities")
await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword")
josn_obj = {'api_name': 'post_processing/create_fulltext_index', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
josn_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct(josn_obj)
logging.info(f'Full Text index created')
if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "create_entity_embedding" in tasks:
if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks:
await asyncio.to_thread(create_entity_embedding, graph)
josn_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
josn_obj = {'api_name': 'post_processing/materialize_entity_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct(josn_obj)
logging.info(f'Entity Embeddings created')
return create_api_response('Success', message='All tasks completed successfully')
Expand Down
24 changes: 13 additions & 11 deletions frontend/src/components/FileTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,9 @@ const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
...Array.from(new Set(filesData.map((f) => f.fileSource))).map((t) => {
return {
title: (
<span className={`${t === fileSourceFilter ? 'n-bg-palette-primary-bg-selected' : ''} p-2`}>{t}</span>
<span className={`${t === fileSourceFilter ? 'n-bg-palette-primary-bg-selected' : ''} p-2`}>
{t}
</span>
),
onClick: () => {
setFileSourceFilter(t as string);
Expand Down Expand Up @@ -579,14 +581,14 @@ const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
item?.fileSource === 's3 bucket' && localStorage.getItem('accesskey') === item?.awsAccessKeyId
? item?.status
: item?.fileSource === 'local file'
? item?.status
: item?.status === 'Completed' || item.status === 'Failed'
? item?.status
: item?.fileSource == 'Wikipedia' ||
item?.fileSource == 'youtube' ||
item?.fileSource == 'gcs bucket'
? item?.status
: 'N/A',
? item?.status
: item?.status === 'Completed' || item.status === 'Failed'
? item?.status
: item?.fileSource == 'Wikipedia' ||
item?.fileSource == 'youtube' ||
item?.fileSource == 'gcs bucket'
? item?.status
: 'N/A',
model: item?.model ?? model,
id: uuidv4(),
source_url: item?.url != 'None' && item?.url != '' ? item.url : '',
Expand All @@ -599,8 +601,8 @@ const FileTable = forwardRef<ChildRef, FileTableProps>((props, ref) => {
language: item?.language ?? '',
processingProgress:
item?.processed_chunk != undefined &&
item?.total_chunks != undefined &&
!isNaN(Math.floor((item?.processed_chunk / item?.total_chunks) * 100))
item?.total_chunks != undefined &&
!isNaN(Math.floor((item?.processed_chunk / item?.total_chunks) * 100))
? Math.floor((item?.processed_chunk / item?.total_chunks) * 100)
: undefined,
// total_pages: item?.total_pages ?? 0,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { Box, Checkbox, Flex, Typography, useMediaQuery } from '@neo4j-ndl/react';
import { taskParam } from '../../../../utils/Constants';
import { POST_PROCESSING_JOBS } from '../../../../utils/Constants';
import { capitalize } from '../../../../utils/Utils';
import { useFileContext } from '../../../../context/UsersFiles';
import { tokens } from '@neo4j-ndl/base';
Expand All @@ -17,67 +17,33 @@ export default function PostProcessingCheckList() {
These options allow you to fine-tune your knowledge graph for improved performance and deeper analysis
</Typography>
</Flex>
<Flex justifyContent='space-between' flexDirection='column'>
<Flex>
<Typography variant={tablet ? 'subheading-small' : 'subheading-medium'}>
Update Similarity Graph :
</Typography>
<Typography variant={tablet ? 'body-small' : 'body-medium'}>
This option refines the connections between different pieces of information (chunks) within your
knowledge graph. By leveraging a k-nearest neighbor algorithm with a similarity threshold (KNN_MIN_SCORE
of 0.8), this process identifies and links chunks with high semantic similarity. This results in a more
interconnected and insightful knowledge representation, enabling more accurate and relevant search
results.
</Typography>
</Flex>
<Flex>
<Typography variant={tablet ? 'subheading-small' : 'subheading-medium'}>
Create Fulltext Index :
</Typography>
<Typography variant={tablet ? 'body-small' : 'body-medium'}>
This option optimizes search capabilities within your knowledge graph. It rebuilds the full-text index
on database labels, ensuring faster and more efficient retrieval of information. This is particularly
beneficial for large knowledge graphs, as it significantly speeds up keyword-based searches and improves
overall query performance..
</Typography>
</Flex>
<Flex>
<Typography variant={tablet ? 'subheading-small' : 'subheading-medium'}>
Create Entity Embeddings :
</Typography>
<Typography variant={tablet ? 'body-small' : 'body-medium'}>
Enhances entity analysis by generating numerical representations (embeddings) that capture their
semantic meaning. This facilitates tasks like clustering similar entities, identifying duplicates, and
performing similarity-based searches.
</Typography>
</Flex>
<Flex justifyContent='space-between' flexDirection='column' gap='6'>
{POST_PROCESSING_JOBS.map((job, idx) => (
<Flex key={`${job.title}${idx}`}>
<Checkbox
label={
<Typography variant='label'>
{job.title
.split('_')
.map((s) => capitalize(s))
.join(' ')}
</Typography>
}
checked={postProcessingTasks.includes(job.title)}
onChange={(e) => {
if (e.target.checked) {
setPostProcessingTasks((prev) => [...prev, job.title]);
} else {
setPostProcessingTasks((prev) => prev.filter((s) => s !== job.title));
}
}}
></Checkbox>
<Typography variant={tablet ? 'body-small' : 'body-medium'}>{job.description}</Typography>
</Flex>
))}
</Flex>
</Flex>
</div>
<Flex flexDirection={tablet ? 'row' : 'column'}>
{taskParam.map((task, index) => (
<Box key={index}>
<Checkbox
label={
<Typography variant={tablet ? 'subheading-medium' : 'subheading-large'}>
{task
.split('_')
.map((s) => capitalize(s))
.join(' ')}
</Typography>
}
checked={postProcessingTasks.includes(task)}
onChange={(e) => {
if (e.target.checked) {
setPostProcessingTasks((prev) => [...prev, task]);
} else {
setPostProcessingTasks((prev) => prev.filter((s) => s !== task));
}
}}
></Checkbox>
</Box>
))}
</Flex>
</Flex>
);
}
31 changes: 29 additions & 2 deletions frontend/src/utils/Constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ export const defaultLLM = llms?.includes('openai-gpt-4o-mini')
export const chatModes =
process.env?.CHAT_MODES?.trim() != ''
? process.env.CHAT_MODES?.split(',')
: ['vector', 'graph', 'graph+vector', 'hybrid','hybrid+graph'];
: ['vector', 'graph', 'graph+vector', 'hybrid', 'hybrid+graph'];
export const chunkSize = process.env.CHUNK_SIZE ? parseInt(process.env.CHUNK_SIZE) : 1 * 1024 * 1024;
export const timeperpage = process.env.TIME_PER_PAGE ? parseInt(process.env.TIME_PER_PAGE) : 50;
export const timePerByte = 0.2;
Expand Down Expand Up @@ -160,7 +160,34 @@ export const buttonCaptions = {
ask: 'Ask',
};

export const taskParam: string[] = ['update_similarity_graph', 'create_fulltext_index', 'create_entity_embedding'];
export const taskParam: string[] = [
'materialize_text_chunk_similarities',
'enable_hybrid_search_and_fulltext_search_in_bloom',
'materialize_entity_similarities',
];
export const POST_PROCESSING_JOBS: { title: string; description: string }[] = [
{
title: 'materialize_text_chunk_similarities',
description: `This option refines the connections between different pieces of information (chunks) within your
knowledge graph. By leveraging a k-nearest neighbor algorithm with a similarity threshold (KNN_MIN_SCORE
of 0.8), this process identifies and links chunks with high semantic similarity. This results in a more
interconnected and insightful knowledge representation, enabling more accurate and relevant search
results.`,
},
{
title: 'enable_hybrid_search_and_fulltext_search_in_bloom',
description: `This option optimizes search capabilities within your knowledge graph. It rebuilds the full-text index
on database labels, ensuring faster and more efficient retrieval of information. This is particularly
beneficial for large knowledge graphs, as it significantly speeds up keyword-based searches and improves
overall query performance.`,
},
{
title: 'materialize_entity_similarities',
description: `Enhances entity analysis by generating numerical representations (embeddings) that capture their
semantic meaning. This facilitates tasks like clustering similar entities, identifying duplicates, and
performing similarity-based searches.`,
},
];

export const nvlOptions: NvlOptions = {
allowDynamicMinZoom: true,
Expand Down
12 changes: 4 additions & 8 deletions frontend/src/utils/Utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,14 @@ export const filterData = (
allNodes: ExtendedNode[],
allRelationships: Relationship[],
scheme: Scheme
) => {
) => {
let filteredNodes: ExtendedNode[] = [];
let filteredRelations: Relationship[] = [];
let filteredScheme: Scheme = {};
const entityTypes = Object.keys(scheme).filter((type) => type !== 'Document' && type !== 'Chunk');
if (graphType.includes('DocumentChunk') && !graphType.includes('Entities')) {
// Document + Chunk
filteredNodes = allNodes.filter(
(node) => node.labels.includes('Document') || node.labels.includes('Chunk')
);
filteredNodes = allNodes.filter((node) => node.labels.includes('Document') || node.labels.includes('Chunk'));
const nodeIds = new Set(filteredNodes.map((node) => node.id));
filteredRelations = allRelationships.filter(
(rel) =>
Expand All @@ -190,9 +188,7 @@ export const filterData = (
filteredScheme = { Document: scheme.Document, Chunk: scheme.Chunk };
} else if (graphType.includes('Entities') && !graphType.includes('DocumentChunk')) {
// Only Entity
const entityNodes = allNodes.filter(
(node) => !node.labels.includes('Document') && !node.labels.includes('Chunk')
);
const entityNodes = allNodes.filter((node) => !node.labels.includes('Document') && !node.labels.includes('Chunk'));
filteredNodes = entityNodes ? entityNodes : [];
const nodeIds = new Set(filteredNodes.map((node) => node.id));
filteredRelations = allRelationships.filter(
Expand All @@ -209,7 +205,7 @@ export const filterData = (
filteredScheme = scheme;
}
return { filteredNodes, filteredRelations, filteredScheme };
};
};

export const getDateTime = () => {
const date = new Date();
Expand Down