Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Add tag_kwd parameter to chunk configuration modal #4368 #4414

Merged
merged 2 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions web/src/components/parse-configuration/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ export const showRaptorParseConfiguration = (parserId: string) => {
return !excludedParseMethods.includes(parserId);
};

export const excludedTagParseMethods = ['table', 'knowledge_graph', 'tag'];

export const showTagItems = (parserId: string) => {
return !excludedTagParseMethods.includes(parserId);
};

// The three types "table", "resume" and "one" do not display this configuration.
const ParseConfiguration = () => {
const form = Form.useFormInstance();
Expand Down
13 changes: 13 additions & 0 deletions web/src/locales/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,19 @@ The above is the content you need to summarize.`,
searchTags: 'Search tags',
tagCloud: 'Cloud',
tagTable: 'Table',
tagSet: 'Tag set',
tagSetTip: `
<p> Selecting the 'Tag' knowledge bases helps to tag every chunks. </p>
<p>Query to those chunks will also be with tags too.</p>
This procedure will improve precision of retrieval by adding more information to the dataset, especially when there's a large set of chunks.
<p>Difference between tags and keywords:</p>
<ul>
<li>Tag is a close set which is defined and manipulated by user while keyword is an open set.</li>
<li>You need to upload tag sets with samples prior to use.</li>
<li>Keywords are generated by LLM which is expensive and time consuming.</li>
</ul>
`,
topnTags: 'Top-N Tags',
},
chunk: {
chunk: 'Chunk',
Expand Down
13 changes: 13 additions & 0 deletions web/src/locales/zh-traditional.ts
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,19 @@ export default {
searchTags: '搜尋標籤',
tagCloud: '雲端',
tagTable: '表',
tagSet: '標籤庫',
topnTags: 'Top-N 標籤',
tagSetTip: `
<p> 選擇「標籤」知識庫有助於標記每個區塊。 </p>
<p>對這些區塊的查詢也將帶有標籤。
此過程將透過向資料集添加更多資訊來提高檢索精度,特別是當存在大量區塊時。
<p>標籤和關鍵字的差異:</p>
<ul>
<li>標籤是一個閉集,由使用者定義和操作,而關鍵字是一個開集。
<li>您需要在使用前上傳包含範例的標籤集。
<li>關鍵字由 LLM 生成,既昂貴又耗時。
</ul>
`,
},
chunk: {
chunk: '解析塊',
Expand Down
13 changes: 13 additions & 0 deletions web/src/locales/zh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,19 @@ export default {
searchTags: '搜索标签',
tagCloud: '云',
tagTable: '表',
tagSet: '标签库',
topnTags: 'Top-N 标签',
tagSetTip: `
<p> 选择“标签”知识库有助于标记每个块。 </p>
<p>对这些块的查询也将带有标签。 </p>
此过程将通过向数据集添加更多信息来提高检索的准确性,尤其是在存在大量块的情况下。
<p>标签和关键字之间的区别:</p>
<ul>
<li>标签是一个由用户定义和操作的封闭集,而关键字是一个开放集。 </li>
<li>您需要在使用前上传带有样本的标签集。 </li>
<li>关键字由 LLM 生成,这既昂贵又耗时。 </li>
</ul>
`,
},
chunk: {
chunk: '解析块',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ type FieldType = {
interface kFProps {
doc_id: string;
chunkId: string | undefined;
parserId: string;
}

const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
Expand All @@ -21,32 +22,39 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
hideModal,
onOk,
loading,
parserId,
}) => {
const [form] = Form.useForm();
const [checked, setChecked] = useState(false);
const [keywords, setKeywords] = useState<string[]>([]);
const [question, setQuestion] = useState<string[]>([]);
const [tagKeyWords, setTagKeyWords] = useState<string[]>([]);
const { removeChunk } = useDeleteChunkByIds();
const { data } = useFetchChunk(chunkId);
const { t } = useTranslation();

const isTagParser = parserId === 'tag';

useEffect(() => {
if (data?.code === 0) {
const {
content_with_weight,
important_kwd = [],
available_int,
question_kwd = [],
tag_kwd = [],
} = data.data;
form.setFieldsValue({ content: content_with_weight });
setKeywords(important_kwd);
setQuestion(question_kwd);
setTagKeyWords(tag_kwd);
setChecked(available_int !== 0);
}

if (!chunkId) {
setKeywords([]);
setQuestion([]);
setTagKeyWords([]);
form.setFieldsValue({ content: undefined });
}
}, [data, form, chunkId]);
Expand All @@ -58,6 +66,7 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
content: values.content,
keywords, // keywords
question_kwd: question,
tag_kwd: tagKeyWords,
available_int: checked ? 1 : 0, // available_int
});
} catch (errorInfo) {
Expand Down Expand Up @@ -105,6 +114,12 @@ const ChunkCreatingModal: React.FC<IModalProps<any> & kFProps> = ({
</div>
<EditTag tags={question} setTags={setQuestion} />
</section>
{isTagParser && (
<section className="mt-4">
<p className="mb-2">{t('knowledgeConfiguration.tagName')} </p>
<EditTag tags={tagKeyWords} setTags={setTagKeyWords} />
</section>
)}
{chunkId && (
<section>
<Divider></Divider>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ export const useUpdateChunk = () => {
keywords,
available_int,
question_kwd,
tag_kwd,
}: {
content: string;
keywords: string;
available_int: number;
question_kwd: string;
tag_kwd: string;
}) => {
const code = await createChunk({
content_with_weight: content,
Expand All @@ -113,6 +115,7 @@ export const useUpdateChunk = () => {
important_kwd: keywords, // keywords
available_int,
question_kwd,
tag_kwd,
});

if (code === 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ const Chunk = () => {
visible={chunkUpdatingVisible}
loading={chunkUpdatingLoading}
onOk={onChunkUpdatingOk}
parserId={documentInfo.parser_id}
/>
)}
<KnowledgeGraphModal></KnowledgeGraphModal>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ const ParsingActionCell = ({
<Dropdown
menu={{ items: chunkItems }}
trigger={['click']}
disabled={isRunning}
disabled={isRunning || record.parser_id === 'tag'}
>
<Button type="text" className={styles.iconButton}>
<ToolOutlined size={20} />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import styles from './index.less';
import { TagTabs } from './tag-tabs';
import { ImageMap } from './utils';

const { Title, Text } = Typography;
const { Text } = Typography;

const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
const parserList = useSelectParserList();
Expand Down Expand Up @@ -37,15 +37,15 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
<section className={styles.categoryPanelWrapper}>
{imageList.length > 0 ? (
<>
<Title level={5} className={styles.topTitle}>
<h5 className="font-semibold text-base mt-0 mb-1">
{`"${item.title}" ${t('methodTitle')}`}
</Title>
</h5>
<p
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(item.description),
}}
></p>
<Title level={5}>{`"${item.title}" ${t('methodExamples')}`}</Title>
<h5 className="font-semibold text-base mt-4 mb-1">{`"${item.title}" ${t('methodExamples')}`}</h5>
<Text>{t('methodExamplesDescription')}</Text>
<Row gutter={[10, 10]} className={styles.imageRow}>
{imageList.map((x) => (
Expand All @@ -58,9 +58,9 @@ const CategoryPanel = ({ chunkMethod }: { chunkMethod: string }) => {
</Col>
))}
</Row>
<Title level={5}>
<h5 className="font-semibold text-base mt-4 mb-1">
{item.title} {t('dialogueExamplesTitle')}
</Title>
</h5>
<Divider></Divider>
</>
) : (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import MaxTokenNumber from '@/components/max-token-number';
import PageRank from '@/components/page-rank';
import ParseConfiguration, {
showRaptorParseConfiguration,
showTagItems,
} from '@/components/parse-configuration';
import { useTranslate } from '@/hooks/common-hooks';
import { useHandleChunkMethodSelectChange } from '@/hooks/logic-hooks';
Expand All @@ -23,6 +24,7 @@ import {
useSubmitKnowledgeConfiguration,
} from './hooks';
import styles from './index.less';
import { TagItems } from './tag-item';

const { Option } = Select;

Expand Down Expand Up @@ -146,6 +148,8 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
{showRaptorParseConfiguration(parserId) && (
<ParseConfiguration></ParseConfiguration>
)}

{showTagItems(parserId) && <TagItems></TagItems>}
</>
);
}}
Expand Down
101 changes: 101 additions & 0 deletions web/src/pages/add-knowledge/components/knowledge-setting/tag-item.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import { useFetchKnowledgeList } from '@/hooks/knowledge-hooks';
import { UserOutlined } from '@ant-design/icons';
import {
Avatar,
Divider,
Flex,
Form,
InputNumber,
Select,
Slider,
Space,
} from 'antd';
import DOMPurify from 'dompurify';
import { useTranslation } from 'react-i18next';

export const TagSetItem = () => {
const { t } = useTranslation();

const { list: knowledgeList } = useFetchKnowledgeList(true);

const knowledgeOptions = knowledgeList
.filter((x) => x.parser_id === 'tag')
.map((x) => ({
label: (
<Space>
<Avatar size={20} icon={<UserOutlined />} src={x.avatar} />
{x.name}
</Space>
),
value: x.id,
}));

return (
<Form.Item
label={t('knowledgeConfiguration.tagSet')}
name={['parser_config', 'tag_kb_ids']}
tooltip={
<div
dangerouslySetInnerHTML={{
__html: DOMPurify.sanitize(t('knowledgeConfiguration.tagSetTip')),
}}
></div>
}
rules={[
{
message: t('chat.knowledgeBasesMessage'),
type: 'array',
},
]}
>
<Select
mode="multiple"
options={knowledgeOptions}
placeholder={t('chat.knowledgeBasesMessage')}
></Select>
</Form.Item>
);
};

export const TopNTagsItem = () => {
const { t } = useTranslation();

return (
<Form.Item label={t('knowledgeConfiguration.topnTags')}>
<Flex gap={20} align="center">
<Flex flex={1}>
<Form.Item
name={['parser_config', 'topn_tags']}
noStyle
initialValue={3}
>
<Slider max={10} min={1} style={{ width: '100%' }} />
</Form.Item>
</Flex>
<Form.Item name={['parser_config', 'topn_tags']} noStyle>
<InputNumber max={10} min={1} />
</Form.Item>
</Flex>
</Form.Item>
);
};

export function TagItems() {
return (
<>
<Divider />
<TagSetItem></TagSetItem>
<Form.Item noStyle dependencies={[['parser_config', 'tag_kb_ids']]}>
{({ getFieldValue }) => {
const ids: string[] = getFieldValue(['parser_config', 'tag_kb_ids']);

return (
Array.isArray(ids) &&
ids.length > 0 && <TopNTagsItem></TopNTagsItem>
);
}}
</Form.Item>
<Divider />
</>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ export function TagTable() {
variant="ghost"
onClick={() => column.toggleSorting(column.getIsSorted() === 'asc')}
>
{t('knowledgeConfiguration.tag')}
{t('knowledgeConfiguration.tagName')}
<ArrowUpDown />
</Button>
);
Expand Down
Loading