From 0946893fe904cab1e89de2a38c4421e38d508608 Mon Sep 17 00:00:00 2001 From: bnodir <145948028+bnodir@users.noreply.github.com> Date: Thu, 24 Oct 2024 03:47:22 +0900 Subject: [PATCH] Feature: include category (#2021) * Add include category feature - Modified approach.py to include category logic - Updated models.ts with category types - Added translations for category("All") in en, es, fr, and ja locales - Updated Ask.tsx and Chat.tsx to handle category - Updated data ingestion documentation * Run pre-commit hooks * Applied changes after review of doc * Update app/frontend/src/locales/es/translation.json Co-authored-by: Alfredo Deza * Update app/frontend/src/locales/es/translation.json Co-authored-by: Alfredo Deza * Update app/frontend/src/locales/fr/translation.json Co-authored-by: Wassim Chegham * Strings for pt-br * Accepting 'Todas' for 'All' in Spanish --------- Co-authored-by: Pamela Fox Co-authored-by: Alfredo Deza Co-authored-by: Wassim Chegham --- .vscode/tasks.json | 2 +- SECURITY.md | 2 +- app/backend/approaches/approach.py | 3 ++ app/frontend/src/api/models.ts | 1 + app/frontend/src/locales/en/translation.json | 6 +++ app/frontend/src/locales/es/translation.json | 8 +++- app/frontend/src/locales/fr/translation.json | 6 +++ app/frontend/src/locales/ja/translation.json | 5 +++ .../src/locales/ptBR/translation.json | 8 +++- app/frontend/src/pages/ask/Ask.tsx | 39 ++++++++++++++++++- app/frontend/src/pages/chat/Chat.tsx | 34 +++++++++++++++- docs/data_ingestion.md | 7 ++++ infra/core/host/container-app.bicep | 4 +- infra/core/networking/private-dns-zones.bicep | 2 +- infra/core/networking/private-endpoint.bicep | 2 +- infra/core/networking/vnet.bicep | 2 +- infra/core/search/search-services.bicep | 2 +- infra/private-endpoints.bicep | 4 +- 18 files changed, 123 insertions(+), 14 deletions(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 1ca7d896d4..bc4a0691c9 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -32,4 +32,4 @@ "command": "azure-dev.commands.getDotEnvFilePath" } ] -} \ No newline at end of file +} diff --git a/SECURITY.md b/SECURITY.md index 8d61b620f5..388e9ad471 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -38,4 +38,4 @@ We prefer all communications to be in English. Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/msrc/cvd). - \ No newline at end of file + diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index ad81a0a7b9..ab445738b1 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -123,9 +123,12 @@ def __init__( self.vision_token_provider = vision_token_provider def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Optional[str]: + include_category = overrides.get("include_category") exclude_category = overrides.get("exclude_category") security_filter = self.auth_helper.build_security_filters(overrides, auth_claims) filters = [] + if include_category: + filters.append("category eq '{}'".format(include_category.replace("'", "''"))) if exclude_category: filters.append("category ne '{}'".format(exclude_category.replace("'", "''"))) if security_filter: diff --git a/app/frontend/src/api/models.ts b/app/frontend/src/api/models.ts index 1829ab5312..633af8bd3f 100644 --- a/app/frontend/src/api/models.ts +++ b/app/frontend/src/api/models.ts @@ -20,6 +20,7 @@ export type ChatAppRequestOverrides = { retrieval_mode?: RetrievalMode; semantic_ranker?: boolean; semantic_captions?: boolean; + include_category?: string; exclude_category?: string; seed?: number; top?: number; diff --git a/app/frontend/src/locales/en/translation.json b/app/frontend/src/locales/en/translation.json index 55b7bd372b..4369556006 100644 --- a/app/frontend/src/locales/en/translation.json +++ b/app/frontend/src/locales/en/translation.json @@ -81,6 +81,10 @@ "minimumSearchScore": "Minimum search score", "minimumRerankerScore": "Minimum reranker score", "retrieveCount": "Retrieve this many search results:", + "includeCategory": "Include category", + "includeCategoryOptions": { + "all": "All" + }, "excludeCategory": "Exclude category", "useSemanticRanker": "Use semantic ranker for retrieval", "useSemanticCaptions": "Use semantic captions", @@ -127,6 +131,8 @@ "Sets a minimum score for search results coming back from the semantic reranker. The score always ranges between 0-4. The higher the score, the more semantically relevant the result is to the question.", "retrieveNumber": "Sets the number of search results to retrieve from Azure AI search. More results may increase the likelihood of finding the correct answer, but may lead to the model getting 'lost in the middle'.", + "includeCategory": + "Specifies a category to include in the search results. There are no categories used in the default data set.", "excludeCategory": "Specifies a category to exclude from the search results. There are no categories used in the default data set.", "useSemanticReranker": diff --git a/app/frontend/src/locales/es/translation.json b/app/frontend/src/locales/es/translation.json index 382876c9fa..303b9d32a5 100644 --- a/app/frontend/src/locales/es/translation.json +++ b/app/frontend/src/locales/es/translation.json @@ -80,7 +80,11 @@ "seed": "Seed", "minimumSearchScore": "Puntaje mínimo de búsqueda", "minimumRerankerScore": "Puntaje mínimo de re-clasificación", - "retrieveCount": "Obtén éste número resultados de búsqueda:", + "retrieveCount": "Obtén éste número de resultados de la búsqueda:", + "includeCategory": "Incluir categoría", + "includeCategoryOptions": { + "all": "Todas" + }, "excludeCategory": "Excluir categoría", "useSemanticRanker": "Usar clasificador semántico para la recuperación", "useSemanticCaptions": "Usar subtítulos semánticos", @@ -128,6 +132,8 @@ "Establece una puntuación mínima para los resultados de búsqueda que vuelven del re-clasificador semántico. La puntuación siempre varía entre 0-4. Cuanto mayor es la puntuación, más relevante es semánticamente el resultado a la pregunta.", "retrieveNumber": "Establece el número de resultados de búsqueda para recuperar de Azure AI search. Más resultados pueden aumentar la probabilidad de encontrar la respuesta correcta, pero pueden provocar que el modelo se 'pierda en el medio'.", + "includeCategory": + "Especifica una categoría para incluir en los resultados de la búsqueda. No se utilizan categorías en el conjunto de datos predeterminado.", "excludeCategory": "Especifica una categoría para excluir de los resultados de búsqueda. No se utilizan categorías en el conjunto de datos predeterminado.", "useSemanticReranker": diff --git a/app/frontend/src/locales/fr/translation.json b/app/frontend/src/locales/fr/translation.json index 9bd579e239..28363da398 100644 --- a/app/frontend/src/locales/fr/translation.json +++ b/app/frontend/src/locales/fr/translation.json @@ -81,6 +81,10 @@ "minimumSearchScore": "Score de recherche minimum", "minimumRerankerScore": "Score minimum du reclasseur sémantique", "retrieveCount": "Récupérer ce nombre de résultats de recherche :", + "includeCategory": "Inclure la catégorie", + "includeCategoryOptions": { + "all": "Toutes" + }, "excludeCategory": "Exclure la catégorie", "useSemanticRanker": "Utiliser le reclasseur sémantique", "useSemanticCaptions": "Utiliser les titres sémantiques", @@ -128,6 +132,8 @@ "Définit un score minimum pour les résultats de recherche provenant du reranker sémantique. Le score varie toujours entre 0 et 4. Plus le score est élevé, plus le résultat est sémantiquement pertinent par rapport à la question.", "retrieveNumber": "Définit le nombre de résultats de recherche à récupérer d'Azure AI Search. Plus de résultats peuvent augmenter la probabilité de trouver la bonne réponse, mais peuvent amener le modèle à se 'perdre au milieu'.", + "includeCategory": + "Spécifie une catégorie à inclure dans les résultats de recherche. Il n'y a aucune catégorie utilisée dans l'ensemble de données par défaut.", "excludeCategory": "Spécifie une catégorie à exclure des résultats de recherche. Il n'y a aucune catégorie utilisée dans l'ensemble de données par défaut.", "useSemanticReranker": diff --git a/app/frontend/src/locales/ja/translation.json b/app/frontend/src/locales/ja/translation.json index 73a5599f37..b6b82fa8bd 100644 --- a/app/frontend/src/locales/ja/translation.json +++ b/app/frontend/src/locales/ja/translation.json @@ -81,6 +81,10 @@ "minimumSearchScore": "最小検索スコア", "minimumRerankerScore": "最小リランキング・スコア", "retrieveCount": "ここで指定する検索結果数を取得:", + "includeCategory": "カテゴリを指定", + "includeCategoryOptions": { + "all": "全て" + }, "excludeCategory": "カテゴリを除外", "useSemanticRanker": "取得にセマンティック・ランカーを使用", "useSemanticCaptions": "セマンティック・キャプションを使用", @@ -127,6 +131,7 @@ "セマンティック・リランカーから返される検索結果の最小スコアを設定します。スコアの値は0から4の範囲で変更できます。スコアの値が大きいほど、質問に対する結果の意味的な関連性が高まります。", "retrieveNumber": "Azure AI Searchの検索結果から取得する数を設定します。結果が多ければ多いほど、正しい答えを見つける可能性は高まるかもしれませんが、モデルが「途中で迷子になる」可能性もあります。", + "includeCategory": "検索結果に含めるカテゴリを指定します。デフォルトのデータセットはカテゴリを使用していません。", "excludeCategory": "検索結果から除外するカテゴリを指定します。デフォルトのデータセットはカテゴリを使用していません。", "useSemanticReranker": "Azure AI Searchのセマンティック・ランカーを有効にします(ユーザーのクエリに対するセマンティック類似性に基づいて検索結果をリランク付けするモデル)。", diff --git a/app/frontend/src/locales/ptBR/translation.json b/app/frontend/src/locales/ptBR/translation.json index 30746ab614..8f4180d767 100644 --- a/app/frontend/src/locales/ptBR/translation.json +++ b/app/frontend/src/locales/ptBR/translation.json @@ -81,6 +81,10 @@ "minimumSearchScore": "Pontuação mínima de pesquisa", "minimumRerankerScore": "Pontuação mínima de reclassificação", "retrieveCount": "Recuperar esta quantidade de resultados de pesquisa:", + "includeCategory": "Incluir categoria", + "includeCategoryOptions": { + "all": "Todos" + }, "excludeCategory": "Excluir categoria", "useSemanticRanker": "Usar rankeador semântico para recuperação", "useSemanticCaptions": "Usar legendas semânticas", @@ -90,7 +94,7 @@ "label": "Entradas do modelo de visão GPT", "options": { "textAndImages": "Imagens e texto", - "images": "", + "images": "Imagens", "texts": "Texto" } }, @@ -127,6 +131,8 @@ "Define uma pontuação mínima para os resultados da pesquisa retornados pelo ranker semântico. A pontuação varia de 0 a 4. Quanto maior a pontuação, mais relevante é o resultado em relação à pergunta.", "retrieveNumber": "Define o número de resultados de pesquisa a serem recuperados na pesquisa do Azure AI. Mais resultados podem aumentar a chance de encontrar a resposta correta, mas podem fazer com que o modelo se 'perca'.", + "includeCategory": + "Especifica uma categoria para incluir nos resultados da pesquisa. Não há categorias usadas no conjunto de dados padrão.", "excludeCategory": "Especifica uma categoria para excluir dos resultados da pesquisa. Não há categorias usadas no conjunto de dados padrão.", "useSemanticReranker": diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx index d91293267d..d336a4566c 100644 --- a/app/frontend/src/pages/ask/Ask.tsx +++ b/app/frontend/src/pages/ask/Ask.tsx @@ -1,7 +1,18 @@ import { useContext, useEffect, useRef, useState } from "react"; import { useTranslation } from "react-i18next"; import { Helmet } from "react-helmet-async"; -import { Checkbox, Panel, DefaultButton, Spinner, TextField, ICheckboxProps, ITextFieldProps } from "@fluentui/react"; +import { + Checkbox, + Panel, + DefaultButton, + Spinner, + TextField, + ICheckboxProps, + ITextFieldProps, + Dropdown, + IDropdownOption, + IDropdownProps +} from "@fluentui/react"; import { useId } from "@fluentui/react-hooks"; import styles from "./Ask.module.css"; @@ -38,6 +49,7 @@ export function Component(): JSX.Element { const [useSemanticCaptions, setUseSemanticCaptions] = useState(false); const [useGPT4V, setUseGPT4V] = useState(false); const [gpt4vInput, setGPT4VInput] = useState(GPT4VInput.TextAndImages); + const [includeCategory, setIncludeCategory] = useState(""); const [excludeCategory, setExcludeCategory] = useState(""); const [question, setQuestion] = useState(""); const [vectorFieldList, setVectorFieldList] = useState([VectorFieldOptions.Embedding, VectorFieldOptions.ImageEmbedding]); @@ -120,6 +132,7 @@ export function Component(): JSX.Element { prompt_template: promptTemplate.length === 0 ? undefined : promptTemplate, prompt_template_prefix: promptTemplatePrefix.length === 0 ? undefined : promptTemplatePrefix, prompt_template_suffix: promptTemplateSuffix.length === 0 ? undefined : promptTemplateSuffix, + include_category: includeCategory.length === 0 ? undefined : includeCategory, exclude_category: excludeCategory.length === 0 ? undefined : excludeCategory, top: retrieveCount, temperature: temperature, @@ -181,6 +194,10 @@ export function Component(): JSX.Element { setUseSemanticCaptions(!!checked); }; + const onIncludeCategoryChanged = (_ev?: React.FormEvent, option?: IDropdownOption) => { + setIncludeCategory((option?.key as string) || ""); + }; + const onExcludeCategoryChanged = (_ev?: React.FormEvent, newValue?: string) => { setExcludeCategory(newValue || ""); }; @@ -228,6 +245,8 @@ export function Component(): JSX.Element { const rerankerScoreFieldId = useId("rerankerScoreField"); const retrieveCountId = useId("retrieveCount"); const retrieveCountFieldId = useId("retrieveCountField"); + const includeCategoryId = useId("includeCategory"); + const includeCategoryFieldId = useId("includeCategoryField"); const excludeCategoryId = useId("excludeCategory"); const excludeCategoryFieldId = useId("excludeCategoryField"); const semanticRankerId = useId("semanticRanker"); @@ -407,6 +426,24 @@ export function Component(): JSX.Element { )} /> + ( + + )} + /> + { const [useSemanticRanker, setUseSemanticRanker] = useState(true); const [shouldStream, setShouldStream] = useState(true); const [useSemanticCaptions, setUseSemanticCaptions] = useState(false); + const [includeCategory, setIncludeCategory] = useState(""); const [excludeCategory, setExcludeCategory] = useState(""); const [useSuggestFollowupQuestions, setUseSuggestFollowupQuestions] = useState(false); const [vectorFieldList, setVectorFieldList] = useState([VectorFieldOptions.Embedding]); @@ -184,6 +185,7 @@ const Chat = () => { context: { overrides: { prompt_template: promptTemplate.length === 0 ? undefined : promptTemplate, + include_category: includeCategory.length === 0 ? undefined : includeCategory, exclude_category: excludeCategory.length === 0 ? undefined : excludeCategory, top: retrieveCount, temperature: temperature, @@ -291,6 +293,10 @@ const Chat = () => { setShouldStream(!!checked); }; + const onIncludeCategoryChanged = (_ev?: React.FormEvent, option?: IDropdownOption) => { + setIncludeCategory((option?.key as string) || ""); + }; + const onExcludeCategoryChanged = (_ev?: React.FormEvent, newValue?: string) => { setExcludeCategory(newValue || ""); }; @@ -345,6 +351,8 @@ const Chat = () => { const rerankerScoreFieldId = useId("rerankerScoreField"); const retrieveCountId = useId("retrieveCount"); const retrieveCountFieldId = useId("retrieveCountField"); + const includeCategoryId = useId("includeCategory"); + const includeCategoryFieldId = useId("includeCategoryField"); const excludeCategoryId = useId("excludeCategory"); const excludeCategoryFieldId = useId("excludeCategoryField"); const semanticRankerId = useId("semanticRanker"); @@ -607,6 +615,30 @@ const Chat = () => { )} /> + ( + + )} + /> +