From 0e400d0927df32ecf3aec5fb59c9ccbdab46e35b Mon Sep 17 00:00:00 2001 From: kdestin <101366538+kdestin@users.noreply.github.com> Date: Tue, 22 Oct 2024 17:11:46 -0400 Subject: [PATCH] [evaluation] fix: Remove "complex" type hints for AsyncEvaluators (#37964) * fix: Remove "complex" type hints from AsyncEvaluators This introduced a regression where promptflow rejected the evaluators because they were of types promptflow doesn't support. * style: Run isort --- .../azure/ai/evaluation/_evaluate/_evaluate.py | 2 +- .../evaluation/_evaluators/_common/_base_rai_svc_eval.py | 2 +- .../_evaluators/_content_safety/_content_safety.py | 6 +++--- .../_evaluators/_content_safety/_hate_unfairness.py | 2 +- .../evaluation/_evaluators/_content_safety/_self_harm.py | 2 +- .../ai/evaluation/_evaluators/_content_safety/_sexual.py | 2 +- .../evaluation/_evaluators/_content_safety/_violence.py | 2 +- .../azure/ai/evaluation/_evaluators/_eci/_eci.py | 2 +- .../_protected_material/_protected_material.py | 2 +- .../ai/evaluation/_evaluators/_retrieval/_retrieval.py | 9 +++------ .../azure/ai/evaluation/_evaluators/_rouge/_rouge.py | 2 +- .../ai/evaluation/_evaluators/_similarity/_similarity.py | 8 +++----- .../azure/ai/evaluation/_evaluators/_xpia/xpia.py | 2 +- .../ai/evaluation/simulator/_adversarial_simulator.py | 2 +- .../ai/evaluation/simulator/_direct_attack_simulator.py | 2 +- .../evaluation/simulator/_indirect_attack_simulator.py | 2 +- .../azure/ai/evaluation/simulator/_simulator.py | 2 +- 17 files changed, 23 insertions(+), 28 deletions(-) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py index d1b629094ed0..efc434148182 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py @@ -14,8 +14,8 @@ from promptflow.entities import Run from azure.ai.evaluation._common.math import list_sum -from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException from azure.ai.evaluation._common.utils import validate_azure_ai_project +from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException from .._constants import ( CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT, diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py index 6fc46bdc13b1..23c753523f8c 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_common/_base_rai_svc_eval.py @@ -1,7 +1,7 @@ # --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- -from typing import Dict, Optional, Union, TypeVar +from typing import Dict, Optional, TypeVar, Union from typing_extensions import override diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py index 841dd918a8c0..3f56a7488684 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_content_safety.py @@ -2,13 +2,13 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- from concurrent.futures import as_completed -from typing import Callable, Dict, List, Union, Optional -from typing_extensions import override +from typing import Callable, Dict, List, Optional, Union from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor +from typing_extensions import override -from azure.ai.evaluation._evaluators._common import EvaluatorBase from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._evaluators._common import EvaluatorBase from ._hate_unfairness import HateUnfairnessEvaluator from ._self_harm import SelfHarmEvaluator diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py index f38e66daad5f..ae078563dbbd 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_hate_unfairness.py @@ -5,8 +5,8 @@ from typing_extensions import override -from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py index 51fc547a2ee7..ea1f057e2fcf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_self_harm.py @@ -5,8 +5,8 @@ from typing_extensions import override -from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py index d8cd569c4415..151874535a5e 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_sexual.py @@ -5,8 +5,8 @@ from typing_extensions import override -from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py index 0f1266a36b5a..44a9b3abf0bf 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_content_safety/_violence.py @@ -5,8 +5,8 @@ from typing_extensions import override -from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py index 6761ccfaedaf..cd2e6853ac29 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_eci/_eci.py @@ -3,8 +3,8 @@ # --------------------------------------------------------- from typing_extensions import override -from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py index dfb682a04173..0ead00125c3d 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_protected_material/_protected_material.py @@ -6,8 +6,8 @@ from typing_extensions import override -from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py index 7694af92431b..9bfbf4137fa1 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_retrieval/_retrieval.py @@ -7,13 +7,10 @@ import math import os import re -from typing import Union from promptflow._utils.async_utils import async_run_allowing_running_loop from promptflow.core import AsyncPrompty -from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration - from ..._common.math import list_mean_nan_safe from ..._common.utils import construct_prompty_model_config, validate_model_config @@ -31,9 +28,9 @@ class _AsyncRetrievalScoreEvaluator: _LLM_CALL_TIMEOUT = 600 _DEFAULT_OPEN_API_VERSION = "2024-02-15-preview" - def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]): + def __init__(self, model_config: dict): prompty_model_config = construct_prompty_model_config( - model_config, + validate_model_config(model_config), self._DEFAULT_OPEN_API_VERSION, USER_AGENT, ) @@ -138,7 +135,7 @@ class RetrievalEvaluator: """ def __init__(self, model_config): - self._async_evaluator = _AsyncRetrievalScoreEvaluator(validate_model_config(model_config)) + self._async_evaluator = _AsyncRetrievalScoreEvaluator(model_config) def __call__(self, *, conversation, **kwargs): """Evaluates retrieval score chat scenario. diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_rouge/_rouge.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_rouge/_rouge.py index 51ad94ee4897..6b7a84d5db92 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_rouge/_rouge.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_rouge/_rouge.py @@ -4,8 +4,8 @@ from enum import Enum from promptflow._utils.async_utils import async_run_allowing_running_loop -from azure.ai.evaluation._vendor.rouge_score import rouge_scorer +from azure.ai.evaluation._vendor.rouge_score import rouge_scorer from azure.core import CaseInsensitiveEnumMeta diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py index d3b1646908b6..33a06e971969 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_similarity/_similarity.py @@ -5,13 +5,11 @@ import math import os import re -from typing import Union from promptflow._utils.async_utils import async_run_allowing_running_loop from promptflow.core import AsyncPrompty from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException -from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration from ..._common.utils import construct_prompty_model_config, validate_model_config @@ -27,9 +25,9 @@ class _AsyncSimilarityEvaluator: _LLM_CALL_TIMEOUT = 600 _DEFAULT_OPEN_API_VERSION = "2024-02-15-preview" - def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]): + def __init__(self, model_config: dict): prompty_model_config = construct_prompty_model_config( - model_config, + validate_model_config(model_config), self._DEFAULT_OPEN_API_VERSION, USER_AGENT, ) @@ -108,7 +106,7 @@ class SimilarityEvaluator: """ def __init__(self, model_config): - self._async_evaluator = _AsyncSimilarityEvaluator(validate_model_config(model_config)) + self._async_evaluator = _AsyncSimilarityEvaluator(model_config) def __call__(self, *, query: str, response: str, ground_truth: str, **kwargs): """ diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py index 234bf7099ffa..8db3b777f23c 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluators/_xpia/xpia.py @@ -6,8 +6,8 @@ from typing_extensions import override -from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.constants import EvaluationMetrics from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase logger = logging.getLogger(__name__) diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py index 132d0b146f43..d96cb4df5cd3 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_adversarial_simulator.py @@ -10,8 +10,8 @@ from tqdm import tqdm -from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException from azure.ai.evaluation._http_utils import get_async_http_client from azure.ai.evaluation.simulator import AdversarialScenario diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py index 11ab4b72566b..6f2369ed3539 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_direct_attack_simulator.py @@ -7,8 +7,8 @@ from random import randint from typing import Callable, Optional, cast -from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException from azure.ai.evaluation.simulator import AdversarialScenario from azure.core.credentials import TokenCredential diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py index 98eebd0b3aa6..231659fb15a8 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_indirect_attack_simulator.py @@ -6,8 +6,8 @@ import logging from typing import Callable, cast -from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.utils import validate_azure_ai_project from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException from azure.ai.evaluation.simulator import AdversarialScenario from azure.core.credentials import TokenCredential diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py index f92e371983f4..82244a274fd6 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/simulator/_simulator.py @@ -14,8 +14,8 @@ from promptflow.core import AsyncPrompty from tqdm import tqdm -from azure.ai.evaluation._common.utils import construct_prompty_model_config from azure.ai.evaluation._common._experimental import experimental +from azure.ai.evaluation._common.utils import construct_prompty_model_config from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration from .._exceptions import ErrorBlame, ErrorCategory, EvaluationException