Skip to content

Commit

Permalink
[evaluation] fix: Remove "complex" type hints for AsyncEvaluators (#3…
Browse files Browse the repository at this point in the history
…7964)

* fix: Remove "complex" type hints from AsyncEvaluators

    This introduced a regression where promptflow rejected the evaluators
    because they were of types promptflow doesn't support.

* style: Run isort
  • Loading branch information
kdestin authored Oct 22, 2024
1 parent 3a43719 commit 0e400d0
Show file tree
Hide file tree
Showing 17 changed files with 23 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from promptflow.entities import Run

from azure.ai.evaluation._common.math import list_sum
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
from azure.ai.evaluation._common.utils import validate_azure_ai_project
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException

from .._constants import (
CONTENT_SAFETY_DEFECT_RATE_THRESHOLD_DEFAULT,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from typing import Dict, Optional, Union, TypeVar
from typing import Dict, Optional, TypeVar, Union

from typing_extensions import override

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from concurrent.futures import as_completed
from typing import Callable, Dict, List, Union, Optional
from typing_extensions import override
from typing import Callable, Dict, List, Optional, Union

from promptflow.tracing import ThreadPoolExecutorWithContext as ThreadPoolExecutor
from typing_extensions import override

from azure.ai.evaluation._evaluators._common import EvaluatorBase
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._evaluators._common import EvaluatorBase

from ._hate_unfairness import HateUnfairnessEvaluator
from ._self_harm import SelfHarmEvaluator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from typing_extensions import override

from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from typing_extensions import override

from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from typing_extensions import override

from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

from typing_extensions import override

from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
# ---------------------------------------------------------
from typing_extensions import override

from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.constants import _InternalEvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

from typing_extensions import override

from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,10 @@
import math
import os
import re
from typing import Union

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty

from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration

from ..._common.math import list_mean_nan_safe
from ..._common.utils import construct_prompty_model_config, validate_model_config

Expand All @@ -31,9 +28,9 @@ class _AsyncRetrievalScoreEvaluator:
_LLM_CALL_TIMEOUT = 600
_DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"

def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
def __init__(self, model_config: dict):
prompty_model_config = construct_prompty_model_config(
model_config,
validate_model_config(model_config),
self._DEFAULT_OPEN_API_VERSION,
USER_AGENT,
)
Expand Down Expand Up @@ -138,7 +135,7 @@ class RetrievalEvaluator:
"""

def __init__(self, model_config):
self._async_evaluator = _AsyncRetrievalScoreEvaluator(validate_model_config(model_config))
self._async_evaluator = _AsyncRetrievalScoreEvaluator(model_config)

def __call__(self, *, conversation, **kwargs):
"""Evaluates retrieval score chat scenario.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from enum import Enum

from promptflow._utils.async_utils import async_run_allowing_running_loop
from azure.ai.evaluation._vendor.rouge_score import rouge_scorer

from azure.ai.evaluation._vendor.rouge_score import rouge_scorer
from azure.core import CaseInsensitiveEnumMeta


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
import math
import os
import re
from typing import Union

from promptflow._utils.async_utils import async_run_allowing_running_loop
from promptflow.core import AsyncPrompty

from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration

from ..._common.utils import construct_prompty_model_config, validate_model_config

Expand All @@ -27,9 +25,9 @@ class _AsyncSimilarityEvaluator:
_LLM_CALL_TIMEOUT = 600
_DEFAULT_OPEN_API_VERSION = "2024-02-15-preview"

def __init__(self, model_config: Union[AzureOpenAIModelConfiguration, OpenAIModelConfiguration]):
def __init__(self, model_config: dict):
prompty_model_config = construct_prompty_model_config(
model_config,
validate_model_config(model_config),
self._DEFAULT_OPEN_API_VERSION,
USER_AGENT,
)
Expand Down Expand Up @@ -108,7 +106,7 @@ class SimilarityEvaluator:
"""

def __init__(self, model_config):
self._async_evaluator = _AsyncSimilarityEvaluator(validate_model_config(model_config))
self._async_evaluator = _AsyncSimilarityEvaluator(model_config)

def __call__(self, *, query: str, response: str, ground_truth: str, **kwargs):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

from typing_extensions import override

from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.constants import EvaluationMetrics
from azure.ai.evaluation._evaluators._common import RaiServiceEvaluatorBase

logger = logging.getLogger(__name__)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

from tqdm import tqdm

from azure.ai.evaluation._common.utils import validate_azure_ai_project
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.utils import validate_azure_ai_project
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
from azure.ai.evaluation._http_utils import get_async_http_client
from azure.ai.evaluation.simulator import AdversarialScenario
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from random import randint
from typing import Callable, Optional, cast

from azure.ai.evaluation._common.utils import validate_azure_ai_project
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.utils import validate_azure_ai_project
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
from azure.ai.evaluation.simulator import AdversarialScenario
from azure.core.credentials import TokenCredential
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import logging
from typing import Callable, cast

from azure.ai.evaluation._common.utils import validate_azure_ai_project
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.utils import validate_azure_ai_project
from azure.ai.evaluation._exceptions import ErrorBlame, ErrorCategory, ErrorTarget, EvaluationException
from azure.ai.evaluation.simulator import AdversarialScenario
from azure.core.credentials import TokenCredential
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from promptflow.core import AsyncPrompty
from tqdm import tqdm

from azure.ai.evaluation._common.utils import construct_prompty_model_config
from azure.ai.evaluation._common._experimental import experimental
from azure.ai.evaluation._common.utils import construct_prompty_model_config
from azure.ai.evaluation._model_configurations import AzureOpenAIModelConfiguration, OpenAIModelConfiguration

from .._exceptions import ErrorBlame, ErrorCategory, EvaluationException
Expand Down

0 comments on commit 0e400d0

Please sign in to comment.