Skip to content

Commit

Permalink
Users/singankit/model config optional (#34812)
Browse files Browse the repository at this point in the history
* Adding tests to capture groundedness with expected values

* Changes to make model config optional

* Adding test for missing model config

* Fixing tests

* Fixing pylint error
  • Loading branch information
singankit authored Mar 18, 2024
1 parent 6272970 commit f9c0eda
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@
}


class ContentSafetyMetrics:
"""
Evaluation metrics supported by evaluation in azure-ai-generative sdk
"""

HATE_FAIRNESS = "hate_fairness"
VIOLENCE = "violence"
SELF_HARM = "self_harm"
SEXUAL = "sexual"


class EvaluationMetrics:
"""
Evaluation metrics supported by evaluation in azure-ai-generative sdk
Expand All @@ -31,10 +42,6 @@ class EvaluationMetrics:
RETRIEVAL_SCORE = "retrieval_score"
GROUNDING_SCORE = "grounding_score"
GENERATION_SCORE = "generation_score"
HATE_FAIRNESS = "hate_fairness"
VIOLENCE = "violence"
SELF_HARM = "self_harm"
SEXUAL = "sexual"


class QaMetrics:
Expand All @@ -50,10 +57,10 @@ class QaMetrics:
EvaluationMetrics.GPT_FLUENCY,
EvaluationMetrics.GPT_SIMILARITY,
EvaluationMetrics.F1_SCORE,
EvaluationMetrics.HATE_FAIRNESS,
EvaluationMetrics.VIOLENCE,
EvaluationMetrics.SELF_HARM,
EvaluationMetrics.SEXUAL
ContentSafetyMetrics.HATE_FAIRNESS,
ContentSafetyMetrics.VIOLENCE,
ContentSafetyMetrics.SELF_HARM,
ContentSafetyMetrics.SEXUAL
]


Expand All @@ -69,10 +76,10 @@ class ChatMetrics:
EvaluationMetrics.GPT_GROUNDEDNESS,
EvaluationMetrics.GPT_RELEVANCE,
EvaluationMetrics.GPT_RETRIEVAL_SCORE,
EvaluationMetrics.HATE_FAIRNESS,
EvaluationMetrics.VIOLENCE,
EvaluationMetrics.SELF_HARM,
EvaluationMetrics.SEXUAL
ContentSafetyMetrics.HATE_FAIRNESS,
ContentSafetyMetrics.VIOLENCE,
ContentSafetyMetrics.SELF_HARM,
ContentSafetyMetrics.SEXUAL
]


Expand All @@ -85,3 +92,10 @@ class ChatMetrics:
QA: QaMetrics,
CHAT: ChatMetrics
}

CONTENT_SAFETY_METRICS_LIST = [
ContentSafetyMetrics.SEXUAL,
ContentSafetyMetrics.SELF_HARM,
ContentSafetyMetrics.VIOLENCE,
ContentSafetyMetrics.HATE_FAIRNESS
]
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import pandas as pd

from azure.ai.generative.evaluate._constants import TASK_TYPE_TO_METRICS_MAPPING, CHAT
from azure.ai.generative.evaluate._constants import TASK_TYPE_TO_METRICS_MAPPING, CHAT, CONTENT_SAFETY_METRICS_LIST
from ._user_agent import USER_AGENT

from ._utils import run_pf_flow_with_dict_list, df_to_dict_list, wait_for_pf_run_to_complete
Expand All @@ -23,14 +23,14 @@

class MetricHandler(object):
def __init__(
self,
task_type,
prediction_data: pd.DataFrame,
input_output_data: pd.DataFrame,
test_data,
metrics_mapping=None,
metrics=None,
data_mapping: Optional[Dict] = None,
self,
task_type,
prediction_data: pd.DataFrame,
input_output_data: pd.DataFrame,
test_data,
metrics_mapping=None,
metrics=None,
data_mapping: Optional[Dict] = None,
):
self.task_type = task_type
self.prediction_data = prediction_data
Expand Down Expand Up @@ -72,7 +72,20 @@ def calculate_metrics(self) -> Dict:

pf_client = PFClient(user_agent=USER_AGENT)

openai_config = self.metrics_mapping["openai_params"]
openai_config = self.metrics_mapping.get("openai_params")

if openai_config is None:
if all(m in CONTENT_SAFETY_METRICS_LIST for m in metrics):
openai_config = {
"api_key": "api_key",
"api_base": "api_base",
"api_version": "api_version",
"api_type": "azure",
"deployment_id" : "deployment_id"
}
else:
raise Exception("model_config is required for metrics other than content safety metrics")

conn_name = "openai_connection"
deployment_id = openai_config["deployment_id"]
if not openai_config["api_type"] or openai_config["api_type"] == "azure":
Expand All @@ -98,7 +111,8 @@ def calculate_metrics(self) -> Dict:
nodes_list = NODE_LIST_BY_TASK[self.task_type]

if self.task_type == CHAT:
pf_run = run_pf_flow_with_dict_list(flow_path, dict_list)
pf_run = run_pf_flow_with_dict_list(flow_path, dict_list, flow_params={
"connections": {node: {"connection": conn_name}} for node in nodes_list})
else:
pf_run = run_pf_flow_with_dict_list(
flow_path, dict_list, flow_params={"connections": {node: connection_override for node in nodes_list}}
Expand Down
2 changes: 2 additions & 0 deletions sdk/ai/azure-ai-generative/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ def sanitized_environment_variables(
"AI_SUBSCRIPTION_ID": "00000000-0000-0000-0000-000000000",
"AI_RESOURCE_GROUP": "00000",
"AI_WORKSPACE_NAME": "00000",
"AI_PROJECT_NAME": "00000",
"AI_TEAM_NAME": "00000",
"AI_FEATURE_STORE_NAME": "00000",
"AI_TEST_STORAGE_ACCOUNT_NAME": "teststorageaccount",
"AI_TEST_STORAGE_ACCOUNT_PRIMARY_KEY": fake_datastore_key,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,75 @@ def test_evaluate_built_in_metrics(self, e2e_openai_api_base, e2e_openai_api_key
assert tabular_result["gpt_groundedness"][0] in [1,2]
assert tabular_result["gpt_groundedness"][1] in [5, 4]

def test_evaluate_built_in_metrics_missing_model_config(self, tmpdir):
test_data = [
{"context": "Some are reported as not having been wanted at all.",
"question": "",
"answer": "All are reported as being completely and fully wanted."
},
{"question": "How do you log a model?",
"context": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.",
"answer": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`."
},
]

with tmpdir.as_cwd():
output_path = tmpdir + "/evaluation_output"

with pytest.raises(Exception) as ex:
result = evaluate( # This will log metric/artifacts using mlflow
evaluation_name="rag-chat-1",
data=test_data,
task_type="qa",
metrics_list=["gpt_groundedness"],
data_mapping={
"question": "question",
"context": "context",
"y_pred": "answer",
"y_test": "truth",
},
output_path=output_path
)

def test_evaluate_built_in_content_safety_metrics(self, ai_client, tmpdir):
test_data = [
{"context": "Some are reported as not having been wanted at all.",
"question": "How is weather in Seattle?",
"answer": "All are reported as being completely and fully wanted."
},
{"question": "How do you log a model?",
"context": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`.",
"answer": "There are a few ways to log models in Azure Machine Learning. \n\nOne way is to use the `register_model()` method of the `Run` object. The `register_model()` method logs a model file in the Azure Machine Learning service workspace and makes it available for deployment. Here's an example:\n\n```python\nfrom azureml.core import Model\n\nmodel_path = '.\/outputs\/my_model.pkl'\nmodel = Model.register(workspace=ws, model_path=model_path, model_name='my_model')\n```\n\nThis code registers the model file located at `model_path` to the Azure Machine Learning service workspace with the name `my_model`. \n\nAnother way to log a model is to save it as an output of a `Run`. If your model generation code is part of a script or Jupyter notebook that runs as an Azure Machine Learning experiment, you can save the model file as an output of the `Run` object. Here's an example:\n\n```python\nfrom sklearn.linear_model import LogisticRegression\nfrom azureml.core.run import Run\n\n# Initialize a run object\nrun = Run.get_context()\n\n# Train your model\nX_train, y_train = ...\nclf = LogisticRegression().fit(X_train, y_train)\n\n# Save the model to the Run object's outputs directory\nmodel_path = 'outputs\/model.pkl'\njoblib.dump(value=clf, filename=model_path)\n\n# Log the model as a run artifact\nrun.upload_file(name=model_path, path_or_stream=model_path)\n```\n\nIn this code, `Run.get_context()` retrieves the current run context object, which you can use to track metadata and metrics for the run. After training your model, you can use `joblib.dump()` to save the model to a file, and then log the file as an artifact of the run using `run.upload_file()`."
},
]

with tmpdir.as_cwd():
output_path = tmpdir + "/evaluation_output"
tracking_uri = ai_client.tracking_uri

result = evaluate( # This will log metric/artifacts using mlflow
evaluation_name="rag-chat-1",
data=test_data,
task_type="qa",
metrics_list=["violence"],
data_mapping={
"question": "question",
"context": "context",
"y_pred": "answer",
"y_test": "truth",
},
output_path=output_path,
tracking_uri=tracking_uri
)

metrics_summary = result.metrics_summary
tabular_result = pd.read_json(os.path.join(output_path, "eval_results.jsonl"), lines=True)

assert "violence_defect_rate" in metrics_summary.keys()
assert tabular_result["violence"][0] in ["Safe"]
assert tabular_result["violence_score"][0] in [0.0]
assert tabular_result["violence_reasoning"] is not None


def test_duplicate_metrics_name(self, e2e_openai_api_base, e2e_openai_api_key, e2e_openai_completion_deployment_name, tmpdir):
test_data = [
Expand Down

0 comments on commit f9c0eda

Please sign in to comment.