Skip to content

Commit d3829f4

Browse files
thecswpdhotems
andauthored
Converter from AI Service threads/runs to evaluator-compatible schema (#40047)
* WIP AIAgentConverter * Added the v1 of the converter * Updated the AIAgentConverter with different output schemas. * ruff format * Update the top schema to have: query, response, tool_definitions * "agentic" is not a recognized word, change the wording. * System message always comes first in query with multiple runs. * Add support for getting inputs from local files with run_ids. * Export AIAgentConverter through azure.ai.evaluation, local read updates * Use from ._models import * Ruff format again. * For ComputeInstance and AmlCompute update disableLocalAuth property based on ssh_public_access (#39934) * add disableLocalAuth for computeInstance * fix disableLocalAuthAuth issue for amlCompute * update compute instance * update recordings * temp changes * Revert "temp changes" This reverts commit 64e3c38. * update recordings * fix tests * Simplify the API by rolling up the static methods and hiding internals. * Lock the ._converters._ai_services behind an import error. * Print to install azure-ai-projects if we can't import AIAgentConverter * By default, include all previous runs' tool calls and results. * Don't crash if there is no content in historical thread messages. * Parallelize the calls to get step_details for each run_id. * Addressing PR comments. * Use a single underscore to hide internal static members. --------- Co-authored-by: Prashant Dhote <168401122+pdhotems@users.noreply.github.com>
1 parent ce7eced commit d3829f4

File tree

13 files changed

+819
-21
lines changed

13 files changed

+819
-21
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,16 @@
3838
OpenAIModelConfiguration,
3939
)
4040

41+
# The converter from the AI service to the evaluator schema requires a dependency on
42+
# ai.projects, but we also don't want to force users installing ai.evaluations to pull
43+
# in ai.projects. So we only import it if it's available and the user has ai.projects.
44+
try:
45+
from ._converters._ai_services import AIAgentConverter
46+
_patch_all = ["AIAgentConverter"]
47+
except ImportError:
48+
print("Could not import AIAgentConverter. Please install the dependency with `pip install azure-ai-projects`.")
49+
_patch_all = []
50+
4151
__all__ = [
4252
"evaluate",
4353
"CoherenceEvaluator",
@@ -72,3 +82,5 @@
7282
"ISAEvaluator",
7383
"ToolCallAccuracyEvaluator",
7484
]
85+
86+
__all__.extend([p for p in _patch_all if p not in __all__])
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# ---------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# ---------------------------------------------------------

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_converters/_ai_services.py

Lines changed: 490 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
import datetime
2+
import json
3+
4+
from pydantic import BaseModel
5+
6+
from azure.ai.projects.models import RunStepFunctionToolCall
7+
8+
from typing import List, Optional, Union
9+
10+
# Message roles constants.
11+
_SYSTEM = "system"
12+
_USER = "user"
13+
_AGENT = "assistant"
14+
_TOOL = "tool"
15+
16+
# Constant definitions for what tool details include.
17+
_TOOL_CALL = "tool_call"
18+
_TOOL_RESULT = "tool_result"
19+
_FUNCTION = "function"
20+
21+
# This is returned by AI services in the API to filter against tool invocations.
22+
_TOOL_CALLS = "tool_calls"
23+
24+
25+
class Message(BaseModel):
26+
"""Represents a message in a conversation with agents, assistants, and tools. We need to export these structures
27+
to JSON for evaluators and we have custom fields such as createdAt, run_id, and tool_call_id, so we cannot use
28+
the standard pydantic models provided by OpenAI.
29+
30+
:param createdAt: The timestamp when the message was created.
31+
:type createdAt: datetime.datetime
32+
:param run_id: The ID of the run associated with the message. Optional.
33+
:type run_id: Optional[str]
34+
:param role: The role of the message sender (e.g., system, user, tool, assistant).
35+
:type role: str
36+
:param content: The content of the message, which can be a string or a list of dictionaries.
37+
:type content: Union[str, List[dict]]
38+
"""
39+
40+
createdAt: Optional[Union[datetime.datetime, int]] = None # SystemMessage wouldn't have this
41+
run_id: Optional[str] = None
42+
tool_call_id: Optional[str] = None # see ToolMessage
43+
role: str
44+
content: Union[str, List[dict]]
45+
46+
47+
class SystemMessage(Message):
48+
"""Represents a system message in a conversation with agents, assistants, and tools.
49+
50+
:param role: The role of the message sender, which is always 'system'.
51+
:type role: str
52+
"""
53+
54+
role: str = _SYSTEM
55+
56+
57+
class UserMessage(Message):
58+
"""Represents a user message in a conversation with agents, assistants, and tools.
59+
60+
:param role: The role of the message sender, which is always 'user'.
61+
:type role: str
62+
"""
63+
64+
role: str = _USER
65+
66+
67+
class ToolMessage(Message):
68+
"""Represents a tool message in a conversation with agents, assistants, and tools.
69+
70+
:param run_id: The ID of the run associated with the message.
71+
:type run_id: str
72+
:param role: The role of the message sender, which is always 'tool'.
73+
:type role: str
74+
:param tool_call_id: The ID of the tool call associated with the message. Optional.
75+
:type tool_call_id: Optional[str]
76+
"""
77+
78+
run_id: str
79+
role: str = _TOOL
80+
tool_call_id: Optional[str] = None
81+
82+
83+
class AssistantMessage(Message):
84+
"""Represents an assistant message.
85+
86+
:param run_id: The ID of the run associated with the message.
87+
:type run_id: str
88+
:param role: The role of the message sender, which is always 'assistant'.
89+
:type role: str
90+
"""
91+
92+
run_id: str
93+
role: str = _AGENT
94+
95+
96+
class ToolDefinition(BaseModel):
97+
"""Represents a tool definition that will be used in the agent.
98+
99+
:param name: The name of the tool.
100+
:type name: str
101+
:param description: A description of the tool.
102+
:type description: str
103+
:param parameters: The parameters required by the tool.
104+
:type parameters: dict
105+
"""
106+
107+
name: str
108+
description: Optional[str] = None
109+
parameters: dict
110+
111+
112+
class ToolCall:
113+
"""Represents a tool call, used as an intermediate step in the conversion process.
114+
115+
:param created: The timestamp when the tool call was created.
116+
:type created: datetime.datetime
117+
:param completed: The timestamp when the tool call was completed.
118+
:type completed: datetime.datetime
119+
:param details: The details of the tool call.
120+
:type details: RunStepFunctionToolCall
121+
"""
122+
123+
def __init__(self, created: datetime.datetime, completed: datetime.datetime, details: RunStepFunctionToolCall):
124+
self.created = created
125+
self.completed = completed
126+
self.details = details
127+
128+
129+
class EvaluatorData(BaseModel):
130+
"""Represents the result of a conversion.
131+
132+
:param query: A list of messages representing the system message, chat history, and user query.
133+
:type query: List[Message]
134+
:param response: A list of messages representing the assistant's response, including tool calls and results.
135+
:type response: List[Message]
136+
:param tool_definitions: A list of tool definitions used in the agent.
137+
:type tool_definitions: List[ToolDefinition]
138+
"""
139+
140+
query: List[Message]
141+
response: List[Message]
142+
tool_definitions: List[ToolDefinition]
143+
144+
def to_json(self):
145+
"""Converts the result to a JSON string.
146+
147+
:return: The JSON representation of the result.
148+
:rtype: str
149+
"""
150+
return self.model_dump_json(exclude={}, exclude_none=True)
151+
152+
153+
def break_tool_call_into_messages(tool_call: ToolCall, run_id: str) -> List[Message]:
154+
"""
155+
Breaks a tool call into a list of messages, including the tool call and its result.
156+
157+
:param tool_call: The tool call to be broken into messages.
158+
:type tool_call: ToolCall
159+
:param run_id: The ID of the run associated with the messages.
160+
:type run_id: str
161+
:return: A list of messages representing the tool call and its result.
162+
:rtype: List[Message]
163+
"""
164+
# We will use this as our accumulator.
165+
messages: List[Message] = []
166+
167+
# As of March 17th, 2025, we only support custom functions due to built-in code interpreters and bing grounding
168+
# tooling not reporting their function calls in the same way. Code interpreters don't include the tool call at
169+
# all in most of the cases, and bing would only show the API URL, without arguments or results.
170+
# Bing grounding would have "bing_grounding" in details with "requesturl" that will just be the API path with query.
171+
# TODO: Work with AI Services to add converter support for BingGrounding and CodeInterpreter.
172+
if not hasattr(tool_call.details, _FUNCTION):
173+
return messages
174+
175+
# This is the internals of the content object that will be included with the tool call.
176+
tool_call_id = tool_call.details.id
177+
content_tool_call = {
178+
"type": _TOOL_CALL,
179+
_TOOL_CALL: {
180+
"id": tool_call_id,
181+
"type": _FUNCTION,
182+
_FUNCTION: {
183+
"name": tool_call.details.function.name,
184+
"arguments": safe_loads(tool_call.details.function.arguments),
185+
},
186+
},
187+
}
188+
189+
# We format it into an assistant message, where the content is a singleton list of the content object.
190+
# It should be a tool message, since this is the call, but the given schema treats this message as
191+
# assistant's action of calling the tool.
192+
messages.append(AssistantMessage(run_id=run_id, content=[to_dict(content_tool_call)], createdAt=tool_call.created))
193+
194+
# Now, onto the tool result, which only includes the result of the function call.
195+
content_tool_call_result = {"type": _TOOL_RESULT, _TOOL_RESULT: safe_loads(tool_call.details.function.output)}
196+
197+
# Since this is a tool's action of returning, we put it as a tool message.
198+
messages.append(
199+
ToolMessage(
200+
run_id=run_id,
201+
tool_call_id=tool_call_id,
202+
content=[to_dict(content_tool_call_result)],
203+
createdAt=tool_call.completed,
204+
)
205+
)
206+
return messages
207+
208+
209+
def to_dict(obj) -> dict:
210+
"""
211+
Converts an object to a dictionary.
212+
213+
:param obj: The object to be converted.
214+
:type obj: Any
215+
:return: The dictionary representation of the object.
216+
:rtype: dict
217+
"""
218+
return json.loads(json.dumps(obj))
219+
220+
221+
def safe_loads(data: str) -> Union[dict, str]:
222+
"""
223+
Safely loads a JSON string into a Python dictionary or returns the original string if loading fails.
224+
:param data: The JSON string to be loaded.
225+
:type data: str
226+
:return: The loaded dictionary or the original string.
227+
:rtype: Union[dict, str]
228+
"""
229+
try:
230+
return json.loads(data)
231+
except json.JSONDecodeError:
232+
return data
233+
234+
235+
def convert_message(msg: dict) -> Message:
236+
"""
237+
Converts a dictionary to the appropriate Message subclass.
238+
239+
:param msg: The message dictionary.
240+
:type msg: dict
241+
:return: The Message object.
242+
:rtype: Message
243+
"""
244+
role = msg["role"]
245+
if role == "system":
246+
return SystemMessage(content=str(msg["content"]))
247+
elif role == "user":
248+
return UserMessage(content=msg["content"], createdAt=msg["createdAt"])
249+
elif role == "assistant":
250+
return AssistantMessage(run_id=str(msg["run_id"]), content=msg["content"], createdAt=msg["createdAt"])
251+
elif role == "tool":
252+
return ToolMessage(
253+
run_id=str(msg["run_id"]),
254+
tool_call_id=str(msg["tool_call_id"]),
255+
content=msg["content"],
256+
createdAt=msg["createdAt"],
257+
)
258+
else:
259+
raise ValueError(f"Unknown role: {role}")

sdk/ml/azure-ai-ml/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
### Features Added
44

55
### Bugs Fixed
6+
- Fix for compute Instance, disableLocalAuth property should be depend on ssh public access enabled.
67

78
## 1.26.0 (2025-03-11)
89

sdk/ml/azure-ai-ml/assets.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"AssetsRepo": "Azure/azure-sdk-assets",
33
"AssetsRepoPrefixPath": "python",
44
"TagPrefix": "python/ml/azure-ai-ml",
5-
"Tag": "python/ml/azure-ai-ml_a2c955e6e2"
5+
"Tag": "python/ml/azure-ai-ml_305b890d5b"
66
}

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/aml_compute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def _to_rest_object(self) -> ComputeResource:
251251
),
252252
)
253253
remote_login_public_access = "Enabled"
254-
disableLocalAuth = not (self.ssh_public_access_enabled and self.ssh_settings is not None)
254+
disableLocalAuth = not (self.ssh_settings)
255255
if self.ssh_public_access_enabled is not None:
256256
remote_login_public_access = "Enabled" if self.ssh_public_access_enabled else "Disabled"
257257

sdk/ml/azure-ai-ml/azure/ai/ml/entities/_compute/compute_instance.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,12 +280,14 @@ def _to_rest_object(self) -> ComputeResource:
280280
subnet_resource = None
281281

282282
ssh_settings = None
283+
disable_local_auth = True
283284
if self.ssh_public_access_enabled is not None or self.ssh_settings is not None:
284285
ssh_settings = CiSShSettings()
285286
ssh_settings.ssh_public_access = "Enabled" if self.ssh_public_access_enabled else "Disabled"
286287
ssh_settings.admin_public_key = (
287288
self.ssh_settings.ssh_key_value if self.ssh_settings and self.ssh_settings.ssh_key_value else None
288289
)
290+
disable_local_auth = not self.ssh_public_access_enabled
289291

290292
personal_compute_instance_settings = None
291293
if self.create_on_behalf_of:
@@ -330,6 +332,7 @@ def _to_rest_object(self) -> ComputeResource:
330332
description=self.description,
331333
compute_type=self.type,
332334
properties=compute_instance_prop,
335+
disable_local_auth=disable_local_auth,
333336
)
334337
return ComputeResource(
335338
location=self.location,

sdk/ml/azure-ai-ml/tests/compute/e2etests/test_compute.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def test_aml_compute_create_and_delete(self, client: MLClient, rand_compute_name
3030
assert compute_resource_get.name == compute_name
3131
assert compute_resource_get.tier == "dedicated"
3232
assert compute_resource_get.location == compute.location
33+
assert compute_resource_get.ssh_public_access_enabled == True
34+
assert compute_resource_get.ssh_settings.admin_username == "azureuser"
3335

3436
compute_resource_get.idle_time_before_scale_down = 200
3537
compute_update_poller = client.compute.begin_update(compute_resource_get)
@@ -46,7 +48,6 @@ def test_aml_compute_create_and_delete(self, client: MLClient, rand_compute_name
4648
# so this is a preferred approach to assert
4749
assert isinstance(outcome, LROPoller)
4850

49-
@pytest.mark.skip(reason="not enough capacity")
5051
def test_compute_instance_create_and_delete(
5152
self, client: MLClient, rand_compute_name: Callable[[str], str]
5253
) -> None:
@@ -65,20 +66,11 @@ def test_compute_instance_create_and_delete(
6566
assert isinstance(compute_resource_list, ItemPaged)
6667
compute_resource_get = client.compute.get(name=compute_name)
6768
assert compute_resource_get.name == compute_name
68-
assert compute_resource_get.identity.type == "system_assigned"
6969
outcome = client.compute.begin_delete(name=compute_name)
7070
# the compute is getting deleted , but not waiting on the poller! so immediately returning
7171
# so this is a preferred approach to assert
7272
assert isinstance(outcome, LROPoller)
7373

74-
@pytest.mark.skipif(
75-
condition=not is_live(),
76-
reason=(
77-
"Test takes 5 minutes in automation. "
78-
"Already have unit tests verifying correct _restclient method is called. "
79-
"Can be validated in live build only."
80-
),
81-
)
8274
def test_compute_instance_stop_start_restart(
8375
self, client: MLClient, rand_compute_name: Callable[[str], str]
8476
) -> None:

0 commit comments

Comments
 (0)