Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions verifiers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .parsers.xml_parser import XMLParser
from .rubrics.judge_rubric import JudgeRubric
from .rubrics.rubric_group import RubricGroup
from .rubrics.tool_rubric import ToolRubric
from .utils.data_utils import (
extract_boxed_answer,
extract_hash_answer,
Expand All @@ -51,6 +52,7 @@
"Rubric",
"JudgeRubric",
"RubricGroup",
"ToolRubric",
"MathRubric",
"TextArenaEnv",
"ReasoningGymEnv",
Expand Down Expand Up @@ -143,3 +145,4 @@ def __getattr__(name: str):
get_model_and_tokenizer,
)
from .rubrics.math_rubric import MathRubric # noqa: F401
from .rubrics.tool_rubric import ToolRubric # noqa: F401
63 changes: 63 additions & 0 deletions verifiers/rubrics/tool_rubric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from typing import Callable

from verifiers.rubrics.rubric import Rubric
from verifiers.types import Messages
from verifiers.utils.tool_utils import convert_func_to_oai_tool


class ToolRubric(Rubric):
"""Simple rubric that counts tool calls in completion messages."""

def __init__(self, tools: list[Callable] | None = None):
self.tools = tools or []
self.oai_tools = [convert_func_to_oai_tool(tool) for tool in self.tools]
self.tool_names = [tool.__name__ for tool in self.tools]

# Build initial reward functions and weights
reward_funcs = [self.total_tool_calls]
reward_weights = [0.0]

for tool_name in self.tool_names:
reward_funcs.append(self.get_tool_call_count_func(tool_name))
reward_weights.append(0.0)

# Pass them to parent class
super().__init__(funcs=reward_funcs, weights=reward_weights)

def total_tool_calls(self, completion: Messages, **kwargs) -> float:
"""Count the total number of tool calls across all assistant messages."""
total = 0
for msg in completion:
if msg.get("role") == "assistant" and "tool_calls" in msg:
tool_calls = msg.get("tool_calls", [])
if isinstance(tool_calls, list):
total += len(tool_calls)
return float(total)

def get_tool_call_count_func(self, tool_name: str) -> Callable:
"""Create a reward function that counts calls to a specific tool."""

def tool_call_count_func(completion: Messages, **kwargs) -> float:
"""Count calls to {tool_name} tool."""
count = 0

# Find tool calls in assistant messages
for msg in completion:
if msg.get("role") == "assistant" and "tool_calls" in msg:
tool_calls = msg.get("tool_calls", [])
if not isinstance(tool_calls, list):
continue

for tool_call in tool_calls:
if hasattr(tool_call, "function") and hasattr(
tool_call.function, "name"
):
if tool_call.function.name == tool_name:
count += 1
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per-tool counter uses attribute access on dictionary

High Severity

The get_tool_call_count_func method uses attribute access (hasattr(tool_call, "function") and tool_call.function.name) on tool_call items, but completion messages passed to rubric functions are dictionaries, not OpenAI response objects. Dictionary items don't have a function attribute, so hasattr returns False and per-tool counts are always 0. The existing ToolMonitorRubric in tool_env.py correctly uses dictionary access: tool_call.get("function", {}).get("name").

Fix in Cursor Fix in Web


return float(count)

tool_call_count_func.__name__ = f"{tool_name}_calls"
return tool_call_count_func