Skip to content

Commit 59ef7cb

Browse files
committed
fix: convert scroll_at direction/magnitude to scroll_x/scroll_y in GoogleCUAClient
Previously, GoogleCUAClient._process_provider_response was passing the raw direction and magnitude fields from Google CUA's scroll_at function call directly into the ScrollAction payload. This caused Pydantic validation errors because ScrollAction expects scroll_x and scroll_y fields instead. This change: - Safely extracts direction and magnitude with defaults (down, 800) - Converts direction/magnitude to scroll_x/scroll_y values - Handles all four directions (up, down, left, right) with proper sign - Gracefully handles missing or non-numeric magnitude values - Removes the unsupported direction error path The fix now correctly produces ScrollAction payloads that validate and execute properly when scrolling in the browser.
1 parent 9823ad3 commit 59ef7cb

File tree

1 file changed

+26
-19
lines changed

1 file changed

+26
-19
lines changed

stagehand/agent/google_cua.py

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -274,32 +274,39 @@ def _process_provider_response(
274274
elif action_name == "scroll_at":
275275
action_type_str = "scroll"
276276
x, y = self._normalize_coordinates(action_args["x"], action_args["y"])
277-
direction = action_args["direction"].lower()
278-
magnitude = action_args.get("magnitude", 800)
279277

280-
# Denormalize magnitude based on direction
281-
if direction in ("up", "down"):
282-
magnitude = self._normalize_coordinates(0, magnitude)[1]
283-
elif direction in ("left", "right"):
284-
magnitude = self._normalize_coordinates(magnitude, 0)[0]
278+
# Match the TypeScript GoogleCUAClient scroll_at behavior:
279+
# - direction defaults to "down" if missing
280+
# - magnitude defaults to 800 if missing / not a number
281+
direction_raw = action_args.get("direction", "down")
282+
direction = str(direction_raw or "down").lower()
283+
raw_magnitude = action_args.get("magnitude", 800)
284+
magnitude: int
285+
if isinstance(raw_magnitude, (int, float)):
286+
magnitude = int(raw_magnitude)
285287
else:
286-
self.logger.error(
287-
f"Unsupported scroll direction: {direction}", category="agent"
288-
)
289-
return (
290-
[],
291-
reasoning_text,
292-
True,
293-
f"Unsupported scroll direction: {direction}",
294-
invoked_function_info,
295-
)
288+
magnitude = 800
289+
290+
scroll_x = 0
291+
scroll_y = 0
292+
if direction == "up":
293+
scroll_y = -magnitude
294+
elif direction == "down":
295+
scroll_y = magnitude
296+
elif direction == "left":
297+
scroll_x = -magnitude
298+
elif direction == "right":
299+
scroll_x = magnitude
300+
else:
301+
# Default to scrolling down if the direction is unknown
302+
scroll_y = magnitude
296303

297304
action_payload_dict = {
298305
"type": "scroll",
299306
"x": x,
300307
"y": y,
301-
"direction": direction,
302-
"magnitude": magnitude,
308+
"scroll_x": scroll_x,
309+
"scroll_y": scroll_y,
303310
}
304311
elif action_name == "drag_and_drop":
305312
action_type_str = "function"

0 commit comments

Comments
 (0)