Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,7 @@ OpenManus is built by contributors from MetaGPT. Huge thanks to this agent commu
url = {https://doi.org/10.5281/zenodo.15186407},
}
```


## Planning JSON Repair
See docs/planning-json-repair.md for the repair pipeline, integration, and toggle.
17 changes: 16 additions & 1 deletion app/flow/planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from app.flow.base import BaseFlow
from app.llm import LLM
from app.logger import logger
from app.planning.integrations import parse_plan_text
from app.planning.metrics import snapshot as planning_metrics_snapshot
from app.schema import AgentState, Message, ToolChoice
from app.tool import PlanningTool

Expand Down Expand Up @@ -183,7 +185,20 @@ async def _create_initial_plan(self, request: str) -> None:
args = tool_call.function.arguments
if isinstance(args, str):
try:
args = json.loads(args)
# robust parse to Plan; then dump to dict (compat)
_plan_obj = parse_plan_text(args, max_retries=2)
args = _plan_obj.model_dump()
logger.info(
"planning_metrics=%s", planning_metrics_snapshot()
)
except Exception:
# fallback: strict parse (legacy behavior)
import json as _json

args = _json.loads(args)
logger.info(
"planning_metrics=%s", planning_metrics_snapshot()
)
except json.JSONDecodeError:
logger.error(f"Failed to parse tool arguments: {args}")
continue
Expand Down
4 changes: 4 additions & 0 deletions app/planning/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .collector import parse_plan_payload, parse_with_retries
from .errors import JSONParseError, JSONRepairFailed, JSONSchemaError, PlanningError
from .integrations import parse_plan_text
from .models import Plan, Step # convenience re-exports
68 changes: 68 additions & 0 deletions app/planning/collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from __future__ import annotations

import json
from typing import Tuple

from pydantic import ValidationError

from . import metrics
from .errors import JSONParseError, JSONRepairFailed, JSONSchemaError
from .json_repair import repair_json, strip_markdown_fences, trim_to_outermost_json
from .models import Plan


def extract_candidate_json(payload: str) -> str:
"""Prefer fenced content; otherwise slice the outermost JSON and tolerate trailing prose."""
s = strip_markdown_fences(payload)
s = trim_to_outermost_json(s)
return s.strip()


def parse_plan_payload(payload: str) -> Tuple[Plan, dict]:
"""Extract, repair, json.loads, and validate into Plan.
Returns (Plan, meta) where meta contains notes of applied repairs.
"""
stop_total = metrics.timer()
raw = extract_candidate_json(payload)
try:
repaired, notes = repair_json(raw)
except Exception as e:
metrics.inc("planning.repair_fail")
raise JSONRepairFailed(
f"Failed to repair JSON: {e!s}",
hint="Check code fences, trailing commas, and brace balance",
) from e
try:
data = json.loads(repaired)
except Exception as e:
metrics.inc("planning.parse_fail")
raise JSONParseError(
f"json.loads failed: {e!s}",
hint="Review quotes, comments, and illegal characters",
) from e
try:
plan = Plan.model_validate(data)
except ValidationError as e:
metrics.inc("planning.schema_fail")
raise JSONSchemaError(
f"Plan schema failed: {e!s}",
hint="Extra fields are forbidden (extra='forbid')",
) from e
metrics.inc("planning.ok")
stop_total("planning.total")
return plan, {"notes": notes}


def parse_with_retries(payload: str, max_retries: int = 2) -> Plan:
"""Try to parse with up to N deterministic retries (idempotent pipeline)."""
last_err: Exception | None = None
for _ in range(max_retries + 1):
try:
plan, _meta = parse_plan_payload(payload)
return plan
except (JSONRepairFailed, JSONParseError, JSONSchemaError) as e:
last_err = e
metrics.inc("planning.retry")
continue
assert last_err is not None
raise last_err
20 changes: 20 additions & 0 deletions app/planning/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
class PlanningError(Exception):
"""Base error for the planning pipeline."""

hint: str | None = None

def __init__(self, message: str, *, hint: str | None = None):
super().__init__(message)
self.hint = hint


class JSONParseError(PlanningError):
"""Failed to json.loads the repaired text."""


class JSONSchemaError(PlanningError):
"""Schema validation failed for Plan/Step (Pydantic)."""


class JSONRepairFailed(PlanningError):
"""Repair considered unsafe or impossible."""
22 changes: 22 additions & 0 deletions app/planning/integrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from __future__ import annotations

import json
import os

from .collector import parse_with_retries
from .models import Plan


def parse_plan_text(
text: str, *, max_retries: int = 2, env_var: str = "OPENMANUS_PLANNING_REPAIR"
) -> Plan:
"""
Parse a plan JSON string from an LLM into a Plan.

If the env var is unset or not "0", run robust repair+validate (parse_with_retries).
If the env var equals "0", run the strict fallback: json.loads -> Plan.model_validate.
"""
if os.getenv(env_var, "1") != "0":
return parse_with_retries(text, max_retries=max_retries)
data = json.loads(text) # strict mode
return Plan.model_validate(data)
187 changes: 187 additions & 0 deletions app/planning/json_repair.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
from __future__ import annotations

import re


def strip_markdown_fences(s: str) -> str:
"""If content is within ```json ... ```, extract the inner block."""
m = re.search(r"```(?:json|JSON)?\s*(.*?)```", s, flags=re.DOTALL)
return m.group(1) if m else s


def trim_to_outermost_json(s: str) -> str:
"""Slice from first '{' to last '}', tolerating prose outside."""
start = s.find("{")
end = s.rfind("}")
return s[start : end + 1] if (start != -1 and end != -1 and end > start) else s


def normalize_unicode_quotes(s: str) -> str:
"""Normalize smart/Unicode quotes to ASCII."""
return (
s.replace("“", '"')
.replace("”", '"')
.replace("„", '"')
.replace("«", '"')
.replace("»", '"')
.replace("’", "'")
.replace("‘", "'")
)


def remove_json_comments(s: str) -> str:
"""Remove // and /* */ outside strings safely."""
out, i, n, in_str, esc = [], 0, len(s), False, False
while i < n:
ch = s[i]
if in_str:
if esc:
esc = False
elif ch == "\\":
esc = True
elif ch == '"':
in_str = False
out.append(ch)
i += 1
continue
if ch == '"':
in_str = True
out.append(ch)
i += 1
continue
if i + 1 < n and s[i : i + 2] == "//":
i += 2
while i < n and s[i] not in "\r\n":
i += 1
continue
if i + 1 < n and s[i : i + 2] == "/*":
i += 2
while i + 1 < n and s[i : i + 2] != "*/":
i += 1
i += 2 if i + 1 < n else 0
continue
out.append(ch)
i += 1
return "".join(out)


def remove_trailing_commas(s: str) -> str:
"""Remove trailing commas before } or ] while respecting strings and **skipping whitespace**."""
out = []
i, n = 0, len(s)
in_str = False
esc = False
while i < n:
ch = s[i]
if in_str:
if esc:
esc = False
elif ch == "\\":
esc = True
elif ch == '"':
in_str = False
out.append(ch)
i += 1
continue
if ch == '"':
in_str = True
out.append(ch)
i += 1
continue
if ch == ",":
# lookahead skipping whitespace
j = i + 1
while j < n and s[j] in " \t\r\n":
j += 1
if j < n and s[j] in "}]":
# drop the comma (do not advance j; let normal loop output the closer)
i += 1
continue
out.append(ch)
i += 1
return "".join(out)


def escape_illegal_string_chars(s: str) -> str:
"""Escape newlines/tabs only inside strings."""
out = []
i, n = 0, len(s)
in_str = False
esc = False
while i < n:
ch = s[i]
if in_str:
if ch in ("\n", "\r"):
out.append("\\n")
i += 1
continue
if ch == "\t":
out.append("\\t")
i += 1
continue
if esc:
esc = False
elif ch == "\\":
esc = True
elif ch == '"':
in_str = False
out.append(ch)
i += 1
continue
else:
if ch == '"':
in_str = True
out.append(ch)
i += 1
continue
return "".join(out)


def balance_braces_brackets(s: str) -> str:
"""If exactly one closing brace/bracket is missing, append it conservatively."""
open_curly = s.count("{")
close_curly = s.count("}")
if open_curly == close_curly + 1 and not s.rstrip().endswith("}"):
return s.rstrip() + "}"
open_sq = s.count("[")
close_sq = s.count("]")
if open_sq == close_sq + 1 and not s.rstrip().endswith("]"):
return s.rstrip() + "]"
return s


def sanity_checks(baseline: str, repaired: str) -> bool:
"""Reject repairs that remove more than 30% after safe structural passes."""
return len(repaired) >= 0.70 * len(baseline)


def repair_json(text: str) -> tuple[str, list[str]]:
"""Apply passes in order; return (repaired_text, notes_of_passes).
Baseline for sanity is after removing fences and outermost slice.
"""
notes = []
text1 = strip_markdown_fences(text)
if text1 != text:
notes.append("strip_markdown_fences")
text = text1
text1 = trim_to_outermost_json(text)
if text1 != text:
notes.append("trim_to_outermost_json")
text = text1
baseline = text

for fn in (
normalize_unicode_quotes,
remove_json_comments,
remove_trailing_commas,
escape_illegal_string_chars,
balance_braces_brackets,
):
s1 = fn(text)
if s1 != text:
notes.append(fn.__name__)
text = s1

if not sanity_checks(baseline, text):
raise ValueError("Unsafe repair delta")
return text, notes
27 changes: 27 additions & 0 deletions app/planning/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from __future__ import annotations

import time
from collections import Counter


_counters = Counter()


def inc(name: str, n: int = 1) -> None:
_counters[name] += n


def timer():
start = time.perf_counter()

def _stop(name: str):
dt = (time.perf_counter() - start) * 1000.0
_counters[f"{name}_ms_total"] += int(dt)
_counters[f"{name}_count"] += 1
return dt

return _stop


def snapshot() -> dict[str, int]:
return dict(_counters)
26 changes: 26 additions & 0 deletions app/planning/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from __future__ import annotations

from typing import Any, Dict, List, Optional

from pydantic import BaseModel, ConfigDict, Field


class Step(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
title: str
description: str
depends_on: List[str] = Field(default_factory=list)
tool: str # e.g., "browser_use", "python", "code_interpreter"
inputs: Dict[str, Any] = Field(default_factory=dict)
expected_output: str


class Plan(BaseModel):
model_config = ConfigDict(extra="forbid")
version: str
objective: str
constraints: List[str] = Field(default_factory=list)
success_criteria: List[str] = Field(default_factory=list)
steps: List[Step]
notes: Optional[str] = None
Loading