Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ All notable changes to vouch are documented here. Format follows
committed SVGs stay reproducible (#286).

### Added
- typed `Config` model for `.vouch/config.yaml`: the file is parsed once into a
validated pydantic `Config` (with nested `ReviewConfig` / `RetrievalConfig`)
exposed as `KBStore.config`, replacing the per-call-site untyped `dict` reads
in `proposals.py` and the retrieval backend selector. a malformed value
(e.g. `retrieval.default_limit: "ten"`) now fails fast with the offending key
path instead of silently falling back, and `vouch doctor` surfaces unknown
top-level keys as likely typos instead of dropping them. existing KBs with
partial or absent `retrieval:` / `review:` blocks load with the documented
defaults — no on-disk change (#243).
- GitHub PR auto-labeling: a pull-request metadata-only labeler workflow now
applies vouch surface labels from `.github/labeler.yml`, keeps those labels
in sync as files change, and adds OpenClaw-style `size: XS` through
Expand Down
28 changes: 3 additions & 25 deletions src/vouch/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
import sqlite3
from typing import Any, Literal, cast

import yaml

from . import graph, index_db
from .models import ClaimStatus, ContextItem, ContextPack, ContextQuality
from .scoping import (
Expand All @@ -42,35 +40,15 @@

ContextItemKind = Literal["claim", "page", "entity", "relation", "source"]

_VALID_BACKENDS = ("auto", "embedding", "fts5", "substring")


def _configured_backend(store: KBStore) -> str:
"""Resolve the retrieval backend from `config.yaml`, defaulting to "auto".

Reads the singular `retrieval.backend` string. For KBs initialised
before this knob existed, a legacy `retrieval.backends` list is honoured
by taking its first recognised entry. Anything unreadable or unrecognised
falls back to "auto".
by taking its first recognised entry. Anything unrecognised falls back to
"auto". Parsing + validation now lives in the typed `Config` model (#243).
"""
try:
loaded = yaml.safe_load(store.config_path.read_text())
except (OSError, yaml.YAMLError):
return "auto"
if not isinstance(loaded, dict):
return "auto"
retrieval = loaded.get("retrieval")
if not isinstance(retrieval, dict):
return "auto"
backend = retrieval.get("backend")
if isinstance(backend, str) and backend in _VALID_BACKENDS:
return backend
legacy = retrieval.get("backends")
if isinstance(legacy, list):
for entry in legacy:
if isinstance(entry, str) and entry in _VALID_BACKENDS:
return entry
return "auto"
return store.config.retrieval.resolved_backend()


def _retrieve(
Expand Down
25 changes: 23 additions & 2 deletions src/vouch/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,15 @@

from . import index_db
from .audit import count_events, verify_chain
from .models import Claim, ClaimStatus, Entity, Page, ProposalKind, ProposalStatus
from .models import (
Claim,
ClaimStatus,
ConfigError,
Entity,
Page,
ProposalKind,
ProposalStatus,
)
from .storage import KBStore, _yaml_load, sha256_hex
from .verify import verify_all

Expand Down Expand Up @@ -232,7 +240,9 @@ def doctor(store: KBStore) -> HealthReport:
)
)

# Config sanity.
# Config sanity — a missing file is an error; otherwise a malformed value
# is an error and an unknown key is a likely typo (silently ignored before
# #243, now surfaced).
if not store.config_path.exists():
report.findings.append(
Finding(
Expand All @@ -241,6 +251,17 @@ def doctor(store: KBStore) -> HealthReport:
"config.yaml is missing",
)
)
else:
try:
cfg = store.config
except ConfigError as e:
report.findings.append(Finding("error", "config_invalid", str(e)))
else:
for key in cfg.unknown_keys():
report.findings.append(Finding(
"warning", "config_unknown_key",
f"unknown config key {key!r} — possible typo, ignored",
))

# Index presence (warning only — the index is derivable).
if not (store.kb_dir / index_db.DB_FILENAME).exists():
Expand Down
113 changes: 112 additions & 1 deletion src/vouch/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from enum import StrEnum
from typing import Any, Literal

from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator


def utcnow() -> datetime:
Expand Down Expand Up @@ -456,3 +456,114 @@ class Capabilities(BaseModel):
default_factory=list,
description="OpenClaw context engines exposed (see openclaw.plugin.json)",
)


# --- config.yaml (issue #243) ---------------------------------------------
#
# `.vouch/config.yaml` used to be read as an untyped dict at each call site,
# with nested `.get()` guards and silent `except: {}` fallbacks. That spread
# the schema across the codebase and swallowed typos. These models are the
# single source of truth for a valid config; `KBStore.config` parses the file
# into a `Config` once, and malformed values fail fast with a per-field path.


class ConfigError(ValueError):
"""`.vouch/config.yaml` could not be parsed into a valid Config."""


class ReviewConfig(BaseModel):
"""`review:` block — the write-gate policy."""

model_config = ConfigDict(extra="allow")

require_human_approval: bool = True
expire_pending_after_days: int = Field(default=90, ge=0)
# "trusted-agent" lets an agent approve its own proposals; anything else
# (incl. unset) keeps the human-in-the-loop gate.
approver_role: str | None = None


class RetrievalConfig(BaseModel):
"""`retrieval:` block — how kb.search / kb.context pick a backend."""

model_config = ConfigDict(extra="allow")

# Unset (None) means "not pinned" — resolution then consults the legacy
# list and finally defaults to "auto". The starter config sets "auto"
# explicitly. Keeping this optional is what lets a legacy `backends`-only
# KB still resolve via the list.
backend: str | None = None
default_limit: int = Field(default=10, ge=1)
# Legacy plural list, honoured for KBs created before `backend` existed.
backends: list[str] | None = None

def resolved_backend(self) -> str:
"""Effective backend, preserving pre-#243 fallback behaviour.

An explicit, recognised `backend` wins; else a legacy `backends` list
contributes its first recognised entry; else "auto". Unrecognised
values fall back to "auto". See `context._retrieve`.
"""
valid = ("auto", "embedding", "fts5", "substring")
if self.backend is not None and self.backend in valid:
return self.backend
for entry in self.backends or []:
if entry in valid:
return entry
return "auto"


class Config(BaseModel):
"""Typed view of `.vouch/config.yaml`, parsed once at store-open.

Known top-level sections are validated; unknown ones are preserved (not
dropped) so `vouch doctor` can flag them as likely typos — see
`unknown_keys()`. Sections owned by their own readers (`serve`,
`volunteer`, `mcp`) are kept loose here so they neither break nor trip
the unknown-key check.
"""

model_config = ConfigDict(extra="allow")

version: int = 1
review: ReviewConfig = Field(default_factory=ReviewConfig)
retrieval: RetrievalConfig = Field(default_factory=RetrievalConfig)
agents: dict[str, Any] = Field(default_factory=dict)
page_kinds: dict[str, Any] = Field(default_factory=dict)
serve: dict[str, Any] | None = None
volunteer: dict[str, Any] | None = None
mcp: dict[str, Any] | None = None

@classmethod
def load(cls, raw: Any) -> Config:
"""Parse a `yaml.safe_load` result into a Config.

`None` (empty file) and `{}` both yield all-defaults. A non-mapping
top level, or a per-field type error, raises `ConfigError` naming the
offending path.
"""
if raw is None:
raw = {}
if not isinstance(raw, dict):
raise ConfigError(
f"config.yaml must be a mapping at the top level, "
f"got {type(raw).__name__}"
)
try:
return cls.model_validate(raw)
except ValidationError as e:
first = e.errors()[0]
loc = ".".join(str(p) for p in first["loc"]) or "<root>"
raise ConfigError(f"config.yaml: {loc}: {first['msg']}") from e

def unknown_keys(self) -> list[str]:
"""Keys outside the known schema (likely typos), dotted for nesting.

Walks the top level plus the two validated nested blocks, so a typo
one level deep (`review.expier_pending_after_days`) is surfaced the
same way a top-level one is, rather than silently swallowed.
"""
keys = set(self.__pydantic_extra__ or {})
keys |= {f"review.{k}" for k in (self.review.__pydantic_extra__ or {})}
keys |= {f"retrieval.{k}" for k in (self.retrieval.__pydantic_extra__ or {})}
return sorted(keys)
39 changes: 8 additions & 31 deletions src/vouch/proposals.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from datetime import UTC, datetime, timedelta
from typing import Any

import yaml
from pydantic import ValidationError

from . import audit, index_db
Expand All @@ -35,7 +34,6 @@ class ProposalError(RuntimeError):

EXPIRE_REASON = "expired"
EXPIRE_ACTOR = "vouch-expire"
_DEFAULT_EXPIRE_PENDING_DAYS = 90


@dataclass
Expand Down Expand Up @@ -330,23 +328,14 @@ def _approval_block_reason(
"""
if proposal.status != ProposalStatus.PENDING:
return f"proposal {proposal.id} is {proposal.status.value}, not pending"
if approved_by == proposal.proposed_by:
cfg: dict[str, Any] = {}
try:
loaded = yaml.safe_load((store.kb_dir / "config.yaml").read_text())
if isinstance(loaded, dict):
cfg = loaded
except Exception:
pass
review_cfg = cfg.get("review")
approver_role = (
review_cfg.get("approver_role") if isinstance(review_cfg, dict) else None
if (
approved_by == proposal.proposed_by
and store.config.review.approver_role != "trusted-agent"
):
return (
f"forbidden_self_approval: {approved_by} cannot approve their own "
"proposal (set review.approver_role: trusted-agent in config.yaml to opt out)"
)
if approver_role != "trusted-agent":
return (
f"forbidden_self_approval: {approved_by} cannot approve their own "
"proposal (set review.approver_role: trusted-agent in config.yaml to opt out)"
)
return None


Expand Down Expand Up @@ -576,19 +565,7 @@ def expire_pending_after_days(store: KBStore, *, override: int | None = None) ->
"""Resolve GC threshold from config (`review.expire_pending_after_days`)."""
if override is not None:
return override
try:
loaded = yaml.safe_load(store.config_path.read_text())
except Exception:
return _DEFAULT_EXPIRE_PENDING_DAYS
if not isinstance(loaded, dict):
return _DEFAULT_EXPIRE_PENDING_DAYS
review_cfg = loaded.get("review")
if not isinstance(review_cfg, dict):
return _DEFAULT_EXPIRE_PENDING_DAYS
days = review_cfg.get("expire_pending_after_days")
if isinstance(days, int) and days >= 0:
return days
return _DEFAULT_EXPIRE_PENDING_DAYS
return store.config.review.expire_pending_after_days


def _utc(dt: datetime) -> datetime:
Expand Down
24 changes: 24 additions & 0 deletions src/vouch/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@
import re
import sqlite3
import stat
from functools import cached_property
from pathlib import Path
from typing import Any

import yaml

from .models import (
Claim,
Config,
ConfigError,
Entity,
Evidence,
Page,
Expand Down Expand Up @@ -231,6 +234,27 @@ def init(cls, root: Path) -> KBStore:
def config_path(self) -> Path:
return self.kb_dir / CONFIG_FILENAME

@cached_property
def config(self) -> Config:
"""Typed, validated view of config.yaml, parsed once. (#243)

A missing file yields all-defaults (a KB may predate `config.yaml`);
a malformed file raises `ConfigError` naming the offending key.
Cached per store instance — construct a fresh `KBStore` to re-read
after rewriting config.
"""
try:
text = self.config_path.read_text(encoding="utf-8")
except FileNotFoundError:
return Config()
except OSError as e:
raise ConfigError(f"cannot read {self.config_path}: {e}") from e
try:
raw = _yaml_load(text)
except yaml.YAMLError as e:
raise ConfigError(f"{self.config_path}: invalid YAML: {e}") from e
return Config.load(raw)

Comment thread
coderabbitai[bot] marked this conversation as resolved.
def _yaml(self, sub: str, obj_id: str) -> Path:
return self.kb_dir / sub / f"{obj_id}.yaml"

Expand Down
39 changes: 38 additions & 1 deletion tests/test_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from vouch import health, index_db
from vouch.models import Claim, ClaimStatus, Proposal, ProposalKind, ProposalStatus
from vouch.storage import KBStore, _yaml_dump
from vouch.storage import KBStore, _starter_config, _yaml_dump


@pytest.fixture
Expand Down Expand Up @@ -333,3 +333,40 @@ def test_fsck_without_state_db_reports_info(store: KBStore) -> None:
assert "index_missing" in codes
# info finding alone shouldn't fail the report.
assert report.ok is True


# --- config diagnostics (#243) --------------------------------------------


def test_doctor_warns_on_unknown_config_key(store: KBStore) -> None:
store.config_path.write_text(_yaml_dump({**_starter_config(), "reveiw": {}}))
report = health.doctor(KBStore(store.root))
codes = {(f.severity, f.code) for f in report.findings}
assert ("warning", "config_unknown_key") in codes
# An unknown key is only a warning — doctor stays ok.
assert report.ok is True


def test_doctor_errors_on_invalid_config_value(store: KBStore) -> None:
store.config_path.write_text(_yaml_dump({"retrieval": {"default_limit": "ten"}}))
report = health.doctor(KBStore(store.root))
codes = {(f.severity, f.code) for f in report.findings}
assert ("error", "config_invalid") in codes
assert report.ok is False


def test_doctor_reports_malformed_yaml_without_crashing(store: KBStore) -> None:
store.config_path.write_text("retrieval: {backend: [unterminated\n")
report = health.doctor(KBStore(store.root)) # must not raise
codes = {(f.severity, f.code) for f in report.findings}
assert ("error", "config_invalid") in codes
assert report.ok is False


def test_doctor_surfaces_nested_unknown_config_key(store: KBStore) -> None:
store.config_path.write_text(
_yaml_dump({**_starter_config(), "review": {"expier_after": 1}})
)
report = health.doctor(KBStore(store.root))
warns = [f for f in report.findings if f.code == "config_unknown_key"]
assert any("review.expier_after" in f.message for f in warns)
Loading