Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ All notable changes to vouch are documented here. Format follows
in sync as files change, and adds OpenClaw-style `size: XS` through
`size: XL` labels based on non-doc changed lines. Maintainers can also run
it manually to backfill labels on already-open PRs.
- `vouch consolidate` — retroactive batch cleanup of near-duplicate approved
claims. clusters same-kind claims by embedding cosine similarity (reuses
`dedup.scan_all` vector machinery), picks a deterministic survivor per cluster
(highest confidence → most recent → lexicographic id), and emits supersede or
merge intents into the pending queue for human review. `--mode=supersede`
(default) proposes per-pair supersede relations; `--mode=merge` proposes a
single union claim per cluster. `--dry-run` reports clusters without writing
anything. configurable via `consolidate.threshold`, `consolidate.mode`,
`consolidate.max_clusters` in `config.yaml` (#308).
- `vouch detect-themes` — cross-session pattern detection via deterministic
entity co-occurrence scoring. `kb.detect_themes` is read-only (returns
ranked clusters); `kb.propose_theme` routes synthesis pages through the
Expand Down
1 change: 1 addition & 0 deletions src/vouch/capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
"kb.provenance_rebuild",
"kb.detect_themes",
"kb.propose_theme",
"kb.consolidate",
]


Expand Down
78 changes: 78 additions & 0 deletions src/vouch/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,84 @@ def detect_themes_cmd(
)


# --- consolidation --------------------------------------------------------


@cli.command(name="consolidate")
@click.option(
"--threshold", default=None, type=float,
help="Cosine similarity threshold (default 0.95 from config).",
)
@click.option(
"--mode", default=None, type=click.Choice(["supersede", "merge"]),
help="Consolidation mode (default: supersede).",
)
@click.option(
"--max-clusters", default=None, type=int,
help="Maximum clusters per pass.",
)
@click.option("--dry-run", is_flag=True, help="Report clusters without proposing.")
@click.option("--json", "as_json", is_flag=True, help="Emit JSON output.")
@click.option("--agent", default=None, help="Agent name for proposals.")
def consolidate_cmd(
threshold: float | None,
mode: str | None,
max_clusters: int | None,
dry_run: bool,
as_json: bool,
agent: str | None,
) -> None:
"""Cluster near-duplicate approved claims and propose supersede/merge intents."""
from . import consolidate as cons

store = _load_store()
actor = agent or _whoami()
result = cons.consolidate(
store,
threshold=threshold,
mode=mode,
max_clusters=max_clusters,
dry_run=dry_run,
actor=actor,
)
if as_json:
_emit_json({
"clusters": [
{
"survivor": c.survivor_id,
"members": [
{"claim_id": m.claim_id, "cosine": m.cosine}
for m in c.members
],
"cosine_min": c.cosine_min,
"cosine_max": c.cosine_max,
}
for c in result.clusters
],
"proposals": result.proposals,
"config": result.config_used,
"dry_run": result.dry_run,
})
return
if not result.clusters:
click.echo("no near-duplicate clusters found")
return
for i, c in enumerate(result.clusters, 1):
click.echo(
f"{i}. survivor={c.survivor_id} "
f"members={len(c.members)} "
f"cos=[{c.cosine_min:.4f}, {c.cosine_max:.4f}]"
)
for m in c.members:
click.echo(f" <- {m.claim_id} (cos={m.cosine})")
if dry_run:
click.echo(f"\ndry run: {len(result.clusters)} cluster(s) detected")
else:
click.echo(
f"\n{len(result.proposals)} proposal(s) filed"
)


# --- export / import ------------------------------------------------------


Expand Down
Loading
Loading