Graphify-Labs · TPAteeq · Jul 4, 2026 · Jul 4, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@ Full release notes with details on each version: [GitHub Releases](https://githu
 
 ## Unreleased
 
+- Feat: per-project `query` defaults for `--budget`/`--depth`, read from an optional `graphify-out/config.json` (#1654, thanks @Ns2384-star). Declare `{"query": {"default_budget": N, "default_depth": N}}` (flat `budget`/`depth` keys also accepted) to seed the query CLI's defaults before flag parsing, so a per-repo budget/depth becomes the norm without retyping it — and an explicit `--budget`/`--depth` flag still overrides. The `query` command also gains a `--depth` flag (it had none, forcing the old hardcoded depth 2), making the traversal depth tunable per invocation. A missing, unreadable, malformed, or ill-typed config silently degrades to the built-in defaults (budget 2000, depth 2) so a bad file never crashes a query.
 - Fix: a malformed semantic chunk no longer crashes `extract` and discards every successful chunk (#1631, thanks @ssazy). When an LLM returned a well-formed object whose `edges` (or `nodes`/`hyperedges`) array carried a stray non-dict entry — a nested list where an edge object belongs — the AST+semantic merge and the semantic-cache write both called `.get()` per entry and raised `AttributeError: 'list' object has no attribute 'get'`. On a 34-chunk run where 33 succeeded, that meant no `graph.json` was written and the cache write failed too, so a re-run re-extracted everything. `_parse_llm_json` now sanitizes each fragment at the single parse chokepoint (keeping only dict entries and coercing a non-list value to `[]`), so the cache writer, the adaptive-retry merge, and the CLI merge are all protected in one place.
 - Fix: an unresolved bare npm import no longer aliases onto an unrelated same-named local file (#1638, thanks @EveX1). `import colors from "tailwindcss/colors"` in a `.tsx` file emitted an `imports_from` edge to the bare id `colors`, and build.py's pre-migration alias index (which registers every local file's bare stem) then remapped it onto an unrelated `backend/utils/colors.py` — a confident (`EXTRACTED`) cross-language phantom edge, and one per `.tsx` file sharing the import. In a real monorepo eight unrelated `.tsx` files all landed on a single Python module. Common package subpaths (`colors`, `utils`, `types`, `config`, `client`) collide this way constantly. The external-import fallback now namespaces its target with the `ref` prefix (the same J-4 convention used for tsconfig `extends`/`$ref` externals), so it can never collapse to a local file/symbol id; the ref-namespaced target has no node, so build drops it as an external reference — the correct outcome for a third-party import.
 - Fix: `graph.json` node/edge ordering is now stable run-to-run for document/semantic corpora (#1632, thanks @umeshpsatwe). With a parallel LLM backend, `extract_corpus_parallel` merged chunk results in completion order, so which network call happened to return first reordered the nodes and edges even when the model returned identical content — churning `graph.json` between otherwise-identical runs. Chunks are now merged in deterministic submission order after the pool drains (matching the serial path); the progress callback still fires in completion order so long local runs aren't silent. Note: the semantic content the LLM extracts is itself nondeterministic run-to-run — this fix removes the pipeline's own ordering churn, not the model's variance.

diff --git a/graphify/__main__.py b/graphify/__main__.py
@@ -22,6 +22,7 @@
 # Defined once in graphify.paths so the security/callflow path guards honour the
 # same override (#1423).
 from graphify.paths import GRAPHIFY_OUT as _GRAPHIFY_OUT
+from graphify.paths import query_config_defaults as _query_config_defaults
 
 
 @functools.lru_cache(maxsize=None)
@@ -2305,7 +2306,10 @@ def main() -> None:
         print("    --dfs                   use depth-first instead of breadth-first")
         print("    --context C             explicit edge-context filter (repeatable)")
         print("    --budget N              cap output at N tokens (default 2000)")
+        print("    --depth N               traversal depth (default 2)")
         print("    --graph <path>          path to graph.json (default graphify-out/graph.json)")
+        print("    (defaults for --budget/--depth can be set per-project in")
+        print("     graphify-out/config.json: {\"query\": {\"default_budget\": N, \"default_depth\": N}}; CLI flags override)")
         print("  affected \"X\"             reverse traversal to find nodes impacted by X")
         print("    --relation R            edge relation to traverse in reverse (repeatable)")
         print("    --depth N               reverse traversal depth (default 2)")
@@ -2842,7 +2846,7 @@ def main() -> None:
             sys.exit(1)
     elif cmd == "query":
         if len(sys.argv) < 3:
-            print("Usage: graphify query \"<question>\" [--dfs] [--context C] [--budget N] [--graph path]", file=sys.stderr)
+            print("Usage: graphify query \"<question>\" [--dfs] [--context C] [--budget N] [--depth N] [--graph path]", file=sys.stderr)
             sys.exit(1)
         from graphify.serve import _query_graph_text
         from graphify.security import sanitize_label
@@ -2851,7 +2855,15 @@ def main() -> None:
 
         question = sys.argv[2]
         use_dfs = "--dfs" in sys.argv
+        # Built-in defaults, optionally seeded from graphify-out/config.json;
+        # CLI flags below still override the config (#1654).
         budget = 2000
+        depth = 2
+        _cfg_defaults = _query_config_defaults()
+        if "budget" in _cfg_defaults:
+            budget = _cfg_defaults["budget"]
+        if "depth" in _cfg_defaults:
+            depth = _cfg_defaults["depth"]
         graph_path = _default_graph_path()
         context_filters: list[str] = []
         args = sys.argv[3:]
@@ -2871,6 +2883,26 @@ def main() -> None:
                     print(f"error: --budget must be an integer", file=sys.stderr)
                     sys.exit(1)
                 i += 1
+            elif args[i] == "--depth" and i + 1 < len(args):
+                try:
+                    depth = int(args[i + 1])
+                except ValueError:
+                    print(f"error: --depth must be an integer", file=sys.stderr)
+                    sys.exit(1)
+                if depth <= 0:
+                    print("error: --depth must be a positive integer", file=sys.stderr)
+                    sys.exit(1)
+                i += 2
+            elif args[i].startswith("--depth="):
+                try:
+                    depth = int(args[i].split("=", 1)[1])
+                except ValueError:
+                    print(f"error: --depth must be an integer", file=sys.stderr)
+                    sys.exit(1)
+                if depth <= 0:
+                    print("error: --depth must be a positive integer", file=sys.stderr)
+                    sys.exit(1)
+                i += 1
             elif args[i] == "--context" and i + 1 < len(args):
                 context_filters.append(args[i + 1])
                 i += 2
@@ -2922,7 +2954,7 @@ def main() -> None:
             G,
             question,
             mode=_mode,
-            depth=2,
+            depth=depth,
             token_budget=budget,
             context_filters=context_filters,
         )
@@ -2932,7 +2964,7 @@ def main() -> None:
             corpus=str(gp),
             result=_result,
             mode=_mode,
-            depth=2,
+            depth=depth,
             token_budget=budget,
             duration_ms=(_time.perf_counter() - _t0) * 1000,
         )

diff --git a/graphify/paths.py b/graphify/paths.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+import json
 import os
 import re
 from pathlib import Path, PurePosixPath
@@ -232,3 +233,61 @@ def default_graph_json() -> str:
     the path is passed explicitly (#1423).
     """
     return str(out_path("graph.json"))
+
+
+def query_config_defaults(config_path: Path | None = None) -> dict[str, int]:
+    """Per-project ``query`` defaults read from ``graphify-out/config.json``.
+
+    Returns any ``budget``/``depth`` overrides the sidecar declares, as a dict
+    that may contain either, both, or neither key. The values seed the CLI's
+    built-in defaults before flag parsing, so a CLI flag still wins (#1654).
+
+    The file may nest the settings under a ``"query"`` object (the documented
+    shape) or place them at the top level, and either the
+    ``default_budget``/``default_depth`` or bare ``budget``/``depth`` spelling
+    is accepted::
+
+        {"query": {"default_budget": 4000, "default_depth": 3}}
+
+    A missing file, unreadable file, malformed JSON, wrong top-level type, or
+    non-positive/non-integer values all degrade to an empty dict so a bad
+    config never crashes a query. A whole-valued float (``4000.0``) is coerced
+    to ``int``; a fractional float (``4000.5``), bool, string, or null is
+    rejected. When both a nested and a flat value are present the nested
+    ``query`` object wins.
+    """
+    defaults: dict[str, int] = {}
+    target = config_path if config_path is not None else out_path("config.json")
+    try:
+        raw = json.loads(Path(target).read_text(encoding="utf-8"))
+    except (OSError, ValueError):
+        return defaults
+    if not isinstance(raw, dict):
+        return defaults
+    section = raw.get("query")
+    if not isinstance(section, dict):
+        section = {}
+
+    def _pick(*keys: str) -> int | None:
+        for source in (section, raw):
+            for key in keys:
+                value = source.get(key)
+                # bool is an int subclass; reject it up front so True/False can
+                # never read as 1/0.
+                if isinstance(value, bool):
+                    continue
+                if isinstance(value, int) and value > 0:
+                    return value
+                # Accept a whole-valued float (4000.0 -> 4000) from a hand-written
+                # config; reject a fractional one (4000.5) and non-positive values.
+                if isinstance(value, float) and value.is_integer() and value > 0:
+                    return int(value)
+        return None
+
+    budget = _pick("default_budget", "budget")
+    if budget is not None:
+        defaults["budget"] = budget
+    depth = _pick("default_depth", "depth")
+    if depth is not None:
+        defaults["depth"] = depth
+    return defaults
diff --git a/tests/test_paths.py b/tests/test_paths.py
@@ -2,11 +2,14 @@
 
 from __future__ import annotations
 
+import json
+
 import pytest
 
 from graphify.paths import (
     _is_test_path,
     disambiguate_ambiguous_candidates,
+    query_config_defaults,
 )
 
 
@@ -97,3 +100,80 @@ def test_disambiguate_path_proximity_same_dir() -> None:
         "pkg/a/caller.py",
     )
     assert winner == "near"
+
+
+# --- query_config_defaults (per-project config.json, #1654) -----------------
+
+
+def _write_config(tmp_path, data) -> None:
+    (tmp_path / "config.json").write_text(json.dumps(data), encoding="utf-8")
+
+
+def test_query_config_defaults_nested_query_object(tmp_path) -> None:
+    _write_config(tmp_path, {"query": {"default_budget": 4000, "default_depth": 3}})
+    assert query_config_defaults(tmp_path / "config.json") == {"budget": 4000, "depth": 3}
+
+
+def test_query_config_defaults_flat_keys(tmp_path) -> None:
+    _write_config(tmp_path, {"budget": 1234, "depth": 5})
+    assert query_config_defaults(tmp_path / "config.json") == {"budget": 1234, "depth": 5}
+
+
+def test_query_config_defaults_partial(tmp_path) -> None:
+    _write_config(tmp_path, {"query": {"default_depth": 4}})
+    assert query_config_defaults(tmp_path / "config.json") == {"depth": 4}
+
+
+def test_query_config_defaults_nested_wins_over_flat(tmp_path) -> None:
+    _write_config(tmp_path, {"query": {"default_budget": 4000}, "budget": 9999})
+    assert query_config_defaults(tmp_path / "config.json") == {"budget": 4000}
+
+
+def test_query_config_defaults_missing_file(tmp_path) -> None:
+    assert query_config_defaults(tmp_path / "does-not-exist.json") == {}
+
+
+def test_query_config_defaults_malformed_json(tmp_path) -> None:
+    (tmp_path / "config.json").write_text("{not valid json", encoding="utf-8")
+    assert query_config_defaults(tmp_path / "config.json") == {}
+
+
+def test_query_config_defaults_rejects_bad_values(tmp_path) -> None:
+    # non-int, bool, zero, and negative values are all ignored.
+    _write_config(
+        tmp_path,
+        {"query": {"default_budget": "lots", "default_depth": -1}, "budget": True, "depth": 0},
+    )
+    assert query_config_defaults(tmp_path / "config.json") == {}
+
+
+def test_query_config_defaults_non_dict_top_level(tmp_path) -> None:
+    (tmp_path / "config.json").write_text(json.dumps([1, 2, 3]), encoding="utf-8")
+    assert query_config_defaults(tmp_path / "config.json") == {}
+
+
+def test_query_config_defaults_whole_valued_float_accepted(tmp_path) -> None:
+    # A hand-written config often carries floats; a whole-valued one coerces.
+    _write_config(tmp_path, {"query": {"default_budget": 4000.0, "default_depth": 3.0}})
+    assert query_config_defaults(tmp_path / "config.json") == {"budget": 4000, "depth": 3}
+
+
+def test_query_config_defaults_fractional_float_rejected(tmp_path) -> None:
+    # A fractional float can't be an integer depth/budget, so it degrades.
+    _write_config(tmp_path, {"query": {"default_budget": 4000.5, "default_depth": 2.5}})
+    assert query_config_defaults(tmp_path / "config.json") == {}
+
+
+def test_query_config_defaults_absolute_graphify_out(tmp_path, monkeypatch) -> None:
+    # With no explicit path, the reader resolves via out_path(), which honours
+    # an absolute GRAPHIFY_OUT override (#1423 / #686).
+    import graphify.paths as paths
+
+    out_dir = tmp_path / "shared" / "graphify-out"
+    out_dir.mkdir(parents=True)
+    (out_dir / "config.json").write_text(
+        json.dumps({"query": {"default_budget": 7000, "default_depth": 4}}),
+        encoding="utf-8",
+    )
+    monkeypatch.setattr(paths, "GRAPHIFY_OUT", str(out_dir))
+    assert query_config_defaults() == {"budget": 7000, "depth": 4}