Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions internal/graph/meta_merge.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package graph

import "reflect"

// metaDelta returns the subset of kv whose value differs from (or is
// absent in) existing — i.e. the keys a merge would actually change.
//
// It is a pure calculation: no locks, no I/O, no mutation of either
// argument. Equality is reflect.DeepEqual so the comparison is correct
// for the value shapes that ride on Node.Meta — scalars (string, the
// JSON-decoded float64 for numbers), []string / []any tag slices, and
// nested map[string]any blobs — where a == comparison would either
// panic (slices/maps are not comparable) or compare identity rather
// than contents.
//
// A nil existing is treated as "no keys present", so every entry in kv
// is returned. An empty/nil kv returns nil (nothing to merge). The
// returned map is freshly allocated and shares no structure with kv's
// values beyond the value pointers themselves (callers store them as-is
// — these are caller-owned, JSON-decoded values).
//
// Idempotency falls straight out of this: once a key has been written,
// a second metaDelta over the now-updated map omits it, so the second
// MergeNodeMeta reports changed=false.
func metaDelta(existing, kv map[string]any) map[string]any {
if len(kv) == 0 {
return nil
}
var delta map[string]any
for k, v := range kv {
if existing != nil {
if cur, ok := existing[k]; ok && reflect.DeepEqual(cur, v) {
// Key already present with an equal value — nothing to do.
continue
}
}
if delta == nil {
delta = make(map[string]any, len(kv))
}
delta[k] = v
}
return delta
}

// MergeNodeMeta is the in-memory *Graph implementation of the
// Store.MergeNodeMeta contract: additive, idempotent, shard-locked
// merge of kv into the target node's Meta. See the Store interface doc
// for the full (changed, found) semantics.
//
// Locking: the node lives in exactly one shard (shardFor(id)), so a
// single shard write lock is sufficient and necessary — necessary
// because GetNode reads, and a concurrent AddNode/AddEdge writes, the
// same sharded maps; sufficient because the merge touches only this one
// node's Meta map and no cross-shard index. We reuse lockTwoWrite(id,
// id), which collapses to a single Lock on that shard (its a==b branch),
// rather than hand-rolling shardFor(id).mu.Lock() so this method stays
// in lock-step with the one sanctioned shard-locking helper the rest of
// the mutators use.
//
// The compare-before-write (metaDelta) runs inside the lock: it must
// observe the same Meta it is about to mutate, and computing it outside
// the lock would race a concurrent merge to the same node.
func (g *Graph) MergeNodeMeta(id string, kv map[string]any) (changed bool, found bool) {
unlock := g.lockTwoWrite(id, id)
defer unlock()

s := g.shardFor(id)
n := s.nodes[id]
if n == nil {
// Unknown id — recorded as a not-found skip by the caller; the
// batch continues. No structural state was touched.
return false, false
}

delta := metaDelta(n.Meta, kv)
if len(delta) == 0 {
// Found, but every provided key already equals the stored value:
// the merge is a no-op, so the call is idempotent.
return false, true
}

if n.Meta == nil {
n.Meta = make(map[string]any, len(delta))
}
for k, v := range delta {
n.Meta[k] = v
}
return true, true
}
199 changes: 199 additions & 0 deletions internal/graph/meta_merge_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
package graph

import (
"fmt"
"sync"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// TestMetaDelta_OnlyDifferingKeys proves the pure delta calculation:
// metaDelta returns exactly the keys whose value is absent or differs
// from the existing map, across the value shapes that ride on Node.Meta
// (string, []any tag slice, float64, nested map) using reflect.DeepEqual
// so slices/maps compare by contents, not identity. (AC1, pure half.)
func TestMetaDelta_OnlyDifferingKeys(t *testing.T) {
existing := map[string]any{
"ext_summary": "old summary",
"ext_tags": []any{"a", "b"},
"ext_complexity": 0.5,
"structural": "untouched",
}
kv := map[string]any{
"ext_summary": "new summary", // changed
"ext_tags": []any{"a", "b"}, // equal (deep) -> omitted
"ext_complexity": 0.5, // equal -> omitted
"ext_domain": "billing", // new key
"nested": map[string]any{"x": 1}, // new nested map
}

delta := metaDelta(existing, kv)

// Telemetry before asserts (LDD spirit).
t.Logf("[delta] keys=%v", keysOf(delta))

require.Len(t, delta, 3, "only changed/new keys should appear")
assert.Equal(t, "new summary", delta["ext_summary"])
assert.Equal(t, "billing", delta["ext_domain"])
assert.Equal(t, map[string]any{"x": 1}, delta["nested"])
_, hasTags := delta["ext_tags"]
assert.False(t, hasTags, "deep-equal tag slice must be omitted")
_, hasComplexity := delta["ext_complexity"]
assert.False(t, hasComplexity, "equal complexity must be omitted")
}

// TestMetaDelta_NilAndEmpty covers the boundary inputs: a nil existing
// map returns every kv entry; an empty/nil kv returns nil.
func TestMetaDelta_NilAndEmpty(t *testing.T) {
full := metaDelta(nil, map[string]any{"ext_summary": "s", "ext_domain": "d"})
require.Len(t, full, 2, "nil existing -> every kv key is a delta")

assert.Nil(t, metaDelta(map[string]any{"k": "v"}, nil), "nil kv -> nil delta")
assert.Nil(t, metaDelta(nil, map[string]any{}), "empty kv -> nil delta")
}

// TestMergeNodeMeta_MergeOverwriteIdempotent exercises the full
// (changed, found) contract on the in-memory store: new keys merge,
// changed keys overwrite, and a re-apply of identical input is a no-op
// (idempotent). (AC1.)
func TestMergeNodeMeta_MergeOverwriteIdempotent(t *testing.T) {
g := New()
g.AddNode(&Node{ID: "repo/a.go::Foo", Name: "Foo", Kind: KindFunction, FilePath: "repo/a.go"})

// First merge: two new keys.
changed, found := g.MergeNodeMeta("repo/a.go::Foo", map[string]any{
"ext_summary": "computes foo",
"ext_domain": "core",
})
t.Logf("[merge#1] changed=%v found=%v", changed, found)
require.True(t, found)
require.True(t, changed, "first merge writes new keys")
assert.Equal(t, "computes foo", g.GetNode("repo/a.go::Foo").Meta["ext_summary"])

// Idempotent re-apply: identical input -> no change.
changed, found = g.MergeNodeMeta("repo/a.go::Foo", map[string]any{
"ext_summary": "computes foo",
"ext_domain": "core",
})
t.Logf("[merge#2 idempotent] changed=%v found=%v", changed, found)
require.True(t, found)
assert.False(t, changed, "re-applying identical input must be a no-op")

// Overwrite: one key changes -> changed=true, only that key updated.
changed, found = g.MergeNodeMeta("repo/a.go::Foo", map[string]any{
"ext_summary": "computes foo, revised",
"ext_domain": "core", // unchanged
})
t.Logf("[merge#3 overwrite] changed=%v found=%v", changed, found)
require.True(t, found)
require.True(t, changed, "a differing value flips changed")
assert.Equal(t, "computes foo, revised", g.GetNode("repo/a.go::Foo").Meta["ext_summary"])
assert.Equal(t, "core", g.GetNode("repo/a.go::Foo").Meta["ext_domain"])
}

// TestMergeNodeMeta_UnknownIDNoPanic proves an unknown id reports
// found=false (changed=false) and never panics. (AC1, MUST NOT: unknown
// ids must not crash the batch.)
func TestMergeNodeMeta_UnknownIDNoPanic(t *testing.T) {
g := New()
changed, found := g.MergeNodeMeta("does/not/exist::Nope", map[string]any{"ext_summary": "x"})
t.Logf("[unknown] changed=%v found=%v", changed, found)
assert.False(t, found, "unknown id -> found=false")
assert.False(t, changed, "unknown id -> changed=false")
}

// TestMergeNodeMeta_LazyInitsNilMeta proves a node created without a
// Meta map gets one lazily on first merge — no nil-map write panic.
func TestMergeNodeMeta_LazyInitsNilMeta(t *testing.T) {
g := New()
g.AddNode(&Node{ID: "repo/b.go::Bar", Name: "Bar", Kind: KindFunction, FilePath: "repo/b.go"})
require.Nil(t, g.GetNode("repo/b.go::Bar").Meta, "precondition: nil Meta")

changed, found := g.MergeNodeMeta("repo/b.go::Bar", map[string]any{"ext_summary": "lazy"})
require.True(t, found)
require.True(t, changed)
assert.Equal(t, "lazy", g.GetNode("repo/b.go::Bar").Meta["ext_summary"])
}

// TestMergeNodeMeta_StructuralUntouched proves the merge never alters
// structural node fields. (AC3 / MUST NOT.)
func TestMergeNodeMeta_StructuralUntouched(t *testing.T) {
g := New()
g.AddNode(&Node{ID: "repo/c.go::Baz", Name: "Baz", Kind: KindFunction, FilePath: "repo/c.go", StartLine: 10, EndLine: 20})

g.MergeNodeMeta("repo/c.go::Baz", map[string]any{"ext_summary": "s", "ext_tags": []any{"t"}})

n := g.GetNode("repo/c.go::Baz")
assert.Equal(t, "Baz", n.Name)
assert.Equal(t, KindFunction, n.Kind)
assert.Equal(t, "repo/c.go", n.FilePath)
assert.Equal(t, 10, n.StartLine)
assert.Equal(t, 20, n.EndLine)
assert.Equal(t, "s", n.Meta["ext_summary"])
}

// TestMergeNodeMeta_ConcurrentNoRace fans out many MergeNodeMeta and
// AddEdge goroutines against a shared graph. Run under `-race` it proves
// the merge takes the shard write lock correctly: no concurrent-map
// panic, no data race. This is the first mutating tool, so this is the
// load-bearing safety test. (AC2.)
func TestMergeNodeMeta_ConcurrentNoRace(t *testing.T) {
g := New()
const nNodes = 32
for i := 0; i < nNodes; i++ {
g.AddNode(&Node{
ID: fmt.Sprintf("repo/x.go::N%d", i),
Name: fmt.Sprintf("N%d", i),
Kind: KindFunction,
FilePath: "repo/x.go",
})
}

const workers = 16
const iters = 200
var wg sync.WaitGroup
wg.Add(workers)
for w := 0; w < workers; w++ {
go func(w int) {
defer wg.Done()
for it := 0; it < iters; it++ {
id := fmt.Sprintf("repo/x.go::N%d", (w+it)%nNodes)
// Concurrent Meta merge (shard-locked) ...
g.MergeNodeMeta(id, map[string]any{
"ext_summary": fmt.Sprintf("w%d-it%d", w, it),
"ext_tags": []any{"t", fmt.Sprintf("%d", it)},
})
// ... interleaved with idempotent edge adds (also
// shard-locked) to maximise cross-shard lock contention.
other := fmt.Sprintf("repo/x.go::N%d", (w+it+1)%nNodes)
g.AddEdge(&Edge{
From: id,
To: other,
Kind: EdgeSemanticallyRelated,
Origin: "ext_annotated",
})
}
}(w)
}
wg.Wait()

t.Logf("[concurrency] nodes=%d edges=%d", g.NodeCount(), g.EdgeCount())
// Every node should carry a ext_summary from the last writer that
// touched it; the exact value is non-deterministic, presence is not.
for i := 0; i < nNodes; i++ {
n := g.GetNode(fmt.Sprintf("repo/x.go::N%d", i))
require.NotNil(t, n)
assert.Contains(t, n.Meta, "ext_summary")
}
}

// keysOf is a tiny test helper for stable delta-key logging.
func keysOf(m map[string]any) []string {
out := make([]string, 0, len(m))
for k := range m {
out = append(out, k)
}
return out
}
32 changes: 32 additions & 0 deletions internal/graph/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,38 @@ type Store interface {
AddNode(n *Node)
AddBatch(nodes []*Node, edges []*Edge)
AddEdge(e *Edge)
// MergeNodeMeta merges the key/value pairs in kv into the Meta map
// of the node identified by id, under the node's shard write lock.
// It is the ONLY sanctioned way for a caller holding a Store (rather
// than a concrete *Graph) to mutate Node.Meta: the shard lock the
// in-memory backend takes internally is private, so a handler that
// reached into GetNode(id).Meta directly would race the sharded map
// and panic on a live daemon.
//
// Semantics:
//
// - found is false (and changed is false) when no node carries id —
// callers batching many ids treat a miss as a recorded skip, not
// a failure.
// - changed is false when every key in kv already maps to an equal
// value (deep-equal compare via metaDelta) — the merge is a no-op
// and the call is idempotent. Re-applying the same kv twice yields
// changed=false on the second call.
// - changed is true when at least one key was added or its value
// differed; only the differing keys are written, and a nil Meta is
// lazily initialised before the first write.
//
// The method is deliberately scoped to additive Meta merges — it
// never deletes keys and never touches structural fields (ID, Kind,
// Name, FilePath, line ranges). Callers writing external/agent-produced
// annotations namespace their keys (e.g. ext_summary, ext_tags, …) so a
// merge can never shadow an indexer-owned Meta key by accident.
//
// Durability: the merge mutates in-memory state for the daemon
// session and rides the gob+gzip shutdown snapshot; there is no
// per-call persist. Cross-restart durability of annotations is an
// out-of-scope follow-up for this v1.
MergeNodeMeta(id string, kv map[string]any) (changed bool, found bool)
SetEdgeProvenance(e *Edge, newOrigin string) bool
ReindexEdge(e *Edge, oldTo string)
// Batched siblings of the per-edge mutators. Same semantics, but
Expand Down
Loading
Loading