@@ -16,6 +16,8 @@ package doltdb_test
1616
1717import (
1818 "context"
19+ "errors"
20+ "os"
1921 "testing"
2022
2123 "github.com/dolthub/go-mysql-server/sql"
@@ -28,7 +30,13 @@ import (
2830 "github.com/dolthub/dolt/go/libraries/doltcore/env"
2931 "github.com/dolthub/dolt/go/libraries/doltcore/ref"
3032 "github.com/dolthub/dolt/go/libraries/doltcore/sqle"
33+ "github.com/dolthub/dolt/go/libraries/utils/filesys"
3134 "github.com/dolthub/dolt/go/store/hash"
35+ "github.com/dolthub/dolt/go/store/nbs"
36+ "github.com/dolthub/dolt/go/store/prolly"
37+ "github.com/dolthub/dolt/go/store/prolly/tree"
38+ "github.com/dolthub/dolt/go/store/types"
39+ "github.com/dolthub/dolt/go/store/val"
3240)
3341
3442func TestGarbageCollection (t * testing.T ) {
@@ -40,6 +48,8 @@ func TestGarbageCollection(t *testing.T) {
4048 testGarbageCollection (t , gct )
4149 })
4250 }
51+
52+ t .Run ("HasCacheDataCorruption" , testGarbageCollectionHasCacheDataCorruptionBugFix )
4353}
4454
4555type stage struct {
@@ -140,3 +150,118 @@ func testGarbageCollection(t *testing.T, test gcTest) {
140150 require .NoError (t , err )
141151 assert .Equal (t , test .expected , actual )
142152}
153+
154+ // In September 2023, we found a failure to handle the `hasCache` in
155+ // `*NomsBlockStore` appropriately while cleaning up a memtable into which
156+ // dangling references had been written could result in writing chunks to a
157+ // database which referenced non-existant chunks.
158+ //
159+ // The general pattern was to get new chunk addresses into the hasCache, but
160+ // not written to the store, and then to have an incoming chunk add a refenece
161+ // to missing chunk. At that time, we would clear the memtable, since it had
162+ // invalid chunks in it, but we wouldn't purge the hasCache. Later writes which
163+ // attempted to reference the chunks which had made it into the hasCache would
164+ // succeed.
165+ //
166+ // One such concrete pattern for doing this is implemented below. We do:
167+ //
168+ // 1) Put a new chunk to the database -- C1.
169+ //
170+ // 2) Run a GC.
171+ //
172+ // 3) Put a new chunk to the database -- C2.
173+ //
174+ // 4) Call NBS.Commit() with a stale last hash.Hash. This causes us to cache C2
175+ // as present in the store, but it does not get written to disk, because the
176+ // optimistic concurrency control on the value of the current root hash fails.
177+ //
178+ // 5) Put a chunk referencing C1 to the database -- R1.
179+ //
180+ // 5) Call NBS.Commit(). This causes ErrDanglingRef. C1 was written before the
181+ // GC and is no longer in the store. C2 is also cleared from the pending write
182+ // set.
183+ //
184+ // 6) Put a chunk referencing C2 to the database -- R2.
185+ //
186+ // 7) Call NBS.Commit(). This should fail, since R2 references C2 and C2 is not
187+ // in the store. However, C2 is in the cache as a result of step #4, and so
188+ // this does not fail. R2 gets written to disk with a dangling reference to C2.
189+ func testGarbageCollectionHasCacheDataCorruptionBugFix (t * testing.T ) {
190+ ctx := context .Background ()
191+
192+ d , err := os .MkdirTemp (t .TempDir (), "hascachetest-" )
193+ require .NoError (t , err )
194+
195+ ddb , err := doltdb .LoadDoltDB (ctx , types .Format_DOLT , "file://" + d , filesys .LocalFS )
196+ require .NoError (t , err )
197+ defer ddb .Close ()
198+
199+ err = ddb .WriteEmptyRepo (ctx , "main" , "Aaron Son" , "aaron@dolthub.com" )
200+ require .NoError (t , err )
201+
202+ root , err := ddb .NomsRoot (ctx )
203+ require .NoError (t , err )
204+
205+ ns := ddb .NodeStore ()
206+
207+ c1 := newIntMap (t , ctx , ns , 1 , 1 )
208+ _ , err = ns .Write (ctx , c1 .Node ())
209+ require .NoError (t , err )
210+
211+ err = ddb .GC (ctx , nil )
212+ require .NoError (t , err )
213+
214+ c2 := newIntMap (t , ctx , ns , 2 , 2 )
215+ _ , err = ns .Write (ctx , c2 .Node ())
216+ require .NoError (t , err )
217+
218+ success , err := ddb .CommitRoot (ctx , c2 .HashOf (), c2 .HashOf ())
219+ require .NoError (t , err )
220+ require .False (t , success , "committing the root with a last hash which does not match the current root must fail" )
221+
222+ r1 := newAddrMap (t , ctx , ns , "r1" , c1 .HashOf ())
223+ _ , err = ns .Write (ctx , r1 .Node ())
224+ require .NoError (t , err )
225+
226+ success , err = ddb .CommitRoot (ctx , root , root )
227+ require .True (t , errors .Is (err , nbs .ErrDanglingRef ), "committing a reference to just-collected c1 must fail with ErrDanglingRef" )
228+
229+ r2 := newAddrMap (t , ctx , ns , "r2" , c2 .HashOf ())
230+ _ , err = ns .Write (ctx , r2 .Node ())
231+ require .NoError (t , err )
232+
233+ success , err = ddb .CommitRoot (ctx , root , root )
234+ require .True (t , errors .Is (err , nbs .ErrDanglingRef ), "committing a reference to c2, which was erased with the ErrDanglingRef above, must also fail with ErrDanglingRef" )
235+ }
236+
237+ func newIntMap (t * testing.T , ctx context.Context , ns tree.NodeStore , k , v int8 ) prolly.Map {
238+ desc := val .NewTupleDescriptor (val.Type {
239+ Enc : val .Int8Enc ,
240+ Nullable : false ,
241+ })
242+
243+ tb := val .NewTupleBuilder (desc )
244+ tb .PutInt8 (0 , k )
245+ keyTuple := tb .Build (ns .Pool ())
246+
247+ tb .PutInt8 (0 , v )
248+ valueTuple := tb .Build (ns .Pool ())
249+
250+ m , err := prolly .NewMapFromTuples (ctx , ns , desc , desc , keyTuple , valueTuple )
251+ require .NoError (t , err )
252+ return m
253+ }
254+
255+ func newAddrMap (t * testing.T , ctx context.Context , ns tree.NodeStore , key string , h hash.Hash ) prolly.AddressMap {
256+ m , err := prolly .NewEmptyAddressMap (ns )
257+ require .NoError (t , err )
258+
259+ editor := m .Editor ()
260+ err = editor .Add (ctx , key , h )
261+ require .NoError (t , err )
262+
263+ m , err = editor .Flush (ctx )
264+ require .NoError (t , err )
265+
266+ return m
267+ }
0 commit comments