@@ -29,6 +29,7 @@ import (
2929 "github.com/cockroachdb/cockroach/pkg/storage"
3030 "github.com/cockroachdb/cockroach/pkg/storage/engine"
3131 "github.com/cockroachdb/cockroach/pkg/storage/stateloader"
32+ "github.com/cockroachdb/cockroach/pkg/storage/txnwait"
3233 "github.com/cockroachdb/cockroach/pkg/testutils"
3334 "github.com/cockroachdb/cockroach/pkg/util/hlc"
3435 "github.com/cockroachdb/cockroach/pkg/util/leaktest"
@@ -375,6 +376,38 @@ func TestStoreRangeMergeStats(t *testing.T) {
375376 writeRandomDataToRange (t , store , aDesc .RangeID , []byte ("aaa" ))
376377 writeRandomDataToRange (t , store , bDesc .RangeID , []byte ("ccc" ))
377378
379+ // Litter some abort span records. txn1 will leave a record on the LHS, txn2
380+ // will leave a record on the RHS, and txn3 will leave a record on both. This
381+ // tests whether the merge code properly accounts for merging abort span
382+ // records for the same transaction.
383+ txn1 := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
384+ if err := txn1 .Put (ctx , "a-txn1" , "val" ); err != nil {
385+ t .Fatal (err )
386+ }
387+ txn2 := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
388+ if err := txn2 .Put (ctx , "c-txn2" , "val" ); err != nil {
389+ t .Fatal (err )
390+ }
391+ txn3 := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
392+ if err := txn3 .Put (ctx , "a-txn3" , "val" ); err != nil {
393+ t .Fatal (err )
394+ }
395+ if err := txn3 .Put (ctx , "c-txn3" , "val" ); err != nil {
396+ t .Fatal (err )
397+ }
398+ hiPriTxn := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
399+ hiPriTxn .InternalSetPriority (roachpb .MaxTxnPriority )
400+ for _ , key := range []string {"a-txn1" , "c-txn2" , "a-txn3" , "c-txn3" } {
401+ if err := hiPriTxn .Put (ctx , key , "val" ); err != nil {
402+ t .Fatal (err )
403+ }
404+ }
405+ if err := hiPriTxn .Commit (ctx ); err != nil {
406+ t .Fatal (err )
407+ }
408+ // Leave txn1-txn3 open so that their abort span records exist during the
409+ // merge below.
410+
378411 // Get the range stats for both ranges now that we have data.
379412 snap := store .Engine ().NewSnapshot ()
380413 defer snap .Close ()
@@ -418,6 +451,180 @@ func TestStoreRangeMergeStats(t *testing.T) {
418451 }
419452}
420453
454+ func TestStoreRangeMergeInFlightTxns (t * testing.T ) {
455+ defer leaktest .AfterTest (t )
456+
457+ ctx := context .Background ()
458+ sc := storage .TestStoreConfig (nil )
459+ sc .TestingKnobs .DisableReplicateQueue = true
460+ mtc := & multiTestContext {storeConfig : & sc }
461+ mtc .Start (t , 2 )
462+ defer mtc .Stop ()
463+ store := mtc .stores [0 ]
464+
465+ // Create two adjacent ranges. The left-hand range has exactly one replica,
466+ // on the first store, and the right-hand range has exactly one replica,
467+ // on the second store
468+ var lhsDesc , rhsDesc * roachpb.RangeDescriptor
469+ setupReplicas := func () {
470+ var err * roachpb.Error
471+ lhsDesc , rhsDesc , err = createSplitRanges (store )
472+ if err != nil {
473+ t .Fatal (err )
474+ }
475+ mtc .replicateRange (rhsDesc .RangeID , 1 )
476+ mtc .transferLease (ctx , rhsDesc .RangeID , 0 , 1 )
477+ mtc .unreplicateRange (rhsDesc .RangeID , 0 )
478+ }
479+
480+ // Verify that a transaction can span a merge.
481+ t .Run ("valid" , func (t * testing.T ) {
482+ setupReplicas ()
483+ lhsKey , rhsKey := roachpb .Key ("aa" ), roachpb .Key ("cc" )
484+
485+ txn := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
486+ // Put the key on the RHS side first so ownership of the transaction record
487+ // will need to transfer to the LHS range during the merge.
488+ if err := txn .Put (ctx , rhsKey , t .Name ()); err != nil {
489+ t .Fatal (err )
490+ }
491+ if err := txn .Put (ctx , lhsKey , t .Name ()); err != nil {
492+ t .Fatal (err )
493+ }
494+ args := adminMergeArgs (lhsDesc .StartKey .AsRawKey ())
495+ if _ , err := client .SendWrapped (ctx , store .TestSender (), args ); err != nil {
496+ t .Fatal (err )
497+ }
498+ if err := txn .Commit (ctx ); err != nil {
499+ t .Fatal (err )
500+ }
501+
502+ for _ , key := range []roachpb.Key {lhsKey , rhsKey } {
503+ kv , err := store .DB ().Get (ctx , key )
504+ if err != nil {
505+ t .Fatal (err )
506+ } else if string (kv .ValueBytes ()) != t .Name () {
507+ t .Fatalf ("actual value %q did not match expected value %q" , kv .ValueBytes (), t .Name ())
508+ }
509+ }
510+ })
511+
512+ // Verify that a transaction's abort span records are preserved when the
513+ // transaction spans a merge.
514+ t .Run ("abort-span" , func (t * testing.T ) {
515+ setupReplicas ()
516+ rhsKey := roachpb .Key ("cc" )
517+
518+ // Create a transaction that will be aborted before the merge but won't
519+ // realize until after the merge.
520+ txn1 := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
521+ // Put the key on the RHS side so ownership of the transaction record and
522+ // abort span records will need to transfer to the LHS during the merge.
523+ if err := txn1 .Put (ctx , rhsKey , t .Name ()); err != nil {
524+ t .Fatal (err )
525+ }
526+
527+ // Create and commit a txn that aborts txn1.
528+ txn2 := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
529+ txn2 .InternalSetPriority (roachpb .MaxTxnPriority )
530+ if err := txn2 .Put (ctx , rhsKey , "muhahahah" ); err != nil {
531+ t .Fatal (err )
532+ }
533+ if err := txn2 .Commit (ctx ); err != nil {
534+ t .Fatal (err )
535+ }
536+
537+ // Complete the merge.
538+ args := adminMergeArgs (lhsDesc .StartKey .AsRawKey ())
539+ if _ , err := client .SendWrapped (ctx , store .TestSender (), args ); err != nil {
540+ t .Fatal (err )
541+ }
542+ if _ , err := txn1 .Get (ctx , rhsKey ); ! testutils .IsError (err , "txn aborted" ) {
543+ t .Fatalf ("expected 'txn aborted' error but got %v" , err )
544+ }
545+ })
546+
547+ // Verify that the transaction wait queue on the right-hand range in a merge
548+ // is cleared if the merge commits.
549+ t .Run ("wait-queue" , func (t * testing.T ) {
550+ setupReplicas ()
551+ rhsKey := roachpb .Key ("cc" )
552+
553+ // Set a timeout, and set the the transaction liveness threshold to
554+ // something much larger than our timeout. We want transactions to get stuck
555+ // in the transaction wait queue and trigger the timeout if we forget to
556+ // clear it.
557+ ctx , cancel := context .WithTimeout (ctx , testutils .DefaultSucceedsSoonDuration )
558+ defer cancel ()
559+ defer func (old time.Duration ) { txnwait .TxnLivenessThreshold = old }(txnwait .TxnLivenessThreshold )
560+ txnwait .TxnLivenessThreshold = 2 * testutils .DefaultSucceedsSoonDuration
561+
562+ // Create a transaction that won't complete until after the merge.
563+ txn1 := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
564+ // Put the key on the RHS side so ownership of the transaction record and
565+ // abort span records will need to transfer to the LHS during the merge.
566+ if err := txn1 .Put (ctx , rhsKey , t .Name ()); err != nil {
567+ t .Fatal (err )
568+ }
569+
570+ // Create a txn that will conflict with txn1.
571+ txn2 := client .NewTxn (store .DB (), 0 /* gatewayNodeID */ , client .RootTxn )
572+ txn2ErrCh := make (chan error )
573+ go func () {
574+ txn2ErrCh <- txn2 .Put (ctx , rhsKey , "muhahahah" )
575+ }()
576+
577+ // Wait for txn2 to realize it conflicts with txn1 and enter its wait queue.
578+ {
579+ repl , err := mtc .stores [1 ].GetReplica (rhsDesc .RangeID )
580+ if err != nil {
581+ t .Fatal (err )
582+ }
583+ for {
584+ if _ , ok := repl .GetTxnWaitQueue ().TrackedTxns ()[txn1 .ID ()]; ok {
585+ break
586+ }
587+ select {
588+ case <- time .After (10 * time .Millisecond ):
589+ case <- ctx .Done ():
590+ t .Fatal ("timed out waiting for txn2 to enter wait queue" )
591+ }
592+ }
593+ }
594+
595+ // Complete the merge.
596+ args := adminMergeArgs (lhsDesc .StartKey .AsRawKey ())
597+ if _ , err := client .SendWrapped (ctx , store .TestSender (), args ); err != nil {
598+ t .Fatal (err )
599+ }
600+
601+ if err := txn1 .Commit (ctx ); err != nil {
602+ t .Fatal (err )
603+ }
604+
605+ kv , pErr := store .DB ().Get (ctx , rhsKey )
606+ if pErr != nil {
607+ t .Fatal (pErr )
608+ } else if string (kv .ValueBytes ()) != t .Name () {
609+ t .Fatalf ("actual value %q did not match expected value %q" , kv .ValueBytes (), t .Name ())
610+ }
611+
612+ // Now that txn1 has committed, txn2's put operation should complete.
613+ select {
614+ case err := <- txn2ErrCh :
615+ if err != nil {
616+ t .Fatal (err )
617+ }
618+ case <- ctx .Done ():
619+ t .Fatal ("timed out waiting for txn2 to complete put" )
620+ }
621+
622+ if err := txn2 .Commit (ctx ); err != nil {
623+ t .Fatal (err )
624+ }
625+ })
626+ }
627+
421628func TestInvalidGetSnapshotForMergeRequest (t * testing.T ) {
422629 defer leaktest .AfterTest (t )()
423630
0 commit comments