From efad221cb6f6bac903dc3fbfa0e2c29f5a0b88b3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 9 May 2016 16:54:03 +0100 Subject: [PATCH 01/78] Put server version back to dev. --HG-- branch : dev --- consts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consts.go b/consts.go index bfbfd49..9e0810b 100644 --- a/consts.go +++ b/consts.go @@ -5,7 +5,7 @@ import ( ) const ( - ServerVersion = "0.2" + ServerVersion = "dev" MDBInitialSize = 1048576 TwoToTheSixtyThree = 9223372036854775808 SubmissionInitialAttempts = 5 From 565dfdeb571cbfc730625392d03cee503b0fbd25 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 10 May 2016 12:08:00 +0100 Subject: [PATCH 02/78] Refactor outcome accumulator so that it uses just one map and will be faster. --HG-- branch : dev --- paxos/outcomeaccumulator.go | 193 +++++++++++++++++++++--------------- 1 file changed, 111 insertions(+), 82 deletions(-) diff --git a/paxos/outcomeaccumulator.go b/paxos/outcomeaccumulator.go index 93f9daf..6ebf327 100644 --- a/paxos/outcomeaccumulator.go +++ b/paxos/outcomeaccumulator.go @@ -14,143 +14,172 @@ import ( // distinct acceptors which all have equal Clocks, we know we have a // consensus on the result. type OutcomeAccumulator struct { - acceptorIdToTxnOutcome map[common.RMId]*txnOutcome - outcomes []*txnOutcome - decidingOutcome *txnOutcome - pendingTGC map[common.RMId]server.EmptyStruct - fInc int - acceptorCount int + acceptors common.RMIds + acceptorOutcomes map[common.RMId]*acceptorIndexWithTxnOutcome + winningOutcome *txnOutcome + allKnownOutcomes []*txnOutcome + pendingTGC int + fInc int +} + +type acceptorIndexWithTxnOutcome struct { + idx int + tgcReceived bool + tOut *txnOutcome +} + +type txnOutcome struct { + outcome *outcomeEqualId + acceptors common.RMIds + outcomeReceivedCount int } func NewOutcomeAccumulator(fInc int, acceptors common.RMIds) *OutcomeAccumulator { - pendingTGC := make(map[common.RMId]server.EmptyStruct, len(acceptors)) - for _, rmId := range acceptors { - pendingTGC[rmId] = server.EmptyStructVal + acceptorOutcomes := make(map[common.RMId]*acceptorIndexWithTxnOutcome, len(acceptors)) + ids := make([]acceptorIndexWithTxnOutcome, len(acceptors)) + for idx, rmId := range acceptors { + ptr := &ids[idx] + ptr.idx = idx + acceptorOutcomes[rmId] = ptr } return &OutcomeAccumulator{ - acceptorIdToTxnOutcome: make(map[common.RMId]*txnOutcome), - outcomes: []*txnOutcome{}, - pendingTGC: pendingTGC, - fInc: fInc, - acceptorCount: len(acceptors), + acceptors: acceptors, + acceptorOutcomes: acceptorOutcomes, + winningOutcome: nil, + allKnownOutcomes: make([]*txnOutcome, 0, 1), + pendingTGC: len(acceptors), + fInc: fInc, } } func (oa *OutcomeAccumulator) TopologyChange(topology *configuration.Topology) bool { - result := false + // We can only gain more RMsRemoved when a new topology is + // installed post barrier2 and migration. To get to barrier2, every + // live transaction must have its outcome known. Therefore by this + // point we should not have to deal with the removal of nodes + // causing winningOutcome needing to go from nil to non-nil. for rmId := range topology.RMsRemoved() { - if _, found := oa.pendingTGC[rmId]; found { - delete(oa.pendingTGC, rmId) - if outcome, found := oa.acceptorIdToTxnOutcome[rmId]; found { - delete(oa.acceptorIdToTxnOutcome, rmId) - outcome.outcomeReceivedCount-- + if acceptorOutcome, found := oa.acceptorOutcomes[rmId]; found { + delete(oa.acceptorOutcomes, rmId) + oa.acceptors[acceptorOutcome.idx] = common.RMIdEmpty + if l := oa.acceptors.NonEmptyLen(); l < oa.fInc { + oa.fInc = l } - oa.acceptorCount-- - if oa.acceptorCount > oa.fInc { - oa.fInc = oa.acceptorCount + if !acceptorOutcome.tgcReceived { + acceptorOutcome.tgcReceived = true + oa.pendingTGC-- } - if oa.decidingOutcome != nil { - result = result || oa.decidingOutcome.outcomeReceivedCount == oa.acceptorCount + if tOut := acceptorOutcome.tOut; tOut != nil { + acceptorOutcome.tOut = nil + tOut.outcomeReceivedCount-- + if tOut.outcomeReceivedCount == 0 { + oa.deleteFromOutcomes(tOut) + } else { + tOut.acceptors[acceptorOutcome.idx] = common.RMIdEmpty + } } } } - return result + return oa.winningOutcome != nil && oa.winningOutcome.outcomeReceivedCount == len(oa.acceptorOutcomes) } func (oa *OutcomeAccumulator) BallotOutcomeReceived(acceptorId common.RMId, outcome *msgs.Outcome) (*msgs.Outcome, bool) { outcomeEq := (*outcomeEqualId)(outcome) - if tOut, found := oa.acceptorIdToTxnOutcome[acceptorId]; found { + acceptorOutcome, found := oa.acceptorOutcomes[acceptorId] + if !found { + panic(fmt.Sprintf("BallotOutcomeReceived: Unable to find precreated acceptorIndexWithTxnOutcome for %v", acceptorId)) + } + + if tOut := acceptorOutcome.tOut; tOut != nil { if tOut.outcome.Equal(outcomeEq) { // It's completely a duplicate msg. No change to our state so just return return nil, false } else { // The acceptor has changed its mind. tOut.outcomeReceivedCount-- - // Paxos guarantees that in this case, tOut != oa.decidingOutcome + if tOut.outcomeReceivedCount == 0 { + oa.deleteFromOutcomes(tOut) + } else { + tOut.acceptors[acceptorOutcome.idx] = common.RMIdEmpty + } + // Paxos guarantees that in this case, tOut != oa.winningOutcome } } tOut := oa.getOutcome(outcomeEq) - if tOut == nil { - tOut = &txnOutcome{ - outcome: outcomeEq, - outcomeReceivedCount: 1, - } - oa.addToOutcomes(tOut) - - } else { - // We've checked for duplicate msgs above, so we don't need to - // worry about that here. - tOut.outcomeReceivedCount++ - } - oa.acceptorIdToTxnOutcome[acceptorId] = tOut - - allAgreed := tOut.outcomeReceivedCount == oa.acceptorCount - if oa.decidingOutcome == nil && oa.fInc == tOut.outcomeReceivedCount { - oa.decidingOutcome = tOut - return (*msgs.Outcome)(oa.decidingOutcome.outcome), allAgreed + // We've checked for duplicate msgs above, so we don't need to + // worry about that here. + tOut.outcomeReceivedCount++ + tOut.acceptors[acceptorOutcome.idx] = acceptorId + + allAgreed := tOut.outcomeReceivedCount == len(oa.acceptorOutcomes) + if oa.winningOutcome == nil && oa.fInc == tOut.outcomeReceivedCount { + oa.winningOutcome = tOut + return (*msgs.Outcome)(oa.winningOutcome.outcome), allAgreed } return nil, allAgreed } func (oa *OutcomeAccumulator) TxnGloballyCompleteReceived(acceptorId common.RMId) bool { server.Log("TGC received from", acceptorId, "; pending:", oa.pendingTGC) - delete(oa.pendingTGC, acceptorId) - return len(oa.pendingTGC) == 0 -} - -func (oa *OutcomeAccumulator) addToOutcomes(tOut *txnOutcome) { - oa.outcomes = append(oa.outcomes, tOut) + acceptorOutcome, found := oa.acceptorOutcomes[acceptorId] + if !found { + panic(fmt.Sprintf("TxnGloballyCompleteReceived: Unable to find precreated acceptorIndexWithTxnOutcome for %v", acceptorId)) + } + if !acceptorOutcome.tgcReceived { + acceptorOutcome.tgcReceived = true + oa.pendingTGC-- + } + return oa.pendingTGC == 0 } func (oa *OutcomeAccumulator) getOutcome(outcome *outcomeEqualId) *txnOutcome { - for _, tOut := range oa.outcomes { + var empty *txnOutcome + for _, tOut := range oa.allKnownOutcomes { if tOut.outcome.Equal(outcome) { return tOut + } else if empty == nil && tOut.outcome == nil { + empty = tOut } } - return nil + if empty == nil { + empty = &txnOutcome{ + outcome: outcome, + acceptors: make([]common.RMId, len(oa.acceptors)), + outcomeReceivedCount: 0, + } + oa.allKnownOutcomes = append(oa.allKnownOutcomes, empty) + } else { + empty.outcome = outcome + empty.acceptors = make([]common.RMId, len(oa.acceptors)) + empty.outcomeReceivedCount = 0 + } + return empty +} + +func (oa *OutcomeAccumulator) deleteFromOutcomes(tOut *txnOutcome) { + tOut.outcome = nil } func (oa *OutcomeAccumulator) IsAllAborts() []common.RMId { - count := len(oa.acceptorIdToTxnOutcome) - for _, outcome := range oa.outcomes { - if outcome.outcomeReceivedCount == count && (*msgs.Outcome)(outcome.outcome).Which() == msgs.OUTCOME_ABORT { - acceptors := make([]common.RMId, 0, count) - for rmId := range oa.acceptorIdToTxnOutcome { - acceptors = append(acceptors, rmId) - } - return acceptors + count := len(oa.acceptorOutcomes) + for _, tOut := range oa.allKnownOutcomes { + if tOut.outcome != nil && tOut.outcomeReceivedCount == count && (*msgs.Outcome)(tOut.outcome).Which() == msgs.OUTCOME_ABORT { + return tOut.acceptors.NonEmpty() } } return nil } func (oa *OutcomeAccumulator) Status(sc *server.StatusConsumer) { - outcomeToAcceptors := make(map[*txnOutcome][]common.RMId) - acceptors := make([]common.RMId, 0, len(oa.acceptorIdToTxnOutcome)) - for rmId, outcome := range oa.acceptorIdToTxnOutcome { - acceptors = append(acceptors, rmId) - if list, found := outcomeToAcceptors[outcome]; found { - outcomeToAcceptors[outcome] = append(list, rmId) - } else { - outcomeToAcceptors[outcome] = []common.RMId{rmId} - } - } - sc.Emit(fmt.Sprintf("- known outcomes from acceptors: %v", acceptors)) - sc.Emit(fmt.Sprintf("- unique outcomes: %v", outcomeToAcceptors)) - sc.Emit(fmt.Sprintf("- outcome decided? %v", oa.decidingOutcome != nil)) - sc.Emit(fmt.Sprintf("- pending TGCs from: %v", oa.pendingTGC)) + sc.Emit(fmt.Sprintf("- unique outcomes: %v", oa.allKnownOutcomes)) + sc.Emit(fmt.Sprintf("- outcome decided? %v", oa.winningOutcome != nil)) + sc.Emit(fmt.Sprintf("- pending TGC count: %v", oa.pendingTGC)) sc.Join() } -type txnOutcome struct { - outcome *outcomeEqualId - outcomeReceivedCount int -} - func (to *txnOutcome) String() string { - return fmt.Sprintf("%v:%v", to.outcome, to.outcomeReceivedCount) + return fmt.Sprintf("%v:%v", to.outcome, to.acceptors.NonEmpty()) } type outcomeEqualId msgs.Outcome From bb4916e4ba3423cf2de56584f328fc98de6edf9b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 10 May 2016 12:59:29 +0100 Subject: [PATCH 03/78] Ensure that we don't start sending the TSC until we've scheduled the removal of the txn sender. Ref T27. --HG-- branch : T27 --- client/simpletxnsubmitter.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 8c83fa5..11816ab 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -88,17 +88,18 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, activeRMs []c txnId := common.MakeTxnId(txnCap.Id()) server.Log(txnId, "Submitting txn") txnSender := paxos.NewRepeatingSender(server.SegToBytes(seg), activeRMs...) - var removeSenderCh chan server.EmptyStruct + var removeSenderCh chan chan server.EmptyStruct if delay == 0 { sts.connPub.AddServerConnectionSubscriber(txnSender) } else { - removeSenderCh = make(chan server.EmptyStruct) + removeSenderCh = make(chan chan server.EmptyStruct) go func() { // fmt.Printf("%v ", delay) time.Sleep(delay) sts.connPub.AddServerConnectionSubscriber(txnSender) - <-removeSenderCh + doneChan := <-removeSenderCh sts.connPub.RemoveServerConnectionSubscriber(txnSender) + close(doneChan) }() } acceptors := paxos.GetAcceptorsFromTxn(txnCap) @@ -109,7 +110,9 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, activeRMs []c if delay == 0 { sts.connPub.RemoveServerConnectionSubscriber(txnSender) } else { - close(removeSenderCh) + txnSenderRemovedChan := make(chan server.EmptyStruct) + removeSenderCh <- txnSenderRemovedChan + <-txnSenderRemovedChan } // OSS is safe here - see above. paxos.NewOneShotSender(paxos.MakeTxnSubmissionCompleteMsg(txnId), sts.connPub, acceptors...) From 36baa4db220853dc3761831c86e2c1cd029fdd19 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 12 May 2016 11:53:05 +0100 Subject: [PATCH 04/78] Improve logic. --HG-- branch : dev --- client/versioncache.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index 87eb613..e27ac4f 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -57,8 +57,8 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda case msgs.ACTION_MISSING: if c, found := vc[*vUUId]; found { cmp := c.txnId.Compare(txnId) - if clockElem > c.clockElem && cmp == common.EQ { - panic(fmt.Sprintf("Clock version increased on missing for %v@%v (%v > %v)", vUUId, txnId, clockElem, c.clockElem)) + if cmp == common.EQ && clockElem != c.clockElem { + panic(fmt.Sprintf("Clock version changed on missing for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) } if clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { delete(vc, *vUUId) @@ -69,8 +69,8 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda case msgs.ACTION_WRITE: if c, found := vc[*vUUId]; found { cmp := c.txnId.Compare(txnId) - if clockElem > c.clockElem && cmp == common.EQ { - panic(fmt.Sprintf("Clock version increased on write for %v@%v (%v > %v)", vUUId, txnId, clockElem, c.clockElem)) + if cmp == common.EQ && clockElem != c.clockElem { + panic(fmt.Sprintf("Clock version changed on write for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) } if clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { c.txnId = txnId From 66dca8962d12aad01bb98fe83bf9e41d01d5831a Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 12 May 2016 16:41:12 +0100 Subject: [PATCH 05/78] Hmm, so doing a straight binary search is not great. This is certainly a better API, but I'm far from sure yet if it's faster or not. Ref T26. --HG-- branch : T26 --- client/versioncache.go | 6 +- paxos/ballotaccumulator.go | 18 +-- txnengine/frame.go | 81 +++++----- txnengine/var.go | 4 +- txnengine/vectorclock.go | 294 ++++++++++++++++++++++++------------- 5 files changed, 248 insertions(+), 155 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index e27ac4f..c9f1fb1 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -27,11 +27,11 @@ func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outco vUUId := common.MakeVarUUId(action.VarId()) if c, found := vc[*vUUId]; found { c.txnId = txnId - c.clockElem = clock.Clock[*vUUId] + c.clockElem = clock.At(vUUId) } else { vc[*vUUId] = &cached{ txnId: txnId, - clockElem: clock.Clock[*vUUId], + clockElem: clock.At(vUUId), } } } @@ -51,7 +51,7 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda for idy, m := 0, actions.Len(); idy < m; idy++ { action := actions.At(idy) vUUId := common.MakeVarUUId(action.VarId()) - clockElem := clock.Clock[*vUUId] + clockElem := clock.At(vUUId) switch action.Which() { case msgs.ACTION_MISSING: diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index db5111e..44089ea 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -279,9 +279,9 @@ func (vb *varBallot) combineVote(rmBal *rmBallot, br badReads) { case cur.Vote == eng.Commit && new.Vote == eng.Commit: cur.Clock.MergeInMax(new.Clock) - case cur.Vote == eng.AbortDeadlock && len(cur.Clock.Clock) == 0: + case cur.Vote == eng.AbortDeadlock && cur.Clock.Len == 0: // Do nothing - ignore the new ballot - case new.Vote == eng.AbortDeadlock && len(new.Clock.Clock) == 0: + case new.Vote == eng.AbortDeadlock && new.Clock.Len == 0: // This has been created by abort proposer. This trumps everything. cur.Vote = eng.AbortDeadlock cur.Clock = new.Clock @@ -298,7 +298,7 @@ func (vb *varBallot) combineVote(rmBal *rmBallot, br badReads) { cur.Clock.MergeInMax(new.Clock) case new.Vote == eng.AbortDeadlock && cur.Vote == eng.AbortBadRead && - new.Clock.Clock[*vb.vUUId] < cur.Clock.Clock[*vb.vUUId]: + new.Clock.At(vb.vUUId) < cur.Clock.At(vb.vUUId): // The new Deadlock is strictly in the past of the current // BadRead, so we stay on the badread. cur.Clock.MergeInMax(new.Clock) @@ -313,7 +313,7 @@ func (vb *varBallot) combineVote(rmBal *rmBallot, br badReads) { case cur.Vote == eng.AbortBadRead: // && new.Vote == eng.AbortBadRead cur.Clock.MergeInMax(new.Clock) - case new.Clock.Clock[*vb.vUUId] > cur.Clock.Clock[*vb.vUUId]: + case new.Clock.At(vb.vUUId) > cur.Clock.At(vb.vUUId): // && cur.Vote == AbortDeadlock && new.Vote == AbortBadRead. The // new BadRead is strictly in the future of the cur Deadlock, so // we should switch to the BadRead. @@ -374,16 +374,16 @@ func (br badReads) combine(rmBal *rmBallot) { vUUId := common.MakeVarUUId(action.VarId()) if bra, found := br[*vUUId]; found { - bra.combine(&action, rmBal, txnId, clock.Clock[*vUUId]) + bra.combine(&action, rmBal, txnId, clock.At(vUUId)) } else if action.Which() == msgs.ACTION_READ { br[*vUUId] = &badReadAction{ rmBallot: rmBal, vUUId: vUUId, txnId: common.MakeTxnId(action.Read().Version()), - clockElem: clock.Clock[*vUUId] - 1, + clockElem: clock.At(vUUId) - 1, action: &action, } - if clock.Clock[*vUUId] == 0 { + if clock.At(vUUId) == 0 { panic(fmt.Sprintf("Just did 0 - 1 in int64 (%v, %v) (%v)", vUUId, clock, txnId)) } } else { @@ -391,7 +391,7 @@ func (br badReads) combine(rmBal *rmBallot) { rmBallot: rmBal, vUUId: vUUId, txnId: txnId, - clockElem: clock.Clock[*vUUId], + clockElem: clock.At(vUUId), action: &action, } } @@ -518,7 +518,7 @@ func (br badReads) AddToSeg(seg *capn.Segment) msgs.Update_List { panic(fmt.Sprintf("Unexpected action type (%v) for badread of %v at %v", action.Which(), action.VarId(), txnId)) } - clock.SetVarIdMax(*bra.vUUId, bra.clockElem) + clock.SetVarIdMax(bra.vUUId, bra.clockElem) } update.SetClock(clock.AddToSeg(seg)) } diff --git a/txnengine/frame.go b/txnengine/frame.go index 63b4432..dc35389 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -78,7 +78,7 @@ func (f *frame) nextState() { } func (f *frame) String() string { - return fmt.Sprintf("%v Frame %v (%v) r%v w%v", f.v.UUId, f.frameTxnId, len(f.frameTxnClock.Clock), f.readVoteClock, f.writeVoteClock) + return fmt.Sprintf("%v Frame %v (%v) r%v w%v", f.v.UUId, f.frameTxnId, f.frameTxnClock.Len, f.readVoteClock, f.writeVoteClock) } func (f *frame) Status(sc *server.StatusConsumer) { @@ -374,11 +374,12 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { if fo.currentState != fo { panic(fmt.Sprintf("%v ReadLearnt called for %v with frame in state %v", fo.v, txn, fo.currentState)) } - actClockElem := action.outcomeClock.Clock[*fo.v.UUId] - 1 - if action.outcomeClock.Clock[*fo.v.UUId] == 0 { + actClockElem := action.outcomeClock.At(fo.v.UUId) + if actClockElem == 0 { panic("Just did 0 - 1 in int64") } - reqClockElem := fo.frameTxnClock.Clock[*fo.v.UUId] + actClockElem-- + reqClockElem := fo.frameTxnClock.At(fo.v.UUId) if action.readVsn.Compare(fo.frameTxnId) != common.EQ { // The write would be one less than the read. We want to know if // this read is of a write before or after our current frame @@ -410,11 +411,12 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { // in the action.outcomeClock then we know that we must be // missing some TGCs - essentially we can infer TGCs by // observing the outcome clocks on future txns we learn. - for k, v := range fo.readVoteClock.Clock { - if _, found := action.outcomeClock.Clock[k]; !found { - fo.mask.SetVarIdMax(k, v) + fo.readVoteClock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if action.outcomeClock.At(vUUId) == Deleted { + fo.mask.SetVarIdMax(vUUId, v) } - } + return true + }) server.Log(fo.frame, "ReadLearnt", txn, "uncommittedReads:", fo.uncommittedReads, "uncommittedWrites:", fo.uncommittedWrites) fo.maybeScheduleRoll() return true @@ -428,8 +430,8 @@ func (fo *frameOpen) WriteLearnt(action *localAction) bool { if fo.currentState != fo { panic(fmt.Sprintf("%v WriteLearnt called for %v with frame in state %v", fo.v, txn, fo.currentState)) } - actClockElem := action.outcomeClock.Clock[*fo.v.UUId] - reqClockElem := fo.frameTxnClock.Clock[*fo.v.UUId] + actClockElem := action.outcomeClock.At(fo.v.UUId) + reqClockElem := fo.frameTxnClock.At(fo.v.UUId) if actClockElem < reqClockElem || (actClockElem == reqClockElem && action.Id.Compare(fo.frameTxnId) == common.LT) { server.Log(fo.frame, "WriteLearnt", txn, "ignored, too old") return false @@ -455,11 +457,12 @@ func (fo *frameOpen) WriteLearnt(action *localAction) bool { if clock == nil { clock = fo.readVoteClock } - for k, v := range clock.Clock { - if _, found := action.outcomeClock.Clock[k]; !found { - fo.mask.SetVarIdMax(k, v) + clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if action.outcomeClock.At(vUUId) == Deleted { + fo.mask.SetVarIdMax(vUUId, v) } - } + return true + }) server.Log(fo.frame, "WriteLearnt", txn, "uncommittedReads:", fo.uncommittedReads, "uncommittedWrites:", fo.uncommittedWrites) if fo.uncommittedReads == 0 { fo.maybeCreateChild() @@ -533,18 +536,20 @@ func (fo *frameOpen) calculateReadVoteClock() { if fo.readVoteClock == nil { clock := fo.frameTxnClock.Clone() written := fo.frameWritesClock.Clone() - for k, v := range clock.Clock { - if m, found := fo.mask.Clock[k]; found && m >= v { - delete(clock.Clock, k) + clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if fo.mask.At(vUUId) >= v { + clock.Delete(vUUId) } - } - for k, v := range written.Clock { - if m, found := fo.mask.Clock[k]; fo.v.UUId.Compare(&k) == common.EQ || !found || m < v { - clock.SetVarIdMax(k, v+1) + return true + }) + written.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if fo.mask.At(vUUId) < v || fo.v.UUId.Compare(vUUId) == common.EQ { + clock.SetVarIdMax(vUUId, v+1) } - } + return true + }) fo.readVoteClock = clock - if _, found := fo.frameWritesClock.Clock[*fo.v.UUId]; !found { + if fo.frameWritesClock.At(fo.v.UUId) == Deleted { panic(fmt.Sprintf("%v no write to self!", fo.frame)) } } @@ -558,19 +563,21 @@ func (fo *frameOpen) calculateWriteVoteClock() { action := node.Key.(*localAction) clock.MergeInMax(action.outcomeClock) for _, k := range action.writes { - written.SetVarIdMax(*k, action.outcomeClock.Clock[*k]) + written.SetVarIdMax(k, action.outcomeClock.At(k)) } } - for k, v := range clock.Clock { - if m, found := fo.mask.Clock[k]; found && m >= v { - delete(clock.Clock, k) + clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if fo.mask.At(vUUId) >= v { + clock.Delete(vUUId) } - } - for k, v := range written.Clock { - if m, found := fo.mask.Clock[k]; !found || m < v { - clock.SetVarIdMax(k, v+1) + return true + }) + written.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if fo.mask.At(vUUId) < v { + clock.SetVarIdMax(vUUId, v+1) } - } + return true + }) fo.writeVoteClock = clock } } @@ -591,7 +598,7 @@ func (fo *frameOpen) maybeCreateChild() { localElemVals := uint64s([]uint64{}) for node := fo.writes.First(); node != nil; node = node.Next() { action := node.Key.(*localAction) - localElemVal := action.outcomeClock.Clock[*vUUId] + localElemVal := action.outcomeClock.At(vUUId) if listPtr, found := localElemValToTxns[localElemVal]; found { *listPtr = append(*listPtr, action) } else { @@ -604,7 +611,7 @@ func (fo *frameOpen) maybeCreateChild() { var clock, written *VectorClock - elem := fo.frameTxnClock.Clock[*fo.v.UUId] + elem := fo.frameTxnClock.At(fo.v.UUId) switch { case len(localElemVals) == 1 && localElemVals[0] == elem: clock = fo.frameTxnClock.Clone() @@ -645,7 +652,7 @@ func (fo *frameOpen) maybeCreateChild() { clock.MergeInMax(action.outcomeClock) if action.writesClock == nil { for _, k := range action.writes { - written.SetVarIdMax(*k, action.outcomeClock.Clock[*k]) + written.SetVarIdMax(k, action.outcomeClock.At(k)) } } else { written.MergeInMax(action.writesClock) @@ -668,7 +675,7 @@ func (fo *frameOpen) maybeCreateChild() { func (fo *frameOpen) maybeScheduleRoll() { // do not check vm.RollAllowed here. if !fo.rollScheduled && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (len(fo.frameTxnClock.Clock) > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { + (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { fo.rollScheduled = true fo.v.vm.ScheduleCallback(func() { fo.v.applyToVar(func() { @@ -681,7 +688,7 @@ func (fo *frameOpen) maybeScheduleRoll() { func (fo *frameOpen) maybeStartRoll() { if fo.v.vm.RollAllowed && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (len(fo.frameTxnClock.Clock) > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { + (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { fo.rollActive = true ctxn, varPosMap := fo.createRollClientTxn() go func() { diff --git a/txnengine/var.go b/txnengine/var.go index 5432e71..8723658 100644 --- a/txnengine/var.go +++ b/txnengine/var.go @@ -72,8 +72,8 @@ func VarFromData(data []byte, exe *dispatcher.Executor, db *db.Databases, vm *Va func NewVar(uuid *common.VarUUId, exe *dispatcher.Executor, db *db.Databases, vm *VarManager) *Var { v := newVar(uuid, exe, db, vm) - clock := NewVectorClock().Bump(*v.UUId, 0) - written := NewVectorClock().Bump(*v.UUId, 0) + clock := NewVectorClock().Bump(v.UUId, 1) + written := NewVectorClock().Bump(v.UUId, 1) v.curFrame = NewFrame(nil, v, nil, nil, clock, written) seg := capn.NewBuffer(nil) diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index df90ab9..46d6e31 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -8,149 +8,229 @@ import ( ) type VectorClock struct { - Clock map[common.VarUUId]uint64 - cap *msgs.VectorClock + cap *msgs.VectorClock + initial map[common.VarUUId]uint64 + deltas map[common.VarUUId]uint64 + Len int } +const ( + Deleted uint64 = 0 +) + func VectorClockFromCap(vcCap msgs.VectorClock) *VectorClock { - vUUIds := vcCap.VarUuids() - values := vcCap.Values() + l := vcCap.VarUuids().Len() vc := &VectorClock{ - Clock: make(map[common.VarUUId]uint64, vUUIds.Len()), - cap: &vcCap, + cap: &vcCap, + initial: make(map[common.VarUUId]uint64, l), + deltas: make(map[common.VarUUId]uint64), + Len: l, } - for idx, l := 0, vUUIds.Len(); idx < l; idx++ { - vUUId := common.MakeVarUUId(vUUIds.At(idx)) - vc.Clock[*vUUId] = values.At(idx) + keys := vcCap.VarUuids() + values := vcCap.Values() + for idx, l := 0, keys.Len(); idx < l; idx++ { + k := common.MakeVarUUId(keys.At(idx)) + vc.initial[*k] = values.At(idx) } return vc } func NewVectorClock() *VectorClock { + return &VectorClock{deltas: make(map[common.VarUUId]uint64)} +} + +func (vcA *VectorClock) Clone() *VectorClock { + deltas := make(map[common.VarUUId]uint64, len(vcA.deltas)) + for k, v := range vcA.deltas { + deltas[k] = v + } return &VectorClock{ - Clock: make(map[common.VarUUId]uint64, 32), - cap: nil, + cap: vcA.cap, + initial: vcA.initial, + deltas: deltas, + Len: vcA.Len, } } -func (vcA *VectorClock) Clone() *VectorClock { - vcB := NewVectorClock() - vcB.MergeInMax(vcA) - vcB.cap = vcA.cap - return vcB +func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { + deltaKeys := common.VarUUIds(make([]*common.VarUUId, 0, len(vc.deltas))) + for k := range vc.deltas { + kCopy := k + deltaKeys = append(deltaKeys, &kCopy) + } + deltaKeys.Sort() + if vc.cap != nil { + keys := vc.cap.VarUuids() + if l := keys.Len(); l > 0 { + values := vc.cap.Values() + idx, key := 0, common.MakeVarUUId(keys.At(0)) + nextMain := func() { + idx++ + if idx < l { + key = common.MakeVarUUId(keys.At(idx)) + } + } + if len(deltaKeys) > 0 { + dk := deltaKeys[0] + dv := vc.deltas[*dk] + nextDelta := func() { + deltaKeys = deltaKeys[1:] + if len(deltaKeys) > 0 { + dk = deltaKeys[0] + dv = vc.deltas[*dk] + } + } + for len(deltaKeys) > 0 && idx < l { + switch dk.Compare(key) { + case common.LT: + if dv != Deleted { + if !it(dk, dv) { + return false + } + } + nextDelta() + case common.EQ: + if dv != Deleted { + if !it(dk, dv) { + return false + } + } + nextDelta() + nextMain() + default: + if !it(key, values.At(idx)) { + return false + } + nextMain() + } + } + } + for idx < l { + if !it(key, values.At(idx)) { + return false + } + nextMain() + } + } + } + for _, dk := range deltaKeys { + if value := vc.deltas[*dk]; value != Deleted { + if !it(dk, value) { + return false + } + } + } + return true } func (vc *VectorClock) String() string { - return fmt.Sprintf("VC:%v (cached cap? %v)", vc.Clock, vc.cap != nil) + str := fmt.Sprintf("VC:(%v)", vc.Len) + vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + str += fmt.Sprintf(" %v:%v", vUUId, v) + return true + }) + return str } -func (vc *VectorClock) Bump(oid common.VarUUId, inc uint64) *VectorClock { - vc.Clock[oid] = inc + vc.Clock[oid] - vc.cap = nil +func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { + if value, found := vc.deltas[*vUUId]; found { + return value + } + if vc.cap == nil { + return Deleted + } + if value, found := vc.initial[*vUUId]; found { + return value + } + return Deleted +} + +func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { + if Deleted != vc.At(vUUId) { + vc.deltas[*vUUId] = Deleted + vc.Len-- + } return vc } -func (vc *VectorClock) SetVarIdMax(oid common.VarUUId, v uint64) bool { - if old, found := vc.Clock[oid]; found && old >= v { - return false +func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { + old := vc.At(vUUId) + if old == Deleted { + vc.deltas[*vUUId] = inc + vc.Len++ } else { - vc.Clock[oid] = v - vc.cap = nil + vc.deltas[*vUUId] = old + inc + } + return vc +} + +func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { + old := vc.At(vUUId) + if v > old { + vc.deltas[*vUUId] = v + if old == Deleted { + vc.Len++ + } return true } + return false } func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { changed := false - for k, v := range vcB.Clock { + vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { // put "|| changed" last to avoid short-circuit - changed = vcA.SetVarIdMax(k, v) || changed - } + changed = vcA.SetVarIdMax(vUUId, v) || changed + return true + }) return changed } func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { changed := false - for k, v := range vcB.Clock { - if _, found := vcA.Clock[k]; !found { - vcA.Clock[k] = v + vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if vcA.At(vUUId) == Deleted { + vcA.deltas[*vUUId] = v changed = true + vcA.Len++ } - } - if changed { - vcA.cap = nil - } - return changed -} - -func (vc *VectorClock) SubtractIfMatch(oid common.VarUUId, v uint64) bool { - if old, found := vc.Clock[oid]; found && old <= v { - delete(vc.Clock, oid) - vc.cap = nil return true - } - return false + }) + return changed } -func (vcA *VectorClock) Equal(vcB *VectorClock) bool { - if len(vcA.Clock) != len(vcB.Clock) { - return false - } else { - for k, vA := range vcA.Clock { - if vB, found := vcB.Clock[k]; !(found && vA == vB) { - return false - } +func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { + if old := vc.At(vUUId); old <= v { + if old != Deleted { + vc.deltas[*vUUId] = Deleted + vc.Len-- } return true } -} - -func (vcA *VectorClock) EqualOnIntersection(vcB *VectorClock) bool { - smaller, larger := vcA, vcB - if len(vcB.Clock) < len(vcA.Clock) { - smaller, larger = vcB, vcA - } - for k, vS := range smaller.Clock { - if vL, found := larger.Clock[k]; found && vS != vL { - return false - } - } - return true + return false } func (vcA *VectorClock) LessThan(vcB *VectorClock) bool { // 1. If A has more elems than B then A cannot be < B - if len(vcA.Clock) > len(vcB.Clock) { + if vcA.Len > vcB.Len { return false } ltFound := false - // 2. For every elem in A, B[e] must be >= A[e] - for k, valA := range vcA.Clock { - valB, found := vcB.Clock[k] - if !found || valB < valA { + // 2. For every elem e in A, B[e] must be >= A[e] + completed := vcA.ForEach(func(vUUId *common.VarUUId, valA uint64) bool { + valB := vcB.At(vUUId) + if valA > valB { return false } - // Have we found anything for which A[e] < B[e]? - ltFound = ltFound || (found && valA < valB) + ltFound = ltFound || valA < valB + return true + }) + if !completed { + return false } // 3. Everything in A is also in B and <= B. If A == B for // everything in A, then B must be > A if len(B) > len(A) - return ltFound || len(vcB.Clock) > len(vcA.Clock) -} - -func (vcA *VectorClock) LessThanOnIntersection(vcB *VectorClock) bool { - smallerFound := false - for k, vA := range vcA.Clock { - if vB, found := vcB.Clock[k]; found { - switch { - case vA > vB: - return false - case vA < vB: - smallerFound = true - } - } - } - return smallerFound + return ltFound || vcB.Len > vcA.Len } func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { @@ -159,23 +239,29 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { vcCap.SetVarUuids(seg.NewDataList(0)) vcCap.SetValues(seg.NewUInt64List(0)) return vcCap + } - } else if vc.cap == nil { - vcCap := msgs.NewVectorClock(seg) - vc.cap = &vcCap - vUUIds := seg.NewDataList(len(vc.Clock)) - values := seg.NewUInt64List(len(vc.Clock)) - vcCap.SetVarUuids(vUUIds) - vcCap.SetValues(values) - idx := 0 - for vUUId, ver := range vc.Clock { - vUUIds.Set(idx, vUUId[:]) - values.Set(idx, ver) - idx++ - } - return vcCap - - } else { + if len(vc.deltas) == 0 && vc.cap != nil { return *vc.cap } + + vcCap := msgs.NewVectorClock(seg) + vUUIds := seg.NewDataList(vc.Len) + values := seg.NewUInt64List(vc.Len) + vcCap.SetVarUuids(vUUIds) + vcCap.SetValues(values) + idx := 0 + vc.initial = make(map[common.VarUUId]uint64, vc.Len) + vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + vc.initial[*vUUId] = v + vUUIds.Set(idx, vUUId[:]) + values.Set(idx, v) + idx++ + return true + }) + + vc.deltas = make(map[common.VarUUId]uint64) + vc.cap = &vcCap + + return vcCap } From 24bc4f8612cd0934b5deb237bd8aa040029b7626 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 12 May 2016 17:51:47 +0100 Subject: [PATCH 06/78] Newer idea. Broken though - WIP. Ref T26. --HG-- branch : T26 --- txnengine/vectorclock.go | 268 +++++++++++++++++++++++++-------------- 1 file changed, 171 insertions(+), 97 deletions(-) diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 46d6e31..4c82ad3 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -4,26 +4,28 @@ import ( "fmt" capn "github.com/glycerine/go-capnproto" "goshawkdb.io/common" + "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" ) +const ( + Deleted uint64 = 0 +) + type VectorClock struct { cap *msgs.VectorClock initial map[common.VarUUId]uint64 - deltas map[common.VarUUId]uint64 + adds map[common.VarUUId]uint64 + changes map[common.VarUUId]uint64 + deletes map[common.VarUUId]server.EmptyStruct Len int } -const ( - Deleted uint64 = 0 -) - func VectorClockFromCap(vcCap msgs.VectorClock) *VectorClock { l := vcCap.VarUuids().Len() vc := &VectorClock{ cap: &vcCap, initial: make(map[common.VarUUId]uint64, l), - deltas: make(map[common.VarUUId]uint64), Len: l, } keys := vcCap.VarUuids() @@ -36,90 +38,59 @@ func VectorClockFromCap(vcCap msgs.VectorClock) *VectorClock { } func NewVectorClock() *VectorClock { - return &VectorClock{deltas: make(map[common.VarUUId]uint64)} + return &VectorClock{} } func (vcA *VectorClock) Clone() *VectorClock { - deltas := make(map[common.VarUUId]uint64, len(vcA.deltas)) - for k, v := range vcA.deltas { - deltas[k] = v + adds, changes, deletes := vcA.adds, vcA.changes, vcA.deletes + if len(adds) > 0 { + adds := make(map[common.VarUUId]uint64, len(adds)) + for k, v := range vcA.adds { + adds[k] = v + } + } + if len(changes) > 0 { + changes := make(map[common.VarUUId]uint64, len(changes)) + for k, v := range vcA.changes { + changes[k] = v + } + } + if len(deletes) > 0 { + deletes := make(map[common.VarUUId]server.EmptyStruct, len(deletes)) + for k := range vcA.deletes { + deletes[k] = server.EmptyStructVal + } } return &VectorClock{ cap: vcA.cap, initial: vcA.initial, - deltas: deltas, + adds: adds, + changes: changes, + deletes: deletes, Len: vcA.Len, } } func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { - deltaKeys := common.VarUUIds(make([]*common.VarUUId, 0, len(vc.deltas))) - for k := range vc.deltas { - kCopy := k - deltaKeys = append(deltaKeys, &kCopy) - } - deltaKeys.Sort() - if vc.cap != nil { - keys := vc.cap.VarUuids() - if l := keys.Len(); l > 0 { - values := vc.cap.Values() - idx, key := 0, common.MakeVarUUId(keys.At(0)) - nextMain := func() { - idx++ - if idx < l { - key = common.MakeVarUUId(keys.At(idx)) - } - } - if len(deltaKeys) > 0 { - dk := deltaKeys[0] - dv := vc.deltas[*dk] - nextDelta := func() { - deltaKeys = deltaKeys[1:] - if len(deltaKeys) > 0 { - dk = deltaKeys[0] - dv = vc.deltas[*dk] - } - } - for len(deltaKeys) > 0 && idx < l { - switch dk.Compare(key) { - case common.LT: - if dv != Deleted { - if !it(dk, dv) { - return false - } - } - nextDelta() - case common.EQ: - if dv != Deleted { - if !it(dk, dv) { - return false - } - } - nextDelta() - nextMain() - default: - if !it(key, values.At(idx)) { - return false - } - nextMain() - } - } - } - for idx < l { - if !it(key, values.At(idx)) { - return false - } - nextMain() - } + for k, v := range vc.adds { + if !it(&k, v) { + return false } } - for _, dk := range deltaKeys { - if value := vc.deltas[*dk]; value != Deleted { - if !it(dk, value) { + for k, v := range vc.initial { + if ch, found := vc.changes[k]; found { + if !it(&k, ch) { return false } } + if _, found := vc.deletes[k]; found { + continue + } + if !it(&k, v) { + return false + } } + return true } @@ -133,10 +104,13 @@ func (vc *VectorClock) String() string { } func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { - if value, found := vc.deltas[*vUUId]; found { + if value, found := vc.adds[*vUUId]; found { + return value + } + if value, found := vc.changes[*vUUId]; found { return value } - if vc.cap == nil { + if _, found := vc.deletes[*vUUId]; found { return Deleted } if value, found := vc.initial[*vUUId]; found { @@ -146,34 +120,93 @@ func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { } func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { - if Deleted != vc.At(vUUId) { - vc.deltas[*vUUId] = Deleted + if _, found := vc.deletes[*vUUId]; found { + return vc + } + if _, found := vc.adds[*vUUId]; found { + delete(vc.adds, *vUUId) + vc.Len-- + return vc + } + if _, found := vc.initial[*vUUId]; found { + delete(vc.changes, *vUUId) + if vc.deletes == nil { + vc.deletes = make(map[common.VarUUId]server.EmptyStruct) + } + vc.deletes[*vUUId] = server.EmptyStructVal vc.Len-- } return vc } func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { - old := vc.At(vUUId) - if old == Deleted { - vc.deltas[*vUUId] = inc + if old, found := vc.adds[*vUUId]; found { + vc.adds[*vUUId] = old + inc + return vc + } + if old, found := vc.changes[*vUUId]; found { + vc.changes[*vUUId] = old + inc + return vc + } + if _, found := vc.deletes[*vUUId]; found { + delete(vc.deletes, *vUUId) vc.Len++ - } else { - vc.deltas[*vUUId] = old + inc + if vc.changes == nil { + vc.changes = make(map[common.VarUUId]uint64) + } + vc.changes[*vUUId] = inc + return vc + } + if old, found := vc.initial[*vUUId]; found { + if vc.changes == nil { + vc.changes = make(map[common.VarUUId]uint64) + } + vc.changes[*vUUId] = old + inc + return vc } + if vc.adds == nil { + vc.adds = make(map[common.VarUUId]uint64) + } + vc.adds[*vUUId] = inc + vc.Len++ return vc } func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { - old := vc.At(vUUId) - if v > old { - vc.deltas[*vUUId] = v - if old == Deleted { - vc.Len++ + if old, found := vc.adds[*vUUId]; found { + if v > old { + vc.adds[*vUUId] = v + return true } - return true + return false } - return false + if old, found := vc.changes[*vUUId]; found { + if v > old { + vc.changes[*vUUId] = v + return true + } + return false + } + if old, found := vc.initial[*vUUId]; found { + if v > old { + if _, found := vc.deletes[*vUUId]; found { + delete(vc.deletes, *vUUId) + vc.Len++ + } + if vc.changes == nil { + vc.changes = make(map[common.VarUUId]uint64) + } + vc.changes[*vUUId] = v + return true + } + return false + } + if vc.adds == nil { + vc.adds = make(map[common.VarUUId]uint64) + } + vc.adds[*vUUId] = v + vc.Len++ + return true } func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { @@ -189,10 +222,21 @@ func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { changed := false vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if vcA.At(vUUId) == Deleted { - vcA.deltas[*vUUId] = v - changed = true + if _, found := vcA.deletes[*vUUId]; found { + delete(vcA.deletes, *vUUId) + vcA.Len++ + if vcA.changes == nil { + vcA.changes = make(map[common.VarUUId]uint64) + } + vcA.changes[*vUUId] = v + } else if _, found := vcA.adds[*vUUId]; found { + return false + } else if _, found := vcA.initial[*vUUId]; !found { vcA.Len++ + if vcA.adds == nil { + vcA.adds = make(map[common.VarUUId]uint64) + } + vcA.adds[*vUUId] = v } return true }) @@ -200,12 +244,40 @@ func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { } func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { - if old := vc.At(vUUId); old <= v { - if old != Deleted { - vc.deltas[*vUUId] = Deleted + if _, found := vc.deletes[*vUUId]; found { + return false + } + if old, found := vc.adds[*vUUId]; found { + if old <= v { + delete(vc.adds, *vUUId) vc.Len-- + return true } - return true + return false + } + if old, found := vc.changes[*vUUId]; found { + if old <= v { + if vc.deletes == nil { + vc.deletes = make(map[common.VarUUId]server.EmptyStruct) + } + vc.deletes[*vUUId] = server.EmptyStructVal + delete(vc.changes, *vUUId) + vc.Len-- + return true + } + return false + } + if old, found := vc.initial[*vUUId]; found { + if old <= v { + if vc.deletes == nil { + vc.deletes = make(map[common.VarUUId]server.EmptyStruct) + } + vc.deletes[*vUUId] = server.EmptyStructVal + delete(vc.changes, *vUUId) + vc.Len-- + return true + } + return false } return false } @@ -241,7 +313,7 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { return vcCap } - if len(vc.deltas) == 0 && vc.cap != nil { + if vc.cap != nil && vc.adds == nil && vc.changes == nil && vc.deletes == nil { return *vc.cap } @@ -260,7 +332,9 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { return true }) - vc.deltas = make(map[common.VarUUId]uint64) + vc.adds = nil + vc.changes = nil + vc.deletes = nil vc.cap = &vcCap return vcCap From 02dfc258482c434f9a9972aa1dd8714c487e7108 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 12 May 2016 18:12:25 +0100 Subject: [PATCH 07/78] It works again. And seems quicker too. Ref T26. --HG-- branch : T26 --- txnengine/frame.go | 2 +- txnengine/vectorclock.go | 82 ++++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index dc35389..e6e0ece 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -550,7 +550,7 @@ func (fo *frameOpen) calculateReadVoteClock() { }) fo.readVoteClock = clock if fo.frameWritesClock.At(fo.v.UUId) == Deleted { - panic(fmt.Sprintf("%v no write to self!", fo.frame)) + panic(fmt.Sprintf("%v no write to self! %v", fo.frame, fo.frameWritesClock)) } } } diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 4c82ad3..7eecc37 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -44,19 +44,19 @@ func NewVectorClock() *VectorClock { func (vcA *VectorClock) Clone() *VectorClock { adds, changes, deletes := vcA.adds, vcA.changes, vcA.deletes if len(adds) > 0 { - adds := make(map[common.VarUUId]uint64, len(adds)) + adds = make(map[common.VarUUId]uint64, len(adds)) for k, v := range vcA.adds { adds[k] = v } } if len(changes) > 0 { - changes := make(map[common.VarUUId]uint64, len(changes)) + changes = make(map[common.VarUUId]uint64, len(changes)) for k, v := range vcA.changes { changes[k] = v } } if len(deletes) > 0 { - deletes := make(map[common.VarUUId]server.EmptyStruct, len(deletes)) + deletes = make(map[common.VarUUId]server.EmptyStruct, len(deletes)) for k := range vcA.deletes { deletes[k] = server.EmptyStructVal } @@ -82,20 +82,17 @@ func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { if !it(&k, ch) { return false } - } - if _, found := vc.deletes[k]; found { + } else if _, found := vc.deletes[k]; found { continue - } - if !it(&k, v) { + } else if !it(&k, v) { return false } } - return true } func (vc *VectorClock) String() string { - str := fmt.Sprintf("VC:(%v)", vc.Len) + str := fmt.Sprintf("VC:(%v) initial %v; changes %v; deletes %v; adds %v", vc.Len, vc.initial, vc.changes, vc.deletes, vc.adds) vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { str += fmt.Sprintf(" %v:%v", vUUId, v) return true @@ -143,12 +140,10 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { if old, found := vc.adds[*vUUId]; found { vc.adds[*vUUId] = old + inc return vc - } - if old, found := vc.changes[*vUUId]; found { + } else if old, found := vc.changes[*vUUId]; found { vc.changes[*vUUId] = old + inc return vc - } - if _, found := vc.deletes[*vUUId]; found { + } else if _, found := vc.deletes[*vUUId]; found { delete(vc.deletes, *vUUId) vc.Len++ if vc.changes == nil { @@ -156,20 +151,20 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { } vc.changes[*vUUId] = inc return vc - } - if old, found := vc.initial[*vUUId]; found { + } else if old, found := vc.initial[*vUUId]; found { if vc.changes == nil { vc.changes = make(map[common.VarUUId]uint64) } vc.changes[*vUUId] = old + inc return vc + } else { + if vc.adds == nil { + vc.adds = make(map[common.VarUUId]uint64) + } + vc.adds[*vUUId] = inc + vc.Len++ + return vc } - if vc.adds == nil { - vc.adds = make(map[common.VarUUId]uint64) - } - vc.adds[*vUUId] = inc - vc.Len++ - return vc } func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { @@ -179,20 +174,22 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { return true } return false - } - if old, found := vc.changes[*vUUId]; found { + } else if old, found := vc.changes[*vUUId]; found { if v > old { vc.changes[*vUUId] = v return true } return false - } - if old, found := vc.initial[*vUUId]; found { + } else if _, found := vc.deletes[*vUUId]; found { + delete(vc.deletes, *vUUId) + vc.Len++ + if vc.changes == nil { + vc.changes = make(map[common.VarUUId]uint64) + } + vc.changes[*vUUId] = v + return true + } else if old, found := vc.initial[*vUUId]; found { if v > old { - if _, found := vc.deletes[*vUUId]; found { - delete(vc.deletes, *vUUId) - vc.Len++ - } if vc.changes == nil { vc.changes = make(map[common.VarUUId]uint64) } @@ -200,13 +197,14 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { return true } return false + } else { + if vc.adds == nil { + vc.adds = make(map[common.VarUUId]uint64) + } + vc.adds[*vUUId] = v + vc.Len++ + return true } - if vc.adds == nil { - vc.adds = make(map[common.VarUUId]uint64) - } - vc.adds[*vUUId] = v - vc.Len++ - return true } func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { @@ -229,14 +227,16 @@ func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { vcA.changes = make(map[common.VarUUId]uint64) } vcA.changes[*vUUId] = v + changed = true } else if _, found := vcA.adds[*vUUId]; found { - return false - } else if _, found := vcA.initial[*vUUId]; !found { + } else if _, found := vcA.initial[*vUUId]; found { + } else { vcA.Len++ if vcA.adds == nil { vcA.adds = make(map[common.VarUUId]uint64) } vcA.adds[*vUUId] = v + changed = true } return true }) @@ -273,7 +273,6 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { vc.deletes = make(map[common.VarUUId]server.EmptyStruct) } vc.deletes[*vUUId] = server.EmptyStructVal - delete(vc.changes, *vUUId) vc.Len-- return true } @@ -313,7 +312,7 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { return vcCap } - if vc.cap != nil && vc.adds == nil && vc.changes == nil && vc.deletes == nil { + if vc.cap != nil && len(vc.adds) == 0 && len(vc.changes) == 0 && len(vc.deletes) == 0 { return *vc.cap } @@ -323,15 +322,16 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { vcCap.SetVarUuids(vUUIds) vcCap.SetValues(values) idx := 0 - vc.initial = make(map[common.VarUUId]uint64, vc.Len) + initial := make(map[common.VarUUId]uint64, vc.Len) vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - vc.initial[*vUUId] = v + initial[*vUUId] = v vUUIds.Set(idx, vUUId[:]) values.Set(idx, v) idx++ return true }) + vc.initial = initial vc.adds = nil vc.changes = nil vc.deletes = nil From 92504805105cadc7b11bbce9791beb0755471cf3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 13 May 2016 11:45:48 +0100 Subject: [PATCH 08/78] Simplified vector clock. Corrected network writing! Ref T26. --HG-- branch : T26 --- network/connection.go | 31 ++++++++-- paxos/acceptor.go | 12 ++-- txnengine/transaction.go | 4 ++ txnengine/vectorclock.go | 124 ++++++++++++++++----------------------- 4 files changed, 85 insertions(+), 86 deletions(-) diff --git a/network/connection.go b/network/connection.go index eb3c1b8..dda35f1 100644 --- a/network/connection.go +++ b/network/connection.go @@ -159,8 +159,10 @@ func NewConnectionToDial(host string, cm *ConnectionManager) *Connection { } func NewConnectionFromTCPConn(socket *net.TCPConn, cm *ConnectionManager, count uint32) *Connection { - socket.SetKeepAlive(true) - socket.SetKeepAlivePeriod(time.Second) + if err := common.ConfigureSocket(socket); err != nil { + log.Println(err) + return nil + } conn := &Connection{ socket: socket, connectionManager: cm, @@ -388,8 +390,11 @@ func (cc *connectionDial) start() (bool, error) { cc.nextState(&cc.connectionDelay) return false, nil } - socket.SetKeepAlive(true) - socket.SetKeepAlivePeriod(time.Second) + if err := common.ConfigureSocket(socket); err != nil { + log.Println(err) + cc.nextState(&cc.connectionDelay) + return false, nil + } cc.socket = socket cc.nextState(nil) return false, nil @@ -449,8 +454,20 @@ func (cah *connectionAwaitHandshake) makeHello() *capn.Segment { } func (cah *connectionAwaitHandshake) send(msg []byte) error { - _, err := cah.socket.Write(msg) - return err + l := len(msg) + for l > 0 { + w, err := cah.socket.Write(msg) + if err != nil { + return err + } + if w == l { + return nil + } else { + msg = msg[w:] + l -= w + } + } + return nil } func (cah *connectionAwaitHandshake) readOne() (*capn.Segment, error) { @@ -515,11 +532,13 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { // We came from the listener, so we're going to act as the server. config.ClientAuth = tls.RequireAndVerifyClientCert cash.socket = tls.Server(cash.socket, config) + cash.socket.SetDeadline(time.Time{}) } else { config.InsecureSkipVerify = true socket := tls.Client(cash.socket, config) cash.socket = socket + socket.SetDeadline(time.Time{}) // This is nuts: as a server, we can demand the client cert and // verify that without any concept of a client name. But as the diff --git a/paxos/acceptor.go b/paxos/acceptor.go index de4aaef..b26c65b 100644 --- a/paxos/acceptor.go +++ b/paxos/acceptor.go @@ -405,26 +405,26 @@ func newTwoBTxnVotesSender(outcome *msgs.Outcome, txnId *common.TxnId, submitter } func (s *twoBTxnVotesSender) ConnectedRMs(conns map[common.RMId]Connection) { - if conn, found := conns[s.submitter]; found { - conn.Send(s.submitterMsg) - } for _, rmId := range s.recipients { if conn, found := conns[rmId]; found { conn.Send(s.msg) } } + if conn, found := conns[s.submitter]; found { + conn.Send(s.submitterMsg) + } } func (s *twoBTxnVotesSender) ConnectionLost(common.RMId, map[common.RMId]Connection) {} func (s *twoBTxnVotesSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection) { - if s.submitter == rmId { - conn.Send(s.submitterMsg) - } for _, recipient := range s.recipients { if recipient == rmId { conn.Send(s.msg) break } } + if s.submitter == rmId { + conn.Send(s.submitterMsg) + } } diff --git a/txnengine/transaction.go b/txnengine/transaction.go index 70d170c..9d103c8 100644 --- a/txnengine/transaction.go +++ b/txnengine/transaction.go @@ -536,6 +536,10 @@ func (tro *txnReceiveOutcome) BallotOutcomeReceived(outcome *msgs.Outcome) { switch outcome.Which() { case msgs.OUTCOME_COMMIT: tro.outcomeClock = VectorClockFromCap(outcome.Commit()) + /* + excess := tro.outcomeClock.Len - tro.TxnCap.Actions().Len() + fmt.Printf("%v ", excess) + */ default: tro.aborted = true } diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 7eecc37..8895773 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -4,7 +4,6 @@ import ( "fmt" capn "github.com/glycerine/go-capnproto" "goshawkdb.io/common" - "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" ) @@ -17,7 +16,6 @@ type VectorClock struct { initial map[common.VarUUId]uint64 adds map[common.VarUUId]uint64 changes map[common.VarUUId]uint64 - deletes map[common.VarUUId]server.EmptyStruct Len int } @@ -42,7 +40,7 @@ func NewVectorClock() *VectorClock { } func (vcA *VectorClock) Clone() *VectorClock { - adds, changes, deletes := vcA.adds, vcA.changes, vcA.deletes + adds, changes := vcA.adds, vcA.changes if len(adds) > 0 { adds = make(map[common.VarUUId]uint64, len(adds)) for k, v := range vcA.adds { @@ -55,18 +53,11 @@ func (vcA *VectorClock) Clone() *VectorClock { changes[k] = v } } - if len(deletes) > 0 { - deletes = make(map[common.VarUUId]server.EmptyStruct, len(deletes)) - for k := range vcA.deletes { - deletes[k] = server.EmptyStructVal - } - } return &VectorClock{ cap: vcA.cap, initial: vcA.initial, adds: adds, changes: changes, - deletes: deletes, Len: vcA.Len, } } @@ -77,13 +68,19 @@ func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { return false } } + chCount := len(vc.changes) for k, v := range vc.initial { - if ch, found := vc.changes[k]; found { - if !it(&k, ch) { + if chCount == 0 { + if !it(&k, v) { return false } - } else if _, found := vc.deletes[k]; found { - continue + } else if ch, found := vc.changes[k]; found { + chCount-- + if ch != Deleted { + if !it(&k, ch) { + return false + } + } } else if !it(&k, v) { return false } @@ -92,7 +89,7 @@ func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { } func (vc *VectorClock) String() string { - str := fmt.Sprintf("VC:(%v) initial %v; changes %v; deletes %v; adds %v", vc.Len, vc.initial, vc.changes, vc.deletes, vc.adds) + str := fmt.Sprintf("VC:(%v)", vc.Len) vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { str += fmt.Sprintf(" %v:%v", vUUId, v) return true @@ -103,34 +100,30 @@ func (vc *VectorClock) String() string { func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { if value, found := vc.adds[*vUUId]; found { return value - } - if value, found := vc.changes[*vUUId]; found { + } else if value, found := vc.changes[*vUUId]; found { return value - } - if _, found := vc.deletes[*vUUId]; found { - return Deleted - } - if value, found := vc.initial[*vUUId]; found { + } else if value, found := vc.initial[*vUUId]; found { return value } return Deleted } func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { - if _, found := vc.deletes[*vUUId]; found { - return vc - } if _, found := vc.adds[*vUUId]; found { delete(vc.adds, *vUUId) vc.Len-- return vc - } - if _, found := vc.initial[*vUUId]; found { - delete(vc.changes, *vUUId) - if vc.deletes == nil { - vc.deletes = make(map[common.VarUUId]server.EmptyStruct) + } else if ch, found := vc.changes[*vUUId]; found { + if ch != Deleted { + vc.Len-- + vc.changes[*vUUId] = Deleted + } + return vc + } else if _, found := vc.initial[*vUUId]; found { + if vc.changes == nil { + vc.changes = make(map[common.VarUUId]uint64) } - vc.deletes[*vUUId] = server.EmptyStructVal + vc.changes[*vUUId] = Deleted vc.Len-- } return vc @@ -141,15 +134,12 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { vc.adds[*vUUId] = old + inc return vc } else if old, found := vc.changes[*vUUId]; found { - vc.changes[*vUUId] = old + inc - return vc - } else if _, found := vc.deletes[*vUUId]; found { - delete(vc.deletes, *vUUId) - vc.Len++ - if vc.changes == nil { - vc.changes = make(map[common.VarUUId]uint64) + if old == Deleted { + vc.changes[*vUUId] = inc + vc.Len++ + } else { + vc.changes[*vUUId] = old + inc } - vc.changes[*vUUId] = inc return vc } else if old, found := vc.initial[*vUUId]; found { if vc.changes == nil { @@ -177,17 +167,12 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { } else if old, found := vc.changes[*vUUId]; found { if v > old { vc.changes[*vUUId] = v + if old == Deleted { + vc.Len++ + } return true } return false - } else if _, found := vc.deletes[*vUUId]; found { - delete(vc.deletes, *vUUId) - vc.Len++ - if vc.changes == nil { - vc.changes = make(map[common.VarUUId]uint64) - } - vc.changes[*vUUId] = v - return true } else if old, found := vc.initial[*vUUId]; found { if v > old { if vc.changes == nil { @@ -220,16 +205,17 @@ func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { changed := false vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if _, found := vcA.deletes[*vUUId]; found { - delete(vcA.deletes, *vUUId) - vcA.Len++ - if vcA.changes == nil { - vcA.changes = make(map[common.VarUUId]uint64) + if _, found := vcA.adds[*vUUId]; found { + return true + } else if ch, found := vcA.changes[*vUUId]; found { + if ch == Deleted { + vcA.Len++ + vcA.changes[*vUUId] = v + changed = true } - vcA.changes[*vUUId] = v - changed = true - } else if _, found := vcA.adds[*vUUId]; found { + return true } else if _, found := vcA.initial[*vUUId]; found { + return true } else { vcA.Len++ if vcA.adds == nil { @@ -237,16 +223,13 @@ func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { } vcA.adds[*vUUId] = v changed = true + return true } - return true }) return changed } func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { - if _, found := vc.deletes[*vUUId]; found { - return false - } if old, found := vc.adds[*vUUId]; found { if old <= v { delete(vc.adds, *vUUId) @@ -254,25 +237,19 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { return true } return false - } - if old, found := vc.changes[*vUUId]; found { - if old <= v { - if vc.deletes == nil { - vc.deletes = make(map[common.VarUUId]server.EmptyStruct) - } - vc.deletes[*vUUId] = server.EmptyStructVal - delete(vc.changes, *vUUId) + } else if old, found := vc.changes[*vUUId]; found { + if old != Deleted && old <= v { + vc.changes[*vUUId] = Deleted vc.Len-- return true } return false - } - if old, found := vc.initial[*vUUId]; found { + } else if old, found := vc.initial[*vUUId]; found { if old <= v { - if vc.deletes == nil { - vc.deletes = make(map[common.VarUUId]server.EmptyStruct) + if vc.changes == nil { + vc.changes = make(map[common.VarUUId]uint64) } - vc.deletes[*vUUId] = server.EmptyStructVal + vc.changes[*vUUId] = Deleted vc.Len-- return true } @@ -312,7 +289,7 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { return vcCap } - if vc.cap != nil && len(vc.adds) == 0 && len(vc.changes) == 0 && len(vc.deletes) == 0 { + if vc.cap != nil && len(vc.adds) == 0 && len(vc.changes) == 0 { return *vc.cap } @@ -334,7 +311,6 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { vc.initial = initial vc.adds = nil vc.changes = nil - vc.deletes = nil vc.cap = &vcCap return vcCap From ba8f2f4e66bba280fbb1639d50f5b2cedf21d5c6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 13 May 2016 15:45:47 +0100 Subject: [PATCH 09/78] Happy now with VC. Memoizing the roll txn seems to be a good idea. Much more tuning needed though. Ref T26. --HG-- branch : T26 --- txnengine/frame.go | 15 +++++++++++---- txnengine/vectorclock.go | 24 ++++++++++++------------ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index e6e0ece..490ed20 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -142,6 +142,8 @@ type frameOpen struct { rwPresent bool rollScheduled bool rollActive bool + rollTxn *cmsgs.ClientTxn + rollTxnPos map[common.VarUUId]*common.Positions } func (fo *frameOpen) init(f *frame) { @@ -412,7 +414,7 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { // missing some TGCs - essentially we can infer TGCs by // observing the outcome clocks on future txns we learn. fo.readVoteClock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if action.outcomeClock.At(vUUId) == Deleted { + if action.outcomeClock.At(vUUId) == 0 { fo.mask.SetVarIdMax(vUUId, v) } return true @@ -458,7 +460,7 @@ func (fo *frameOpen) WriteLearnt(action *localAction) bool { clock = fo.readVoteClock } clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if action.outcomeClock.At(vUUId) == Deleted { + if action.outcomeClock.At(vUUId) == 0 { fo.mask.SetVarIdMax(vUUId, v) } return true @@ -483,7 +485,7 @@ func (fo *frameOpen) isLocked() bool { if fo.frameTxnActions == nil || fo.parent == nil { return false } - rvcLen := len(fo.readVoteClock.Clock) + rvcLen := fo.readVoteClock.Len actionsLen := fo.frameTxnActions.Len() excess := rvcLen - actionsLen return excess > server.FrameLockMinExcessSize && rvcLen > actionsLen*server.FrameLockMinRatio @@ -549,7 +551,7 @@ func (fo *frameOpen) calculateReadVoteClock() { return true }) fo.readVoteClock = clock - if fo.frameWritesClock.At(fo.v.UUId) == Deleted { + if fo.frameWritesClock.At(fo.v.UUId) == 0 { panic(fmt.Sprintf("%v no write to self! %v", fo.frame, fo.frameWritesClock)) } } @@ -720,6 +722,9 @@ func (fo *frameOpen) maybeStartRoll() { } func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId]*common.Positions) { + if fo.rollTxn != nil { + return fo.rollTxn, fo.rollTxnPos + } var origWrite *msgs.Action vUUIdBytes := fo.v.UUId[:] for idx, l := 0, fo.frameTxnActions.Len(); idx < l; idx++ { @@ -771,6 +776,8 @@ func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId posMap[*vUUId] = &pos refVarList.Set(idx, vUUId[:]) } + fo.rollTxn = &ctxn + fo.rollTxnPos = posMap return &ctxn, posMap } diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 8895773..72610d0 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -8,7 +8,7 @@ import ( ) const ( - Deleted uint64 = 0 + deleted uint64 = 0 ) type VectorClock struct { @@ -76,7 +76,7 @@ func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { } } else if ch, found := vc.changes[k]; found { chCount-- - if ch != Deleted { + if ch != deleted { if !it(&k, ch) { return false } @@ -105,7 +105,7 @@ func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { } else if value, found := vc.initial[*vUUId]; found { return value } - return Deleted + return deleted } func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { @@ -114,16 +114,16 @@ func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { vc.Len-- return vc } else if ch, found := vc.changes[*vUUId]; found { - if ch != Deleted { + if ch != deleted { vc.Len-- - vc.changes[*vUUId] = Deleted + vc.changes[*vUUId] = deleted } return vc } else if _, found := vc.initial[*vUUId]; found { if vc.changes == nil { vc.changes = make(map[common.VarUUId]uint64) } - vc.changes[*vUUId] = Deleted + vc.changes[*vUUId] = deleted vc.Len-- } return vc @@ -134,7 +134,7 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { vc.adds[*vUUId] = old + inc return vc } else if old, found := vc.changes[*vUUId]; found { - if old == Deleted { + if old == deleted { vc.changes[*vUUId] = inc vc.Len++ } else { @@ -167,7 +167,7 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { } else if old, found := vc.changes[*vUUId]; found { if v > old { vc.changes[*vUUId] = v - if old == Deleted { + if old == deleted { vc.Len++ } return true @@ -208,7 +208,7 @@ func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { if _, found := vcA.adds[*vUUId]; found { return true } else if ch, found := vcA.changes[*vUUId]; found { - if ch == Deleted { + if ch == deleted { vcA.Len++ vcA.changes[*vUUId] = v changed = true @@ -238,8 +238,8 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { } return false } else if old, found := vc.changes[*vUUId]; found { - if old != Deleted && old <= v { - vc.changes[*vUUId] = Deleted + if old != deleted && old <= v { + vc.changes[*vUUId] = deleted vc.Len-- return true } @@ -249,7 +249,7 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { if vc.changes == nil { vc.changes = make(map[common.VarUUId]uint64) } - vc.changes[*vUUId] = Deleted + vc.changes[*vUUId] = deleted vc.Len-- return true } From eb28447acc873c873c9fc73aadd2922cbd95204a Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 14 May 2016 23:50:44 +0100 Subject: [PATCH 10/78] Just dropping a marker here. This is a good fall-back point if necessary. I still feel it would be better to get some feedback from the vars in question when aborts happen rather than just this mech of backoff delay, but not sure yet. Ref T26. --HG-- branch : T26 --- client/clienttxnsubmitter.go | 31 +++++++++++++++++++++---------- consts.go | 4 ++-- paxos/ballotaccumulator.go | 9 +++++---- txnengine/frame.go | 2 +- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index e33a65b..b27cfff 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -18,6 +18,7 @@ type ClientTxnSubmitter struct { *SimpleTxnSubmitter versionCache versionCache txnLive bool + initialDelay time.Duration } func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, cm paxos.ConnectionManager) *ClientTxnSubmitter { @@ -25,6 +26,7 @@ func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, cm paxos.Connecti SimpleTxnSubmitter: NewSimpleTxnSubmitter(rmId, bootCount, cm), versionCache: NewVersionCache(), txnLive: false, + initialDelay: time.Duration(0), } } @@ -46,7 +48,11 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, curTxnId := common.MakeTxnId(ctxnCap.Id()) - delay := time.Duration(0) + delay := cts.initialDelay + if delay < time.Millisecond { + delay = time.Duration(0) + } + start := time.Now() retryCount := 0 var cont TxnCompletionConsumer @@ -56,6 +62,9 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, continuation(nil, err) return } + end := time.Now() + elapsed := end.Sub(start) + start = end switch outcome.Which() { case msgs.OUTCOME_COMMIT: cts.versionCache.UpdateFromCommit(txnId, outcome) @@ -63,6 +72,8 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetCommit() cts.addCreatesToCache(outcome) cts.txnLive = false + cts.initialDelay = delay >> 1 + fmt.Printf("¬%v ", retryCount) continuation(&clientOutcome, nil) return @@ -78,21 +89,20 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetFinalId(txnId[:]) clientOutcome.SetAbort(cts.translateUpdates(seg, validUpdates)) cts.txnLive = false + cts.initialDelay = delay >> 1 + fmt.Printf("¬%v ", retryCount) continuation(&clientOutcome, nil) return } } server.Log("Resubmitting", txnId, "; orig resubmit?", abort.Which() == msgs.OUTCOMEABORT_RESUBMIT) retryCount++ - switch { - case retryCount == server.SubmissionInitialAttempts: - delay = server.SubmissionInitialBackoff - case retryCount > server.SubmissionInitialAttempts: - delay = delay + time.Duration(cts.rng.Intn(int(delay))) - if delay > server.SubmissionMaxSubmitDelay { - delay = time.Duration(cts.rng.Intn(int(server.SubmissionMaxSubmitDelay))) - } + + delay = delay + time.Duration(cts.rng.Intn(int(elapsed))) + if delay > server.SubmissionMaxSubmitDelay { + delay = server.SubmissionMaxSubmitDelay + time.Duration(cts.rng.Intn(int(server.SubmissionMaxSubmitDelay))) } + fmt.Printf("%v|%v ", retryCount, delay) curTxnIdNum := binary.BigEndian.Uint64(txnId[:8]) curTxnIdNum += 1 + uint64(cts.rng.Intn(8)) @@ -104,7 +114,8 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, } cts.txnLive = true - cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, 0, false) + fmt.Printf("%v|%v ", retryCount, delay) + cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false) } func (cts *ClientTxnSubmitter) addCreatesToCache(outcome *msgs.Outcome) { diff --git a/consts.go b/consts.go index 9e0810b..b92becd 100644 --- a/consts.go +++ b/consts.go @@ -8,10 +8,10 @@ const ( ServerVersion = "dev" MDBInitialSize = 1048576 TwoToTheSixtyThree = 9223372036854775808 - SubmissionInitialAttempts = 5 + SubmissionInitialAttempts = 0 SubmissionInitialBackoff = 2 * time.Microsecond SubmissionMaxSubmitDelay = 2 * time.Second - VarIdleTimeoutMin = 50 * time.Millisecond + VarIdleTimeoutMin = 500 * time.Millisecond VarIdleTimeoutRange = 250 FrameLockMinExcessSize = 100 FrameLockMinRatio = 2 diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 44089ea..3c6aa51 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -372,18 +372,19 @@ func (br badReads) combine(rmBal *rmBallot) { for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) vUUId := common.MakeVarUUId(action.VarId()) + clockElem := clock.At(vUUId) if bra, found := br[*vUUId]; found { - bra.combine(&action, rmBal, txnId, clock.At(vUUId)) + bra.combine(&action, rmBal, txnId, clockElem) } else if action.Which() == msgs.ACTION_READ { br[*vUUId] = &badReadAction{ rmBallot: rmBal, vUUId: vUUId, txnId: common.MakeTxnId(action.Read().Version()), - clockElem: clock.At(vUUId) - 1, + clockElem: clockElem - 1, action: &action, } - if clock.At(vUUId) == 0 { + if clockElem == 0 { panic(fmt.Sprintf("Just did 0 - 1 in int64 (%v, %v) (%v)", vUUId, clock, txnId)) } } else { @@ -391,7 +392,7 @@ func (br badReads) combine(rmBal *rmBallot) { rmBallot: rmBal, vUUId: vUUId, txnId: txnId, - clockElem: clock.At(vUUId), + clockElem: clockElem, action: &action, } } diff --git a/txnengine/frame.go b/txnengine/frame.go index 490ed20..b6405c0 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -692,9 +692,9 @@ func (fo *frameOpen) maybeStartRoll() { if fo.v.vm.RollAllowed && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { fo.rollActive = true - ctxn, varPosMap := fo.createRollClientTxn() go func() { server.Log(fo.frame, "Starting roll") + ctxn, varPosMap := fo.createRollClientTxn() outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap, true) ow := "" if outcome != nil { From 9e64644bf757576ed8d5b778fc14e0fc12adeaf9 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 15 May 2016 20:33:59 +0100 Subject: [PATCH 11/78] Tidying. Ref T26. --HG-- branch : T26 --- client/clienttxnsubmitter.go | 8 ++------ consts.go | 1 - network/connection.go | 20 ++++++++++++-------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index b27cfff..e6ed223 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -53,7 +53,6 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, delay = time.Duration(0) } start := time.Now() - retryCount := 0 var cont TxnCompletionConsumer cont = func(txnId *common.TxnId, outcome *msgs.Outcome, err error) { @@ -73,7 +72,6 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, cts.addCreatesToCache(outcome) cts.txnLive = false cts.initialDelay = delay >> 1 - fmt.Printf("¬%v ", retryCount) continuation(&clientOutcome, nil) return @@ -90,19 +88,17 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetAbort(cts.translateUpdates(seg, validUpdates)) cts.txnLive = false cts.initialDelay = delay >> 1 - fmt.Printf("¬%v ", retryCount) continuation(&clientOutcome, nil) return } } server.Log("Resubmitting", txnId, "; orig resubmit?", abort.Which() == msgs.OUTCOMEABORT_RESUBMIT) - retryCount++ delay = delay + time.Duration(cts.rng.Intn(int(elapsed))) if delay > server.SubmissionMaxSubmitDelay { delay = server.SubmissionMaxSubmitDelay + time.Duration(cts.rng.Intn(int(server.SubmissionMaxSubmitDelay))) } - fmt.Printf("%v|%v ", retryCount, delay) + //fmt.Printf("%v ", delay) curTxnIdNum := binary.BigEndian.Uint64(txnId[:8]) curTxnIdNum += 1 + uint64(cts.rng.Intn(8)) @@ -114,7 +110,7 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, } cts.txnLive = true - fmt.Printf("%v|%v ", retryCount, delay) + // fmt.Printf("%v ", delay) cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false) } diff --git a/consts.go b/consts.go index b92becd..d43d45d 100644 --- a/consts.go +++ b/consts.go @@ -8,7 +8,6 @@ const ( ServerVersion = "dev" MDBInitialSize = 1048576 TwoToTheSixtyThree = 9223372036854775808 - SubmissionInitialAttempts = 0 SubmissionInitialBackoff = 2 * time.Microsecond SubmissionMaxSubmitDelay = 2 * time.Second VarIdleTimeoutMin = 500 * time.Millisecond diff --git a/network/connection.go b/network/connection.go index dda35f1..da075db 100644 --- a/network/connection.go +++ b/network/connection.go @@ -456,13 +456,12 @@ func (cah *connectionAwaitHandshake) makeHello() *capn.Segment { func (cah *connectionAwaitHandshake) send(msg []byte) error { l := len(msg) for l > 0 { - w, err := cah.socket.Write(msg) - if err != nil { + switch w, err := cah.socket.Write(msg); { + case err != nil: return err - } - if w == l { + case w == l: return nil - } else { + default: msg = msg[w:] l -= w } @@ -531,14 +530,19 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { if cash.remoteHost == "" { // We came from the listener, so we're going to act as the server. config.ClientAuth = tls.RequireAndVerifyClientCert - cash.socket = tls.Server(cash.socket, config) - cash.socket.SetDeadline(time.Time{}) + socket := tls.Server(cash.socket, config) + if err := socket.SetDeadline(time.Time{}); err != nil { + return cash.connectionAwaitHandshake.maybeRestartConnection(err) + } + cash.socket = socket } else { config.InsecureSkipVerify = true socket := tls.Client(cash.socket, config) + if err := socket.SetDeadline(time.Time{}); err != nil { + return cash.connectionAwaitHandshake.maybeRestartConnection(err) + } cash.socket = socket - socket.SetDeadline(time.Time{}) // This is nuts: as a server, we can demand the client cert and // verify that without any concept of a client name. But as the From 71edf73c4ff25359cc54ad16b9454e5d2c4d9d76 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 31 May 2016 15:16:38 +0100 Subject: [PATCH 12/78] Minor changes mainly as a result of improving the Go client. Don't use pointers when we don't need to (and I guess the compiler can figure out only one copy exists, so no need to actually copy); improve / correct connection shutdown logic. --HG-- branch : dev --- network/connection.go | 30 +++++++++++------------------- network/connectionmanager.go | 4 ++-- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/network/connection.go b/network/connection.go index da075db..9c1454d 100644 --- a/network/connection.go +++ b/network/connection.go @@ -252,10 +252,10 @@ func (conn *Connection) handleMsg(msg connectionMsg) (terminate bool, err error) case connectionReadError: conn.reader = nil err = conn.connectionRun.maybeRestartConnection(msgT.error) - case *connectionReadMessage: - err = conn.handleMsgFromServer((*msgs.Message)(msgT)) - case *connectionReadClientMessage: - err = conn.handleMsgFromClient((*cmsgs.ClientMessage)(msgT)) + case connectionReadMessage: + err = conn.handleMsgFromServer((msgs.Message)(msgT)) + case connectionReadClientMessage: + err = conn.handleMsgFromClient((cmsgs.ClientMessage)(msgT)) case connectionMsgSend: err = conn.sendMessage(msgT) case connectionMsgOutcomeReceived: @@ -815,7 +815,7 @@ func (cr *connectionRun) serverConnectionsChanged(servers map[common.RMId]paxos. } } -func (cr *connectionRun) handleMsgFromClient(msg *cmsgs.ClientMessage) error { +func (cr *connectionRun) handleMsgFromClient(msg cmsgs.ClientMessage) error { if cr.currentState != cr { // probably just draining the queue from the reader after a restart return nil @@ -846,7 +846,7 @@ func (cr *connectionRun) handleMsgFromClient(msg *cmsgs.ClientMessage) error { return nil } -func (cr *connectionRun) handleMsgFromServer(msg *msgs.Message) error { +func (cr *connectionRun) handleMsgFromServer(msg msgs.Message) error { if cr.currentState != cr { // probably just draining the queue from the reader after a restart return nil @@ -958,19 +958,11 @@ func (cr *connectionRun) maybeStopBeater() { func (cr *connectionRun) maybeStopReaderAndCloseSocket() { if cr.reader != nil { close(cr.reader.terminate) - if cr.socket != nil { - if err := cr.socket.Close(); err != nil { - log.Println(err) - } - } cr.reader.terminated.Wait() cr.reader = nil } - if cr.socket != nil { - if err := cr.socket.Close(); err != nil { - log.Println(err) - } + cr.socket.Close() cr.socket = nil } } @@ -1035,14 +1027,14 @@ func newConnectionReader(conn *Connection) *connectionReader { func (cr *connectionReader) readServer() { cr.read(func(seg *capn.Segment) bool { msg := msgs.ReadRootMessage(seg) - return cr.enqueueQuery((*connectionReadMessage)(&msg)) + return cr.enqueueQuery(connectionReadMessage(msg)) }) } func (cr *connectionReader) readClient() { cr.read(func(seg *capn.Segment) bool { msg := cmsgs.ReadRootClientMessage(seg) - return cr.enqueueQuery((*connectionReadClientMessage)(&msg)) + return cr.enqueueQuery(connectionReadClientMessage(msg)) }) } @@ -1067,11 +1059,11 @@ func (cr *connectionReader) read(fun func(*capn.Segment) bool) { type connectionReadMessage msgs.Message -func (crm *connectionReadMessage) witness() connectionMsg { return crm } +func (crm connectionReadMessage) witness() connectionMsg { return crm } type connectionReadClientMessage cmsgs.ClientMessage -func (crcm *connectionReadClientMessage) witness() connectionMsg { return crcm } +func (crcm connectionReadClientMessage) witness() connectionMsg { return crcm } type connectionReadError struct { connectionMsgBasic diff --git a/network/connectionmanager.go b/network/connectionmanager.go index 1af87b3..8694efb 100644 --- a/network/connectionmanager.go +++ b/network/connectionmanager.go @@ -52,7 +52,7 @@ type topologySubscribers struct { subscribers []map[eng.TopologySubscriber]server.EmptyStruct } -func (cm *ConnectionManager) DispatchMessage(sender common.RMId, msgType msgs.Message_Which, msg *msgs.Message) { +func (cm *ConnectionManager) DispatchMessage(sender common.RMId, msgType msgs.Message_Which, msg msgs.Message) { d := cm.Dispatchers switch msgType { case msgs.MESSAGE_TXNSUBMISSION: @@ -670,7 +670,7 @@ func (cm *ConnectionManager) Send(b []byte) { seg, _, err := capn.ReadFromMemoryZeroCopy(b) server.CheckFatal(err) msg := msgs.ReadRootMessage(seg) - cm.DispatchMessage(cm.RMId, msg.Which(), &msg) + cm.DispatchMessage(cm.RMId, msg.Which(), msg) } // serverConnSubscribers From babba47caaf7ac0c894b9c2ba1dcf68287c175c4 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 10 Jun 2016 15:17:25 +0100 Subject: [PATCH 13/78] Basic change to capnp to start obj cap support. Ref T34. --HG-- branch : T34 --- capnp/var.capnp | 16 ++ capnp/var.capnp.go | 499 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 506 insertions(+), 9 deletions(-) diff --git a/capnp/var.capnp b/capnp/var.capnp index 3a5e8e6..18b4e46 100644 --- a/capnp/var.capnp +++ b/capnp/var.capnp @@ -18,4 +18,20 @@ struct Var { struct VarIdPos { id @0: Data; positions @1: List(UInt8); + capabilities :group { + value :group { + read @2: Bool; + write @3: Bool; + } + references :group { + read :union { + all @4: Void; + only @5: List(UInt32); + } + write :union { + all @6: Void; + only @7: List(UInt32); + } + } + } } diff --git a/capnp/var.capnp.go b/capnp/var.capnp.go index 858930c..5753959 100644 --- a/capnp/var.capnp.go +++ b/capnp/var.capnp.go @@ -287,15 +287,70 @@ func (s Var_List) ToArray() []Var { func (s Var_List) Set(i int, item Var) { C.PointerList(s).Set(i, C.Object(item)) } type VarIdPos C.Struct +type VarIdPosCapabilities VarIdPos +type VarIdPosCapabilitiesValue VarIdPos +type VarIdPosCapabilitiesReferences VarIdPos +type VarIdPosCapabilitiesReferencesRead VarIdPos +type VarIdPosCapabilitiesReferencesWrite VarIdPos +type VarIdPosCapabilitiesReferencesRead_Which uint16 -func NewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStruct(0, 2)) } -func NewRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewRootStruct(0, 2)) } -func AutoNewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStructAR(0, 2)) } -func ReadRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.Root(0).ToStruct()) } -func (s VarIdPos) Id() []byte { return C.Struct(s).GetObject(0).ToData() } -func (s VarIdPos) SetId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } -func (s VarIdPos) Positions() C.UInt8List { return C.UInt8List(C.Struct(s).GetObject(1)) } -func (s VarIdPos) SetPositions(v C.UInt8List) { C.Struct(s).SetObject(1, C.Object(v)) } +const ( + VARIDPOSCAPABILITIESREFERENCESREAD_ALL VarIdPosCapabilitiesReferencesRead_Which = 0 + VARIDPOSCAPABILITIESREFERENCESREAD_ONLY VarIdPosCapabilitiesReferencesRead_Which = 1 +) + +type VarIdPosCapabilitiesReferencesWrite_Which uint16 + +const ( + VARIDPOSCAPABILITIESREFERENCESWRITE_ALL VarIdPosCapabilitiesReferencesWrite_Which = 0 + VARIDPOSCAPABILITIESREFERENCESWRITE_ONLY VarIdPosCapabilitiesReferencesWrite_Which = 1 +) + +func NewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStruct(8, 4)) } +func NewRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewRootStruct(8, 4)) } +func AutoNewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStructAR(8, 4)) } +func ReadRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.Root(0).ToStruct()) } +func (s VarIdPos) Id() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s VarIdPos) SetId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } +func (s VarIdPos) Positions() C.UInt8List { return C.UInt8List(C.Struct(s).GetObject(1)) } +func (s VarIdPos) SetPositions(v C.UInt8List) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s VarIdPos) Capabilities() VarIdPosCapabilities { return VarIdPosCapabilities(s) } +func (s VarIdPosCapabilities) Value() VarIdPosCapabilitiesValue { return VarIdPosCapabilitiesValue(s) } +func (s VarIdPosCapabilitiesValue) Read() bool { return C.Struct(s).Get1(0) } +func (s VarIdPosCapabilitiesValue) SetRead(v bool) { C.Struct(s).Set1(0, v) } +func (s VarIdPosCapabilitiesValue) Write() bool { return C.Struct(s).Get1(1) } +func (s VarIdPosCapabilitiesValue) SetWrite(v bool) { C.Struct(s).Set1(1, v) } +func (s VarIdPosCapabilities) References() VarIdPosCapabilitiesReferences { + return VarIdPosCapabilitiesReferences(s) +} +func (s VarIdPosCapabilitiesReferences) Read() VarIdPosCapabilitiesReferencesRead { + return VarIdPosCapabilitiesReferencesRead(s) +} +func (s VarIdPosCapabilitiesReferencesRead) Which() VarIdPosCapabilitiesReferencesRead_Which { + return VarIdPosCapabilitiesReferencesRead_Which(C.Struct(s).Get16(2)) +} +func (s VarIdPosCapabilitiesReferencesRead) SetAll() { C.Struct(s).Set16(2, 0) } +func (s VarIdPosCapabilitiesReferencesRead) Only() C.UInt32List { + return C.UInt32List(C.Struct(s).GetObject(2)) +} +func (s VarIdPosCapabilitiesReferencesRead) SetOnly(v C.UInt32List) { + C.Struct(s).Set16(2, 1) + C.Struct(s).SetObject(2, C.Object(v)) +} +func (s VarIdPosCapabilitiesReferences) Write() VarIdPosCapabilitiesReferencesWrite { + return VarIdPosCapabilitiesReferencesWrite(s) +} +func (s VarIdPosCapabilitiesReferencesWrite) Which() VarIdPosCapabilitiesReferencesWrite_Which { + return VarIdPosCapabilitiesReferencesWrite_Which(C.Struct(s).Get16(4)) +} +func (s VarIdPosCapabilitiesReferencesWrite) SetAll() { C.Struct(s).Set16(4, 0) } +func (s VarIdPosCapabilitiesReferencesWrite) Only() C.UInt32List { + return C.UInt32List(C.Struct(s).GetObject(3)) +} +func (s VarIdPosCapabilitiesReferencesWrite) SetOnly(v C.UInt32List) { + C.Struct(s).Set16(4, 1) + C.Struct(s).SetObject(3, C.Object(v)) +} func (s VarIdPos) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -357,6 +412,219 @@ func (s VarIdPos) WriteJSON(w io.Writer) error { return err } } + err = b.WriteByte(',') + if err != nil { + return err + } + _, err = b.WriteString("\"capabilities\":") + if err != nil { + return err + } + { + s := s.Capabilities() + err = b.WriteByte('{') + if err != nil { + return err + } + _, err = b.WriteString("\"value\":") + if err != nil { + return err + } + { + s := s.Value() + err = b.WriteByte('{') + if err != nil { + return err + } + _, err = b.WriteString("\"read\":") + if err != nil { + return err + } + { + s := s.Read() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(',') + if err != nil { + return err + } + _, err = b.WriteString("\"write\":") + if err != nil { + return err + } + { + s := s.Write() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + } + err = b.WriteByte(',') + if err != nil { + return err + } + _, err = b.WriteString("\"references\":") + if err != nil { + return err + } + { + s := s.References() + err = b.WriteByte('{') + if err != nil { + return err + } + _, err = b.WriteString("\"read\":") + if err != nil { + return err + } + { + s := s.Read() + err = b.WriteByte('{') + if err != nil { + return err + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESREAD_ALL { + _, err = b.WriteString("\"all\":") + if err != nil { + return err + } + _ = s + _, err = b.WriteString("null") + if err != nil { + return err + } + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESREAD_ONLY { + _, err = b.WriteString("\"only\":") + if err != nil { + return err + } + { + s := s.Only() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + } + err = b.WriteByte(',') + if err != nil { + return err + } + _, err = b.WriteString("\"write\":") + if err != nil { + return err + } + { + s := s.Write() + err = b.WriteByte('{') + if err != nil { + return err + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESWRITE_ALL { + _, err = b.WriteString("\"all\":") + if err != nil { + return err + } + _ = s + _, err = b.WriteString("null") + if err != nil { + return err + } + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESWRITE_ONLY { + _, err = b.WriteString("\"only\":") + if err != nil { + return err + } + { + s := s.Only() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + } err = b.WriteByte('}') if err != nil { return err @@ -430,6 +698,219 @@ func (s VarIdPos) WriteCapLit(w io.Writer) error { return err } } + _, err = b.WriteString(", ") + if err != nil { + return err + } + _, err = b.WriteString("capabilities = ") + if err != nil { + return err + } + { + s := s.Capabilities() + err = b.WriteByte('(') + if err != nil { + return err + } + _, err = b.WriteString("value = ") + if err != nil { + return err + } + { + s := s.Value() + err = b.WriteByte('(') + if err != nil { + return err + } + _, err = b.WriteString("read = ") + if err != nil { + return err + } + { + s := s.Read() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + _, err = b.WriteString(", ") + if err != nil { + return err + } + _, err = b.WriteString("write = ") + if err != nil { + return err + } + { + s := s.Write() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + } + _, err = b.WriteString(", ") + if err != nil { + return err + } + _, err = b.WriteString("references = ") + if err != nil { + return err + } + { + s := s.References() + err = b.WriteByte('(') + if err != nil { + return err + } + _, err = b.WriteString("read = ") + if err != nil { + return err + } + { + s := s.Read() + err = b.WriteByte('(') + if err != nil { + return err + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESREAD_ALL { + _, err = b.WriteString("all = ") + if err != nil { + return err + } + _ = s + _, err = b.WriteString("null") + if err != nil { + return err + } + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESREAD_ONLY { + _, err = b.WriteString("only = ") + if err != nil { + return err + } + { + s := s.Only() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + } + _, err = b.WriteString(", ") + if err != nil { + return err + } + _, err = b.WriteString("write = ") + if err != nil { + return err + } + { + s := s.Write() + err = b.WriteByte('(') + if err != nil { + return err + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESWRITE_ALL { + _, err = b.WriteString("all = ") + if err != nil { + return err + } + _ = s + _, err = b.WriteString("null") + if err != nil { + return err + } + } + if s.Which() == VARIDPOSCAPABILITIESREFERENCESWRITE_ONLY { + _, err = b.WriteString("only = ") + if err != nil { + return err + } + { + s := s.Only() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + } err = b.WriteByte(')') if err != nil { return err @@ -446,7 +927,7 @@ func (s VarIdPos) MarshalCapLit() ([]byte, error) { type VarIdPos_List C.PointerList func NewVarIdPosList(s *C.Segment, sz int) VarIdPos_List { - return VarIdPos_List(s.NewCompositeList(0, 2, sz)) + return VarIdPos_List(s.NewCompositeList(8, 4, sz)) } func (s VarIdPos_List) Len() int { return C.PointerList(s).Len() } func (s VarIdPos_List) At(i int) VarIdPos { return VarIdPos(C.PointerList(s).At(i).ToStruct()) } From bc96524b49048a138791e1cd61decd2f46cd25f6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 10 Jun 2016 20:46:31 +0100 Subject: [PATCH 14/78] Parse and verify client roots from configuration file, covert to capnp etc. Ref T41. --HG-- branch : T41 --- capnp/configuration.capnp | 14 +- capnp/configuration.capnp.go | 316 +++++++++++++++++++++++++++++++-- capnp/var.capnp | 6 +- capnp/var.capnp.go | 43 ++++- configuration/configuration.go | 206 ++++++++++++++++++--- configuration/topology.go | 2 +- 6 files changed, 547 insertions(+), 40 deletions(-) diff --git a/capnp/configuration.capnp b/capnp/configuration.capnp index b55cb2e..604ebac 100644 --- a/capnp/configuration.capnp +++ b/capnp/configuration.capnp @@ -5,6 +5,8 @@ $Go.import("goshawkdb.io/server/capnp"); @0xbbc717d787db5c5f; +using Common = import "../../common/capnp/capabilities.capnp"; + struct Configuration { clusterId @0: Text; version @1: UInt32; @@ -14,7 +16,7 @@ struct Configuration { noSync @5: Bool; rms @6: List(UInt32); rmsRemoved @7: List(UInt32); - fingerprints @8: List(Data); + fingerprints @8: List(Fingerprint); union { transitioningTo :group { configuration @9: Configuration; @@ -31,6 +33,16 @@ struct Configuration { } } +struct Fingerprint { + sha256 @0: Data; + roots @1: List(Root); +} + +struct Root { + name @0: Text; + capabilities @1: Common.Capabilities; +} + struct ConditionPair { rmId @0: UInt32; condition @1: Condition; diff --git a/capnp/configuration.capnp.go b/capnp/configuration.capnp.go index c11d723..1705092 100644 --- a/capnp/configuration.capnp.go +++ b/capnp/configuration.capnp.go @@ -7,6 +7,7 @@ import ( "bytes" "encoding/json" C "github.com/glycerine/go-capnproto" + "goshawkdb.io/common/capnp" "io" ) @@ -41,8 +42,10 @@ func (s Configuration) Rms() C.UInt32List { return C.UInt32List(C.S func (s Configuration) SetRms(v C.UInt32List) { C.Struct(s).SetObject(2, C.Object(v)) } func (s Configuration) RmsRemoved() C.UInt32List { return C.UInt32List(C.Struct(s).GetObject(3)) } func (s Configuration) SetRmsRemoved(v C.UInt32List) { C.Struct(s).SetObject(3, C.Object(v)) } -func (s Configuration) Fingerprints() C.DataList { return C.DataList(C.Struct(s).GetObject(4)) } -func (s Configuration) SetFingerprints(v C.DataList) { C.Struct(s).SetObject(4, C.Object(v)) } +func (s Configuration) Fingerprints() Fingerprint_List { + return Fingerprint_List(C.Struct(s).GetObject(4)) +} +func (s Configuration) SetFingerprints(v Fingerprint_List) { C.Struct(s).SetObject(4, C.Object(v)) } func (s Configuration) TransitioningTo() ConfigurationTransitioningTo { return ConfigurationTransitioningTo(s) } @@ -329,11 +332,7 @@ func (s Configuration) WriteJSON(w io.Writer) error { if err != nil { return err } - buf, err = json.Marshal(s) - if err != nil { - return err - } - _, err = b.Write(buf) + err = s.WriteJSON(b) if err != nil { return err } @@ -902,11 +901,7 @@ func (s Configuration) WriteCapLit(w io.Writer) error { if err != nil { return err } - buf, err = json.Marshal(s) - if err != nil { - return err - } - _, err = b.Write(buf) + err = s.WriteCapLit(b) if err != nil { return err } @@ -1262,6 +1257,303 @@ func (s Configuration_List) ToArray() []Configuration { } func (s Configuration_List) Set(i int, item Configuration) { C.PointerList(s).Set(i, C.Object(item)) } +type Fingerprint C.Struct + +func NewFingerprint(s *C.Segment) Fingerprint { return Fingerprint(s.NewStruct(0, 2)) } +func NewRootFingerprint(s *C.Segment) Fingerprint { return Fingerprint(s.NewRootStruct(0, 2)) } +func AutoNewFingerprint(s *C.Segment) Fingerprint { return Fingerprint(s.NewStructAR(0, 2)) } +func ReadRootFingerprint(s *C.Segment) Fingerprint { return Fingerprint(s.Root(0).ToStruct()) } +func (s Fingerprint) Sha256() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s Fingerprint) SetSha256(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } +func (s Fingerprint) Roots() Root_List { return Root_List(C.Struct(s).GetObject(1)) } +func (s Fingerprint) SetRoots(v Root_List) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s Fingerprint) WriteJSON(w io.Writer) error { + b := bufio.NewWriter(w) + var err error + var buf []byte + _ = buf + err = b.WriteByte('{') + if err != nil { + return err + } + _, err = b.WriteString("\"sha256\":") + if err != nil { + return err + } + { + s := s.Sha256() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(',') + if err != nil { + return err + } + _, err = b.WriteString("\"roots\":") + if err != nil { + return err + } + { + s := s.Roots() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + err = s.WriteJSON(b) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + err = b.Flush() + return err +} +func (s Fingerprint) MarshalJSON() ([]byte, error) { + b := bytes.Buffer{} + err := s.WriteJSON(&b) + return b.Bytes(), err +} +func (s Fingerprint) WriteCapLit(w io.Writer) error { + b := bufio.NewWriter(w) + var err error + var buf []byte + _ = buf + err = b.WriteByte('(') + if err != nil { + return err + } + _, err = b.WriteString("sha256 = ") + if err != nil { + return err + } + { + s := s.Sha256() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + _, err = b.WriteString(", ") + if err != nil { + return err + } + _, err = b.WriteString("roots = ") + if err != nil { + return err + } + { + s := s.Roots() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + err = s.WriteCapLit(b) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + err = b.Flush() + return err +} +func (s Fingerprint) MarshalCapLit() ([]byte, error) { + b := bytes.Buffer{} + err := s.WriteCapLit(&b) + return b.Bytes(), err +} + +type Fingerprint_List C.PointerList + +func NewFingerprintList(s *C.Segment, sz int) Fingerprint_List { + return Fingerprint_List(s.NewCompositeList(0, 2, sz)) +} +func (s Fingerprint_List) Len() int { return C.PointerList(s).Len() } +func (s Fingerprint_List) At(i int) Fingerprint { return Fingerprint(C.PointerList(s).At(i).ToStruct()) } +func (s Fingerprint_List) ToArray() []Fingerprint { + n := s.Len() + a := make([]Fingerprint, n) + for i := 0; i < n; i++ { + a[i] = s.At(i) + } + return a +} +func (s Fingerprint_List) Set(i int, item Fingerprint) { C.PointerList(s).Set(i, C.Object(item)) } + +type Root C.Struct + +func NewRoot(s *C.Segment) Root { return Root(s.NewStruct(0, 2)) } +func NewRootRoot(s *C.Segment) Root { return Root(s.NewRootStruct(0, 2)) } +func AutoNewRoot(s *C.Segment) Root { return Root(s.NewStructAR(0, 2)) } +func ReadRootRoot(s *C.Segment) Root { return Root(s.Root(0).ToStruct()) } +func (s Root) Name() string { return C.Struct(s).GetObject(0).ToText() } +func (s Root) NameBytes() []byte { return C.Struct(s).GetObject(0).ToDataTrimLastByte() } +func (s Root) SetName(v string) { C.Struct(s).SetObject(0, s.Segment.NewText(v)) } +func (s Root) Capabilities() capnp.Capabilities { + return capnp.Capabilities(C.Struct(s).GetObject(1).ToStruct()) +} +func (s Root) SetCapabilities(v capnp.Capabilities) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s Root) WriteJSON(w io.Writer) error { + b := bufio.NewWriter(w) + var err error + var buf []byte + _ = buf + err = b.WriteByte('{') + if err != nil { + return err + } + _, err = b.WriteString("\"name\":") + if err != nil { + return err + } + { + s := s.Name() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(',') + if err != nil { + return err + } + _, err = b.WriteString("\"capabilities\":") + if err != nil { + return err + } + { + s := s.Capabilities() + err = s.WriteJSON(b) + if err != nil { + return err + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + err = b.Flush() + return err +} +func (s Root) MarshalJSON() ([]byte, error) { + b := bytes.Buffer{} + err := s.WriteJSON(&b) + return b.Bytes(), err +} +func (s Root) WriteCapLit(w io.Writer) error { + b := bufio.NewWriter(w) + var err error + var buf []byte + _ = buf + err = b.WriteByte('(') + if err != nil { + return err + } + _, err = b.WriteString("name = ") + if err != nil { + return err + } + { + s := s.Name() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + _, err = b.WriteString(", ") + if err != nil { + return err + } + _, err = b.WriteString("capabilities = ") + if err != nil { + return err + } + { + s := s.Capabilities() + err = s.WriteCapLit(b) + if err != nil { + return err + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + err = b.Flush() + return err +} +func (s Root) MarshalCapLit() ([]byte, error) { + b := bytes.Buffer{} + err := s.WriteCapLit(&b) + return b.Bytes(), err +} + +type Root_List C.PointerList + +func NewRootList(s *C.Segment, sz int) Root_List { return Root_List(s.NewCompositeList(0, 2, sz)) } +func (s Root_List) Len() int { return C.PointerList(s).Len() } +func (s Root_List) At(i int) Root { return Root(C.PointerList(s).At(i).ToStruct()) } +func (s Root_List) ToArray() []Root { + n := s.Len() + a := make([]Root, n) + for i := 0; i < n; i++ { + a[i] = s.At(i) + } + return a +} +func (s Root_List) Set(i int, item Root) { C.PointerList(s).Set(i, C.Object(item)) } + type ConditionPair C.Struct func NewConditionPair(s *C.Segment) ConditionPair { return ConditionPair(s.NewStruct(8, 2)) } diff --git a/capnp/var.capnp b/capnp/var.capnp index 3a5e8e6..f546a9f 100644 --- a/capnp/var.capnp +++ b/capnp/var.capnp @@ -6,6 +6,7 @@ $Go.import("goshawkdb.io/server/capnp"); @0xc3ce226b914ee1eb; using VC = import "vectorclock.capnp"; +using Common = import "../../common/capnp/capabilities.capnp"; struct Var { id @0: Data; @@ -16,6 +17,7 @@ struct Var { } struct VarIdPos { - id @0: Data; - positions @1: List(UInt8); + id @0: Data; + positions @1: List(UInt8); + capabilities @2: Common.Capabilities; } diff --git a/capnp/var.capnp.go b/capnp/var.capnp.go index 858930c..d114b14 100644 --- a/capnp/var.capnp.go +++ b/capnp/var.capnp.go @@ -7,6 +7,7 @@ import ( "bytes" "encoding/json" C "github.com/glycerine/go-capnproto" + "goshawkdb.io/common/capnp" "io" ) @@ -288,14 +289,18 @@ func (s Var_List) Set(i int, item Var) { C.PointerList(s).Set(i, C.Object(item)) type VarIdPos C.Struct -func NewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStruct(0, 2)) } -func NewRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewRootStruct(0, 2)) } -func AutoNewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStructAR(0, 2)) } +func NewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStruct(0, 3)) } +func NewRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewRootStruct(0, 3)) } +func AutoNewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStructAR(0, 3)) } func ReadRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.Root(0).ToStruct()) } func (s VarIdPos) Id() []byte { return C.Struct(s).GetObject(0).ToData() } func (s VarIdPos) SetId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s VarIdPos) Positions() C.UInt8List { return C.UInt8List(C.Struct(s).GetObject(1)) } func (s VarIdPos) SetPositions(v C.UInt8List) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s VarIdPos) Capabilities() capnp.Capabilities { + return capnp.Capabilities(C.Struct(s).GetObject(2).ToStruct()) +} +func (s VarIdPos) SetCapabilities(v capnp.Capabilities) { C.Struct(s).SetObject(2, C.Object(v)) } func (s VarIdPos) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -357,6 +362,21 @@ func (s VarIdPos) WriteJSON(w io.Writer) error { return err } } + err = b.WriteByte(',') + if err != nil { + return err + } + _, err = b.WriteString("\"capabilities\":") + if err != nil { + return err + } + { + s := s.Capabilities() + err = s.WriteJSON(b) + if err != nil { + return err + } + } err = b.WriteByte('}') if err != nil { return err @@ -430,6 +450,21 @@ func (s VarIdPos) WriteCapLit(w io.Writer) error { return err } } + _, err = b.WriteString(", ") + if err != nil { + return err + } + _, err = b.WriteString("capabilities = ") + if err != nil { + return err + } + { + s := s.Capabilities() + err = s.WriteCapLit(b) + if err != nil { + return err + } + } err = b.WriteByte(')') if err != nil { return err @@ -446,7 +481,7 @@ func (s VarIdPos) MarshalCapLit() ([]byte, error) { type VarIdPos_List C.PointerList func NewVarIdPosList(s *C.Segment, sz int) VarIdPos_List { - return VarIdPos_List(s.NewCompositeList(0, 2, sz)) + return VarIdPos_List(s.NewCompositeList(0, 3, sz)) } func (s VarIdPos_List) Len() int { return C.PointerList(s).Len() } func (s VarIdPos_List) At(i int) VarIdPos { return VarIdPos(C.PointerList(s).At(i).ToStruct()) } diff --git a/configuration/configuration.go b/configuration/configuration.go index bd1077d..c062ff6 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -8,11 +8,13 @@ import ( "fmt" capn "github.com/glycerine/go-capnproto" "goshawkdb.io/common" + commsgs "goshawkdb.io/common/capnp" "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" ch "goshawkdb.io/server/consistenthash" "net" "os" + "sort" "strconv" ) @@ -23,13 +25,23 @@ type Configuration struct { F uint8 MaxRMCount uint16 NoSync bool - ClientCertificateFingerprints []string + ClientCertificateFingerprints map[string]map[string]*RootCapabilities + roots []string rms common.RMIds rmsRemoved map[common.RMId]server.EmptyStruct - fingerprints map[[sha256.Size]byte]server.EmptyStruct + fingerprints map[[sha256.Size]byte]map[string]*common.Capabilities nextConfiguration *NextConfiguration } +type RootCapabilities struct { + ValueRead bool + ValueWrite bool + ReferencesReadAll bool + ReferencesReadOnly []uint32 + ReferencesWriteAll bool + ReferencesWriteOnly []uint32 +} + type NextConfiguration struct { *Configuration AllHosts []string @@ -176,20 +188,86 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { if len(config.ClientCertificateFingerprints) == 0 { return nil, errors.New("No ClientCertificateFingerprints defined") } else { - fingerprints := make(map[[sha256.Size]byte]server.EmptyStruct, len(config.ClientCertificateFingerprints)) - for _, fingerprint := range config.ClientCertificateFingerprints { + rootsMap := make(map[string]uint32) + rootsName := []string{} + fingerprints := make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.ClientCertificateFingerprints)) + for fingerprint, rootsCapabilities := range config.ClientCertificateFingerprints { fingerprintBytes, err := hex.DecodeString(fingerprint) if err != nil { return nil, err } else if l := len(fingerprintBytes); l != sha256.Size { return nil, fmt.Errorf("Invalid fingerprint: expected %v bytes, and found %v", sha256.Size, l) } + if len(rootsCapabilities) == 0 { + return nil, fmt.Errorf("No roots configured for client fingerprint %v; at least 1 needed", fingerprint) + } + roots := make(map[string]*common.Capabilities, len(rootsCapabilities)) + for name, rootCapabilities := range rootsCapabilities { + if _, found := rootsMap[name]; !found { + rootsMap[name] = 0 + rootsName = append(rootsName, name) + } + SortUInt32(rootCapabilities.ReferencesReadOnly).Sort() + SortUInt32(rootCapabilities.ReferencesWriteOnly).Sort() + if rootCapabilities.ReferencesReadAll && len(rootCapabilities.ReferencesReadOnly) != 0 { + return nil, fmt.Errorf("ReferencesReadAll and ReferencesReadOnly must be mutually exclusive for client fingerprint %v, root %s", fingerprint, name) + } + if rootCapabilities.ReferencesWriteAll && len(rootCapabilities.ReferencesWriteOnly) != 0 { + return nil, fmt.Errorf("ReferencesWriteAll and ReferencesWriteOnly must be mutually exclusive for client fingerprint %v, root %s", fingerprint, name) + } + old := uint32(0) + for idx, index := range rootCapabilities.ReferencesReadOnly { + if index == old && idx > 0 { + return nil, fmt.Errorf("Client fingerprint %v, root %s: Duplicate read only reference index %v", + fingerprint, name, index) + } + old = index + } + old = uint32(0) + for idx, index := range rootCapabilities.ReferencesWriteOnly { + if index == old && idx > 0 { + return nil, fmt.Errorf("Client fingerprint %v, root %s: Duplicate write only reference index %v", + fingerprint, name, index) + } + old = index + } + if !rootCapabilities.ValueRead && !rootCapabilities.ValueWrite && + !rootCapabilities.ReferencesReadAll && !rootCapabilities.ReferencesWriteAll && + len(rootCapabilities.ReferencesReadOnly) == 0 && len(rootCapabilities.ReferencesWriteOnly) == 0 { + return nil, fmt.Errorf("Client fingerprint %v, root %s: no capabilities have been granted.", + fingerprint, name) + } + cap := &common.Capabilities{} + switch { + case rootCapabilities.ValueRead && rootCapabilities.ValueWrite: + cap.Value = common.ReadWrite + case rootCapabilities.ValueRead: + cap.Value = common.Read + case rootCapabilities.ValueWrite: + cap.Value = common.Write + default: + cap.Value = common.None + } + if rootCapabilities.ReferencesReadAll { + cap.References.Read.All = true + } else { + cap.References.Read.Only = rootCapabilities.ReferencesReadOnly + } + if rootCapabilities.ReferencesWriteAll { + cap.References.Write.All = true + } else { + cap.References.Write.Only = rootCapabilities.ReferencesWriteOnly + } + roots[name] = cap + } ary := [sha256.Size]byte{} copy(ary[:], fingerprintBytes) - fingerprints[ary] = server.EmptyStructVal + fingerprints[ary] = roots } config.fingerprints = fingerprints config.ClientCertificateFingerprints = nil + sort.Strings(rootsName) + config.roots = rootsName } return &config, err } @@ -217,11 +295,41 @@ func ConfigurationFromCap(config *msgs.Configuration) *Configuration { } fingerprints := config.Fingerprints() - fingerprintsMap := make(map[[sha256.Size]byte]server.EmptyStruct, fingerprints.Len()) + fingerprintsMap := make(map[[sha256.Size]byte]map[string]*common.Capabilities, fingerprints.Len()) for idx, l := 0, fingerprints.Len(); idx < l; idx++ { + fingerprint := fingerprints.At(idx) ary := [sha256.Size]byte{} - copy(ary[:], fingerprints.At(idx)) - fingerprintsMap[ary] = server.EmptyStructVal + copy(ary[:], fingerprint.Sha256()) + rootsCap := fingerprint.Roots() + roots := make(map[string]*common.Capabilities, rootsCap.Len()) + for idy, m := 0, rootsCap.Len(); idy < m; idy++ { + rootCap := rootsCap.At(idy) + capsCap := rootCap.Capabilities() + cap := &common.Capabilities{} + switch capsCap.Value() { + case commsgs.VALUECAPABILITY_READ: + cap.Value = common.Read + case commsgs.VALUECAPABILITY_WRITE: + cap.Value = common.Write + case commsgs.VALUECAPABILITY_READWRITE: + cap.Value = common.ReadWrite + default: + cap.Value = common.None + } + refsCap := capsCap.References() + if refsReadCap := refsCap.Read(); refsReadCap.Which() == commsgs.CAPABILITIESREFERENCESREAD_ALL { + cap.References.Read.All = true + } else { + cap.References.Read.Only = refsReadCap.Only().ToArray() + } + if refsWriteCap := refsCap.Write(); refsWriteCap.Which() == commsgs.CAPABILITIESREFERENCESWRITE_ALL { + cap.References.Write.All = true + } else { + cap.References.Write.Only = refsWriteCap.Only().ToArray() + } + roots[rootCap.Name()] = cap + } + fingerprintsMap[ary] = roots } c.fingerprints = fingerprintsMap @@ -299,20 +407,26 @@ func (a *Configuration) Equal(b *Configuration) bool { return false } } - for fingerprint := range b.fingerprints { - if _, found := a.fingerprints[fingerprint]; !found { + for fingerprint, aRoots := range a.fingerprints { + if bRoots, found := b.fingerprints[fingerprint]; !found || len(aRoots) != len(bRoots) { return false + } else { + for name, aRootCaps := range aRoots { + if bRootCaps, found := bRoots[name]; !found || !aRootCaps.Equal(bRootCaps) { + return false + } + } } } return a.nextConfiguration.Equal(b.nextConfiguration) } func (config *Configuration) String() string { - return fmt.Sprintf("Configuration{ClusterId: %v, Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v}", - config.ClusterId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved) + return fmt.Sprintf("Configuration{ClusterId: %v, Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, ClientCertificates: %v}", + config.ClusterId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.fingerprints) } -func (config *Configuration) Fingerprints() map[[sha256.Size]byte]server.EmptyStruct { +func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*common.Capabilities { return config.fingerprints } @@ -370,15 +484,17 @@ func (config *Configuration) Clone() *Configuration { F: config.F, MaxRMCount: config.MaxRMCount, NoSync: config.NoSync, - ClientCertificateFingerprints: make([]string, len(config.ClientCertificateFingerprints)), + ClientCertificateFingerprints: make(map[string]map[string]*RootCapabilities, len(config.ClientCertificateFingerprints)), rms: make([]common.RMId, len(config.rms)), rmsRemoved: make(map[common.RMId]server.EmptyStruct, len(config.rmsRemoved)), - fingerprints: make(map[[sha256.Size]byte]server.EmptyStruct, len(config.fingerprints)), + fingerprints: make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.fingerprints)), nextConfiguration: config.nextConfiguration.Clone(), } copy(clone.Hosts, config.Hosts) - copy(clone.ClientCertificateFingerprints, config.ClientCertificateFingerprints) + for k, v := range config.ClientCertificateFingerprints { + clone.ClientCertificateFingerprints[k] = v + } copy(clone.rms, config.rms) for k, v := range config.rmsRemoved { clone.rmsRemoved[k] = v @@ -419,13 +535,56 @@ func (config *Configuration) AddToSegAutoRoot(seg *capn.Segment) msgs.Configurat } fingerprintsMap := config.fingerprints - fingerprints := seg.NewDataList(len(fingerprintsMap)) - cap.SetFingerprints(fingerprints) + fingerprintsCap := msgs.NewFingerprintList(seg, len(fingerprintsMap)) idx = 0 - for fingerprint := range fingerprintsMap { - fingerprints.Set(idx, fingerprint[:]) + for fingerprint, roots := range fingerprintsMap { + fingerprintCap := msgs.NewFingerprint(seg) + fingerprintCap.SetSha256(fingerprint[:]) + rootsCap := msgs.NewRootList(seg, len(roots)) + idy := 0 + for name, capabilities := range roots { + rootCap := msgs.NewRoot(seg) + rootCap.SetName(name) + capsCap := commsgs.NewCapabilities(seg) + switch capabilities.Value { + case common.Read: + capsCap.SetValue(commsgs.VALUECAPABILITY_READ) + case common.Write: + capsCap.SetValue(commsgs.VALUECAPABILITY_WRITE) + case common.ReadWrite: + capsCap.SetValue(commsgs.VALUECAPABILITY_READWRITE) + default: + capsCap.SetValue(commsgs.VALUECAPABILITY_NONE) + } + readRefsCap := capsCap.References().Read() + if capabilities.References.Read.All { + readRefsCap.SetAll() + } else { + onlyList := seg.NewUInt32List(len(capabilities.References.Read.Only)) + for idz, index := range capabilities.References.Read.Only { + onlyList.Set(idz, index) + } + readRefsCap.SetOnly(onlyList) + } + writeRefsCap := capsCap.References().Write() + if capabilities.References.Write.All { + writeRefsCap.SetAll() + } else { + onlyList := seg.NewUInt32List(len(capabilities.References.Write.Only)) + for idz, index := range capabilities.References.Write.Only { + onlyList.Set(idz, index) + } + writeRefsCap.SetOnly(onlyList) + } + rootCap.SetCapabilities(capsCap) + rootsCap.Set(idy, rootCap) + idy++ + } + fingerprintCap.SetRoots(rootsCap) + fingerprintsCap.Set(idx, fingerprintCap) idx++ } + cap.SetFingerprints(fingerprintsCap) if config.nextConfiguration == nil { cap.SetStable() @@ -820,3 +979,10 @@ func (g *Generator) AddToSeg(seg *capn.Segment) msgs.Condition { condCap.SetGenerator(genCap) return condCap } + +type SortUInt32 []uint32 + +func (nums SortUInt32) Sort() { sort.Sort(nums) } +func (nums SortUInt32) Len() int { return len(nums) } +func (nums SortUInt32) Less(i, j int) bool { return nums[i] < nums[j] } +func (nums SortUInt32) Swap(i, j int) { nums[i], nums[j] = nums[j], nums[i] } diff --git a/configuration/topology.go b/configuration/topology.go index 098bbfa..c8e2bd6 100644 --- a/configuration/topology.go +++ b/configuration/topology.go @@ -34,7 +34,7 @@ func BlankTopology(clusterId string) *Topology { F: 0, MaxRMCount: 0, NoSync: false, - ClientCertificateFingerprints: []string{}, + ClientCertificateFingerprints: nil, rms: []common.RMId{}, fingerprints: nil, nextConfiguration: nil, From 457c315e9152dd3ed447ea936f118e581de08c0c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 11 Jun 2016 14:37:20 +0100 Subject: [PATCH 15/78] With the possibility that roots can now change, we can't use root id as a unique cluster id, so instead introduce an explicit cluster uuid. Most multiple-root bits now taken care of, with the exception of dealing with changes to roots during topology changes. Ref T41. --HG-- branch : T41 --- capnp/configuration.capnp | 37 ++++----- capnp/configuration.capnp.go | 74 ++++++++++++++---- capnp/connection.capnp | 12 +-- capnp/connection.capnp.go | 22 +++--- client/simpletxnsubmitter.go | 6 +- configuration/configuration.go | 88 ++++++++++----------- configuration/topology.go | 48 ++++++++---- network/connection.go | 97 +++++++++++++++-------- network/connectionmanager.go | 16 ++-- network/topologytransmogrifier.go | 124 +++++++++++++++--------------- paxos/network.go | 2 +- 11 files changed, 303 insertions(+), 223 deletions(-) diff --git a/capnp/configuration.capnp b/capnp/configuration.capnp index 604ebac..f8fd09e 100644 --- a/capnp/configuration.capnp +++ b/capnp/configuration.capnp @@ -9,27 +9,28 @@ using Common = import "../../common/capnp/capabilities.capnp"; struct Configuration { clusterId @0: Text; - version @1: UInt32; - hosts @2: List(Text); - f @3: UInt8; - maxRMCount @4: UInt16; - noSync @5: Bool; - rms @6: List(UInt32); - rmsRemoved @7: List(UInt32); - fingerprints @8: List(Fingerprint); + clusterUUId @1: UInt64; + version @2: UInt32; + hosts @3: List(Text); + f @4: UInt8; + maxRMCount @5: UInt16; + noSync @6: Bool; + rms @7: List(UInt32); + rmsRemoved @8: List(UInt32); + fingerprints @9: List(Fingerprint); union { transitioningTo :group { - configuration @9: Configuration; - allHosts @10: List(Text); - newRMIds @11: List(UInt32); - survivingRMIds @12: List(UInt32); - lostRMIds @13: List(UInt32); - installedOnNew @14: Bool; - barrierReached1 @15: List(UInt32); - barrierReached2 @16: List(UInt32); - pending @17: List(ConditionPair); + configuration @10: Configuration; + allHosts @11: List(Text); + newRMIds @12: List(UInt32); + survivingRMIds @13: List(UInt32); + lostRMIds @14: List(UInt32); + installedOnNew @15: Bool; + barrierReached1 @16: List(UInt32); + barrierReached2 @17: List(UInt32); + pending @18: List(ConditionPair); } - stable @18: Void; + stable @19: Void; } } diff --git a/capnp/configuration.capnp.go b/capnp/configuration.capnp.go index 1705092..6decd61 100644 --- a/capnp/configuration.capnp.go +++ b/capnp/configuration.capnp.go @@ -20,24 +20,26 @@ const ( CONFIGURATION_STABLE Configuration_Which = 1 ) -func NewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStruct(16, 13)) } -func NewRootConfiguration(s *C.Segment) Configuration { return Configuration(s.NewRootStruct(16, 13)) } -func AutoNewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStructAR(16, 13)) } +func NewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStruct(24, 13)) } +func NewRootConfiguration(s *C.Segment) Configuration { return Configuration(s.NewRootStruct(24, 13)) } +func AutoNewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStructAR(24, 13)) } func ReadRootConfiguration(s *C.Segment) Configuration { return Configuration(s.Root(0).ToStruct()) } -func (s Configuration) Which() Configuration_Which { return Configuration_Which(C.Struct(s).Get16(8)) } +func (s Configuration) Which() Configuration_Which { return Configuration_Which(C.Struct(s).Get16(16)) } func (s Configuration) ClusterId() string { return C.Struct(s).GetObject(0).ToText() } func (s Configuration) ClusterIdBytes() []byte { return C.Struct(s).GetObject(0).ToDataTrimLastByte() } func (s Configuration) SetClusterId(v string) { C.Struct(s).SetObject(0, s.Segment.NewText(v)) } -func (s Configuration) Version() uint32 { return C.Struct(s).Get32(0) } -func (s Configuration) SetVersion(v uint32) { C.Struct(s).Set32(0, v) } +func (s Configuration) ClusterUUId() uint64 { return C.Struct(s).Get64(0) } +func (s Configuration) SetClusterUUId(v uint64) { C.Struct(s).Set64(0, v) } +func (s Configuration) Version() uint32 { return C.Struct(s).Get32(8) } +func (s Configuration) SetVersion(v uint32) { C.Struct(s).Set32(8, v) } func (s Configuration) Hosts() C.TextList { return C.TextList(C.Struct(s).GetObject(1)) } func (s Configuration) SetHosts(v C.TextList) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s Configuration) F() uint8 { return C.Struct(s).Get8(4) } -func (s Configuration) SetF(v uint8) { C.Struct(s).Set8(4, v) } -func (s Configuration) MaxRMCount() uint16 { return C.Struct(s).Get16(6) } -func (s Configuration) SetMaxRMCount(v uint16) { C.Struct(s).Set16(6, v) } -func (s Configuration) NoSync() bool { return C.Struct(s).Get1(40) } -func (s Configuration) SetNoSync(v bool) { C.Struct(s).Set1(40, v) } +func (s Configuration) F() uint8 { return C.Struct(s).Get8(12) } +func (s Configuration) SetF(v uint8) { C.Struct(s).Set8(12, v) } +func (s Configuration) MaxRMCount() uint16 { return C.Struct(s).Get16(14) } +func (s Configuration) SetMaxRMCount(v uint16) { C.Struct(s).Set16(14, v) } +func (s Configuration) NoSync() bool { return C.Struct(s).Get1(104) } +func (s Configuration) SetNoSync(v bool) { C.Struct(s).Set1(104, v) } func (s Configuration) Rms() C.UInt32List { return C.UInt32List(C.Struct(s).GetObject(2)) } func (s Configuration) SetRms(v C.UInt32List) { C.Struct(s).SetObject(2, C.Object(v)) } func (s Configuration) RmsRemoved() C.UInt32List { return C.UInt32List(C.Struct(s).GetObject(3)) } @@ -49,7 +51,7 @@ func (s Configuration) SetFingerprints(v Fingerprint_List) { C.Struct(s).SetObje func (s Configuration) TransitioningTo() ConfigurationTransitioningTo { return ConfigurationTransitioningTo(s) } -func (s Configuration) SetTransitioningTo() { C.Struct(s).Set16(8, 0) } +func (s Configuration) SetTransitioningTo() { C.Struct(s).Set16(16, 0) } func (s ConfigurationTransitioningTo) Configuration() Configuration { return Configuration(C.Struct(s).GetObject(5).ToStruct()) } @@ -78,8 +80,8 @@ func (s ConfigurationTransitioningTo) LostRMIds() C.UInt32List { func (s ConfigurationTransitioningTo) SetLostRMIds(v C.UInt32List) { C.Struct(s).SetObject(9, C.Object(v)) } -func (s ConfigurationTransitioningTo) InstalledOnNew() bool { return C.Struct(s).Get1(41) } -func (s ConfigurationTransitioningTo) SetInstalledOnNew(v bool) { C.Struct(s).Set1(41, v) } +func (s ConfigurationTransitioningTo) InstalledOnNew() bool { return C.Struct(s).Get1(105) } +func (s ConfigurationTransitioningTo) SetInstalledOnNew(v bool) { C.Struct(s).Set1(105, v) } func (s ConfigurationTransitioningTo) BarrierReached1() C.UInt32List { return C.UInt32List(C.Struct(s).GetObject(10)) } @@ -98,7 +100,7 @@ func (s ConfigurationTransitioningTo) Pending() ConditionPair_List { func (s ConfigurationTransitioningTo) SetPending(v ConditionPair_List) { C.Struct(s).SetObject(12, C.Object(v)) } -func (s Configuration) SetStable() { C.Struct(s).Set16(8, 1) } +func (s Configuration) SetStable() { C.Struct(s).Set16(16, 1) } func (s Configuration) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -127,6 +129,25 @@ func (s Configuration) WriteJSON(w io.Writer) error { if err != nil { return err } + _, err = b.WriteString("\"clusterUUId\":") + if err != nil { + return err + } + { + s := s.ClusterUUId() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(',') + if err != nil { + return err + } _, err = b.WriteString("\"version\":") if err != nil { return err @@ -696,6 +717,25 @@ func (s Configuration) WriteCapLit(w io.Writer) error { if err != nil { return err } + _, err = b.WriteString("clusterUUId = ") + if err != nil { + return err + } + { + s := s.ClusterUUId() + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + _, err = b.WriteString(", ") + if err != nil { + return err + } _, err = b.WriteString("version = ") if err != nil { return err @@ -1241,7 +1281,7 @@ func (s Configuration) MarshalCapLit() ([]byte, error) { type Configuration_List C.PointerList func NewConfigurationList(s *C.Segment, sz int) Configuration_List { - return Configuration_List(s.NewCompositeList(16, 13, sz)) + return Configuration_List(s.NewCompositeList(24, 13, sz)) } func (s Configuration_List) Len() int { return C.PointerList(s).Len() } func (s Configuration_List) At(i int) Configuration { diff --git a/capnp/connection.capnp b/capnp/connection.capnp index feb5d6b..0bf66ed 100644 --- a/capnp/connection.capnp +++ b/capnp/connection.capnp @@ -13,12 +13,12 @@ using Config = import "configuration.capnp"; using Migration = import "migration.capnp"; struct HelloServerFromServer { - localHost @0: Text; - rmId @1: UInt32; - bootCount @2: UInt32; - tieBreak @3: UInt32; - clusterId @4: Text; - rootId @5: Data; + localHost @0: Text; + rmId @1: UInt32; + bootCount @2: UInt32; + tieBreak @3: UInt32; + clusterId @4: Text; + clusterUUId @5: UInt64; } struct Message { diff --git a/capnp/connection.capnp.go b/capnp/connection.capnp.go index a85f1fa..e1d2c2e 100644 --- a/capnp/connection.capnp.go +++ b/capnp/connection.capnp.go @@ -13,13 +13,13 @@ import ( type HelloServerFromServer C.Struct func NewHelloServerFromServer(s *C.Segment) HelloServerFromServer { - return HelloServerFromServer(s.NewStruct(16, 3)) + return HelloServerFromServer(s.NewStruct(24, 2)) } func NewRootHelloServerFromServer(s *C.Segment) HelloServerFromServer { - return HelloServerFromServer(s.NewRootStruct(16, 3)) + return HelloServerFromServer(s.NewRootStruct(24, 2)) } func AutoNewHelloServerFromServer(s *C.Segment) HelloServerFromServer { - return HelloServerFromServer(s.NewStructAR(16, 3)) + return HelloServerFromServer(s.NewStructAR(24, 2)) } func ReadRootHelloServerFromServer(s *C.Segment) HelloServerFromServer { return HelloServerFromServer(s.Root(0).ToStruct()) @@ -39,9 +39,9 @@ func (s HelloServerFromServer) ClusterId() string { return C.Struct(s).GetOb func (s HelloServerFromServer) ClusterIdBytes() []byte { return C.Struct(s).GetObject(1).ToDataTrimLastByte() } -func (s HelloServerFromServer) SetClusterId(v string) { C.Struct(s).SetObject(1, s.Segment.NewText(v)) } -func (s HelloServerFromServer) RootId() []byte { return C.Struct(s).GetObject(2).ToData() } -func (s HelloServerFromServer) SetRootId(v []byte) { C.Struct(s).SetObject(2, s.Segment.NewData(v)) } +func (s HelloServerFromServer) SetClusterId(v string) { C.Struct(s).SetObject(1, s.Segment.NewText(v)) } +func (s HelloServerFromServer) ClusterUUId() uint64 { return C.Struct(s).Get64(16) } +func (s HelloServerFromServer) SetClusterUUId(v uint64) { C.Struct(s).Set64(16, v) } func (s HelloServerFromServer) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -146,12 +146,12 @@ func (s HelloServerFromServer) WriteJSON(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("\"rootId\":") + _, err = b.WriteString("\"clusterUUId\":") if err != nil { return err } { - s := s.RootId() + s := s.ClusterUUId() buf, err = json.Marshal(s) if err != nil { return err @@ -277,12 +277,12 @@ func (s HelloServerFromServer) WriteCapLit(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("rootId = ") + _, err = b.WriteString("clusterUUId = ") if err != nil { return err } { - s := s.RootId() + s := s.ClusterUUId() buf, err = json.Marshal(s) if err != nil { return err @@ -308,7 +308,7 @@ func (s HelloServerFromServer) MarshalCapLit() ([]byte, error) { type HelloServerFromServer_List C.PointerList func NewHelloServerFromServerList(s *C.Segment, sz int) HelloServerFromServer_List { - return HelloServerFromServer_List(s.NewCompositeList(16, 3, sz)) + return HelloServerFromServer_List(s.NewCompositeList(24, 2, sz)) } func (s HelloServerFromServer_List) Len() int { return C.PointerList(s).Len() } func (s HelloServerFromServer_List) At(i int) HelloServerFromServer { diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 11816ab..785cc94 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -174,8 +174,10 @@ func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) sts.topology = topology sts.resolver = ch.NewResolver(topology.RMs(), topology.TwoFInc) sts.hashCache.SetResolver(sts.resolver) - if topology.Root.VarUUId != nil { - sts.hashCache.AddPosition(topology.Root.VarUUId, topology.Root.Positions) + if topology.Roots != nil { + for _, root := range topology.Roots { + sts.hashCache.AddPosition(root.VarUUId, root.Positions) + } } sts.calculateDisabledHashcodes() } diff --git a/configuration/configuration.go b/configuration/configuration.go index c062ff6..90b62fa 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -12,10 +12,12 @@ import ( "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" ch "goshawkdb.io/server/consistenthash" + "math/rand" "net" "os" "sort" "strconv" + "time" ) type Configuration struct { @@ -26,6 +28,7 @@ type Configuration struct { MaxRMCount uint16 NoSync bool ClientCertificateFingerprints map[string]map[string]*RootCapabilities + clusterUUId uint64 roots []string rms common.RMIds rmsRemoved map[common.RMId]server.EmptyStruct @@ -274,12 +277,13 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { func ConfigurationFromCap(config *msgs.Configuration) *Configuration { c := &Configuration{ - ClusterId: config.ClusterId(), - Version: config.Version(), - Hosts: config.Hosts().ToArray(), - F: config.F(), - MaxRMCount: config.MaxRMCount(), - NoSync: config.NoSync(), + ClusterId: config.ClusterId(), + clusterUUId: config.ClusterUUId(), + Version: config.Version(), + Hosts: config.Hosts().ToArray(), + F: config.F(), + MaxRMCount: config.MaxRMCount(), + NoSync: config.NoSync(), } rms := config.Rms() @@ -389,7 +393,7 @@ func (a *Configuration) Equal(b *Configuration) bool { if a == nil || b == nil { return a == b } - if !(a.ClusterId == b.ClusterId && a.Version == b.Version && a.F == b.F && a.MaxRMCount == b.MaxRMCount && a.NoSync == b.NoSync && len(a.Hosts) == len(b.Hosts) && len(a.fingerprints) == len(b.fingerprints) && len(a.rms) == len(b.rms) && len(a.rmsRemoved) == len(b.rmsRemoved)) { + if !(a.ClusterId == b.ClusterId && a.clusterUUId == b.clusterUUId && a.Version == b.Version && a.F == b.F && a.MaxRMCount == b.MaxRMCount && a.NoSync == b.NoSync && len(a.Hosts) == len(b.Hosts) && len(a.fingerprints) == len(b.fingerprints) && len(a.rms) == len(b.rms) && len(a.rmsRemoved) == len(b.rmsRemoved)) { return false } for idx, aHost := range a.Hosts { @@ -422,14 +426,33 @@ func (a *Configuration) Equal(b *Configuration) bool { } func (config *Configuration) String() string { - return fmt.Sprintf("Configuration{ClusterId: %v, Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, ClientCertificates: %v}", - config.ClusterId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.fingerprints) + return fmt.Sprintf("Configuration{ClusterId: %v(%v), Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, ClientCertificates: %v}", + config.ClusterId, config.clusterUUId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.fingerprints) +} + +func (config *Configuration) ClusterUUId() uint64 { + return config.clusterUUId +} + +func (config *Configuration) SetClusterUUId() { + if config.clusterUUId == 0 { + rng := rand.New(rand.NewSource(time.Now().UnixNano())) + r := uint64(rng.Int63()) + for r == 0 { + r = uint64(rng.Int63()) + } + config.clusterUUId = r + } } func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*common.Capabilities { return config.fingerprints } +func (config *Configuration) RootNames() []string { + return config.roots +} + func (config *Configuration) NextBarrierReached1(rmId common.RMId) bool { if config.nextConfiguration != nil { for _, r := range config.nextConfiguration.BarrierReached1 { @@ -478,12 +501,13 @@ func (config *Configuration) SetRMsRemoved(removed map[common.RMId]server.EmptyS func (config *Configuration) Clone() *Configuration { clone := &Configuration{ - ClusterId: config.ClusterId, - Version: config.Version, - Hosts: make([]string, len(config.Hosts)), - F: config.F, - MaxRMCount: config.MaxRMCount, - NoSync: config.NoSync, + ClusterId: config.ClusterId, + clusterUUId: config.clusterUUId, + Version: config.Version, + Hosts: make([]string, len(config.Hosts)), + F: config.F, + MaxRMCount: config.MaxRMCount, + NoSync: config.NoSync, ClientCertificateFingerprints: make(map[string]map[string]*RootCapabilities, len(config.ClientCertificateFingerprints)), rms: make([]common.RMId, len(config.rms)), rmsRemoved: make(map[common.RMId]server.EmptyStruct, len(config.rmsRemoved)), @@ -508,6 +532,7 @@ func (config *Configuration) Clone() *Configuration { func (config *Configuration) AddToSegAutoRoot(seg *capn.Segment) msgs.Configuration { cap := msgs.AutoNewConfiguration(seg) cap.SetClusterId(config.ClusterId) + cap.SetClusterUUId(config.clusterUUId) cap.SetVersion(config.Version) hosts := seg.NewTextList(len(config.Hosts)) @@ -545,38 +570,7 @@ func (config *Configuration) AddToSegAutoRoot(seg *capn.Segment) msgs.Configurat for name, capabilities := range roots { rootCap := msgs.NewRoot(seg) rootCap.SetName(name) - capsCap := commsgs.NewCapabilities(seg) - switch capabilities.Value { - case common.Read: - capsCap.SetValue(commsgs.VALUECAPABILITY_READ) - case common.Write: - capsCap.SetValue(commsgs.VALUECAPABILITY_WRITE) - case common.ReadWrite: - capsCap.SetValue(commsgs.VALUECAPABILITY_READWRITE) - default: - capsCap.SetValue(commsgs.VALUECAPABILITY_NONE) - } - readRefsCap := capsCap.References().Read() - if capabilities.References.Read.All { - readRefsCap.SetAll() - } else { - onlyList := seg.NewUInt32List(len(capabilities.References.Read.Only)) - for idz, index := range capabilities.References.Read.Only { - onlyList.Set(idz, index) - } - readRefsCap.SetOnly(onlyList) - } - writeRefsCap := capsCap.References().Write() - if capabilities.References.Write.All { - writeRefsCap.SetAll() - } else { - onlyList := seg.NewUInt32List(len(capabilities.References.Write.Only)) - for idz, index := range capabilities.References.Write.Only { - onlyList.Set(idz, index) - } - writeRefsCap.SetOnly(onlyList) - } - rootCap.SetCapabilities(capsCap) + rootCap.SetCapabilities(capabilities.AddToSeg(seg)) rootsCap.Set(idy, rootCap) idy++ } diff --git a/configuration/topology.go b/configuration/topology.go index c8e2bd6..a98f215 100644 --- a/configuration/topology.go +++ b/configuration/topology.go @@ -17,7 +17,20 @@ type Topology struct { FInc uint8 TwoFInc uint16 DBVersion *common.TxnId - Root + Roots Roots +} + +type Roots []Root + +func (r Roots) String() string { + if r == nil || len(r) == 0 { + return "No Roots" + } + roots := "" + for _, root := range r { + roots += fmt.Sprintf("%v@%v|", root.VarUUId, (*capn.UInt8List)(root.Positions).ToArray()) + } + return roots[:len(roots)-1] } type Root struct { @@ -45,31 +58,36 @@ func BlankTopology(clusterId string) *Topology { } } -func NewTopology(txnId *common.TxnId, root *msgs.VarIdPos, config *Configuration) *Topology { +func NewTopology(txnId *common.TxnId, rootsCap *msgs.VarIdPos_List, config *Configuration) *Topology { t := &Topology{ Configuration: config, FInc: config.F + 1, TwoFInc: (2 * uint16(config.F)) + 1, DBVersion: txnId, } - if root != nil { - positions := root.Positions() - t.Root = Root{ - VarUUId: common.MakeVarUUId(root.Id()), - Positions: (*common.Positions)(&positions), + if rootsCap != nil { + t.Roots = make([]Root, rootsCap.Len()) + for idx, l := 0, rootsCap.Len(); idx < l; idx++ { + rootCap := rootsCap.At(idx) + positions := rootCap.Positions() + root := &t.Roots[idx] + root.VarUUId = common.MakeVarUUId(rootCap.Id()) + root.Positions = (*common.Positions)(&positions) } } return t } func (t *Topology) Clone() *Topology { - return &Topology{ + c := &Topology{ Configuration: t.Configuration.Clone(), FInc: t.FInc, TwoFInc: t.TwoFInc, DBVersion: t.DBVersion, - Root: t.Root, + Roots: make([]Root, len(t.Roots)), } + copy(c.Roots, t.Roots) + return c } func (t *Topology) SetConfiguration(config *Configuration) { @@ -78,26 +96,22 @@ func (t *Topology) SetConfiguration(config *Configuration) { t.TwoFInc = (2 * uint16(config.F)) + 1 } -func TopologyFromCap(txnId *common.TxnId, root *msgs.VarIdPos, data []byte) (*Topology, error) { +func TopologyFromCap(txnId *common.TxnId, roots *msgs.VarIdPos_List, data []byte) (*Topology, error) { seg, _, err := capn.ReadFromMemoryZeroCopy(data) if err != nil { return nil, err } configCap := msgs.ReadRootConfiguration(seg) config := ConfigurationFromCap(&configCap) - return NewTopology(txnId, root, config), nil + return NewTopology(txnId, roots, config), nil } func (t *Topology) String() string { if t == nil { return "nil" } - root := "unset" - if t.Root.VarUUId != nil { - root = fmt.Sprintf("%v@%v", t.Root.VarUUId, (*capn.UInt8List)(t.Root.Positions).ToArray()) - } - return fmt.Sprintf("Topology{%v, F+1: %v, 2F+1: %v, DBVersion: %v, Root: %v}", - t.Configuration, t.FInc, t.TwoFInc, t.DBVersion, root) + return fmt.Sprintf("Topology{%v, F+1: %v, 2F+1: %v, DBVersion: %v, Roots: %v}", + t.Configuration, t.FInc, t.TwoFInc, t.DBVersion, t.Roots) } func (t *Topology) IsBlank() bool { diff --git a/network/connection.go b/network/connection.go index 9c1454d..afb6329 100644 --- a/network/connection.go +++ b/network/connection.go @@ -29,7 +29,7 @@ type Connection struct { remoteHost string remoteRMId common.RMId remoteBootCount uint32 - remoteRootId *common.VarUUId + remoteClusterUUId uint64 combinedTieBreak uint32 socket net.Conn ConnectionNumber uint32 @@ -437,7 +437,16 @@ func (cah *connectionAwaitHandshake) start() (bool, error) { return false, nil } else { - return cah.maybeRestartConnection(fmt.Errorf("Received erroneous hello from peer")) + product := hello.Product() + if l := len(common.ProductName); len(product) > l { + product = product[:l] + "..." + } + version := hello.Version() + if l := len(common.ProductVersion); len(version) > l { + version = version[:l] + "..." + } + return cah.maybeRestartConnection(fmt.Errorf("Received erroneous hello from peer: received product name '%s' (expected '%s'), product version '%s' (expected '%s')", + product, common.ProductName, version, common.ProductVersion)) } } else { return cah.maybeRestartConnection(err) @@ -586,10 +595,7 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { fmt.Errorf("%v has been removed from topology and may not rejoin.", cash.remoteRMId)) } - rootId := hello.RootId() - if len(rootId) == common.KeyLen { - cash.remoteRootId = common.MakeVarUUId(rootId) - } + cash.remoteClusterUUId = hello.ClusterUUId() cash.remoteBootCount = hello.BootCount() cash.combinedTieBreak = cash.combinedTieBreak ^ hello.TieBreak() cash.nextState(nil) @@ -603,7 +609,16 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { } func (cash *connectionAwaitServerHandshake) verifyTopology(topology *configuration.Topology, remote *msgs.HelloServerFromServer) bool { - return topology.ClusterId == remote.ClusterId() + if topology.ClusterId == remote.ClusterId() { + remoteUUId := remote.ClusterUUId() + localUUId := topology.ClusterUUId() + if remoteUUId == 0 || localUUId == 0 { + return true + } else { + return remoteUUId == localUUId + } + } + return false } func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer(topology *configuration.Topology) *capn.Segment { @@ -617,11 +632,7 @@ func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer(topology * cash.combinedTieBreak = tieBreak hello.SetTieBreak(tieBreak) hello.SetClusterId(topology.ClusterId) - if topology.Root.VarUUId == nil { - hello.SetRootId([]byte{}) - } else { - hello.SetRootId(topology.Root.VarUUId[:]) - } + hello.SetClusterUUId(topology.ClusterUUId()) return seg } @@ -630,6 +641,7 @@ func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer(topology * type connectionAwaitClientHandshake struct { *Connection peerCerts []*x509.Certificate + roots map[string]*common.Capabilities } func (cach *connectionAwaitClientHandshake) connectionStateMachineComponentWitness() {} @@ -648,39 +660,39 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { return false, err } - if cach.topology.Root.VarUUId == nil { - return false, errors.New("Root not yet known") + if cach.topology.ClusterUUId() == 0 { + return false, errors.New("Cluster not yet formed") } peerCerts := socket.ConnectionState().PeerCertificates - if authenticated, hashsum := cach.verifyPeerCerts(cach.topology, peerCerts); authenticated { + if authenticated, hashsum, roots := cach.verifyPeerCerts(cach.topology, peerCerts); authenticated { cach.peerCerts = peerCerts + cach.roots = roots log.Printf("User '%s' authenticated", hex.EncodeToString(hashsum[:])) + helloFromServer := cach.makeHelloClientFromServer(cach.topology, roots) + if err := cach.send(server.SegToBytes(helloFromServer)); err != nil { + return false, err + } + cach.remoteHost = cach.socket.RemoteAddr().String() + cach.nextState(nil) + return false, nil } else { return false, errors.New("Client connection rejected: No client certificate known") } - - helloFromServer := cach.makeHelloClientFromServer(cach.topology) - if err := cach.send(server.SegToBytes(helloFromServer)); err != nil { - return false, err - } - cach.remoteHost = cach.socket.RemoteAddr().String() - cach.nextState(nil) - return false, nil } -func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configuration.Topology, peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte) { +func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configuration.Topology, peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*common.Capabilities) { fingerprints := topology.Fingerprints() for _, cert := range peerCerts { hashsum = sha256.Sum256(cert.Raw) - if _, found := fingerprints[hashsum]; found { - return true, hashsum + if roots, found := fingerprints[hashsum]; found { + return true, hashsum, roots } } - return false, hashsum + return false, hashsum, nil } -func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology *configuration.Topology) *capn.Segment { +func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology *configuration.Topology, roots map[string]*common.Capabilities) *capn.Segment { seg := capn.NewBuffer(nil) hello := cmsgs.NewRootHelloClientFromServer(seg) namespace := make([]byte, common.KeyLen-8) @@ -688,9 +700,18 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology * binary.BigEndian.PutUint32(namespace[4:8], cach.connectionManager.BootCount) binary.BigEndian.PutUint32(namespace[8:], uint32(cach.connectionManager.RMId)) hello.SetNamespace(namespace) - if topology.Root.VarUUId != nil { - hello.SetRootId(topology.Root.VarUUId[:]) + rootsCap := cmsgs.NewRootList(seg, len(roots)) + idy := 0 + for idx, name := range topology.RootNames() { + if capabilities, found := roots[name]; found { + rootCap := rootsCap.At(idy) + idy++ + rootCap.SetName(name) + rootCap.SetVarId(topology.Roots[idx].VarUUId[:]) + rootCap.SetCapabilities(capabilities.AddToSeg(seg)) + } } + hello.SetRoots(rootsCap) return seg } @@ -743,7 +764,7 @@ func (cr *connectionRun) start() (bool, error) { cr.beatBytes = server.SegToBytes(seg) if cr.isServer { - cr.connectionManager.ServerEstablished(cr.Connection, cr.remoteHost, cr.remoteRMId, cr.remoteBootCount, cr.combinedTieBreak, cr.remoteRootId) + cr.connectionManager.ServerEstablished(cr.Connection, cr.remoteHost, cr.remoteRMId, cr.remoteBootCount, cr.combinedTieBreak, cr.remoteClusterUUId) } if cr.isClient { servers := cr.connectionManager.ClientEstablished(cr.ConnectionNumber, cr.Connection) @@ -782,10 +803,22 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error } if cr.isClient { if topology != nil { - if authenticated, _ := cr.verifyPeerCerts(topology, cr.peerCerts); !authenticated { + if authenticated, _, roots := cr.verifyPeerCerts(topology, cr.peerCerts); !authenticated { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(client unauthed)") tc.Done() return errors.New("Client connection closed: No client certificate known") + } else if len(roots) == len(cr.roots) { + for name, capabilitiesOld := range cr.roots { + if capabilitiesNew, found := roots[name]; !found || !capabilitiesNew.Equal(capabilitiesOld) { + server.Log("Connection", cr.Connection, "topologyChanged", tc, "(roots changed)") + tc.Done() + return errors.New("Client connection closed: roots have changed") + } + } + } else { + server.Log("Connection", cr.Connection, "topologyChanged", tc, "(roots changed)") + tc.Done() + return errors.New("Client connection closed: roots have changed") } } cr.submitter.TopologyChanged(topology) diff --git a/network/connectionmanager.go b/network/connectionmanager.go index 8694efb..f3e3b42 100644 --- a/network/connectionmanager.go +++ b/network/connectionmanager.go @@ -134,7 +134,7 @@ type connectionManagerMsgServerEstablished struct { rmId common.RMId bootCount uint32 tieBreak uint32 - rootId *common.VarUUId + clusterUUId uint64 } type connectionManagerMsgServerLost struct { @@ -207,7 +207,7 @@ func (cm *ConnectionManager) SetDesiredServers(localhost string, remotehosts []s }) } -func (cm *ConnectionManager) ServerEstablished(conn *Connection, host string, rmId common.RMId, bootCount uint32, tieBreak uint32, rootId *common.VarUUId) { +func (cm *ConnectionManager) ServerEstablished(conn *Connection, host string, rmId common.RMId, bootCount uint32, tieBreak uint32, clusterUUId uint64) { cm.enqueueQuery(&connectionManagerMsgServerEstablished{ Connection: conn, send: conn.Send, @@ -216,7 +216,7 @@ func (cm *ConnectionManager) ServerEstablished(conn *Connection, host string, rm rmId: rmId, bootCount: bootCount, tieBreak: tieBreak, - rootId: rootId, + clusterUUId: clusterUUId, }) } @@ -598,11 +598,11 @@ func (cm *ConnectionManager) setTopology(topology *configuration.Topology, callb cm.topology = topology cm.topologySubscribers.TopologyChanged(topology, callbacks) cd := cm.rmToServer[cm.RMId] - if topology.Root.VarUUId.Compare(cd.rootId) != common.EQ { + if clusterUUId := topology.ClusterUUId(); cd.clusterUUId == 0 && clusterUUId != 0 { delete(cm.rmToServer, cd.rmId) cm.serverConnSubscribers.ServerConnLost(cd.rmId) cd = cd.clone() - cd.rootId = topology.Root.VarUUId + cd.clusterUUId = clusterUUId cm.rmToServer[cm.RMId] = cd cm.servers[cd.host] = cd cm.serverConnSubscribers.ServerConnEstablished(cd) @@ -758,8 +758,8 @@ func (cd *connectionManagerMsgServerEstablished) TieBreak() uint32 { return cd.tieBreak } -func (cd *connectionManagerMsgServerEstablished) RootId() *common.VarUUId { - return cd.rootId +func (cd *connectionManagerMsgServerEstablished) ClusterUUId() uint64 { + return cd.clusterUUId } func (cd *connectionManagerMsgServerEstablished) Send(msg []byte) { @@ -781,6 +781,6 @@ func (cd *connectionManagerMsgServerEstablished) clone() *connectionManagerMsgSe rmId: cd.rmId, bootCount: cd.bootCount, tieBreak: cd.tieBreak, - rootId: cd.rootId, + clusterUUId: cd.clusterUUId, } } diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index 5b5e861..c405b13 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -178,12 +178,7 @@ func (tt *TopologyTransmogrifier) actorLoop(head *cc.ChanCellHead, config *confi v.AddWriteSubscriber(configuration.VersionOne, &eng.VarWriteSubscriber{ Observe: func(v *eng.Var, value []byte, refs *msgs.VarIdPos_List, txn *eng.Txn) { - var rootVarPosPtr *msgs.VarIdPos - if refs.Len() > 0 { - root := refs.At(0) - rootVarPosPtr = &root - } - topology, err := configuration.TopologyFromCap(txn.Id, rootVarPosPtr, value) + topology, err := configuration.TopologyFromCap(txn.Id, refs, value) if err != nil { panic(fmt.Errorf("Unable to deserialize new topology: %v", err)) } @@ -546,7 +541,7 @@ func (task *targetConfig) tick() error { log.Println("Topology: Ensuring local topology.") task.task = &ensureLocalTopology{task} - case task.active.Version == 0: + case task.active.ClusterUUId() == 0: log.Printf("Topology: Attempting to join cluster with configuration: %v", task.config) task.task = &joinCluster{targetConfig: task} @@ -643,13 +638,13 @@ func (task *targetConfig) partitionByActiveConnection(rmIdLists ...common.RMIds) return active, passive } -func (task *targetConfig) verifyRoots(rootId *common.VarUUId, remoteHosts []string) (bool, error) { +func (task *targetConfig) verifyClusterUUIds(clusterUUId uint64, remoteHosts []string) (bool, error) { for _, host := range remoteHosts { if cd, found := task.hostToConnection[host]; found { - switch remoteRootId := cd.RootId(); { - case remoteRootId == nil: + switch remoteClusterUUId := cd.ClusterUUId(); { + case remoteClusterUUId == 0: // they're joining - case rootId.Compare(remoteRootId) == common.EQ: + case clusterUUId == remoteClusterUUId: // all good default: return false, errors.New("Attempt made to merge different logical clusters together, which is illegal. Aborting topology change.") @@ -752,7 +747,7 @@ type joinCluster struct { } func (task *joinCluster) tick() error { - if task.active.Version != 0 { + if task.active.ClusterUUId() != 0 { return task.completed() } @@ -778,7 +773,7 @@ func (task *joinCluster) tick() error { task.shareGoalWithAll() rmIds := make([]common.RMId, 0, len(task.config.Hosts)) - var rootId *common.VarUUId + clusterUUId := uint64(0) for _, host := range task.config.Hosts { cd, found := task.hostToConnection[host] if !found { @@ -787,12 +782,12 @@ func (task *joinCluster) tick() error { return nil } rmIds = append(rmIds, cd.RMId()) - switch theirRootId := cd.RootId(); { - case theirRootId == nil: + switch theirClusterUUId := cd.ClusterUUId(); { + case theirClusterUUId == 0: // they're joining too - case rootId == nil: - rootId = theirRootId - case rootId.Compare(theirRootId) == common.EQ: + case clusterUUId == 0: + clusterUUId = theirClusterUUId + case clusterUUId == theirClusterUUId: // all good default: return task.fatal( @@ -800,7 +795,7 @@ func (task *joinCluster) tick() error { } } - if allJoining := rootId == nil; allJoining { + if allJoining := clusterUUId == 0; allJoining { // Note that the order of RMIds here matches the order of hosts. return task.allJoining(rmIds) @@ -818,6 +813,7 @@ func (task *joinCluster) tick() error { func (task *joinCluster) allJoining(allRMIds common.RMIds) error { targetTopology := configuration.NewTopology(task.active.DBVersion, nil, task.config.Configuration) targetTopology.SetRMs(allRMIds) + targetTopology.SetClusterUUId() // NB: activeWithNext never gets installed to the DB itself. activeWithNext := task.active.Clone() @@ -831,14 +827,14 @@ func (task *joinCluster) allJoining(allRMIds common.RMIds) error { // txn's topology version is acceptable to our proposers. task.installTopology(activeWithNext, nil) - switch resubmit, err := task.attemptCreateRoot(targetTopology); { + switch resubmit, err := task.attemptCreateRoots(targetTopology); { case err != nil: return task.fatal(err) case resubmit: server.Log("Topology: Root creation needs resubmit") task.enqueueTick(task) return nil - case targetTopology.Root.VarUUId == nil: + case targetTopology.Roots == nil: // We failed; likely we need to wait for connections to change server.Log("Topology: Root creation failed") return nil @@ -852,10 +848,10 @@ func (task *joinCluster) allJoining(allRMIds common.RMIds) error { // sure that all peers are empty and moving to the same topology // is to have all peers as active. - // If we got this far then attemptCreateRoot will have modified + // If we got this far then attemptCreateRoots will have modified // targetTopology to include the updated root. We should install // this to the connectionManager. - activeWithNext.Root = targetTopology.Root + activeWithNext.Roots = targetTopology.Roots task.installTopology(activeWithNext, nil) result, resubmit, err := task.rewriteTopology(task.active, targetTopology, allRMIds, nil) @@ -974,7 +970,7 @@ func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology // the -1 is because allRemoteHosts will not include localHost hostsAddedList := allRemoteHosts[len(hostsOld)-1:] - allAddedFound, err := task.verifyRoots(task.active.Root.VarUUId, hostsAddedList) + allAddedFound, err := task.verifyClusterUUIds(task.active.ClusterUUId(), hostsAddedList) if err != nil { return nil, task.error(err) } else if !allAddedFound { @@ -1603,11 +1599,14 @@ func (task *targetConfig) createTopologyTransaction(read, write *configuration.T rw := action.Readwrite() rw.SetVersion(read.DBVersion[:]) rw.SetValue(write.Serialize()) - refs := msgs.NewVarIdPosList(seg, 1) + roots := write.Roots + refs := msgs.NewVarIdPosList(seg, len(roots)) + for idx, root := range roots { + varIdPos := refs.At(idx) + varIdPos.SetId(root.VarUUId[:]) + varIdPos.SetPositions((capn.UInt8List)(*root.Positions)) + } rw.SetReferences(refs) - varIdPos := refs.At(0) - varIdPos.SetId(write.Root.VarUUId[:]) - varIdPos.SetPositions((capn.UInt8List)(*write.Root.Positions)) } allocs := msgs.NewAllocationList(seg, len(active)+len(passive)) @@ -1686,12 +1685,8 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog return nil, fmt.Errorf("Internal error: read of topology version 0 gave non-write action") } write := updateAction.Write() - var rootPtr *msgs.VarIdPos - if refs := write.References(); refs.Len() == 1 { - root := refs.At(0) - rootPtr = &root - } - return configuration.TopologyFromCap(dbversion, rootPtr, write.Value()) + refs := write.References() + return configuration.TopologyFromCap(dbversion, &refs, write.Value()) } } @@ -1760,23 +1755,15 @@ func (task *targetConfig) rewriteTopology(read, write *configuration.Topology, a fmt.Errorf("Internal error: update action from readwrite of topology gave non-write action!") } writeAction := updateAction.Write() - var rootVarPos *msgs.VarIdPos - if refs := writeAction.References(); refs.Len() == 1 { - root := refs.At(0) - rootVarPos = &root - } else if refs.Len() > 1 { - return nil, false, - fmt.Errorf("Internal error: update action from readwrite of topology has %v references instead of 1!", - refs.Len()) - } - topology, err := configuration.TopologyFromCap(dbversion, rootVarPos, writeAction.Value()) + refs := writeAction.References() + topology, err := configuration.TopologyFromCap(dbversion, &refs, writeAction.Value()) if err != nil { return nil, false, err } return topology, false, nil } -func (task *targetConfig) attemptCreateRoot(topology *configuration.Topology) (bool, error) { +func (task *targetConfig) attemptCreateRoots(topology *configuration.Topology) (bool, error) { twoFInc, fInc, f := int(topology.TwoFInc), int(topology.FInc), int(topology.F) active := make([]common.RMId, fInc) passive := make([]common.RMId, f) @@ -1791,26 +1778,34 @@ func (task *targetConfig) attemptCreateRoot(topology *configuration.Topology) (b nonEmpties = nonEmpties[fInc:] copy(passive, nonEmpties[:f]) - server.Log("Topology: Creating Root. Actives:", active, "; Passives:", passive) + server.Log("Topology: Creating Roots. Actives:", active, "; Passives:", passive) seg := capn.NewBuffer(nil) txn := msgs.NewTxn(seg) txn.SetSubmitter(uint32(task.connectionManager.RMId)) txn.SetSubmitterBootCount(task.connectionManager.BootCount) - actions := msgs.NewActionList(seg, 1) + rootNames := topology.RootNames() + rootNamesLen := len(rootNames) + roots := make([]configuration.Root, rootNamesLen) + actions := msgs.NewActionList(seg, rootNamesLen) + for idx := range rootNames { + action := actions.At(idx) + vUUId := task.localConnection.NextVarUUId() + action.SetVarId(vUUId[:]) + action.SetCreate() + create := action.Create() + positions := seg.NewUInt8List(int(topology.MaxRMCount)) + for idy, l := 0, positions.Len(); idy < l; idy++ { + positions.Set(idy, uint8(idy)) + } + create.SetPositions(positions) + create.SetValue([]byte{}) + create.SetReferences(msgs.NewVarIdPosList(seg, 0)) + root := &roots[idx] + root.VarUUId = vUUId + root.Positions = (*common.Positions)(&positions) + } txn.SetActions(actions) - action := actions.At(0) - vUUId := task.localConnection.NextVarUUId() - action.SetVarId(vUUId[:]) - action.SetCreate() - create := action.Create() - positions := seg.NewUInt8List(int(topology.MaxRMCount)) - create.SetPositions(positions) - for idx, l := 0, positions.Len(); idx < l; idx++ { - positions.Set(idx, uint8(idx)) - } - create.SetValue([]byte{}) - create.SetReferences(msgs.NewVarIdPosList(seg, 0)) allocs := msgs.NewAllocationList(seg, twoFInc) txn.SetAllocations(allocs) offset := 0 @@ -1823,9 +1818,11 @@ func (task *targetConfig) attemptCreateRoot(topology *configuration.Topology) (b } else { alloc.SetActive(0) } - indices := seg.NewUInt16List(1) + indices := seg.NewUInt16List(rootNamesLen) alloc.SetActionIndices(indices) - indices.Set(0, 0) + for idy := range rootNames { + indices.Set(idy, uint16(idy)) + } } offset += len(rmIds) } @@ -1839,9 +1836,8 @@ func (task *targetConfig) attemptCreateRoot(topology *configuration.Topology) (b return false, nil } if result.Which() == msgs.OUTCOME_COMMIT { - server.Log("Topology: Root created in", vUUId) - topology.Root.VarUUId = vUUId - topology.Root.Positions = (*common.Positions)(&positions) + server.Log("Topology: Roots created in", roots) + topology.Roots = roots return false, nil } abort := result.Abort() diff --git a/paxos/network.go b/paxos/network.go index 604a02e..98f93da 100644 --- a/paxos/network.go +++ b/paxos/network.go @@ -39,7 +39,7 @@ type Connection interface { RMId() common.RMId BootCount() uint32 TieBreak() uint32 - RootId() *common.VarUUId + ClusterUUId() uint64 Send(msg []byte) } From f43d5c44d091a5e7759aa06445960cb0e86e9afd Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 11 Jun 2016 21:16:29 +0100 Subject: [PATCH 16/78] Well that was painful. Wanted to allow roots to not all be on the same first RMs because it's no longer valid to assume that's possible because roots will have to change. That then meant that we really want to use the client txn api, which meant the sts needs a proper topology before creating roots. Then of course it occurs to me that we no longer need to do roots until much later in the joining process, and indeed the explicit join step can actually go away after some light munging, which takes ages to sort out. So it works again now, and it's better. But: - actually not yet creating roots now! - don't have a step to do the delta calc on roots and set all that up. Ref T41. --HG-- branch : T41 --- client/simpletxnsubmitter.go | 1 + configuration/configuration.go | 4 +- network/connection.go | 24 +++-- network/topologytransmogrifier.go | 172 +++++++++--------------------- 4 files changed, 66 insertions(+), 135 deletions(-) diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 785cc94..67abb59 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -166,6 +166,7 @@ func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, } func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) { + server.Log("STS Topology Changed", topology) if topology == nil || topology.RMs().NonEmptyLen() < int(topology.TwoFInc) { // topology is needed for client txns. As we're booting up, we // just don't care. diff --git a/configuration/configuration.go b/configuration/configuration.go index 90b62fa..17f2ea3 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -426,8 +426,8 @@ func (a *Configuration) Equal(b *Configuration) bool { } func (config *Configuration) String() string { - return fmt.Sprintf("Configuration{ClusterId: %v(%v), Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, ClientCertificates: %v}", - config.ClusterId, config.clusterUUId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.fingerprints) + return fmt.Sprintf("Configuration{ClusterId: %v(%v), Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, ClientCertificates: %v, %v}", + config.ClusterId, config.clusterUUId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.fingerprints, config.nextConfiguration) } func (config *Configuration) ClusterUUId() uint64 { diff --git a/network/connection.go b/network/connection.go index afb6329..e1c3fba 100644 --- a/network/connection.go +++ b/network/connection.go @@ -75,8 +75,12 @@ type connectionMsgTopologyChanged struct { resultChan chan struct{} } -func (cmtc *connectionMsgTopologyChanged) Done() { - close(cmtc.resultChan) +func (cmtc *connectionMsgTopologyChanged) maybeClose() { + select { + case <-cmtc.resultChan: + default: + close(cmtc.resultChan) + } } type connectionMsgStatus struct { @@ -744,7 +748,7 @@ func (cr *connectionRun) outcomeReceived(out connectionMsgOutcomeReceived) { si := cr.submitterIdle cr.submitterIdle = nil server.Log("Connection", cr.Connection, "outcomeReceived", si, "(submitterIdle)") - si.Done() + si.maybeClose() } } @@ -792,39 +796,39 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error if si := cr.submitterIdle; si != nil { cr.submitterIdle = nil server.Log("Connection", cr.Connection, "topologyChanged:", tc, "clearing old:", si) - si.Done() + si.maybeClose() } topology := tc.topology cr.topology = topology if cr.currentState != cr { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(not in cr)") - tc.Done() + tc.maybeClose() return nil } if cr.isClient { if topology != nil { if authenticated, _, roots := cr.verifyPeerCerts(topology, cr.peerCerts); !authenticated { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(client unauthed)") - tc.Done() + tc.maybeClose() return errors.New("Client connection closed: No client certificate known") } else if len(roots) == len(cr.roots) { for name, capabilitiesOld := range cr.roots { if capabilitiesNew, found := roots[name]; !found || !capabilitiesNew.Equal(capabilitiesOld) { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(roots changed)") - tc.Done() + tc.maybeClose() return errors.New("Client connection closed: roots have changed") } } } else { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(roots changed)") - tc.Done() + tc.maybeClose() return errors.New("Client connection closed: roots have changed") } } cr.submitter.TopologyChanged(topology) if cr.submitter.IsIdle() { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(client, submitter is idle)") - tc.Done() + tc.maybeClose() } else { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(client, submitter not idle)") cr.submitterIdle = tc @@ -832,7 +836,7 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error } if cr.isServer { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(isServer)") - tc.Done() + tc.maybeClose() if topology != nil { if _, found := topology.RMsRemoved()[cr.remoteRMId]; found { cr.restart = false diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index c405b13..c7ef224 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -9,6 +9,7 @@ import ( mdb "github.com/msackman/gomdb" mdbs "github.com/msackman/gomdb/server" "goshawkdb.io/common" + cmsgs "goshawkdb.io/common/capnp" "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" "goshawkdb.io/server/client" @@ -153,7 +154,7 @@ func NewTopologyTransmogrifier(db *db.Databases, cm *ConnectionManager, lc *clie cm.AddServerConnectionSubscriber(tt) - go tt.actorLoop(head, config) + go tt.actorLoop(head) return tt, tt.localEstablished } @@ -169,7 +170,7 @@ func (tt *TopologyTransmogrifier) ConnectionEstablished(rmId common.RMId, conn p tt.enqueueQuery(topologyTransmogrifierMsgSetActiveConnections(conns)) } -func (tt *TopologyTransmogrifier) actorLoop(head *cc.ChanCellHead, config *configuration.Configuration) { +func (tt *TopologyTransmogrifier) actorLoop(head *cc.ChanCellHead) { subscriberInstalled := make(chan struct{}) tt.connectionManager.Dispatchers.VarDispatcher.ApplyToVar(func(v *eng.Var) { if v == nil { @@ -262,6 +263,7 @@ func (tt *TopologyTransmogrifier) activeConnectionsChange(conns map[common.RMId] } func (tt *TopologyTransmogrifier) setActive(topology *configuration.Topology) error { + server.Log("Topology: setActive:", topology) if tt.active != nil { switch { case tt.active.ClusterId != topology.ClusterId: @@ -541,7 +543,7 @@ func (task *targetConfig) tick() error { log.Println("Topology: Ensuring local topology.") task.task = &ensureLocalTopology{task} - case task.active.ClusterUUId() == 0: + case task.active.Next() != nil && task.active.Next().ClusterUUId() == 0: log.Printf("Topology: Attempting to join cluster with configuration: %v", task.config) task.task = &joinCluster{targetConfig: task} @@ -747,8 +749,12 @@ type joinCluster struct { } func (task *joinCluster) tick() error { - if task.active.ClusterUUId() != 0 { - return task.completed() + if !(task.active.Next() != nil && task.active.Next().ClusterUUId() == 0) { + if err := task.completed(); err != nil { + return err + } + task.selectGoal(task.config) + return nil } localHost, remoteHosts, err := task.config.LocalRemoteHosts(task.listenPort) @@ -811,63 +817,13 @@ func (task *joinCluster) tick() error { } func (task *joinCluster) allJoining(allRMIds common.RMIds) error { - targetTopology := configuration.NewTopology(task.active.DBVersion, nil, task.config.Configuration) - targetTopology.SetRMs(allRMIds) - targetTopology.SetClusterUUId() - - // NB: activeWithNext never gets installed to the DB itself. - activeWithNext := task.active.Clone() - activeWithNext.SetNext(&configuration.NextConfiguration{ - Configuration: targetTopology.Configuration, - AllHosts: activeWithNext.Hosts, - NewRMIds: allRMIds, - }) - - // We're about to create and run a txn, so we must make sure that - // txn's topology version is acceptable to our proposers. - task.installTopology(activeWithNext, nil) - - switch resubmit, err := task.attemptCreateRoots(targetTopology); { - case err != nil: - return task.fatal(err) - case resubmit: - server.Log("Topology: Root creation needs resubmit") - task.enqueueTick(task) - return nil - case targetTopology.Roots == nil: - // We failed; likely we need to wait for connections to change - server.Log("Topology: Root creation failed") - return nil - } + // NB: activeWithHosts never gets installed to the DB itself. + activeWithRMIds := task.active.Clone() + activeWithRMIds.Hosts = task.config.Hosts + activeWithRMIds.SetRMs(allRMIds) + activeWithRMIds.SetNext(nil) - // Finally we need to rewrite the topology. For allJoining, we - // must use everyone as active. This is because we could have - // seen one of our peers when it had no RootId, but we've since - // lost that connection and in fact that peer has gone off and - // joined another cluster. So the only way to be instantaneously - // sure that all peers are empty and moving to the same topology - // is to have all peers as active. - - // If we got this far then attemptCreateRoots will have modified - // targetTopology to include the updated root. We should install - // this to the connectionManager. - activeWithNext.Roots = targetTopology.Roots - task.installTopology(activeWithNext, nil) - - result, resubmit, err := task.rewriteTopology(task.active, targetTopology, allRMIds, nil) - if err != nil { - return task.fatal(err) - } - if resubmit { - server.Log("Topology: Topology rewrite needs resubmit", allRMIds, result) - task.enqueueTick(task) - return nil - } - // !resubmit, so MUST be a BadRead, or success. By definition, - // if allJoining, everyone is active. So even if we weren't - // successful rewriting ourself, we're guaranteed to be sent - // someone else's write through the subscriber. - return nil + return task.setActive(activeWithRMIds) } // installTargetOld @@ -1063,6 +1019,7 @@ func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology next := task.config.Configuration.Clone() next.SetRMs(rmIdsNew) next.Hosts = hostsNew + next.SetClusterUUId() // Pointer semantics, so we need to copy into our new set removed := make(map[common.RMId]server.EmptyStruct) @@ -1174,7 +1131,7 @@ func (task *installTargetNew) tick() error { fInc := ((len(active) + len(passive)) >> 1) + 1 active, passive = active[:fInc], append(active[fInc:], passive...) - newActive := task.active.Next().NewRMIds + newActive := next.NewRMIds for _, rmId := range newActive { if _, found := task.activeConnections[rmId]; !found { log.Printf("Topology: awaiting connections to new cluster members.") @@ -1763,88 +1720,57 @@ func (task *targetConfig) rewriteTopology(read, write *configuration.Topology, a return topology, false, nil } -func (task *targetConfig) attemptCreateRoots(topology *configuration.Topology) (bool, error) { - twoFInc, fInc, f := int(topology.TwoFInc), int(topology.FInc), int(topology.F) - active := make([]common.RMId, fInc) - passive := make([]common.RMId, f) - // this is valid only because root's positions are hardcoded - nonEmpties := topology.RMs().NonEmpty() - for _, rmId := range nonEmpties { - if _, found := task.activeConnections[rmId]; !found { - return false, nil - } - } - copy(active, nonEmpties[:fInc]) - nonEmpties = nonEmpties[fInc:] - copy(passive, nonEmpties[:f]) - - server.Log("Topology: Creating Roots. Actives:", active, "; Passives:", passive) +func (task *targetConfig) attemptCreateRoots(rootCount int) (bool, configuration.Roots, error) { + server.Log("Topology: Creating Roots.") seg := capn.NewBuffer(nil) - txn := msgs.NewTxn(seg) - txn.SetSubmitter(uint32(task.connectionManager.RMId)) - txn.SetSubmitterBootCount(task.connectionManager.BootCount) - rootNames := topology.RootNames() - rootNamesLen := len(rootNames) - roots := make([]configuration.Root, rootNamesLen) - actions := msgs.NewActionList(seg, rootNamesLen) - for idx := range rootNames { + ctxn := cmsgs.NewClientTxn(seg) + ctxn.SetRetry(false) + roots := make([]configuration.Root, rootCount) + actions := cmsgs.NewClientActionList(seg, rootCount) + for idx := range roots { action := actions.At(idx) vUUId := task.localConnection.NextVarUUId() action.SetVarId(vUUId[:]) action.SetCreate() create := action.Create() - positions := seg.NewUInt8List(int(topology.MaxRMCount)) - for idy, l := 0, positions.Len(); idy < l; idy++ { - positions.Set(idy, uint8(idy)) - } - create.SetPositions(positions) create.SetValue([]byte{}) - create.SetReferences(msgs.NewVarIdPosList(seg, 0)) + create.SetReferences(seg.NewDataList(0)) root := &roots[idx] root.VarUUId = vUUId - root.Positions = (*common.Positions)(&positions) } - txn.SetActions(actions) - allocs := msgs.NewAllocationList(seg, twoFInc) - txn.SetAllocations(allocs) - offset := 0 - for idx, rmIds := range []common.RMIds{active, passive} { - for idy, rmId := range rmIds { - alloc := allocs.At(idy + offset) - alloc.SetRmId(uint32(rmId)) - if idx == 0 { - alloc.SetActive(task.activeConnections[rmId].BootCount()) - } else { - alloc.SetActive(0) - } - indices := seg.NewUInt16List(rootNamesLen) - alloc.SetActionIndices(indices) - for idy := range rootNames { - indices.Set(idy, uint16(idy)) - } - } - offset += len(rmIds) - } - txn.SetFInc(topology.FInc) - txn.SetTopologyVersion(topology.Version) - result, err := task.localConnection.RunTransaction(&txn, true, active...) + ctxn.SetActions(actions) + result, err := task.localConnection.RunClientTransaction(&ctxn, nil, true) + log.Println("Create root result", result, err) if err != nil { - return false, err + return false, nil, err } if result == nil { // shutdown - return false, nil + return false, nil, nil } if result.Which() == msgs.OUTCOME_COMMIT { + actions := result.Txn().Actions() + for idx := range roots { + root := &roots[idx] + action := actions.At(idx) + vUUId := common.MakeVarUUId(action.VarId()) + if vUUId.Compare(root.VarUUId) != common.EQ { + return false, nil, fmt.Errorf("Internal error: actions changed order! At %v expecting %v, found %v", idx, root.VarUUId, vUUId) + } + if action.Which() != msgs.ACTION_CREATE { + return false, nil, fmt.Errorf("Internal error: actions changed type! At %v expecting create, found %v", idx, action.Which()) + } + positions := action.Create().Positions() + root.Positions = (*common.Positions)(&positions) + } server.Log("Topology: Roots created in", roots) - topology.Roots = roots - return false, nil + return false, roots, nil } abort := result.Abort() if abort.Which() == msgs.OUTCOMEABORT_RESUBMIT { - return true, nil + return true, nil, nil } - return false, fmt.Errorf("Internal error: creation of root gave rerun outcome") + return false, nil, fmt.Errorf("Internal error: creation of root gave rerun outcome") } // emigrator From 818238b729171cdc73571a940594100ac26ec08e Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 12 Jun 2016 12:35:55 +0100 Subject: [PATCH 17/78] Fixed a bug in which rootNames was not being repopulated when loading out of the db. Also decided there is no good reason to have both a capnp and Go form of capabilities, so rip that out. Ref T41. --HG-- branch : T41 --- configuration/configuration.go | 100 ++++++++++++++++----------------- configuration/topology.go | 8 ++- network/connection.go | 12 ++-- 3 files changed, 62 insertions(+), 58 deletions(-) diff --git a/configuration/configuration.go b/configuration/configuration.go index 17f2ea3..269e655 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -8,7 +8,7 @@ import ( "fmt" capn "github.com/glycerine/go-capnproto" "goshawkdb.io/common" - commsgs "goshawkdb.io/common/capnp" + cmsgs "goshawkdb.io/common/capnp" "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" ch "goshawkdb.io/server/consistenthash" @@ -32,7 +32,7 @@ type Configuration struct { roots []string rms common.RMIds rmsRemoved map[common.RMId]server.EmptyStruct - fingerprints map[[sha256.Size]byte]map[string]*common.Capabilities + fingerprints map[[sha256.Size]byte]map[string]*cmsgs.Capabilities nextConfiguration *NextConfiguration } @@ -191,9 +191,10 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { if len(config.ClientCertificateFingerprints) == 0 { return nil, errors.New("No ClientCertificateFingerprints defined") } else { - rootsMap := make(map[string]uint32) + rootsMap := make(map[string]server.EmptyStruct) rootsName := []string{} - fingerprints := make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.ClientCertificateFingerprints)) + fingerprints := make(map[[sha256.Size]byte]map[string]*cmsgs.Capabilities, len(config.ClientCertificateFingerprints)) + seg := capn.NewBuffer(nil) for fingerprint, rootsCapabilities := range config.ClientCertificateFingerprints { fingerprintBytes, err := hex.DecodeString(fingerprint) if err != nil { @@ -204,10 +205,10 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { if len(rootsCapabilities) == 0 { return nil, fmt.Errorf("No roots configured for client fingerprint %v; at least 1 needed", fingerprint) } - roots := make(map[string]*common.Capabilities, len(rootsCapabilities)) + roots := make(map[string]*cmsgs.Capabilities, len(rootsCapabilities)) for name, rootCapabilities := range rootsCapabilities { if _, found := rootsMap[name]; !found { - rootsMap[name] = 0 + rootsMap[name] = server.EmptyStructVal rootsName = append(rootsName, name) } SortUInt32(rootCapabilities.ReferencesReadOnly).Sort() @@ -240,28 +241,37 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { return nil, fmt.Errorf("Client fingerprint %v, root %s: no capabilities have been granted.", fingerprint, name) } - cap := &common.Capabilities{} + cap := cmsgs.NewCapabilities(seg) switch { case rootCapabilities.ValueRead && rootCapabilities.ValueWrite: - cap.Value = common.ReadWrite + cap.SetValue(cmsgs.VALUECAPABILITY_READWRITE) case rootCapabilities.ValueRead: - cap.Value = common.Read + cap.SetValue(cmsgs.VALUECAPABILITY_READ) case rootCapabilities.ValueWrite: - cap.Value = common.Write + cap.SetValue(cmsgs.VALUECAPABILITY_WRITE) default: - cap.Value = common.None + cap.SetValue(cmsgs.VALUECAPABILITY_NONE) } + capRefs := cap.References() if rootCapabilities.ReferencesReadAll { - cap.References.Read.All = true + capRefs.Read().SetAll() } else { - cap.References.Read.Only = rootCapabilities.ReferencesReadOnly + only := seg.NewUInt32List(len(rootCapabilities.ReferencesReadOnly)) + for idx, index := range rootCapabilities.ReferencesReadOnly { + only.Set(idx, index) + } + capRefs.Read().SetOnly(only) } if rootCapabilities.ReferencesWriteAll { - cap.References.Write.All = true + capRefs.Write().SetAll() } else { - cap.References.Write.Only = rootCapabilities.ReferencesWriteOnly + only := seg.NewUInt32List(len(rootCapabilities.ReferencesWriteOnly)) + for idx, index := range rootCapabilities.ReferencesWriteOnly { + only.Set(idx, index) + } + capRefs.Write().SetOnly(only) } - roots[name] = cap + roots[name] = &cap } ary := [sha256.Size]byte{} copy(ary[:], fingerprintBytes) @@ -298,44 +308,31 @@ func ConfigurationFromCap(config *msgs.Configuration) *Configuration { c.rmsRemoved[common.RMId(rmsRemoved.At(idx))] = server.EmptyStructVal } + rootsName := []string{} + rootsMap := make(map[string]server.EmptyStruct) fingerprints := config.Fingerprints() - fingerprintsMap := make(map[[sha256.Size]byte]map[string]*common.Capabilities, fingerprints.Len()) + fingerprintsMap := make(map[[sha256.Size]byte]map[string]*cmsgs.Capabilities, fingerprints.Len()) for idx, l := 0, fingerprints.Len(); idx < l; idx++ { fingerprint := fingerprints.At(idx) ary := [sha256.Size]byte{} copy(ary[:], fingerprint.Sha256()) rootsCap := fingerprint.Roots() - roots := make(map[string]*common.Capabilities, rootsCap.Len()) + roots := make(map[string]*cmsgs.Capabilities, rootsCap.Len()) for idy, m := 0, rootsCap.Len(); idy < m; idy++ { rootCap := rootsCap.At(idy) - capsCap := rootCap.Capabilities() - cap := &common.Capabilities{} - switch capsCap.Value() { - case commsgs.VALUECAPABILITY_READ: - cap.Value = common.Read - case commsgs.VALUECAPABILITY_WRITE: - cap.Value = common.Write - case commsgs.VALUECAPABILITY_READWRITE: - cap.Value = common.ReadWrite - default: - cap.Value = common.None - } - refsCap := capsCap.References() - if refsReadCap := refsCap.Read(); refsReadCap.Which() == commsgs.CAPABILITIESREFERENCESREAD_ALL { - cap.References.Read.All = true - } else { - cap.References.Read.Only = refsReadCap.Only().ToArray() + name := rootCap.Name() + capabilities := rootCap.Capabilities() + roots[name] = &capabilities + if _, found := rootsMap[name]; !found { + rootsName = append(rootsName, name) + rootsMap[name] = server.EmptyStructVal } - if refsWriteCap := refsCap.Write(); refsWriteCap.Which() == commsgs.CAPABILITIESREFERENCESWRITE_ALL { - cap.References.Write.All = true - } else { - cap.References.Write.Only = refsWriteCap.Only().ToArray() - } - roots[rootCap.Name()] = cap } fingerprintsMap[ary] = roots } c.fingerprints = fingerprintsMap + sort.Strings(rootsName) + c.roots = rootsName if config.Which() == msgs.CONFIGURATION_TRANSITIONINGTO { next := config.TransitioningTo() @@ -416,7 +413,7 @@ func (a *Configuration) Equal(b *Configuration) bool { return false } else { for name, aRootCaps := range aRoots { - if bRootCaps, found := bRoots[name]; !found || !aRootCaps.Equal(bRootCaps) { + if bRootCaps, found := bRoots[name]; !found || !common.EqualCapabilities(aRootCaps, bRootCaps) { return false } } @@ -426,8 +423,8 @@ func (a *Configuration) Equal(b *Configuration) bool { } func (config *Configuration) String() string { - return fmt.Sprintf("Configuration{ClusterId: %v(%v), Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, ClientCertificates: %v, %v}", - config.ClusterId, config.clusterUUId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.fingerprints, config.nextConfiguration) + return fmt.Sprintf("Configuration{ClusterId: %v(%v), Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, %v}", + config.ClusterId, config.clusterUUId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.nextConfiguration) } func (config *Configuration) ClusterUUId() uint64 { @@ -445,7 +442,7 @@ func (config *Configuration) SetClusterUUId() { } } -func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*common.Capabilities { +func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*cmsgs.Capabilities { return config.fingerprints } @@ -508,16 +505,19 @@ func (config *Configuration) Clone() *Configuration { F: config.F, MaxRMCount: config.MaxRMCount, NoSync: config.NoSync, - ClientCertificateFingerprints: make(map[string]map[string]*RootCapabilities, len(config.ClientCertificateFingerprints)), + ClientCertificateFingerprints: nil, rms: make([]common.RMId, len(config.rms)), rmsRemoved: make(map[common.RMId]server.EmptyStruct, len(config.rmsRemoved)), - fingerprints: make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.fingerprints)), + fingerprints: make(map[[sha256.Size]byte]map[string]*cmsgs.Capabilities, len(config.fingerprints)), nextConfiguration: config.nextConfiguration.Clone(), } copy(clone.Hosts, config.Hosts) - for k, v := range config.ClientCertificateFingerprints { - clone.ClientCertificateFingerprints[k] = v + if config.ClientCertificateFingerprints != nil { + clone.ClientCertificateFingerprints = make(map[string]map[string]*RootCapabilities, len(config.ClientCertificateFingerprints)) + for k, v := range config.ClientCertificateFingerprints { + clone.ClientCertificateFingerprints[k] = v + } } copy(clone.rms, config.rms) for k, v := range config.rmsRemoved { @@ -570,7 +570,7 @@ func (config *Configuration) AddToSegAutoRoot(seg *capn.Segment) msgs.Configurat for name, capabilities := range roots { rootCap := msgs.NewRoot(seg) rootCap.SetName(name) - rootCap.SetCapabilities(capabilities.AddToSeg(seg)) + rootCap.SetCapabilities(*capabilities) rootsCap.Set(idy, rootCap) idy++ } diff --git a/configuration/topology.go b/configuration/topology.go index a98f215..d3b5dcb 100644 --- a/configuration/topology.go +++ b/configuration/topology.go @@ -66,8 +66,12 @@ func NewTopology(txnId *common.TxnId, rootsCap *msgs.VarIdPos_List, config *Conf DBVersion: txnId, } if rootsCap != nil { - t.Roots = make([]Root, rootsCap.Len()) - for idx, l := 0, rootsCap.Len(); idx < l; idx++ { + t.Roots = make([]Root, len(config.RootNames())) + if rootsCap.Len() != len(t.Roots) { + panic(fmt.Sprintf("NewTopology expected to find %v roots by reference, but actually found %v", + len(t.Roots), rootsCap.Len())) + } + for idx := range t.Roots { rootCap := rootsCap.At(idx) positions := rootCap.Positions() root := &t.Roots[idx] diff --git a/network/connection.go b/network/connection.go index e1c3fba..4358fec 100644 --- a/network/connection.go +++ b/network/connection.go @@ -645,7 +645,7 @@ func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer(topology * type connectionAwaitClientHandshake struct { *Connection peerCerts []*x509.Certificate - roots map[string]*common.Capabilities + roots map[string]*cmsgs.Capabilities } func (cach *connectionAwaitClientHandshake) connectionStateMachineComponentWitness() {} @@ -685,7 +685,7 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { } } -func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configuration.Topology, peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*common.Capabilities) { +func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configuration.Topology, peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*cmsgs.Capabilities) { fingerprints := topology.Fingerprints() for _, cert := range peerCerts { hashsum = sha256.Sum256(cert.Raw) @@ -696,7 +696,7 @@ func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configurat return false, hashsum, nil } -func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology *configuration.Topology, roots map[string]*common.Capabilities) *capn.Segment { +func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology *configuration.Topology, roots map[string]*cmsgs.Capabilities) *capn.Segment { seg := capn.NewBuffer(nil) hello := cmsgs.NewRootHelloClientFromServer(seg) namespace := make([]byte, common.KeyLen-8) @@ -712,7 +712,7 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology * idy++ rootCap.SetName(name) rootCap.SetVarId(topology.Roots[idx].VarUUId[:]) - rootCap.SetCapabilities(capabilities.AddToSeg(seg)) + rootCap.SetCapabilities(*capabilities) } } hello.SetRoots(rootsCap) @@ -812,8 +812,8 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error tc.maybeClose() return errors.New("Client connection closed: No client certificate known") } else if len(roots) == len(cr.roots) { - for name, capabilitiesOld := range cr.roots { - if capabilitiesNew, found := roots[name]; !found || !capabilitiesNew.Equal(capabilitiesOld) { + for name, capsOld := range cr.roots { + if capsNew, found := roots[name]; !found || !common.EqualCapabilities(capsNew, capsOld) { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(roots changed)") tc.maybeClose() return errors.New("Client connection closed: roots have changed") From efd48f7aa86525e74886103276b2c6acb19876d8 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 12 Jun 2016 22:17:29 +0100 Subject: [PATCH 18/78] WIP. Tidying and bug fixes as I think things through. Looks like we'll need and index of roots in configuration and need to relax the checking on observation: the list must be == only if no next. Otherwise, list can be >. Ref T41 --HG-- branch : T41 --- client/simpletxnsubmitter.go | 7 ++-- cmd/goshawkdb/main.go | 3 -- configuration/configuration.go | 16 ++++--- configuration/topology.go | 17 ++++---- network/connection.go | 2 + network/topologytransmogrifier.go | 70 ++++++++++++++++++++----------- 6 files changed, 71 insertions(+), 44 deletions(-) diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 67abb59..89d4d14 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -167,7 +167,7 @@ func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) { server.Log("STS Topology Changed", topology) - if topology == nil || topology.RMs().NonEmptyLen() < int(topology.TwoFInc) { + if topology.IsBlank() { // topology is needed for client txns. As we're booting up, we // just don't care. return @@ -184,6 +184,7 @@ func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) } func (sts *SimpleTxnSubmitter) ServerConnectionsChanged(servers map[common.RMId]paxos.Connection) { + server.Log("STS ServerConnectionsChanged", servers) sts.connections = servers sts.calculateDisabledHashcodes() } @@ -200,10 +201,10 @@ func (sts *SimpleTxnSubmitter) calculateDisabledHashcodes() { sts.disabledHashCodes[rmId] = server.EmptyStructVal } } - server.Log("TM disabled hash codes", sts.disabledHashCodes) + server.Log("STS disabled hash codes", sts.disabledHashCodes) // need to wait until we've updated disabledHashCodes before // starting up any buffered txns. - if sts.topology != nil && !sts.topology.IsBlank() && sts.bufferedSubmissions != nil { + if !sts.topology.IsBlank() && sts.bufferedSubmissions != nil { funcs := sts.bufferedSubmissions sts.bufferedSubmissions = nil for _, fun := range funcs { diff --git a/cmd/goshawkdb/main.go b/cmd/goshawkdb/main.go index c04a751..7cc59bb 100644 --- a/cmd/goshawkdb/main.go +++ b/cmd/goshawkdb/main.go @@ -158,9 +158,6 @@ func (s *server) start() { commandLineConfig, err := s.commandLineConfig() s.maybeShutdown(err) - if commandLineConfig == nil { - commandLineConfig = configuration.BlankTopology("").Configuration - } nodeCertPrivKeyPair, err := certs.GenerateNodeCertificatePrivateKeyPair(s.certificate) for idx := range s.certificate { diff --git a/configuration/configuration.go b/configuration/configuration.go index 269e655..e6e0a9b 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -431,14 +431,18 @@ func (config *Configuration) ClusterUUId() uint64 { return config.clusterUUId } -func (config *Configuration) SetClusterUUId() { +func (config *Configuration) SetClusterUUId(uuid uint64) { if config.clusterUUId == 0 { - rng := rand.New(rand.NewSource(time.Now().UnixNano())) - r := uint64(rng.Int63()) - for r == 0 { - r = uint64(rng.Int63()) + if uuid == 0 { + rng := rand.New(rand.NewSource(time.Now().UnixNano())) + r := uint64(rng.Int63()) + for r == 0 { + r = uint64(rng.Int63()) + } + config.clusterUUId = r + } else { + config.clusterUUId = uuid } - config.clusterUUId = r } } diff --git a/configuration/topology.go b/configuration/topology.go index d3b5dcb..d3be089 100644 --- a/configuration/topology.go +++ b/configuration/topology.go @@ -38,15 +38,16 @@ type Root struct { Positions *common.Positions } -func BlankTopology(clusterId string) *Topology { +func BlankTopology() *Topology { return &Topology{ Configuration: &Configuration{ - ClusterId: clusterId, - Version: 0, - Hosts: []string{}, - F: 0, - MaxRMCount: 0, - NoSync: false, + ClusterId: "", + clusterUUId: 0, + Version: 0, + Hosts: []string{}, + F: 0, + MaxRMCount: 0, + NoSync: false, ClientCertificateFingerprints: nil, rms: []common.RMId{}, fingerprints: nil, @@ -119,5 +120,5 @@ func (t *Topology) String() string { } func (t *Topology) IsBlank() bool { - return t == nil || t.Version == 0 + return t == nil || t.MaxRMCount == 0 || t.RMs().NonEmptyLen() < int(t.TwoFInc) } diff --git a/network/connection.go b/network/connection.go index 4358fec..929561e 100644 --- a/network/connection.go +++ b/network/connection.go @@ -666,6 +666,8 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { if cach.topology.ClusterUUId() == 0 { return false, errors.New("Cluster not yet formed") + } else if len(cach.topology.RootNames()) == 0 { + return false, errors.New("No roots: cluster not yet formed") } peerCerts := socket.ConnectionState().PeerCertificates diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index c7ef224..87a6455 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -266,7 +266,7 @@ func (tt *TopologyTransmogrifier) setActive(topology *configuration.Topology) er server.Log("Topology: setActive:", topology) if tt.active != nil { switch { - case tt.active.ClusterId != topology.ClusterId: + case tt.active.ClusterId != topology.ClusterId && tt.active.ClusterId != "": return fmt.Errorf("Topology: Fatal: config with ClusterId change from '%s' to '%s'.", tt.active.ClusterId, topology.ClusterId) @@ -352,15 +352,21 @@ func (tt *TopologyTransmogrifier) installTopology(topology *configuration.Topolo func (tt *TopologyTransmogrifier) selectGoal(goal *configuration.NextConfiguration) { if tt.active != nil { + activeClusterUUId, goalClusterUUId := tt.active.ClusterUUId(), goal.ClusterUUId() switch { case goal.Version == 0: return // done. - case goal.ClusterId != tt.active.ClusterId: + case goal.ClusterId != tt.active.ClusterId && tt.active.ClusterId != "": log.Printf("Topology: Illegal config: ClusterId should be '%s' instead of '%s'.", tt.active.ClusterId, goal.ClusterId) return + case goalClusterUUId != 0 && activeClusterUUId != 0 && goalClusterUUId != activeClusterUUId: + log.Printf("Topology: Illegal config: ClusterUUId should be '%v' instead of '%v'.", + activeClusterUUId, goalClusterUUId) + return + case goal.MaxRMCount != tt.active.MaxRMCount && tt.active.Version != 0: log.Printf("Topology: Illegal config change: Currently changes to MaxRMCount are not supported, sorry.") return @@ -374,6 +380,7 @@ func (tt *TopologyTransmogrifier) selectGoal(goal *configuration.NextConfigurati log.Printf("Topology: Config transition to version %v completed.", goal.Version) return } + goal.SetClusterUUId(activeClusterUUId) } if tt.task != nil { @@ -543,7 +550,7 @@ func (task *targetConfig) tick() error { log.Println("Topology: Ensuring local topology.") task.task = &ensureLocalTopology{task} - case task.active.Next() != nil && task.active.Next().ClusterUUId() == 0: + case task.active.ClusterId == "": log.Printf("Topology: Attempting to join cluster with configuration: %v", task.config) task.task = &joinCluster{targetConfig: task} @@ -698,7 +705,7 @@ type ensureLocalTopology struct { func (task *ensureLocalTopology) tick() error { if task.active != nil { - // the fact we're here means we're done - there is a topology + // The fact we're here means we're done - there is a topology // discovered one way or another. if err := task.completed(); err != nil { return err @@ -706,13 +713,6 @@ func (task *ensureLocalTopology) tick() error { // However, just because we have a local config doesn't mean it // actually satisfies the goal. Essentially, we're pretending // that the goal is in Next(). - task.installTopology(task.active, map[eng.TopologyChangeSubscriberType]func() error{ - eng.ConnectionManagerSubscriber: func() error { - if task.task != nil { - return task.task.tick() - } - return nil - }}) task.selectGoal(task.config) return nil } @@ -726,7 +726,7 @@ func (task *ensureLocalTopology) tick() error { return task.fatal(err) } - if topology == nil && task.config.ClusterId == "" { + if topology == nil && (task.config == nil || task.config.Configuration == nil || task.config.ClusterId == "") { return task.fatal(errors.New("No configuration supplied and no configuration found in local store. Cannot continue.")) } else if topology == nil { @@ -749,10 +749,13 @@ type joinCluster struct { } func (task *joinCluster) tick() error { - if !(task.active.Next() != nil && task.active.Next().ClusterUUId() == 0) { + if !(task.active.ClusterId == "") { if err := task.completed(); err != nil { return err } + // Exactly the same logic as in ensureLocalTopology: the active + // probably doesn't have a Next set; even if it does, it may + // have no relationship to task.config. task.selectGoal(task.config) return nil } @@ -764,8 +767,12 @@ func (task *joinCluster) tick() error { return task.fatal(err) } - // must install to connectionManager before launching any connections - task.installTopology(task.active, nil) + // Set up the ClusterId so that we can actually create some connections. + active := task.active.Clone() + active.ClusterId = task.config.ClusterId + + // Must install to connectionManager before launching any connections + task.installTopology(active, nil) // we may not have the youngest topology and there could be other // hosts who have connected to us who are trying to send us a more // up to date topology. So we shouldn't kill off those connections. @@ -817,13 +824,19 @@ func (task *joinCluster) tick() error { } func (task *joinCluster) allJoining(allRMIds common.RMIds) error { - // NB: activeWithHosts never gets installed to the DB itself. - activeWithRMIds := task.active.Clone() - activeWithRMIds.Hosts = task.config.Hosts - activeWithRMIds.SetRMs(allRMIds) - activeWithRMIds.SetNext(nil) + // NB: active never gets installed to the DB itself. + config := task.config + config1 := configuration.BlankTopology().Configuration + config1.ClusterId = config.ClusterId + config1.Hosts = config.Hosts + config1.F = config.F + config1.MaxRMCount = config.MaxRMCount + config1.SetRMs(allRMIds) - return task.setActive(activeWithRMIds) + active := task.active.Clone() + active.SetConfiguration(config1) + + return task.setActive(active) } // installTargetOld @@ -870,7 +883,17 @@ func (task *installTargetOld) tick() error { log.Printf("Topology: Calculated target topology: %v (active: %v, passive: %v)", targetTopology.Next(), active, passive) - _, resubmit, err := task.rewriteTopology(task.active, targetTopology, active, passive) + resubmit, roots, err := task.attemptCreateRoots(3) + if err != nil { + return task.fatal(err) + } + if resubmit { + task.enqueueTick(task) + return nil + } + targetTopology.Roots = roots + + _, resubmit, err = task.rewriteTopology(task.active, targetTopology, active, passive) if err != nil { return task.fatal(err) } @@ -1019,7 +1042,6 @@ func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology next := task.config.Configuration.Clone() next.SetRMs(rmIdsNew) next.Hosts = hostsNew - next.SetClusterUUId() // Pointer semantics, so we need to copy into our new set removed := make(map[common.RMId]server.EmptyStruct) @@ -1648,7 +1670,7 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog } func (task *targetConfig) createTopologyZero(config *configuration.NextConfiguration) (*configuration.Topology, error) { - topology := configuration.BlankTopology(config.ClusterId) + topology := configuration.BlankTopology() topology.SetNext(config) txn := task.createTopologyTransaction(nil, topology, []common.RMId{task.connectionManager.RMId}, nil) txnId := topology.DBVersion From 6679d7a08a965d599477585dbc88bf56c2f7f927 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 12 Jun 2016 22:24:51 +0100 Subject: [PATCH 19/78] Make the reparsing of roots work again. Realised we just need the extra tracking info in NextConfiguration, which makes a whole lot of sense. Ref T41. --HG-- branch : T41 --- configuration/topology.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/configuration/topology.go b/configuration/topology.go index d3be089..71888b6 100644 --- a/configuration/topology.go +++ b/configuration/topology.go @@ -67,11 +67,11 @@ func NewTopology(txnId *common.TxnId, rootsCap *msgs.VarIdPos_List, config *Conf DBVersion: txnId, } if rootsCap != nil { - t.Roots = make([]Root, len(config.RootNames())) - if rootsCap.Len() != len(t.Roots) { - panic(fmt.Sprintf("NewTopology expected to find %v roots by reference, but actually found %v", - len(t.Roots), rootsCap.Len())) + if rootsCap.Len() < len(config.RootNames()) { + panic(fmt.Sprintf("NewTopology expected to find at least %v roots by reference, but actually found %v", + len(config.RootNames()), rootsCap.Len())) } + t.Roots = make([]Root, rootsCap.Len()) for idx := range t.Roots { rootCap := rootsCap.At(idx) positions := rootCap.Positions() From be5ad95de5f010c0a23b56a37177ee64f842e437 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 15 Jun 2016 16:03:45 +0100 Subject: [PATCH 20/78] Ok, not yet completely convinced I've got this algorithm right, but it certainly could be! Ref T41. --HG-- branch : T41 --- capnp/configuration.capnp | 11 ++-- capnp/configuration.capnp.go | 100 +++++++++++++++++++++++++++--- configuration/configuration.go | 33 ++++++++-- network/topologytransmogrifier.go | 65 ++++++++++++++----- 4 files changed, 173 insertions(+), 36 deletions(-) diff --git a/capnp/configuration.capnp b/capnp/configuration.capnp index f8fd09e..33471b2 100644 --- a/capnp/configuration.capnp +++ b/capnp/configuration.capnp @@ -25,12 +25,13 @@ struct Configuration { newRMIds @12: List(UInt32); survivingRMIds @13: List(UInt32); lostRMIds @14: List(UInt32); - installedOnNew @15: Bool; - barrierReached1 @16: List(UInt32); - barrierReached2 @17: List(UInt32); - pending @18: List(ConditionPair); + rootIndices @15: List(UInt32); + installedOnNew @16: Bool; + barrierReached1 @17: List(UInt32); + barrierReached2 @18: List(UInt32); + pending @19: List(ConditionPair); } - stable @19: Void; + stable @20: Void; } } diff --git a/capnp/configuration.capnp.go b/capnp/configuration.capnp.go index 6decd61..2c67f2d 100644 --- a/capnp/configuration.capnp.go +++ b/capnp/configuration.capnp.go @@ -20,9 +20,9 @@ const ( CONFIGURATION_STABLE Configuration_Which = 1 ) -func NewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStruct(24, 13)) } -func NewRootConfiguration(s *C.Segment) Configuration { return Configuration(s.NewRootStruct(24, 13)) } -func AutoNewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStructAR(24, 13)) } +func NewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStruct(24, 14)) } +func NewRootConfiguration(s *C.Segment) Configuration { return Configuration(s.NewRootStruct(24, 14)) } +func AutoNewConfiguration(s *C.Segment) Configuration { return Configuration(s.NewStructAR(24, 14)) } func ReadRootConfiguration(s *C.Segment) Configuration { return Configuration(s.Root(0).ToStruct()) } func (s Configuration) Which() Configuration_Which { return Configuration_Which(C.Struct(s).Get16(16)) } func (s Configuration) ClusterId() string { return C.Struct(s).GetObject(0).ToText() } @@ -80,25 +80,31 @@ func (s ConfigurationTransitioningTo) LostRMIds() C.UInt32List { func (s ConfigurationTransitioningTo) SetLostRMIds(v C.UInt32List) { C.Struct(s).SetObject(9, C.Object(v)) } +func (s ConfigurationTransitioningTo) RootIndices() C.UInt32List { + return C.UInt32List(C.Struct(s).GetObject(10)) +} +func (s ConfigurationTransitioningTo) SetRootIndices(v C.UInt32List) { + C.Struct(s).SetObject(10, C.Object(v)) +} func (s ConfigurationTransitioningTo) InstalledOnNew() bool { return C.Struct(s).Get1(105) } func (s ConfigurationTransitioningTo) SetInstalledOnNew(v bool) { C.Struct(s).Set1(105, v) } func (s ConfigurationTransitioningTo) BarrierReached1() C.UInt32List { - return C.UInt32List(C.Struct(s).GetObject(10)) + return C.UInt32List(C.Struct(s).GetObject(11)) } func (s ConfigurationTransitioningTo) SetBarrierReached1(v C.UInt32List) { - C.Struct(s).SetObject(10, C.Object(v)) + C.Struct(s).SetObject(11, C.Object(v)) } func (s ConfigurationTransitioningTo) BarrierReached2() C.UInt32List { - return C.UInt32List(C.Struct(s).GetObject(11)) + return C.UInt32List(C.Struct(s).GetObject(12)) } func (s ConfigurationTransitioningTo) SetBarrierReached2(v C.UInt32List) { - C.Struct(s).SetObject(11, C.Object(v)) + C.Struct(s).SetObject(12, C.Object(v)) } func (s ConfigurationTransitioningTo) Pending() ConditionPair_List { - return ConditionPair_List(C.Struct(s).GetObject(12)) + return ConditionPair_List(C.Struct(s).GetObject(13)) } func (s ConfigurationTransitioningTo) SetPending(v ConditionPair_List) { - C.Struct(s).SetObject(12, C.Object(v)) + C.Struct(s).SetObject(13, C.Object(v)) } func (s Configuration) SetStable() { C.Struct(s).Set16(16, 1) } func (s Configuration) WriteJSON(w io.Writer) error { @@ -538,6 +544,43 @@ func (s Configuration) WriteJSON(w io.Writer) error { if err != nil { return err } + _, err = b.WriteString("\"rootIndices\":") + if err != nil { + return err + } + { + s := s.RootIndices() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + err = b.WriteByte(',') + if err != nil { + return err + } _, err = b.WriteString("\"installedOnNew\":") if err != nil { return err @@ -1126,6 +1169,43 @@ func (s Configuration) WriteCapLit(w io.Writer) error { if err != nil { return err } + _, err = b.WriteString("rootIndices = ") + if err != nil { + return err + } + { + s := s.RootIndices() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + _, err = b.WriteString(", ") + if err != nil { + return err + } _, err = b.WriteString("installedOnNew = ") if err != nil { return err @@ -1281,7 +1361,7 @@ func (s Configuration) MarshalCapLit() ([]byte, error) { type Configuration_List C.PointerList func NewConfigurationList(s *C.Segment, sz int) Configuration_List { - return Configuration_List(s.NewCompositeList(24, 13, sz)) + return Configuration_List(s.NewCompositeList(24, 14, sz)) } func (s Configuration_List) Len() int { return C.PointerList(s).Len() } func (s Configuration_List) At(i int) Configuration { diff --git a/configuration/configuration.go b/configuration/configuration.go index e6e0a9b..426e7c3 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -51,6 +51,7 @@ type NextConfiguration struct { NewRMIds common.RMIds SurvivingRMIds common.RMIds LostRMIds common.RMIds + RootIndices []uint32 InstalledOnNew bool BarrierReached1 common.RMIds BarrierReached2 common.RMIds @@ -58,8 +59,8 @@ type NextConfiguration struct { } func (next *NextConfiguration) String() string { - return fmt.Sprintf("Next Configuration:\n AllHosts: %v;\n NewRMIds: %v;\n SurvivingRMIds: %v;\n LostRMIds: %v;\n InstalledOnNew: %v;\n BarrierReached1: %v;\n BarrierReached2: %v;\n Pending:%v;\n Configuration: %v", - next.AllHosts, next.NewRMIds, next.SurvivingRMIds, next.LostRMIds, next.InstalledOnNew, next.BarrierReached1, next.BarrierReached2, next.Pending, next.Configuration) + return fmt.Sprintf("Next Configuration:\n AllHosts: %v;\n NewRMIds: %v;\n SurvivingRMIds: %v;\n LostRMIds: %v;\n RootIndices: %v;\n InstalledOnNew: %v;\n BarrierReached1: %v;\n BarrierReached2: %v;\n Pending:%v;\n Configuration: %v", + next.AllHosts, next.NewRMIds, next.SurvivingRMIds, next.LostRMIds, next.RootIndices, next.InstalledOnNew, next.BarrierReached1, next.BarrierReached2, next.Pending, next.Configuration) } func (a *NextConfiguration) Equal(b *NextConfiguration) bool { @@ -75,6 +76,15 @@ func (a *NextConfiguration) Equal(b *NextConfiguration) bool { return false } } + if len(a.RootIndices) == len(b.RootIndices) { + for idx, aIndex := range a.RootIndices { + if aIndex != b.RootIndices[idx] { + return false + } + } + } else { + return false + } return a.NewRMIds.Equal(b.NewRMIds) && a.SurvivingRMIds.Equal(b.SurvivingRMIds) && a.LostRMIds.Equal(b.LostRMIds) && @@ -102,6 +112,9 @@ func (next *NextConfiguration) Clone() *NextConfiguration { lostRMIds := make([]common.RMId, len(next.LostRMIds)) copy(lostRMIds, next.LostRMIds) + rootIndices := make([]uint32, len(next.RootIndices)) + copy(rootIndices, next.RootIndices) + barrierReached1 := make([]common.RMId, len(next.BarrierReached1)) copy(barrierReached1, next.BarrierReached1) @@ -127,6 +140,7 @@ func (next *NextConfiguration) Clone() *NextConfiguration { NewRMIds: newRMIds, SurvivingRMIds: survivingRMIds, LostRMIds: lostRMIds, + RootIndices: rootIndices, InstalledOnNew: next.InstalledOnNew, BarrierReached1: barrierReached1, BarrierReached2: barrierReached2, @@ -356,6 +370,8 @@ func ConfigurationFromCap(config *msgs.Configuration) *Configuration { lostRMIds[idx] = common.RMId(lostRMIdsCap.At(idx)) } + rootIndices := next.RootIndices().ToArray() + barrierReached1Cap := next.BarrierReached1() barrierReached1 := make([]common.RMId, barrierReached1Cap.Len()) for idx := range barrierReached1 { @@ -376,6 +392,7 @@ func ConfigurationFromCap(config *msgs.Configuration) *Configuration { NewRMIds: newRMIds, SurvivingRMIds: survivingRMIds, LostRMIds: lostRMIds, + RootIndices: rootIndices, InstalledOnNew: next.InstalledOnNew(), BarrierReached1: barrierReached1, BarrierReached2: barrierReached2, @@ -423,8 +440,8 @@ func (a *Configuration) Equal(b *Configuration) bool { } func (config *Configuration) String() string { - return fmt.Sprintf("Configuration{ClusterId: %v(%v), Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, %v}", - config.ClusterId, config.clusterUUId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.nextConfiguration) + return fmt.Sprintf("Configuration{ClusterId: %v(%v), Version: %v, Hosts: %v, F: %v, MaxRMCount: %v, NoSync: %v, RMs: %v, Removed: %v, RootNames: %v, %v}", + config.ClusterId, config.clusterUUId, config.Version, config.Hosts, config.F, config.MaxRMCount, config.NoSync, config.rms, config.rmsRemoved, config.roots, config.nextConfiguration) } func (config *Configuration) ClusterUUId() uint64 { @@ -510,6 +527,7 @@ func (config *Configuration) Clone() *Configuration { MaxRMCount: config.MaxRMCount, NoSync: config.NoSync, ClientCertificateFingerprints: nil, + roots: make([]string, len(config.roots)), rms: make([]common.RMId, len(config.rms)), rmsRemoved: make(map[common.RMId]server.EmptyStruct, len(config.rmsRemoved)), fingerprints: make(map[[sha256.Size]byte]map[string]*cmsgs.Capabilities, len(config.fingerprints)), @@ -523,6 +541,7 @@ func (config *Configuration) Clone() *Configuration { clone.ClientCertificateFingerprints[k] = v } } + copy(clone.roots, config.roots) copy(clone.rms, config.rms) for k, v := range config.rmsRemoved { clone.rmsRemoved[k] = v @@ -616,6 +635,12 @@ func (config *Configuration) AddToSegAutoRoot(seg *capn.Segment) msgs.Configurat } next.SetLostRMIds(lostRMIdsCap) + rootIndicesCap := seg.NewUInt32List(len(nextConfig.RootIndices)) + for idx, index := range nextConfig.RootIndices { + rootIndicesCap.Set(idx, index) + } + next.SetRootIndices(rootIndicesCap) + barrierReached1Cap := seg.NewUInt32List(len(nextConfig.BarrierReached1)) for idx, rmId := range nextConfig.BarrierReached1 { barrierReached1Cap.Set(idx, uint32(rmId)) diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index 87a6455..ab83122 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -864,7 +864,7 @@ func (task *installTargetOld) tick() error { // the others so they might calculate different targets and then // we'd be racing. - targetTopology, err := task.calculateTargetTopology() + targetTopology, rootsRequired, err := task.calculateTargetTopology() if err != nil || targetTopology == nil { return err } @@ -881,19 +881,21 @@ func (task *installTargetOld) tick() error { // add on all new (if there are any) as passives passive = append(passive, targetTopology.Next().NewRMIds...) - log.Printf("Topology: Calculated target topology: %v (active: %v, passive: %v)", targetTopology.Next(), active, passive) + log.Printf("Topology: Calculated target topology: %v (new rootsRequired: %v, active: %v, passive: %v)", targetTopology.Next(), rootsRequired, active, passive) - resubmit, roots, err := task.attemptCreateRoots(3) - if err != nil { - return task.fatal(err) - } - if resubmit { - task.enqueueTick(task) - return nil + if rootsRequired != 0 { + resubmit, roots, err := task.attemptCreateRoots(rootsRequired) + if err != nil { + return task.fatal(err) + } + if resubmit { + task.enqueueTick(task) + return nil + } + targetTopology.Roots = append(targetTopology.Roots, roots...) } - targetTopology.Roots = roots - _, resubmit, err = task.rewriteTopology(task.active, targetTopology, active, passive) + _, resubmit, err := task.rewriteTopology(task.active, targetTopology, active, passive) if err != nil { return task.fatal(err) } @@ -906,10 +908,10 @@ func (task *installTargetOld) tick() error { return nil } -func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology, error) { +func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology, int, error) { localHost, err := task.firstLocalHost(task.active.Configuration) if err != nil { - return nil, task.fatal(err) + return nil, 0, task.fatal(err) } hostsSurvived, hostsRemoved, hostsAdded := @@ -951,9 +953,9 @@ func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology hostsAddedList := allRemoteHosts[len(hostsOld)-1:] allAddedFound, err := task.verifyClusterUUIds(task.active.ClusterUUId(), hostsAddedList) if err != nil { - return nil, task.error(err) + return nil, 0, task.error(err) } else if !allAddedFound { - return nil, nil + return nil, 0, nil } // map(old -> new) @@ -971,7 +973,7 @@ func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology for host := range hostsAdded { cd, found := task.hostToConnection[host] if !found { - return nil, nil + return nil, 0, nil } hostsAdded[host] = cd connsAdded = append(connsAdded, cd) @@ -1060,17 +1062,39 @@ func (task *installTargetOld) calculateTargetTopology() (*configuration.Topology } conds := calculateMigrationConditions(rmIdsAdded, rmIdsLost, rmIdsSurvived, task.active.Configuration, next) + // now figure out which roots have survived and how many new ones + // we need to create. + oldNamesList := targetTopology.RootNames() + oldNamesCount := len(oldNamesList) + oldNames := make(map[string]uint32, oldNamesCount) + for idx, name := range oldNamesList { + oldNames[name] = uint32(idx) + } + newNames := next.RootNames() + rootsRequired := 0 + rootIndices := make([]uint32, len(newNames)) + for idx, name := range newNames { + if index, found := oldNames[name]; found { + rootIndices[idx] = index + } else { + rootIndices[idx] = uint32(oldNamesCount + rootsRequired) + rootsRequired++ + } + } + targetTopology.Roots = targetTopology.Roots[:oldNamesCount] + targetTopology.SetNext(&configuration.NextConfiguration{ Configuration: next, AllHosts: append(allRemoteHosts, localHost), NewRMIds: rmIdsAdded, SurvivingRMIds: rmIdsSurvived, LostRMIds: rmIdsLost, + RootIndices: rootIndices, InstalledOnNew: len(rmIdsAdded) == 0, Pending: conds, }) - return targetTopology, nil + return targetTopology, rootsRequired, nil } func calculateMigrationConditions(added, lost, survived []common.RMId, from, to *configuration.Configuration) configuration.Conds { @@ -1522,6 +1546,13 @@ func (task *installCompletion) tick() error { topology := task.active.Clone() topology.SetConfiguration(next.Configuration) + oldRoots := task.active.Roots + newRoots := make([]configuration.Root, len(next.RootIndices)) + for idx, index := range next.RootIndices { + newRoots[idx] = oldRoots[index] + } + topology.Roots = newRoots + _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) From 00ca5870d515e405011496d0ede8e80baa5e9f67 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 17 Jun 2016 12:31:49 +0100 Subject: [PATCH 21/78] Odd... must have failed to regenerate at some point. Ref T34 --HG-- branch : T34 --- capnp/var.capnp.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/capnp/var.capnp.go b/capnp/var.capnp.go index d953324..d114b14 100644 --- a/capnp/var.capnp.go +++ b/capnp/var.capnp.go @@ -288,12 +288,6 @@ func (s Var_List) ToArray() []Var { func (s Var_List) Set(i int, item Var) { C.PointerList(s).Set(i, C.Object(item)) } type VarIdPos C.Struct -type VarIdPosCapabilities VarIdPos -type VarIdPosCapabilitiesValue VarIdPos -type VarIdPosCapabilitiesReferences VarIdPos -type VarIdPosCapabilitiesReferencesRead VarIdPos -type VarIdPosCapabilitiesReferencesWrite VarIdPos -type VarIdPosCapabilitiesReferencesRead_Which uint16 func NewVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewStruct(0, 3)) } func NewRootVarIdPos(s *C.Segment) VarIdPos { return VarIdPos(s.NewRootStruct(0, 3)) } From 84df7ef500ab971f26a93f6a8a2668fd2562308c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 18 Jun 2016 12:05:37 +0100 Subject: [PATCH 22/78] Have versionCache keep track of capabilities - ish. There is currently no culling of values or refs based on capabilities. Ref T34. --HG-- branch : T34 --- client/clienttxnsubmitter.go | 10 +- client/versioncache.go | 202 +++++++++++++++++++++++++++++++---- network/connection.go | 17 +-- 3 files changed, 198 insertions(+), 31 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index e6ed223..0aefc49 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -21,10 +21,10 @@ type ClientTxnSubmitter struct { initialDelay time.Duration } -func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, cm paxos.ConnectionManager) *ClientTxnSubmitter { +func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, roots map[common.VarUUId]*cmsgs.Capabilities, cm paxos.ConnectionManager) *ClientTxnSubmitter { return &ClientTxnSubmitter{ SimpleTxnSubmitter: NewSimpleTxnSubmitter(rmId, bootCount, cm), - versionCache: NewVersionCache(), + versionCache: NewVersionCache(roots), txnLive: false, initialDelay: time.Duration(0), } @@ -126,17 +126,17 @@ func (cts *ClientTxnSubmitter) addCreatesToCache(outcome *msgs.Outcome) { } } -func (cts *ClientTxnSubmitter) translateUpdates(seg *capn.Segment, updates map[*msgs.Update][]*msgs.Action) cmsgs.ClientUpdate_List { +func (cts *ClientTxnSubmitter) translateUpdates(seg *capn.Segment, updates map[*msgs.Update]*[]*msgs.Action) cmsgs.ClientUpdate_List { clientUpdates := cmsgs.NewClientUpdateList(seg, len(updates)) idx := 0 for update, actions := range updates { clientUpdate := clientUpdates.At(idx) idx++ clientUpdate.SetVersion(update.TxnId()) - clientActions := cmsgs.NewClientActionList(seg, len(actions)) + clientActions := cmsgs.NewClientActionList(seg, len(*actions)) clientUpdate.SetActions(clientActions) - for idy, action := range actions { + for idy, action := range *actions { clientAction := clientActions.At(idy) clientAction.SetVarId(action.VarId()) switch action.Which() { diff --git a/client/versioncache.go b/client/versioncache.go index c9f1fb1..a47f396 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -2,7 +2,9 @@ package client import ( "fmt" + capn "github.com/glycerine/go-capnproto" "goshawkdb.io/common" + cmsgs "goshawkdb.io/common/capnp" msgs "goshawkdb.io/server/capnp" eng "goshawkdb.io/server/txnengine" ) @@ -12,10 +14,27 @@ type versionCache map[common.VarUUId]*cached type cached struct { txnId *common.TxnId clockElem uint64 + caps *cmsgs.Capabilities } -func NewVersionCache() versionCache { - return make(map[common.VarUUId]*cached) +var maxCapsCap *cmsgs.Capabilities + +func init() { + seg := capn.NewBuffer(nil) + cap := cmsgs.NewCapabilities(seg) + cap.SetValue(cmsgs.VALUECAPABILITY_READWRITE) + ref := cap.References() + ref.Read().SetAll() + ref.Write().SetAll() + maxCapsCap = &cap +} + +func NewVersionCache(roots map[common.VarUUId]*cmsgs.Capabilities) versionCache { + cache := make(map[common.VarUUId]*cached) + for vUUId, caps := range roots { + cache[vUUId] = &cached{caps: caps} + } + return cache } func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outcome) { @@ -23,30 +42,43 @@ func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outco actions := outcome.Txn().Actions() for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) - if action.Which() != msgs.ACTION_READ { + if act := action.Which(); act != msgs.ACTION_READ { vUUId := common.MakeVarUUId(action.VarId()) if c, found := vc[*vUUId]; found { c.txnId = txnId c.clockElem = clock.At(vUUId) - } else { + } else if act == msgs.ACTION_CREATE { vc[*vUUId] = &cached{ txnId: txnId, clockElem: clock.At(vUUId), + caps: maxCapsCap, } + } else { + panic(fmt.Sprintf("%v contained action (%v) for unknown %v", txnId, act, vUUId)) } } } } -func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Update][]*msgs.Action { - validUpdates := make(map[*msgs.Update][]*msgs.Action) +type unreached struct { + cached *cached + action *msgs.Action + actions *[]*msgs.Action +} + +func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Update]*[]*msgs.Action { + l := updates.Len() + validUpdates := make(map[*msgs.Update]*[]*msgs.Action, l) + unreachedMap := make(map[common.VarUUId]unreached, l) - for idx, l := 0, updates.Len(); idx < l; idx++ { + for idx := 0; idx < l; idx++ { update := updates.At(idx) txnId := common.MakeTxnId(update.TxnId()) clock := eng.VectorClockFromCap(update.Clock()) actions := update.Actions() - validActions := make([]*msgs.Action, 0, actions.Len()) + validActionsList := make([]*msgs.Action, 0, actions.Len()) + validActions := &validActionsList + validUpdates[&update] = validActions for idy, m := 0, actions.Len(); idy < m; idy++ { action := actions.At(idy) @@ -55,14 +87,19 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda switch action.Which() { case msgs.ACTION_MISSING: - if c, found := vc[*vUUId]; found { + // In this context, ACTION_MISSING means we know there was + // a write of vUUId by txnId, but we have no idea what the + // value written was. + //log.Printf("%v contains missing write action of %v\n", txnId, vUUId) + if c, found := vc[*vUUId]; found && c.txnId != nil { cmp := c.txnId.Compare(txnId) if cmp == common.EQ && clockElem != c.clockElem { panic(fmt.Sprintf("Clock version changed on missing for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) } if clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { - delete(vc, *vUUId) - validActions = append(validActions, &action) + c.txnId = nil + c.clockElem = 0 + *validActions = append(*validActions, &action) } } @@ -72,27 +109,152 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda if cmp == common.EQ && clockElem != c.clockElem { panic(fmt.Sprintf("Clock version changed on write for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) } - if clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { + if c.txnId == nil || clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { c.txnId = txnId c.clockElem = clockElem - validActions = append(validActions, &action) + *validActions = append(*validActions, &action) + refs := action.Write().References() + worklist := []*msgs.VarIdPos_List{&refs} + for len(worklist) > 0 { + refs, worklist = *worklist[0], worklist[1:] + for idz, n := 0, refs.Len(); idz < n; idz++ { + ref := refs.At(idz) + caps := ref.Capabilities() + vUUId := common.MakeVarUUId(ref.Id()) + if c, found := vc[*vUUId]; found { + c.caps = mergeCaps(c.caps, &caps) + } else if ur, found := unreachedMap[*vUUId]; found { + delete(unreachedMap, *vUUId) + c := ur.cached + c.caps = &caps + vc[*vUUId] = c + *ur.actions = append(*ur.actions, ur.action) + refs1 := ur.action.Write().References() + worklist = append(worklist, &refs1) + } else { + vc[*vUUId] = &cached{caps: &caps} + } + } + } } + } else if _, found := unreachedMap[*vUUId]; found { + panic(fmt.Sprintf("%v reported twice in same update (and appeared in unreachedMap twice!)", vUUId)) } else { - vc[*vUUId] = &cached{ - txnId: txnId, - clockElem: clockElem, + //log.Printf("%v contains write action of %v\n", txnId, vUUId) + unreachedMap[*vUUId] = unreached{ + cached: &cached{ + txnId: txnId, + clockElem: clockElem, + }, + action: &action, + actions: validActions, } - validActions = append(validActions, &action) } default: - panic(fmt.Sprintf("%v", action.Which())) + panic(fmt.Sprintf("Unexpected action for %v on %v: %v", txnId, vUUId, action.Which())) } } + } - if len(validActions) != 0 { - validUpdates[&update] = validActions + for update, actions := range validUpdates { + if len(*actions) == 0 { + delete(validUpdates, update) } } return validUpdates } + +func mergeCaps(a, b *cmsgs.Capabilities) *cmsgs.Capabilities { + if a == maxCapsCap || b == maxCapsCap { + return maxCapsCap + } + + aValue := a.Value() + aRefsRead := a.References().Read() + aRefsWrite := a.References().Write() + if aValue == cmsgs.VALUECAPABILITY_READWRITE && + aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL && + aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL { + return a + } + + seg := capn.NewBuffer(nil) + cap := cmsgs.NewCapabilities(seg) + + bValue := b.Value() + valueRead := aValue == cmsgs.VALUECAPABILITY_READWRITE || aValue == cmsgs.VALUECAPABILITY_READ || + bValue == cmsgs.VALUECAPABILITY_READWRITE || bValue == cmsgs.VALUECAPABILITY_READ + valueWrite := aValue == cmsgs.VALUECAPABILITY_READWRITE || aValue == cmsgs.VALUECAPABILITY_WRITE || + bValue == cmsgs.VALUECAPABILITY_READWRITE || bValue == cmsgs.VALUECAPABILITY_WRITE + switch { + case valueRead && valueWrite: + cap.SetValue(cmsgs.VALUECAPABILITY_READWRITE) + case valueWrite: + cap.SetValue(cmsgs.VALUECAPABILITY_WRITE) + case valueRead: + cap.SetValue(cmsgs.VALUECAPABILITY_WRITE) + default: + cap.SetValue(cmsgs.VALUECAPABILITY_NONE) + } + + isMax := valueRead && valueWrite + + bRefsRead := b.References().Read() + readAll := aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL || + aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL + if readAll { + cap.References().Read().SetAll() + } else { + isMax = false + aOnly, bOnly := aRefsRead.Only().ToArray(), bRefsRead.Only().ToArray() + cap.References().Read().SetOnly(mergeOnlies(seg, aOnly, bOnly)) + } + + bRefsWrite := b.References().Write() + writeAll := aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL || + aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL + if writeAll { + cap.References().Write().SetAll() + } else { + isMax = false + aOnly, bOnly := aRefsWrite.Only().ToArray(), bRefsWrite.Only().ToArray() + cap.References().Write().SetOnly(mergeOnlies(seg, aOnly, bOnly)) + } + + if isMax { + return maxCapsCap + } else { + return &cap + } +} + +func mergeOnlies(seg *capn.Segment, a, b []uint32) capn.UInt32List { + only := make([]uint32, 0, len(a)+len(b)) + for len(a) > 0 && len(b) > 0 { + aIndex, bIndex := a[0], b[0] + switch { + case aIndex < bIndex: + only = append(only, aIndex) + a = a[1:] + case aIndex > bIndex: + only = append(only, bIndex) + b = b[1:] + default: + only = append(only, bIndex) + a = a[1:] + b = b[1:] + } + } + if len(a) > 0 { + only = append(only, a...) + } else { + only = append(only, b...) + } + + cap := seg.NewUInt32List(len(only)) + for idx, index := range only { + cap.Set(idx, index) + } + return cap +} diff --git a/network/connection.go b/network/connection.go index 929561e..6d6148f 100644 --- a/network/connection.go +++ b/network/connection.go @@ -646,6 +646,7 @@ type connectionAwaitClientHandshake struct { *Connection peerCerts []*x509.Certificate roots map[string]*cmsgs.Capabilities + rootsVar map[common.VarUUId]*cmsgs.Capabilities } func (cach *connectionAwaitClientHandshake) connectionStateMachineComponentWitness() {} @@ -675,7 +676,7 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { cach.peerCerts = peerCerts cach.roots = roots log.Printf("User '%s' authenticated", hex.EncodeToString(hashsum[:])) - helloFromServer := cach.makeHelloClientFromServer(cach.topology, roots) + helloFromServer := cach.makeHelloClientFromServer(cach.topology) if err := cach.send(server.SegToBytes(helloFromServer)); err != nil { return false, err } @@ -698,7 +699,7 @@ func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configurat return false, hashsum, nil } -func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology *configuration.Topology, roots map[string]*cmsgs.Capabilities) *capn.Segment { +func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology *configuration.Topology) *capn.Segment { seg := capn.NewBuffer(nil) hello := cmsgs.NewRootHelloClientFromServer(seg) namespace := make([]byte, common.KeyLen-8) @@ -706,18 +707,22 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology * binary.BigEndian.PutUint32(namespace[4:8], cach.connectionManager.BootCount) binary.BigEndian.PutUint32(namespace[8:], uint32(cach.connectionManager.RMId)) hello.SetNamespace(namespace) - rootsCap := cmsgs.NewRootList(seg, len(roots)) + rootsCap := cmsgs.NewRootList(seg, len(cach.roots)) idy := 0 + rootsVar := make(map[common.VarUUId]*cmsgs.Capabilities, len(cach.roots)) for idx, name := range topology.RootNames() { - if capabilities, found := roots[name]; found { + if capabilities, found := cach.roots[name]; found { rootCap := rootsCap.At(idy) idy++ + vUUId := topology.Roots[idx].VarUUId rootCap.SetName(name) - rootCap.SetVarId(topology.Roots[idx].VarUUId[:]) + rootCap.SetVarId(vUUId[:]) rootCap.SetCapabilities(*capabilities) + rootsVar[*vUUId] = capabilities } } hello.SetRoots(rootsCap) + cach.rootsVar = rootsVar return seg } @@ -774,7 +779,7 @@ func (cr *connectionRun) start() (bool, error) { } if cr.isClient { servers := cr.connectionManager.ClientEstablished(cr.ConnectionNumber, cr.Connection) - cr.submitter = client.NewClientTxnSubmitter(cr.connectionManager.RMId, cr.connectionManager.BootCount, cr.connectionManager) + cr.submitter = client.NewClientTxnSubmitter(cr.connectionManager.RMId, cr.connectionManager.BootCount, cr.rootsVar, cr.connectionManager) cr.submitter.TopologyChanged(cr.topology) cr.submitter.ServerConnectionsChanged(servers) } From d24c04ff9bd5a365603b7d33fec1ac6bb28645ae Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 25 Jun 2016 20:30:44 +0100 Subject: [PATCH 23/78] Initial stab at implementing capabilities. This is fairly ugly in a number of places, plus I don't actually have a way to test this just yet as none of the clients support them yet! Ref T34. --HG-- branch : T34 --- client/clienttxnsubmitter.go | 61 +++--- client/localconnection.go | 18 +- client/simpletxnsubmitter.go | 241 +++++++++++++++++------ client/versioncache.go | 304 ++++++++++++++++++++++++------ configuration/configuration.go | 11 +- network/connection.go | 31 +-- network/topologytransmogrifier.go | 2 +- txnengine/frame.go | 6 +- 8 files changed, 496 insertions(+), 178 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index 0aefc49..0a1298d 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -12,7 +12,7 @@ import ( "time" ) -type ClientTxnCompletionConsumer func(*cmsgs.ClientTxnOutcome, error) +type ClientTxnCompletionConsumer func(*cmsgs.ClientTxnOutcome, error) error type ClientTxnSubmitter struct { *SimpleTxnSubmitter @@ -36,10 +36,13 @@ func (cts *ClientTxnSubmitter) Status(sc *server.StatusConsumer) { sc.Join() } -func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, continuation ClientTxnCompletionConsumer) { +func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, continuation ClientTxnCompletionConsumer) error { if cts.txnLive { - continuation(nil, fmt.Errorf("Cannot submit client as a live txn already exists")) - return + return continuation(nil, fmt.Errorf("Cannot submit client as a live txn already exists")) + } + + if err := cts.versionCache.ValidateTransaction(ctxnCap); err != nil { + return continuation(nil, err) } seg := capn.NewBuffer(nil) @@ -55,11 +58,10 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, start := time.Now() var cont TxnCompletionConsumer - cont = func(txnId *common.TxnId, outcome *msgs.Outcome, err error) { + cont = func(txnId *common.TxnId, outcome *msgs.Outcome, err error) error { if outcome == nil || err != nil { // node is shutting down or error cts.txnLive = false - continuation(nil, err) - return + return continuation(nil, err) } end := time.Now() elapsed := end.Sub(start) @@ -72,8 +74,7 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, cts.addCreatesToCache(outcome) cts.txnLive = false cts.initialDelay = delay >> 1 - continuation(&clientOutcome, nil) - return + return continuation(&clientOutcome, nil) default: abort := outcome.Abort() @@ -88,8 +89,7 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetAbort(cts.translateUpdates(seg, validUpdates)) cts.txnLive = false cts.initialDelay = delay >> 1 - continuation(&clientOutcome, nil) - return + return continuation(&clientOutcome, nil) } } server.Log("Resubmitting", txnId, "; orig resubmit?", abort.Which() == msgs.OUTCOMEABORT_RESUBMIT) @@ -105,13 +105,13 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, binary.BigEndian.PutUint64(curTxnId[:8], curTxnIdNum) ctxnCap.SetId(curTxnId[:]) - cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false) + return cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false, cts.versionCache) } } cts.txnLive = true // fmt.Printf("%v ", delay) - cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false) + return cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false, cts.versionCache) } func (cts *ClientTxnSubmitter) addCreatesToCache(outcome *msgs.Outcome) { @@ -126,38 +126,41 @@ func (cts *ClientTxnSubmitter) addCreatesToCache(outcome *msgs.Outcome) { } } -func (cts *ClientTxnSubmitter) translateUpdates(seg *capn.Segment, updates map[*msgs.Update]*[]*msgs.Action) cmsgs.ClientUpdate_List { +func (cts *ClientTxnSubmitter) translateUpdates(seg *capn.Segment, updates map[common.TxnId]*[]*update) cmsgs.ClientUpdate_List { clientUpdates := cmsgs.NewClientUpdateList(seg, len(updates)) idx := 0 - for update, actions := range updates { + for txnId, actions := range updates { clientUpdate := clientUpdates.At(idx) idx++ - clientUpdate.SetVersion(update.TxnId()) + clientUpdate.SetVersion(txnId[:]) clientActions := cmsgs.NewClientActionList(seg, len(*actions)) clientUpdate.SetActions(clientActions) for idy, action := range *actions { clientAction := clientActions.At(idy) - clientAction.SetVarId(action.VarId()) - switch action.Which() { - case msgs.ACTION_MISSING: + clientAction.SetVarId(action.varUUId[:]) + if value := action.Value(); value == nil { clientAction.SetDelete() - case msgs.ACTION_WRITE: + } else { clientAction.SetWrite() - write := action.Write() clientWrite := clientAction.Write() - clientWrite.SetValue(write.Value()) - references := write.References() - clientReferences := seg.NewDataList(references.Len()) + clientWrite.SetValue(value) + references := action.references + clientReferences := cmsgs.NewClientVarIdPosList(seg, len(references)) clientWrite.SetReferences(clientReferences) - for idz, n := 0, references.Len(); idz < n; idz++ { - ref := references.At(idz) - clientReferences.Set(idz, ref.Id()) + referencesMask := action.ReferencesReadMask() + for idz, ref := range references { + varIdPos := clientReferences.At(idz) + if len(referencesMask) != 0 && referencesMask[0] == uint32(idz) { + referencesMask = referencesMask[1:] + varIdPos.SetVarId(ref.Id()) + varIdPos.SetCapabilities(ref.Capabilities()) + } else { + varIdPos.SetVarId([]byte{}) + } positions := common.Positions(ref.Positions()) cts.hashCache.AddPosition(common.MakeVarUUId(ref.Id()), &positions) } - default: - panic(fmt.Sprintf("Unexpected action type: %v", action.Which())) } } } diff --git a/client/localconnection.go b/client/localconnection.go index dfec185..cf73885 100644 --- a/client/localconnection.go +++ b/client/localconnection.go @@ -92,10 +92,11 @@ type localConnectionMsgRunClientTxn struct { outcome *msgs.Outcome } -func (lcmrct *localConnectionMsgRunClientTxn) consumer(txnId *common.TxnId, outcome *msgs.Outcome, err error) { +func (lcmrct *localConnectionMsgRunClientTxn) consumer(txnId *common.TxnId, outcome *msgs.Outcome, err error) error { lcmrct.outcome = outcome lcmrct.err = err lcmrct.maybeClose() + return nil } type localConnectionMsgRunTxn struct { @@ -107,10 +108,11 @@ type localConnectionMsgRunTxn struct { outcome *msgs.Outcome } -func (lcmrt *localConnectionMsgRunTxn) consumer(txnId *common.TxnId, outcome *msgs.Outcome, err error) { +func (lcmrt *localConnectionMsgRunTxn) consumer(txnId *common.TxnId, outcome *msgs.Outcome, err error) error { lcmrt.outcome = outcome lcmrt.err = err lcmrt.maybeClose() + return nil } func (lc *LocalConnection) NextVarUUId() *common.VarUUId { @@ -283,16 +285,16 @@ func (lc *LocalConnection) actorLoop(head *cc.ChanCellHead) { case localConnectionMsgShutdown: terminate = true case *localConnectionMsgTopologyChanged: - lc.submitter.TopologyChanged(msgT.topology) + err = lc.submitter.TopologyChanged(msgT.topology) msgT.maybeClose() case *localConnectionMsgRunTxn: lc.runTransaction(msgT) case *localConnectionMsgRunClientTxn: - lc.runClientTransaction(msgT) + err = lc.runClientTransaction(msgT) case localConnectionMsgOutcomeReceived: - lc.submitter.SubmissionOutcomeReceived(msgT.sender, msgT.txnId, msgT.outcome) + err = lc.submitter.SubmissionOutcomeReceived(msgT.sender, msgT.txnId, msgT.outcome) case localConnectionMsgServerConnectionsChanged: - lc.submitter.ServerConnectionsChanged((map[common.RMId]paxos.Connection)(msgT)) + err = lc.submitter.ServerConnectionsChanged((map[common.RMId]paxos.Connection)(msgT)) case localConnectionMsgStatus: lc.status(msgT.StatusConsumer) default: @@ -310,7 +312,7 @@ func (lc *LocalConnection) actorLoop(head *cc.ChanCellHead) { lc.cellTail.Terminate() } -func (lc *LocalConnection) runClientTransaction(txnQuery *localConnectionMsgRunClientTxn) { +func (lc *LocalConnection) runClientTransaction(txnQuery *localConnectionMsgRunClientTxn) error { txn := txnQuery.txn if txnQuery.assignTxnId { txnId := lc.getNextTxnId() @@ -320,7 +322,7 @@ func (lc *LocalConnection) runClientTransaction(txnQuery *localConnectionMsgRunC if varPosMap := txnQuery.varPosMap; varPosMap != nil { lc.submitter.EnsurePositions(varPosMap) } - lc.submitter.SubmitClientTransaction(txn, txnQuery.consumer, 0, true) + return lc.submitter.SubmitClientTransaction(txn, txnQuery.consumer, 0, true, nil) } func (lc *LocalConnection) runTransaction(txnQuery *localConnectionMsgRunTxn) { diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 89d4d14..5df174c 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -23,16 +23,16 @@ type SimpleTxnSubmitter struct { connections map[common.RMId]paxos.Connection connPub paxos.ServerConnectionPublisher outcomeConsumers map[common.TxnId]txnOutcomeConsumer - onShutdown map[*func(bool)]server.EmptyStruct + onShutdown map[*func(bool) error]server.EmptyStruct resolver *ch.Resolver hashCache *ch.ConsistentHashCache topology *configuration.Topology rng *rand.Rand - bufferedSubmissions []func() + bufferedSubmissions []func() error } -type txnOutcomeConsumer func(common.RMId, *common.TxnId, *msgs.Outcome) -type TxnCompletionConsumer func(*common.TxnId, *msgs.Outcome, error) +type txnOutcomeConsumer func(common.RMId, *common.TxnId, *msgs.Outcome) error +type TxnCompletionConsumer func(*common.TxnId, *msgs.Outcome, error) error func NewSimpleTxnSubmitter(rmId common.RMId, bootCount uint32, connPub paxos.ServerConnectionPublisher) *SimpleTxnSubmitter { rng := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -44,7 +44,7 @@ func NewSimpleTxnSubmitter(rmId common.RMId, bootCount uint32, connPub paxos.Ser connections: nil, connPub: connPub, outcomeConsumers: make(map[common.TxnId]txnOutcomeConsumer), - onShutdown: make(map[*func(bool)]server.EmptyStruct), + onShutdown: make(map[*func(bool) error]server.EmptyStruct), hashCache: cache, rng: rng, } @@ -71,12 +71,13 @@ func (sts *SimpleTxnSubmitter) EnsurePositions(varPosMap map[common.VarUUId]*com } } -func (sts *SimpleTxnSubmitter) SubmissionOutcomeReceived(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) { +func (sts *SimpleTxnSubmitter) SubmissionOutcomeReceived(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) error { if consumer, found := sts.outcomeConsumers[*txnId]; found { - consumer(sender, txnId, outcome) + return consumer(sender, txnId, outcome) } else { // OSS is safe here - it's the default action on receipt of an unknown txnid paxos.NewOneShotSender(paxos.MakeTxnSubmissionCompleteMsg(txnId), sts.connPub, sender) + return nil } } @@ -104,7 +105,7 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, activeRMs []c } acceptors := paxos.GetAcceptorsFromTxn(txnCap) - shutdownFun := func(shutdown bool) { + shutdownFun := func(shutdown bool) error { delete(sts.outcomeConsumers, *txnId) // fmt.Printf("sts%v ", len(sts.outcomeConsumers)) if delay == 0 { @@ -124,53 +125,59 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, activeRMs []c // problem with these msgs getting to the propposers. paxos.NewOneShotSender(paxos.MakeTxnSubmissionAbortMsg(txnId), sts.connPub, activeRMs...) } - continuation(txnId, nil, nil) + return continuation(txnId, nil, nil) + } else { + return nil } } shutdownFunPtr := &shutdownFun sts.onShutdown[shutdownFunPtr] = server.EmptyStructVal outcomeAccumulator := paxos.NewOutcomeAccumulator(int(txnCap.FInc()), acceptors) - consumer := func(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) { + consumer := func(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) error { if outcome, _ = outcomeAccumulator.BallotOutcomeReceived(sender, outcome); outcome != nil { delete(sts.onShutdown, shutdownFunPtr) - shutdownFun(false) - continuation(txnId, outcome, nil) + if err := shutdownFun(false); err != nil { + return err + } else { + return continuation(txnId, outcome, nil) + } } + return nil } sts.outcomeConsumers[*txnId] = consumer // fmt.Printf("sts%v ", len(sts.outcomeConsumers)) } -func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, continuation TxnCompletionConsumer, delay time.Duration, useNextVersion bool) { +func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, continuation TxnCompletionConsumer, delay time.Duration, useNextVersion bool, vc versionCache) error { // Frames could attempt rolls before we have a topology. if sts.topology.IsBlank() || (sts.topology.Next() != nil && (!useNextVersion || !sts.topology.NextBarrierReached1(sts.rmId))) { - fun := func() { sts.SubmitClientTransaction(ctxnCap, continuation, delay, useNextVersion) } + fun := func() error { return sts.SubmitClientTransaction(ctxnCap, continuation, delay, useNextVersion, vc) } if sts.bufferedSubmissions == nil { - sts.bufferedSubmissions = []func(){fun} + sts.bufferedSubmissions = []func() error{fun} } else { sts.bufferedSubmissions = append(sts.bufferedSubmissions, fun) } - return + return nil } version := sts.topology.Version if next := sts.topology.Next(); next != nil && useNextVersion { version = next.Version } - txnCap, activeRMs, _, err := sts.clientToServerTxn(ctxnCap, version) + txnCap, activeRMs, _, err := sts.clientToServerTxn(ctxnCap, version, vc) if err != nil { - continuation(nil, nil, err) - return + return continuation(nil, nil, err) } sts.SubmitTransaction(txnCap, activeRMs, continuation, delay) + return nil } -func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) { +func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) error { server.Log("STS Topology Changed", topology) if topology.IsBlank() { // topology is needed for client txns. As we're booting up, we // just don't care. - return + return nil } sts.topology = topology sts.resolver = ch.NewResolver(topology.RMs(), topology.TwoFInc) @@ -180,18 +187,18 @@ func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) sts.hashCache.AddPosition(root.VarUUId, root.Positions) } } - sts.calculateDisabledHashcodes() + return sts.calculateDisabledHashcodes() } -func (sts *SimpleTxnSubmitter) ServerConnectionsChanged(servers map[common.RMId]paxos.Connection) { +func (sts *SimpleTxnSubmitter) ServerConnectionsChanged(servers map[common.RMId]paxos.Connection) error { server.Log("STS ServerConnectionsChanged", servers) sts.connections = servers - sts.calculateDisabledHashcodes() + return sts.calculateDisabledHashcodes() } -func (sts *SimpleTxnSubmitter) calculateDisabledHashcodes() { +func (sts *SimpleTxnSubmitter) calculateDisabledHashcodes() error { if sts.topology == nil || sts.connections == nil { - return + return nil } sts.disabledHashCodes = make(map[common.RMId]server.EmptyStruct, len(sts.topology.RMs())) for _, rmId := range sts.topology.RMs() { @@ -208,9 +215,12 @@ func (sts *SimpleTxnSubmitter) calculateDisabledHashcodes() { funcs := sts.bufferedSubmissions sts.bufferedSubmissions = nil for _, fun := range funcs { - fun() + if err := fun(); err != nil { + return err + } } } + return nil } func (sts *SimpleTxnSubmitter) Shutdown() { @@ -219,7 +229,7 @@ func (sts *SimpleTxnSubmitter) Shutdown() { } } -func (sts *SimpleTxnSubmitter) clientToServerTxn(clientTxnCap *cmsgs.ClientTxn, topologyVersion uint32) (*msgs.Txn, []common.RMId, []common.RMId, error) { +func (sts *SimpleTxnSubmitter) clientToServerTxn(clientTxnCap *cmsgs.ClientTxn, topologyVersion uint32, vc versionCache) (*msgs.Txn, []common.RMId, []common.RMId, error) { outgoingSeg := capn.NewBuffer(nil) txnCap := msgs.NewTxn(outgoingSeg) @@ -235,7 +245,7 @@ func (sts *SimpleTxnSubmitter) clientToServerTxn(clientTxnCap *cmsgs.ClientTxn, txnCap.SetActions(actions) picker := ch.NewCombinationPicker(int(sts.topology.FInc), sts.disabledHashCodes) - rmIdToActionIndices, err := sts.translateActions(outgoingSeg, picker, &actions, &clientActions) + rmIdToActionIndices, err := sts.translateActions(outgoingSeg, picker, &actions, &clientActions, vc) if err != nil { return nil, nil, nil, err } @@ -274,7 +284,7 @@ func (sts *SimpleTxnSubmitter) setAllocations(allocIdx int, rmIdToActionIndices } } -func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picker *ch.CombinationPicker, actions *msgs.Action_List, clientActions *cmsgs.ClientAction_List) (map[common.RMId]*[]int, error) { +func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picker *ch.CombinationPicker, actions *msgs.Action_List, clientActions *cmsgs.ClientAction_List, vc versionCache) (map[common.RMId]*[]int, error) { referencesInNeedOfPositions := []*msgs.VarIdPos{} rmIdToActionIndices := make(map[common.RMId]*[]int) @@ -284,6 +294,7 @@ func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picke clientAction := clientActions.At(idx) action := actions.At(idx) action.SetVarId(clientAction.VarId()) + vUUId := common.MakeVarUUId(clientAction.VarId()) var err error var hashCodes []common.RMId @@ -293,18 +304,17 @@ func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picke sts.translateRead(&action, &clientAction) case cmsgs.CLIENTACTION_WRITE: - sts.translateWrite(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction) + sts.translateWrite(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction, vUUId, vc) case cmsgs.CLIENTACTION_READWRITE: - sts.translateReadWrite(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction) + sts.translateReadWrite(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction, vUUId, vc) case cmsgs.CLIENTACTION_CREATE: var positions *common.Positions - positions, hashCodes, err = sts.translateCreate(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction) + positions, hashCodes, err = sts.translateCreate(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction, vUUId, vc) if err != nil { return nil, err } - vUUId := common.MakeVarUUId(clientAction.VarId()) createdPositions[*vUUId] = positions case cmsgs.CLIENTACTION_ROLL: @@ -366,6 +376,9 @@ func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picke positions, found := createdPositions[*vUUId] if !found { positions = sts.hashCache.GetPositions(vUUId) + if !vc.EnsureSubset(vUUId, vUUIdPos.Capabilities()) { + return nil, fmt.Errorf("Reference created to %v attempts to extend known capabilities.", vUUId) + } } if positions == nil { return nil, fmt.Errorf("Txn contains reference to unknown var %v", vUUId) @@ -382,44 +395,37 @@ func (sts *SimpleTxnSubmitter) translateRead(action *msgs.Action, clientAction * read.SetVersion(clientRead.Version()) } -func (sts *SimpleTxnSubmitter) translateWrite(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction) { +func (sts *SimpleTxnSubmitter) translateWrite(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction, vUUId *common.VarUUId, vc versionCache) { action.SetWrite() clientWrite := clientAction.Write() write := action.Write() - write.SetValue(clientWrite.Value()) + write.SetValue(vc.ValueForWrite(vUUId, clientWrite.Value())) clientReferences := clientWrite.References() - references := msgs.NewVarIdPosList(outgoingSeg, clientReferences.Len()) - write.SetReferences(references) - copyReferences(&clientReferences, &references, referencesInNeedOfPositions) + write.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, vUUId, vc)) } -func (sts *SimpleTxnSubmitter) translateReadWrite(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction) { +func (sts *SimpleTxnSubmitter) translateReadWrite(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction, vUUId *common.VarUUId, vc versionCache) { action.SetReadwrite() clientReadWrite := clientAction.Readwrite() readWrite := action.Readwrite() readWrite.SetVersion(clientReadWrite.Version()) - readWrite.SetValue(clientReadWrite.Value()) + readWrite.SetValue(vc.ValueForWrite(vUUId, clientReadWrite.Value())) clientReferences := clientReadWrite.References() - references := msgs.NewVarIdPosList(outgoingSeg, clientReferences.Len()) - readWrite.SetReferences(references) - copyReferences(&clientReferences, &references, referencesInNeedOfPositions) + readWrite.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, vUUId, vc)) } -func (sts *SimpleTxnSubmitter) translateCreate(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction) (*common.Positions, []common.RMId, error) { +func (sts *SimpleTxnSubmitter) translateCreate(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction, vUUId *common.VarUUId, vc versionCache) (*common.Positions, []common.RMId, error) { action.SetCreate() clientCreate := clientAction.Create() create := action.Create() create.SetValue(clientCreate.Value()) - vUUId := common.MakeVarUUId(clientAction.VarId()) positions, hashCodes, err := sts.hashCache.CreatePositions(vUUId, int(sts.topology.MaxRMCount)) if err != nil { return nil, nil, err } create.SetPositions((capn.UInt8List)(*positions)) clientReferences := clientCreate.References() - references := msgs.NewVarIdPosList(outgoingSeg, clientReferences.Len()) - create.SetReferences(references) - copyReferences(&clientReferences, &references, referencesInNeedOfPositions) + create.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, nil, vc)) return positions, hashCodes, nil } @@ -430,16 +436,135 @@ func (sts *SimpleTxnSubmitter) translateRoll(outgoingSeg *capn.Segment, referenc roll.SetVersion(clientRoll.Version()) roll.SetValue(clientRoll.Value()) clientReferences := clientRoll.References() - references := msgs.NewVarIdPosList(outgoingSeg, clientReferences.Len()) - roll.SetReferences(references) - copyReferences(&clientReferences, &references, referencesInNeedOfPositions) + roll.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, nil, nil)) } -func copyReferences(clientReferences *capn.DataList, references *msgs.VarIdPos_List, referencesInNeedOfPositions *[]*msgs.VarIdPos) { - for idx, l := 0, clientReferences.Len(); idx < l; idx++ { - vUUIdPos := references.At(idx) - vUUId := clientReferences.At(idx) - vUUIdPos.SetId(vUUId) - *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) +func copyReferences(clientReferences *cmsgs.ClientVarIdPos_List, seg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, vc versionCache) msgs.VarIdPos_List { + all, mask, existingRefs := vc.ReferencesWriteMask(vUUId) + if all { + refs := msgs.NewVarIdPosList(seg, clientReferences.Len()) + for idx, l := 0, clientReferences.Len(); idx < l; idx++ { + clientRef := clientReferences.At(idx) + vUUIdPos := refs.At(idx) + vUUIdPos.SetId(clientRef.VarId()) + vUUIdPos.SetCapabilities(translateCapabilities(seg, clientRef.Capabilities())) + *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) + } + return refs + } else { + refs := msgs.NewVarIdPosList(seg, len(existingRefs)) + clientRefLen := clientReferences.Len() + if clientRefLen > len(existingRefs) { + clientRefLen = len(existingRefs) + } + idx := 0 + for ; idx < clientRefLen; idx++ { + vUUIdPos := refs.At(idx) + if len(mask) > 0 && mask[0] == uint32(idx) { + mask = mask[1:] + clientRef := clientReferences.At(idx) + vUUIdPos.SetId(clientRef.VarId()) + vUUIdPos.SetCapabilities(translateCapabilities(seg, clientRef.Capabilities())) + } else { + existing := existingRefs[idx] + vUUIdPos.SetId(existing.Id()) + vUUIdPos.SetCapabilities(existing.Capabilities()) + } + *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) + } + for ; idx < len(existingRefs); idx++ { + vUUIdPos := refs.At(idx) + existing := existingRefs[idx] + vUUIdPos.SetId(existing.Id()) + vUUIdPos.SetCapabilities(existing.Capabilities()) + } + return refs + } +} + +func translateCapabilities(seg *capn.Segment, cap cmsgs.Capabilities) cmsgs.Capabilities { + readWhich, writeWhich := cap.References().Read().Which(), cap.References().Write().Which() + if readWhich == cmsgs.CAPABILITIESREFERENCESREAD_ALL && + writeWhich == cmsgs.CAPABILITIESREFERENCESWRITE_ALL { + return cap + } + rebuild := false + if readWhich == cmsgs.CAPABILITIESREFERENCESREAD_ONLY { + only := cap.References().Read().Only().ToArray() + if len(only) > 1 { + old := only[0] + for _, index := range only[1:] { + if old >= index { + rebuild = true + break + } + old = index + } + } + } + if !rebuild && writeWhich == cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { + only := cap.References().Write().Only().ToArray() + if len(only) > 1 { + old := only[0] + for _, index := range only[1:] { + if old >= index { + rebuild = true + break + } + old = index + } + } + } + if !rebuild { + return cap + } + capNew := cmsgs.NewCapabilities(seg) + capNew.SetValue(cap.Value()) + readNew := capNew.References().Read() + if readWhich == cmsgs.CAPABILITIESREFERENCESREAD_ALL { + readNew.SetAll() + } else { + only := cap.References().Read().Only().ToArray() + common.SortUInt32(only).Sort() + if len(only) > 1 { + old := only[0] + for idx := 1; idx < len(only); idx++ { + cur := only[idx] + if cur == old { + only = append(only[:idx], only[idx+1:]...) + idx-- + } + old = cur + } + } + onlyNew := seg.NewUInt32List(len(only)) + for idx, index := range only { + onlyNew.Set(idx, index) + } + readNew.SetOnly(onlyNew) + } + writeNew := capNew.References().Write() + if writeWhich == cmsgs.CAPABILITIESREFERENCESWRITE_ALL { + writeNew.SetAll() + } else { + only := cap.References().Write().Only().ToArray() + common.SortUInt32(only).Sort() + if len(only) > 1 { + old := only[0] + for idx := 1; idx < len(only); idx++ { + cur := only[idx] + if cur == old { + only = append(only[:idx], only[idx+1:]...) + idx-- + } + old = cur + } + } + onlyNew := seg.NewUInt32List(len(only)) + for idx, index := range only { + onlyNew.Set(idx, index) + } + writeNew.SetOnly(onlyNew) } + return capNew } diff --git a/client/versioncache.go b/client/versioncache.go index a47f396..3b786cb 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -12,9 +12,21 @@ import ( type versionCache map[common.VarUUId]*cached type cached struct { - txnId *common.TxnId - clockElem uint64 - caps *cmsgs.Capabilities + txnId *common.TxnId + clockElem uint64 + caps *cmsgs.Capabilities + value []byte + references []msgs.VarIdPos +} + +type update struct { + *cached + varUUId *common.VarUUId +} + +type unreached struct { + update *update + updates *[]*update } var maxCapsCap *cmsgs.Capabilities @@ -37,6 +49,148 @@ func NewVersionCache(roots map[common.VarUUId]*cmsgs.Capabilities) versionCache return cache } +func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { + actions := cTxn.Actions() + if cTxn.Retry() { + for idx, l := 0, actions.Len(); idx < l; idx++ { + action := actions.At(idx) + if which := action.Which(); which != cmsgs.CLIENTACTION_READ { + return fmt.Errorf("Retry transaction should only include reads. Found %v", which) + } + } + + } else { + for idx, l := 0, actions.Len(); idx < l; idx++ { + action := actions.At(idx) + switch action.Which() { + case cmsgs.CLIENTACTION_READ: + // do nothing + case cmsgs.CLIENTACTION_WRITE, cmsgs.CLIENTACTION_READWRITE: + vUUId := common.MakeVarUUId(action.VarId()) + if _, found := vc[*vUUId]; !found { + return fmt.Errorf("Transaction manipulates unknown object %v", vUUId) + } + + case cmsgs.CLIENTACTION_CREATE: + vUUId := common.MakeVarUUId(action.VarId()) + if _, found := vc[*vUUId]; found { + return fmt.Errorf("Transaction tries to create known object %v", vUUId) + } + + default: + return fmt.Errorf("Only read, write, readwrite or create actions allowed in client transaction, found %v", action.Which()) + } + } + } + return nil +} + +func (vc versionCache) ValueForWrite(vUUId *common.VarUUId, value []byte) []byte { + if vc == nil { + return value + } + if c, found := vc[*vUUId]; !found { + panic(fmt.Errorf("ValueForWrite called for unknown %v", vUUId)) + } else { + switch c.caps.Value() { + case cmsgs.VALUECAPABILITY_WRITE, cmsgs.VALUECAPABILITY_READWRITE: + return value + default: + return c.value + } + } +} + +func (vc versionCache) ReferencesWriteMask(vUUId *common.VarUUId) (bool, []uint32, []msgs.VarIdPos) { + if vc == nil || vUUId == nil { + return true, nil, nil + } + if c, found := vc[*vUUId]; !found { + panic(fmt.Errorf("ReferencesWriteMask called for unknown %v", vUUId)) + } else { + write := c.caps.References().Write() + switch write.Which() { + case cmsgs.CAPABILITIESREFERENCESWRITE_ALL: + return true, nil, c.references + default: + return false, write.Only().ToArray(), c.references + } + } +} + +func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capabilities) bool { + if vc == nil { + return true + } + if c, found := vc[*vUUId]; found { + if c.caps == maxCapsCap { + return true + } + valueNew, valueOld := cap.Value(), c.caps.Value() + if valueNew > valueOld { + return false + } + + readNew, readOld := cap.References().Read(), c.caps.References().Read() + if readOld.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ONLY { + if readNew.Which() != cmsgs.CAPABILITIESREFERENCESREAD_ONLY { + return false + } + readNewOnly, readOldOnly := readNew.Only().ToArray(), readOld.Only().ToArray() + if len(readNewOnly) > len(readOldOnly) { + return false + } + common.SortUInt32(readNewOnly).Sort() + common.SortUInt32(readOldOnly).Sort() + for idx, indexNew := range readNewOnly { + indexOld := readOldOnly[0] + readOldOnly = readOldOnly[1:] + if indexNew < indexOld { + return false + } else { + for ; indexNew > indexOld && len(readOldOnly) > 0; readOldOnly = readOldOnly[1:] { + indexOld = readOldOnly[0] + } + if len(readNewOnly)-idx > len(readOldOnly) { + return false + } + } + } + } + + writeNew, writeOld := cap.References().Write(), c.caps.References().Write() + if writeOld.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { + if writeNew.Which() != cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { + return false + } + writeNewOnly, writeOldOnly := writeNew.Only().ToArray(), writeOld.Only().ToArray() + if len(writeNewOnly) > len(writeOldOnly) { + return false + } + common.SortUInt32(writeNewOnly).Sort() + common.SortUInt32(writeOldOnly).Sort() + for idx, indexNew := range writeNewOnly { + indexOld := writeOldOnly[0] + writeOldOnly = writeOldOnly[1:] + if indexNew < indexOld { + return false + } else { + for ; indexNew > indexOld && len(writeOldOnly) > 0; writeOldOnly = writeOldOnly[1:] { + indexOld = writeOldOnly[0] + } + if len(writeNewOnly)-idx > len(writeOldOnly) { + return false + } + } + } + } + + return true + } else { + return true + } +} + func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outcome) { clock := eng.VectorClockFromCap(outcome.Commit()) actions := outcome.Txn().Actions() @@ -60,37 +214,32 @@ func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outco } } -type unreached struct { - cached *cached - action *msgs.Action - actions *[]*msgs.Action -} - -func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Update]*[]*msgs.Action { - l := updates.Len() - validUpdates := make(map[*msgs.Update]*[]*msgs.Action, l) +func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common.TxnId]*[]*update { + l := updatesCap.Len() + validUpdates := make(map[common.TxnId]*[]*update, l) unreachedMap := make(map[common.VarUUId]unreached, l) for idx := 0; idx < l; idx++ { - update := updates.At(idx) - txnId := common.MakeTxnId(update.TxnId()) - clock := eng.VectorClockFromCap(update.Clock()) - actions := update.Actions() - validActionsList := make([]*msgs.Action, 0, actions.Len()) - validActions := &validActionsList - validUpdates[&update] = validActions - - for idy, m := 0, actions.Len(); idy < m; idy++ { - action := actions.At(idy) - vUUId := common.MakeVarUUId(action.VarId()) + updateCap := updatesCap.At(idx) + txnId := common.MakeTxnId(updateCap.TxnId()) + clock := eng.VectorClockFromCap(updateCap.Clock()) + actionsCap := updateCap.Actions() + updatesList := make([]*update, 0, actionsCap.Len()) + updatesListPtr := &updatesList + validUpdates[*txnId] = updatesListPtr + + for idy, m := 0, actionsCap.Len(); idy < m; idy++ { + actionCap := actionsCap.At(idy) + vUUId := common.MakeVarUUId(actionCap.VarId()) clockElem := clock.At(vUUId) - switch action.Which() { + switch actionCap.Which() { case msgs.ACTION_MISSING: // In this context, ACTION_MISSING means we know there was // a write of vUUId by txnId, but we have no idea what the - // value written was. - //log.Printf("%v contains missing write action of %v\n", txnId, vUUId) + // value written was. The only safe thing we can do is + // remove it from the client. + // log.Printf("%v contains missing write action of %v\n", txnId, vUUId) if c, found := vc[*vUUId]; found && c.txnId != nil { cmp := c.txnId.Compare(txnId) if cmp == common.EQ && clockElem != c.clockElem { @@ -99,11 +248,17 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda if clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { c.txnId = nil c.clockElem = 0 - *validActions = append(*validActions, &action) + c.value = nil + c.references = nil + *updatesListPtr = append(*updatesListPtr, &update{ + cached: c, + varUUId: vUUId, + }) } } case msgs.ACTION_WRITE: + write := actionCap.Write() if c, found := vc[*vUUId]; found { cmp := c.txnId.Compare(txnId) if cmp == common.EQ && clockElem != c.clockElem { @@ -112,28 +267,28 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda if c.txnId == nil || clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { c.txnId = txnId c.clockElem = clockElem - *validActions = append(*validActions, &action) - refs := action.Write().References() - worklist := []*msgs.VarIdPos_List{&refs} - for len(worklist) > 0 { - refs, worklist = *worklist[0], worklist[1:] - for idz, n := 0, refs.Len(); idz < n; idz++ { - ref := refs.At(idz) - caps := ref.Capabilities() - vUUId := common.MakeVarUUId(ref.Id()) - if c, found := vc[*vUUId]; found { - c.caps = mergeCaps(c.caps, &caps) - } else if ur, found := unreachedMap[*vUUId]; found { - delete(unreachedMap, *vUUId) - c := ur.cached - c.caps = &caps - vc[*vUUId] = c - *ur.actions = append(*ur.actions, ur.action) - refs1 := ur.action.Write().References() - worklist = append(worklist, &refs1) - } else { - vc[*vUUId] = &cached{caps: &caps} - } + c.value = write.Value() + refs := write.References().ToArray() + c.references = refs + *updatesListPtr = append(*updatesListPtr, &update{ + cached: c, + varUUId: vUUId, + }) + for ; len(refs) > 0; refs = refs[1:] { + ref := refs[0] + caps := ref.Capabilities() + vUUId := common.MakeVarUUId(ref.Id()) + if c, found := vc[*vUUId]; found { + c.caps = mergeCaps(c.caps, &caps) + } else if ur, found := unreachedMap[*vUUId]; found { + delete(unreachedMap, *vUUId) + c := ur.update.cached + c.caps = &caps + vc[*vUUId] = c + *ur.updates = append(*ur.updates, ur.update) + refs = append(refs, ur.update.references...) + } else { + vc[*vUUId] = &cached{caps: &caps} } } } @@ -142,24 +297,28 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda } else { //log.Printf("%v contains write action of %v\n", txnId, vUUId) unreachedMap[*vUUId] = unreached{ - cached: &cached{ - txnId: txnId, - clockElem: clockElem, + update: &update{ + cached: &cached{ + txnId: txnId, + clockElem: clockElem, + value: write.Value(), + references: write.References().ToArray(), + }, + varUUId: vUUId, }, - action: &action, - actions: validActions, + updates: updatesListPtr, } } default: - panic(fmt.Sprintf("Unexpected action for %v on %v: %v", txnId, vUUId, action.Which())) + panic(fmt.Sprintf("Unexpected action for %v on %v: %v", txnId, vUUId, actionCap.Which())) } } } - for update, actions := range validUpdates { - if len(*actions) == 0 { - delete(validUpdates, update) + for txnId, updates := range validUpdates { + if len(*updates) == 0 { + delete(validUpdates, txnId) } } return validUpdates @@ -258,3 +417,32 @@ func mergeOnlies(seg *capn.Segment, a, b []uint32) capn.UInt32List { } return cap } + +func (u *update) Value() []byte { + if u.value == nil { + return nil + } + switch u.caps.Value() { + case cmsgs.VALUECAPABILITY_READ, cmsgs.VALUECAPABILITY_READWRITE: + return u.value + default: + return []byte{} + } +} + +func (u *update) ReferencesReadMask() []uint32 { + if u.value == nil { + return nil + } + read := u.caps.References().Read() + switch read.Which() { + case cmsgs.CAPABILITIESREFERENCESREAD_ALL: + mask := make([]uint32, len(u.references)) + for idx := range mask { + mask[idx] = uint32(idx) + } + return mask + default: + return read.Only().ToArray() + } +} diff --git a/configuration/configuration.go b/configuration/configuration.go index 426e7c3..789d4d6 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -225,8 +225,8 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { rootsMap[name] = server.EmptyStructVal rootsName = append(rootsName, name) } - SortUInt32(rootCapabilities.ReferencesReadOnly).Sort() - SortUInt32(rootCapabilities.ReferencesWriteOnly).Sort() + common.SortUInt32(rootCapabilities.ReferencesReadOnly).Sort() + common.SortUInt32(rootCapabilities.ReferencesWriteOnly).Sort() if rootCapabilities.ReferencesReadAll && len(rootCapabilities.ReferencesReadOnly) != 0 { return nil, fmt.Errorf("ReferencesReadAll and ReferencesReadOnly must be mutually exclusive for client fingerprint %v, root %s", fingerprint, name) } @@ -1002,10 +1002,3 @@ func (g *Generator) AddToSeg(seg *capn.Segment) msgs.Condition { condCap.SetGenerator(genCap) return condCap } - -type SortUInt32 []uint32 - -func (nums SortUInt32) Sort() { sort.Sort(nums) } -func (nums SortUInt32) Len() int { return len(nums) } -func (nums SortUInt32) Less(i, j int) bool { return nums[i] < nums[j] } -func (nums SortUInt32) Swap(i, j int) { nums[i], nums[j] = nums[j], nums[i] } diff --git a/network/connection.go b/network/connection.go index 6d6148f..cd80ba2 100644 --- a/network/connection.go +++ b/network/connection.go @@ -263,11 +263,11 @@ func (conn *Connection) handleMsg(msg connectionMsg) (terminate bool, err error) case connectionMsgSend: err = conn.sendMessage(msgT) case connectionMsgOutcomeReceived: - conn.outcomeReceived(msgT) + err = conn.outcomeReceived(msgT) case *connectionMsgTopologyChanged: err = conn.topologyChanged(msgT) case connectionMsgServerConnectionsChanged: - conn.serverConnectionsChanged(msgT) + err = conn.serverConnectionsChanged(msgT) case connectionMsgStatus: conn.status(msgT.StatusConsumer) default: @@ -746,17 +746,18 @@ func (cr *connectionRun) init(conn *Connection) { cr.Connection = conn } -func (cr *connectionRun) outcomeReceived(out connectionMsgOutcomeReceived) { +func (cr *connectionRun) outcomeReceived(out connectionMsgOutcomeReceived) error { if cr.currentState != cr { - return + return nil } - cr.submitter.SubmissionOutcomeReceived(out.sender, out.txnId, out.outcome) + err := cr.submitter.SubmissionOutcomeReceived(out.sender, out.txnId, out.outcome) if cr.submitterIdle != nil && cr.submitter.IsIdle() { si := cr.submitterIdle cr.submitterIdle = nil server.Log("Connection", cr.Connection, "outcomeReceived", si, "(submitterIdle)") si.maybeClose() } + return err } func (cr *connectionRun) start() (bool, error) { @@ -832,7 +833,10 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error return errors.New("Client connection closed: roots have changed") } } - cr.submitter.TopologyChanged(topology) + if err := cr.submitter.TopologyChanged(topology); err != nil { + tc.maybeClose() + return err + } if cr.submitter.IsIdle() { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(client, submitter is idle)") tc.maybeClose() @@ -853,10 +857,11 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error return nil } -func (cr *connectionRun) serverConnectionsChanged(servers map[common.RMId]paxos.Connection) { +func (cr *connectionRun) serverConnectionsChanged(servers map[common.RMId]paxos.Connection) error { if cr.submitter != nil { - cr.submitter.ServerConnectionsChanged(servers) + return cr.submitter.ServerConnectionsChanged(servers) } + return nil } func (cr *connectionRun) handleMsgFromClient(msg cmsgs.ClientMessage) error { @@ -868,26 +873,26 @@ func (cr *connectionRun) handleMsgFromClient(msg cmsgs.ClientMessage) error { switch which := msg.Which(); which { case cmsgs.CLIENTMESSAGE_HEARTBEAT: // do nothing + return nil case cmsgs.CLIENTMESSAGE_CLIENTTXNSUBMISSION: ctxn := msg.ClientTxnSubmission() origTxnId := common.MakeTxnId(ctxn.Id()) - cr.submitter.SubmitClientTransaction(&ctxn, func(clientOutcome *cmsgs.ClientTxnOutcome, err error) { + return cr.submitter.SubmitClientTransaction(&ctxn, func(clientOutcome *cmsgs.ClientTxnOutcome, err error) error { switch { case err != nil: - cr.clientTxnError(&ctxn, err, origTxnId) + return cr.clientTxnError(&ctxn, err, origTxnId) case clientOutcome == nil: // shutdown - return + return nil default: seg := capn.NewBuffer(nil) msg := cmsgs.NewRootClientMessage(seg) msg.SetClientTxnOutcome(*clientOutcome) - cr.sendMessage(server.SegToBytes(msg.Segment)) + return cr.sendMessage(server.SegToBytes(msg.Segment)) } }) default: return cr.maybeRestartConnection(fmt.Errorf("Unexpected message type received from client: %v", which)) } - return nil } func (cr *connectionRun) handleMsgFromServer(msg msgs.Message) error { diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index ab83122..04809c0 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -1788,7 +1788,7 @@ func (task *targetConfig) attemptCreateRoots(rootCount int) (bool, configuration action.SetCreate() create := action.Create() create.SetValue([]byte{}) - create.SetReferences(seg.NewDataList(0)) + create.SetReferences(cmsgs.NewClientVarIdPosList(seg, 0)) root := &roots[idx] root.VarUUId = vUUId } diff --git a/txnengine/frame.go b/txnengine/frame.go index b6405c0..10a5d7e 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -767,14 +767,16 @@ func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId } posMap := make(map[common.VarUUId]*common.Positions) posMap[*fo.v.UUId] = fo.v.positions - refVarList := seg.NewDataList(refs.Len()) + refVarList := cmsgs.NewClientVarIdPosList(seg, refs.Len()) roll.SetReferences(refVarList) for idx, l := 0, refs.Len(); idx < l; idx++ { ref := refs.At(idx) vUUId := common.MakeVarUUId(ref.Id()) pos := common.Positions(ref.Positions()) posMap[*vUUId] = &pos - refVarList.Set(idx, vUUId[:]) + varIdPos := refVarList.At(idx) + varIdPos.SetVarId(vUUId[:]) + varIdPos.SetCapabilities(ref.Capabilities()) } fo.rollTxn = &ctxn fo.rollTxnPos = posMap From f9421daac294d7715ce764a50d43449c05813154 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 23 Jul 2016 08:07:59 +0100 Subject: [PATCH 24/78] Correct logic on rerolling which I think will fix a bug on shutdowns being ignored. --HG-- branch : dev --- txnengine/frame.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index b6405c0..dda2fca 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -705,7 +705,7 @@ func (fo *frameOpen) maybeStartRoll() { } // fmt.Printf("r%v ", ow) server.Log(fo.frame, "Roll finished: outcome", ow, "; err:", err) - if outcome == nil || outcome.Which() != msgs.OUTCOME_COMMIT { + if (outcome == nil && err != nil) || (outcome != nil && outcome.Which() != msgs.OUTCOME_COMMIT) { fo.v.applyToVar(func() { fo.rollActive = false if err == AbortRollNotInPermutation { From d327a3b3f4b553d8b6ad30d7ef937f9ba624bc64 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 24 Jul 2016 22:43:16 +0100 Subject: [PATCH 25/78] Rework VC so that it's cached as []byte which avoids reserialization costs. Ref T42. --HG-- branch : T42 --- capnp/ballot.capnp | 3 +- capnp/ballot.capnp.go | 16 +++- capnp/outcome.capnp | 5 +- capnp/outcome.capnp.go | 34 ++++++--- capnp/var.capnp | 5 +- capnp/var.capnp.go | 52 ++++++++----- client/versioncache.go | 4 +- paxos/ballotaccumulator.go | 8 +- txnengine/ballot.go | 5 +- txnengine/frame.go | 4 +- txnengine/transaction.go | 9 ++- txnengine/var.go | 8 +- txnengine/vectorclock.go | 148 ++++++++++++++++++++++++------------- 13 files changed, 192 insertions(+), 109 deletions(-) diff --git a/capnp/ballot.capnp b/capnp/ballot.capnp index 0356b82..bd51112 100644 --- a/capnp/ballot.capnp +++ b/capnp/ballot.capnp @@ -5,12 +5,11 @@ $Go.import("goshawkdb.io/server/capnp"); @0x960e5f709149380d; -using VC = import "vectorclock.capnp"; using Txn = import "transaction.capnp"; struct Ballot { varId @0: Data; - clock @1: VC.VectorClock; + clock @1: Data; vote @2: Vote; } diff --git a/capnp/ballot.capnp.go b/capnp/ballot.capnp.go index 7cf9891..c53437a 100644 --- a/capnp/ballot.capnp.go +++ b/capnp/ballot.capnp.go @@ -18,8 +18,8 @@ func AutoNewBallot(s *C.Segment) Ballot { return Ballot(s.NewStructAR(0, 3)) } func ReadRootBallot(s *C.Segment) Ballot { return Ballot(s.Root(0).ToStruct()) } func (s Ballot) VarId() []byte { return C.Struct(s).GetObject(0).ToData() } func (s Ballot) SetVarId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } -func (s Ballot) Clock() VectorClock { return VectorClock(C.Struct(s).GetObject(1).ToStruct()) } -func (s Ballot) SetClock(v VectorClock) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s Ballot) Clock() []byte { return C.Struct(s).GetObject(1).ToData() } +func (s Ballot) SetClock(v []byte) { C.Struct(s).SetObject(1, s.Segment.NewData(v)) } func (s Ballot) Vote() Vote { return Vote(C.Struct(s).GetObject(2).ToStruct()) } func (s Ballot) SetVote(v Vote) { C.Struct(s).SetObject(2, C.Object(v)) } func (s Ballot) WriteJSON(w io.Writer) error { @@ -56,7 +56,11 @@ func (s Ballot) WriteJSON(w io.Writer) error { } { s := s.Clock() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -122,7 +126,11 @@ func (s Ballot) WriteCapLit(w io.Writer) error { } { s := s.Clock() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/outcome.capnp b/capnp/outcome.capnp index fd694d8..84e5a72 100644 --- a/capnp/outcome.capnp +++ b/capnp/outcome.capnp @@ -6,13 +6,12 @@ $Go.import("goshawkdb.io/server/capnp"); @0xe10cac715301f488; using Txn = import "transaction.capnp"; -using Vec = import "vectorclock.capnp"; struct Outcome { id @0: List(OutcomeId); txn @1: Txn.Txn; union { - commit @2: Vec.VectorClock; + commit @2: Data; abort :group { union { resubmit @3: Void; @@ -25,7 +24,7 @@ struct Outcome { struct Update { txnId @0: Data; actions @1: List(Txn.Action); - clock @2: Vec.VectorClock; + clock @2: Data; } struct OutcomeId { diff --git a/capnp/outcome.capnp.go b/capnp/outcome.capnp.go index e4c3194..8d5515b 100644 --- a/capnp/outcome.capnp.go +++ b/capnp/outcome.capnp.go @@ -35,10 +35,10 @@ func (s Outcome) Id() OutcomeId_List { return OutcomeId_List(C.Struct(s).G func (s Outcome) SetId(v OutcomeId_List) { C.Struct(s).SetObject(0, C.Object(v)) } func (s Outcome) Txn() Txn { return Txn(C.Struct(s).GetObject(1).ToStruct()) } func (s Outcome) SetTxn(v Txn) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s Outcome) Commit() VectorClock { return VectorClock(C.Struct(s).GetObject(2).ToStruct()) } -func (s Outcome) SetCommit(v VectorClock) { +func (s Outcome) Commit() []byte { return C.Struct(s).GetObject(2).ToData() } +func (s Outcome) SetCommit(v []byte) { C.Struct(s).Set16(0, 0) - C.Struct(s).SetObject(2, C.Object(v)) + C.Struct(s).SetObject(2, s.Segment.NewData(v)) } func (s Outcome) Abort() OutcomeAbort { return OutcomeAbort(s) } func (s Outcome) SetAbort() { C.Struct(s).Set16(0, 1) } @@ -109,7 +109,11 @@ func (s Outcome) WriteJSON(w io.Writer) error { } { s := s.Commit() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -246,7 +250,11 @@ func (s Outcome) WriteCapLit(w io.Writer) error { } { s := s.Commit() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -351,8 +359,8 @@ func (s Update) TxnId() []byte { return C.Struct(s).GetObject(0).ToDa func (s Update) SetTxnId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s Update) Actions() Action_List { return Action_List(C.Struct(s).GetObject(1)) } func (s Update) SetActions(v Action_List) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s Update) Clock() VectorClock { return VectorClock(C.Struct(s).GetObject(2).ToStruct()) } -func (s Update) SetClock(v VectorClock) { C.Struct(s).SetObject(2, C.Object(v)) } +func (s Update) Clock() []byte { return C.Struct(s).GetObject(2).ToData() } +func (s Update) SetClock(v []byte) { C.Struct(s).SetObject(2, s.Segment.NewData(v)) } func (s Update) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -420,7 +428,11 @@ func (s Update) WriteJSON(w io.Writer) error { } { s := s.Clock() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -504,7 +516,11 @@ func (s Update) WriteCapLit(w io.Writer) error { } { s := s.Clock() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/var.capnp b/capnp/var.capnp index f546a9f..0a0d4a7 100644 --- a/capnp/var.capnp +++ b/capnp/var.capnp @@ -5,15 +5,14 @@ $Go.import("goshawkdb.io/server/capnp"); @0xc3ce226b914ee1eb; -using VC = import "vectorclock.capnp"; using Common = import "../../common/capnp/capabilities.capnp"; struct Var { id @0: Data; positions @1: List(UInt8); writeTxnId @2: Data; - writeTxnClock @3: VC.VectorClock; - writesClock @4: VC.VectorClock; + writeTxnClock @3: Data; + writesClock @4: Data; } struct VarIdPos { diff --git a/capnp/var.capnp.go b/capnp/var.capnp.go index d114b14..d4734b4 100644 --- a/capnp/var.capnp.go +++ b/capnp/var.capnp.go @@ -13,20 +13,20 @@ import ( type Var C.Struct -func NewVar(s *C.Segment) Var { return Var(s.NewStruct(0, 5)) } -func NewRootVar(s *C.Segment) Var { return Var(s.NewRootStruct(0, 5)) } -func AutoNewVar(s *C.Segment) Var { return Var(s.NewStructAR(0, 5)) } -func ReadRootVar(s *C.Segment) Var { return Var(s.Root(0).ToStruct()) } -func (s Var) Id() []byte { return C.Struct(s).GetObject(0).ToData() } -func (s Var) SetId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } -func (s Var) Positions() C.UInt8List { return C.UInt8List(C.Struct(s).GetObject(1)) } -func (s Var) SetPositions(v C.UInt8List) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s Var) WriteTxnId() []byte { return C.Struct(s).GetObject(2).ToData() } -func (s Var) SetWriteTxnId(v []byte) { C.Struct(s).SetObject(2, s.Segment.NewData(v)) } -func (s Var) WriteTxnClock() VectorClock { return VectorClock(C.Struct(s).GetObject(3).ToStruct()) } -func (s Var) SetWriteTxnClock(v VectorClock) { C.Struct(s).SetObject(3, C.Object(v)) } -func (s Var) WritesClock() VectorClock { return VectorClock(C.Struct(s).GetObject(4).ToStruct()) } -func (s Var) SetWritesClock(v VectorClock) { C.Struct(s).SetObject(4, C.Object(v)) } +func NewVar(s *C.Segment) Var { return Var(s.NewStruct(0, 5)) } +func NewRootVar(s *C.Segment) Var { return Var(s.NewRootStruct(0, 5)) } +func AutoNewVar(s *C.Segment) Var { return Var(s.NewStructAR(0, 5)) } +func ReadRootVar(s *C.Segment) Var { return Var(s.Root(0).ToStruct()) } +func (s Var) Id() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s Var) SetId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } +func (s Var) Positions() C.UInt8List { return C.UInt8List(C.Struct(s).GetObject(1)) } +func (s Var) SetPositions(v C.UInt8List) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s Var) WriteTxnId() []byte { return C.Struct(s).GetObject(2).ToData() } +func (s Var) SetWriteTxnId(v []byte) { C.Struct(s).SetObject(2, s.Segment.NewData(v)) } +func (s Var) WriteTxnClock() []byte { return C.Struct(s).GetObject(3).ToData() } +func (s Var) SetWriteTxnClock(v []byte) { C.Struct(s).SetObject(3, s.Segment.NewData(v)) } +func (s Var) WritesClock() []byte { return C.Struct(s).GetObject(4).ToData() } +func (s Var) SetWritesClock(v []byte) { C.Struct(s).SetObject(4, s.Segment.NewData(v)) } func (s Var) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -117,7 +117,11 @@ func (s Var) WriteJSON(w io.Writer) error { } { s := s.WriteTxnClock() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -132,7 +136,11 @@ func (s Var) WriteJSON(w io.Writer) error { } { s := s.WritesClock() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -239,7 +247,11 @@ func (s Var) WriteCapLit(w io.Writer) error { } { s := s.WriteTxnClock() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -254,7 +266,11 @@ func (s Var) WriteCapLit(w io.Writer) error { } { s := s.WritesClock() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/client/versioncache.go b/client/versioncache.go index c9f1fb1..0bbb075 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -19,7 +19,7 @@ func NewVersionCache() versionCache { } func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outcome) { - clock := eng.VectorClockFromCap(outcome.Commit()) + clock := eng.VectorClockFromData(outcome.Commit()) actions := outcome.Txn().Actions() for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) @@ -44,7 +44,7 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda for idx, l := 0, updates.Len(); idx < l; idx++ { update := updates.At(idx) txnId := common.MakeTxnId(update.TxnId()) - clock := eng.VectorClockFromCap(update.Clock()) + clock := eng.VectorClockFromData(update.Clock()) actions := update.Actions() validActions := make([]*msgs.Action, 0, actions.Len()) diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 3c6aa51..8d2bcdc 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -219,7 +219,7 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { } else { outcome.SetTxn(*ba.Txn) - outcome.SetCommit(combinedClock.AddToSeg(seg)) + outcome.SetCommit(combinedClock.AsData()) } ba.outcome = (*outcomeEqualId)(&outcome) @@ -279,9 +279,9 @@ func (vb *varBallot) combineVote(rmBal *rmBallot, br badReads) { case cur.Vote == eng.Commit && new.Vote == eng.Commit: cur.Clock.MergeInMax(new.Clock) - case cur.Vote == eng.AbortDeadlock && cur.Clock.Len == 0: + case cur.Vote == eng.AbortDeadlock && cur.Clock.Len() == 0: // Do nothing - ignore the new ballot - case new.Vote == eng.AbortDeadlock && new.Clock.Len == 0: + case new.Vote == eng.AbortDeadlock && new.Clock.Len() == 0: // This has been created by abort proposer. This trumps everything. cur.Vote = eng.AbortDeadlock cur.Clock = new.Clock @@ -521,7 +521,7 @@ func (br badReads) AddToSeg(seg *capn.Segment) msgs.Update_List { } clock.SetVarIdMax(bra.vUUId, bra.clockElem) } - update.SetClock(clock.AddToSeg(seg)) + update.SetClock(clock.AsData()) } return updates diff --git a/txnengine/ballot.go b/txnengine/ballot.go index def3a94..9402577 100644 --- a/txnengine/ballot.go +++ b/txnengine/ballot.go @@ -35,6 +35,7 @@ type Ballot struct { func NewBallot(vUUId *common.VarUUId, vote Vote, clock *VectorClock) *Ballot { if clock != nil { + clock.AsData() // force serialisation now and hopefully therefore fewer times clock = clock.Clone() } return &Ballot{ @@ -50,7 +51,7 @@ func BallotFromCap(ballotCap *msgs.Ballot) *Ballot { voteCap := ballotCap.Vote() ballot := &Ballot{ VarUUId: common.MakeVarUUId(ballotCap.VarId()), - Clock: VectorClockFromCap(ballotCap.Clock()), + Clock: VectorClockFromData(ballotCap.Clock()), Vote: Vote(voteCap.Which()), BallotCap: ballotCap, VoteCap: &voteCap, @@ -76,7 +77,7 @@ func (ballot *Ballot) CreateBadReadCap(txnId *common.TxnId, actions *msgs.Action func (ballot *Ballot) AddToSeg(seg *capn.Segment) msgs.Ballot { ballotCap := msgs.NewBallot(seg) ballotCap.SetVarId(ballot.VarUUId[:]) - ballotCap.SetClock(ballot.Clock.AddToSeg(seg)) + ballotCap.SetClock(ballot.Clock.AsData()) if ballot.VoteCap == nil { voteCap := msgs.NewVote(seg) diff --git a/txnengine/frame.go b/txnengine/frame.go index dda2fca..84bf3ad 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -677,7 +677,7 @@ func (fo *frameOpen) maybeCreateChild() { func (fo *frameOpen) maybeScheduleRoll() { // do not check vm.RollAllowed here. if !fo.rollScheduled && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { + (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { fo.rollScheduled = true fo.v.vm.ScheduleCallback(func() { fo.v.applyToVar(func() { @@ -690,7 +690,7 @@ func (fo *frameOpen) maybeScheduleRoll() { func (fo *frameOpen) maybeStartRoll() { if fo.v.vm.RollAllowed && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { + (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { fo.rollActive = true go func() { server.Log(fo.frame, "Starting roll") diff --git a/txnengine/transaction.go b/txnengine/transaction.go index 9d103c8..5c9b763 100644 --- a/txnengine/transaction.go +++ b/txnengine/transaction.go @@ -161,8 +161,10 @@ func ImmigrationTxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateCha action.writeTxnActions = &txnActions positions := varCap.Positions() action.createPositions = (*common.Positions)(&positions) - action.outcomeClock = VectorClockFromCap(varCap.WriteTxnClock()) - action.writesClock = VectorClockFromCap(varCap.WritesClock()) + action.outcomeClock = VectorClockFromData(varCap.WriteTxnClock()) + action.outcomeClock.init() + action.writesClock = VectorClockFromData(varCap.WritesClock()) + action.writesClock.init() actionsMap[*action.vUUId] = action } @@ -535,7 +537,8 @@ func (tro *txnReceiveOutcome) BallotOutcomeReceived(outcome *msgs.Outcome) { } switch outcome.Which() { case msgs.OUTCOME_COMMIT: - tro.outcomeClock = VectorClockFromCap(outcome.Commit()) + tro.outcomeClock = VectorClockFromData(outcome.Commit()) + tro.outcomeClock.init() /* excess := tro.outcomeClock.Len - tro.TxnCap.Actions().Len() fmt.Printf("%v ", excess) diff --git a/txnengine/var.go b/txnengine/var.go index 8723658..be4e77e 100644 --- a/txnengine/var.go +++ b/txnengine/var.go @@ -46,8 +46,8 @@ func VarFromData(data []byte, exe *dispatcher.Executor, db *db.Databases, vm *Va } writeTxnId := common.MakeTxnId(varCap.WriteTxnId()) - writeTxnClock := VectorClockFromCap(varCap.WriteTxnClock()) - writesClock := VectorClockFromCap(varCap.WritesClock()) + writeTxnClock := VectorClockFromData(varCap.WriteTxnClock()) + writesClock := VectorClockFromData(varCap.WritesClock()) server.Log(v.UUId, "Restored", writeTxnId) if result, err := db.ReadonlyTransaction(func(rtxn *mdbs.RTxn) interface{} { @@ -250,8 +250,8 @@ func (v *Var) maybeWriteFrame(f *frame, action *localAction, positions *common.P } varCap.SetWriteTxnId(f.frameTxnId[:]) - varCap.SetWriteTxnClock(f.frameTxnClock.AddToSeg(varSeg)) - varCap.SetWritesClock(f.frameWritesClock.AddToSeg(varSeg)) + varCap.SetWriteTxnClock(f.frameTxnClock.AsData()) + varCap.SetWritesClock(f.frameWritesClock.AsData()) varData := server.SegToBytes(varSeg) txnBytes := action.TxnRootBytes() diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 72610d0..2c65a55 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -4,6 +4,7 @@ import ( "fmt" capn "github.com/glycerine/go-capnproto" "goshawkdb.io/common" + "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" ) @@ -12,57 +13,86 @@ const ( ) type VectorClock struct { - cap *msgs.VectorClock + data []byte initial map[common.VarUUId]uint64 adds map[common.VarUUId]uint64 changes map[common.VarUUId]uint64 - Len int + length int + inited bool } -func VectorClockFromCap(vcCap msgs.VectorClock) *VectorClock { - l := vcCap.VarUuids().Len() - vc := &VectorClock{ - cap: &vcCap, - initial: make(map[common.VarUUId]uint64, l), - Len: l, +func VectorClockFromData(vcData []byte) *VectorClock { + return &VectorClock{data: vcData} +} + +func NewVectorClock() *VectorClock { + return &VectorClock{ + data: []byte{}, + inited: true, } +} + +func (vc *VectorClock) init() { + if vc == nil || vc.inited { + return + } + vc.inited = true + if len(vc.data) == 0 { + return + } + seg, _, err := capn.ReadFromMemoryZeroCopy(vc.data) + if err != nil { + panic(fmt.Sprintf("Error when decoding vector clock: %v", err)) + } + vcCap := msgs.ReadRootVectorClock(seg) + l := vcCap.VarUuids().Len() + vc.length = l + vc.initial = make(map[common.VarUUId]uint64, l) keys := vcCap.VarUuids() values := vcCap.Values() for idx, l := 0, keys.Len(); idx < l; idx++ { k := common.MakeVarUUId(keys.At(idx)) vc.initial[*k] = values.At(idx) } - return vc -} - -func NewVectorClock() *VectorClock { - return &VectorClock{} } func (vcA *VectorClock) Clone() *VectorClock { - adds, changes := vcA.adds, vcA.changes - if len(adds) > 0 { - adds = make(map[common.VarUUId]uint64, len(adds)) + if vcA == nil { + return nil + } + if !vcA.inited { + return VectorClockFromData(vcA.data) + } + vcB := &VectorClock{ + data: vcA.data, + initial: vcA.initial, + length: vcA.length, + inited: true, + } + if len(vcA.adds) > 0 { + adds := make(map[common.VarUUId]uint64, len(vcA.adds)) for k, v := range vcA.adds { adds[k] = v } + vcB.adds = adds } - if len(changes) > 0 { - changes = make(map[common.VarUUId]uint64, len(changes)) + if len(vcA.changes) > 0 { + changes := make(map[common.VarUUId]uint64, len(vcA.changes)) for k, v := range vcA.changes { changes[k] = v } + vcB.changes = changes } - return &VectorClock{ - cap: vcA.cap, - initial: vcA.initial, - adds: adds, - changes: changes, - Len: vcA.Len, - } + return vcB +} + +func (vc *VectorClock) Len() int { + vc.init() + return vc.length } func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { + vc.init() for k, v := range vc.adds { if !it(&k, v) { return false @@ -89,7 +119,10 @@ func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { } func (vc *VectorClock) String() string { - str := fmt.Sprintf("VC:(%v)", vc.Len) + if !vc.inited { + return "VC:(undecoded)" + } + str := fmt.Sprintf("VC:(%v)", vc.Len()) vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { str += fmt.Sprintf(" %v:%v", vUUId, v) return true @@ -98,6 +131,7 @@ func (vc *VectorClock) String() string { } func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { + vc.init() if value, found := vc.adds[*vUUId]; found { return value } else if value, found := vc.changes[*vUUId]; found { @@ -109,13 +143,14 @@ func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { } func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { + vc.init() if _, found := vc.adds[*vUUId]; found { delete(vc.adds, *vUUId) - vc.Len-- + vc.length-- return vc } else if ch, found := vc.changes[*vUUId]; found { if ch != deleted { - vc.Len-- + vc.length-- vc.changes[*vUUId] = deleted } return vc @@ -124,19 +159,20 @@ func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { vc.changes = make(map[common.VarUUId]uint64) } vc.changes[*vUUId] = deleted - vc.Len-- + vc.length-- } return vc } func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { + vc.init() if old, found := vc.adds[*vUUId]; found { vc.adds[*vUUId] = old + inc return vc } else if old, found := vc.changes[*vUUId]; found { if old == deleted { vc.changes[*vUUId] = inc - vc.Len++ + vc.length++ } else { vc.changes[*vUUId] = old + inc } @@ -152,12 +188,13 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { vc.adds = make(map[common.VarUUId]uint64) } vc.adds[*vUUId] = inc - vc.Len++ + vc.length++ return vc } } func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { + vc.init() if old, found := vc.adds[*vUUId]; found { if v > old { vc.adds[*vUUId] = v @@ -168,7 +205,7 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { if v > old { vc.changes[*vUUId] = v if old == deleted { - vc.Len++ + vc.length++ } return true } @@ -187,12 +224,14 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { vc.adds = make(map[common.VarUUId]uint64) } vc.adds[*vUUId] = v - vc.Len++ + vc.length++ return true } } func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { + vcA.init() + vcB.init() changed := false vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { // put "|| changed" last to avoid short-circuit @@ -203,13 +242,15 @@ func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { } func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { + vcA.init() + vcB.init() changed := false vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { if _, found := vcA.adds[*vUUId]; found { return true } else if ch, found := vcA.changes[*vUUId]; found { if ch == deleted { - vcA.Len++ + vcA.length++ vcA.changes[*vUUId] = v changed = true } @@ -217,7 +258,7 @@ func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { } else if _, found := vcA.initial[*vUUId]; found { return true } else { - vcA.Len++ + vcA.length++ if vcA.adds == nil { vcA.adds = make(map[common.VarUUId]uint64) } @@ -230,17 +271,18 @@ func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { } func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { + vc.init() if old, found := vc.adds[*vUUId]; found { if old <= v { delete(vc.adds, *vUUId) - vc.Len-- + vc.length-- return true } return false } else if old, found := vc.changes[*vUUId]; found { if old != deleted && old <= v { vc.changes[*vUUId] = deleted - vc.Len-- + vc.length-- return true } return false @@ -250,7 +292,7 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { vc.changes = make(map[common.VarUUId]uint64) } vc.changes[*vUUId] = deleted - vc.Len-- + vc.length-- return true } return false @@ -259,8 +301,10 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { } func (vcA *VectorClock) LessThan(vcB *VectorClock) bool { + vcA.init() + vcB.init() // 1. If A has more elems than B then A cannot be < B - if vcA.Len > vcB.Len { + if vcA.length > vcB.length { return false } ltFound := false @@ -278,28 +322,26 @@ func (vcA *VectorClock) LessThan(vcB *VectorClock) bool { } // 3. Everything in A is also in B and <= B. If A == B for // everything in A, then B must be > A if len(B) > len(A) - return ltFound || vcB.Len > vcA.Len + return ltFound || vcB.length > vcA.length } -func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { +func (vc *VectorClock) AsData() []byte { if vc == nil { - vcCap := msgs.NewVectorClock(seg) - vcCap.SetVarUuids(seg.NewDataList(0)) - vcCap.SetValues(seg.NewUInt64List(0)) - return vcCap + return []byte{} } - if vc.cap != nil && len(vc.adds) == 0 && len(vc.changes) == 0 { - return *vc.cap + if len(vc.adds) == 0 && len(vc.changes) == 0 { + return vc.data } - vcCap := msgs.NewVectorClock(seg) - vUUIds := seg.NewDataList(vc.Len) - values := seg.NewUInt64List(vc.Len) + seg := capn.NewBuffer(nil) + vcCap := msgs.NewRootVectorClock(seg) + vUUIds := seg.NewDataList(vc.length) + values := seg.NewUInt64List(vc.length) vcCap.SetVarUuids(vUUIds) vcCap.SetValues(values) idx := 0 - initial := make(map[common.VarUUId]uint64, vc.Len) + initial := make(map[common.VarUUId]uint64, vc.length) vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { initial[*vUUId] = v vUUIds.Set(idx, vUUId[:]) @@ -311,7 +353,7 @@ func (vc *VectorClock) AddToSeg(seg *capn.Segment) msgs.VectorClock { vc.initial = initial vc.adds = nil vc.changes = nil - vc.cap = &vcCap + vc.data = server.SegToBytes(seg) - return vcCap + return vc.data } From 21c1652c4ba4c8eb8d14960935e8fe6aa03b87a6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 25 Jul 2016 20:38:45 +0100 Subject: [PATCH 26/78] Figured out even more ways to do less work. Ref T42. --HG-- branch : T42 --- paxos/ballotaccumulator.go | 39 ++++++++++++---------- txnengine/ballot.go | 19 ++++++----- txnengine/vectorclock.go | 68 +++++++++++++++++++++++++------------- 3 files changed, 76 insertions(+), 50 deletions(-) diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 8d2bcdc..f5f4e46 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -62,6 +62,7 @@ func NewBallotAccumulator(txnId *common.TxnId, txn *msgs.Txn) *BallotAccumulator type varBallot struct { vUUId *common.VarUUId result *eng.Ballot + clock *eng.VectorClock rmToBallot rmBallots voters int } @@ -258,71 +259,74 @@ func (ba *BallotAccumulator) Status(sc *server.StatusConsumer) { } func (vb *varBallot) CalculateResult(br badReads, clock *eng.VectorClock) { - vb.result = eng.NewBallot(vb.vUUId, eng.Commit, eng.NewVectorClock()) + vb.result = eng.NewBallot(vb.vUUId, eng.Commit, nil) + vb.clock = eng.NewVectorClock() for _, rmBal := range vb.rmToBallot { vb.combineVote(rmBal, br) } + vb.result.ClockData = vb.clock.AsData() if !vb.result.Aborted() { - clock.MergeInMax(vb.result.Clock) + clock.MergeInMax(vb.clock) } } func (vb *varBallot) combineVote(rmBal *rmBallot, br badReads) { cur := vb.result new := rmBal.ballot + newClock := eng.VectorClockFromData(new.ClockData) if new.Vote == eng.AbortBadRead { - br.combine(rmBal) + br.combine(rmBal, newClock) } switch { case cur.Vote == eng.Commit && new.Vote == eng.Commit: - cur.Clock.MergeInMax(new.Clock) + vb.clock.MergeInMax(newClock) - case cur.Vote == eng.AbortDeadlock && cur.Clock.Len() == 0: + case cur.Vote == eng.AbortDeadlock && vb.clock.Len() == 0: // Do nothing - ignore the new ballot - case new.Vote == eng.AbortDeadlock && new.Clock.Len() == 0: + case new.Vote == eng.AbortDeadlock && newClock.Len() == 0: // This has been created by abort proposer. This trumps everything. cur.Vote = eng.AbortDeadlock - cur.Clock = new.Clock + vb.clock = newClock case cur.Vote == eng.Commit: // new.Vote != eng.Commit otherwise we'd have hit first case. cur.Vote = new.Vote - cur.Clock = new.Clock.Clone() + vb.clock = newClock.Clone() case new.Vote == eng.Commit: // But we know cur.Vote != eng.Commit. Do nothing. case new.Vote == eng.AbortDeadlock && cur.Vote == eng.AbortDeadlock: - cur.Clock.MergeInMax(new.Clock) + vb.clock.MergeInMax(newClock) case new.Vote == eng.AbortDeadlock && cur.Vote == eng.AbortBadRead && - new.Clock.At(vb.vUUId) < cur.Clock.At(vb.vUUId): + newClock.At(vb.vUUId) < vb.clock.At(vb.vUUId): // The new Deadlock is strictly in the past of the current // BadRead, so we stay on the badread. - cur.Clock.MergeInMax(new.Clock) + vb.clock.MergeInMax(newClock) case new.Vote == eng.AbortDeadlock && cur.Vote == eng.AbortBadRead: // The new Deadlock is equal or greater than (by clock local // elem) than the current Badread. We should switch to the // Deadlock cur.Vote = eng.AbortDeadlock - cur.Clock.MergeInMax(new.Clock) + vb.clock.MergeInMax(newClock) case cur.Vote == eng.AbortBadRead: // && new.Vote == eng.AbortBadRead - cur.Clock.MergeInMax(new.Clock) + vb.clock.MergeInMax(newClock) - case new.Clock.At(vb.vUUId) > cur.Clock.At(vb.vUUId): + case newClock.At(vb.vUUId) > vb.clock.At(vb.vUUId): // && cur.Vote == AbortDeadlock && new.Vote == AbortBadRead. The // new BadRead is strictly in the future of the cur Deadlock, so // we should switch to the BadRead. cur.Vote = eng.AbortBadRead - cur.Clock.MergeInMax(new.Clock) + vb.clock.MergeInMax(newClock) default: // cur.Vote == AbortDeadlock && new.Vote == AbortBadRead. - cur.Clock.MergeInMax(new.Clock) + vb.clock.MergeInMax(newClock) } } @@ -363,8 +367,7 @@ func NewBadReads() badReads { return make(map[common.VarUUId]*badReadAction) } -func (br badReads) combine(rmBal *rmBallot) { - clock := rmBal.ballot.Clock +func (br badReads) combine(rmBal *rmBallot, clock *eng.VectorClock) { badRead := rmBal.ballot.VoteCap.AbortBadRead() txnId := common.MakeTxnId(badRead.TxnId()) actions := badRead.TxnActions() diff --git a/txnengine/ballot.go b/txnengine/ballot.go index 9402577..0fa3e4c 100644 --- a/txnengine/ballot.go +++ b/txnengine/ballot.go @@ -27,31 +27,32 @@ func (v Vote) ToVoteEnum() msgs.VoteEnum { type Ballot struct { VarUUId *common.VarUUId - Clock *VectorClock + ClockData []byte Vote Vote BallotCap *msgs.Ballot VoteCap *msgs.Vote } func NewBallot(vUUId *common.VarUUId, vote Vote, clock *VectorClock) *Ballot { - if clock != nil { - clock.AsData() // force serialisation now and hopefully therefore fewer times - clock = clock.Clone() - } - return &Ballot{ + ballot := &Ballot{ VarUUId: vUUId, - Clock: clock, Vote: vote, BallotCap: nil, VoteCap: nil, } + if clock == nil { + ballot.ClockData = []byte{} + } else { + ballot.ClockData = clock.AsData() + } + return ballot } func BallotFromCap(ballotCap *msgs.Ballot) *Ballot { voteCap := ballotCap.Vote() ballot := &Ballot{ VarUUId: common.MakeVarUUId(ballotCap.VarId()), - Clock: VectorClockFromData(ballotCap.Clock()), + ClockData: ballotCap.Clock(), Vote: Vote(voteCap.Which()), BallotCap: ballotCap, VoteCap: &voteCap, @@ -77,7 +78,7 @@ func (ballot *Ballot) CreateBadReadCap(txnId *common.TxnId, actions *msgs.Action func (ballot *Ballot) AddToSeg(seg *capn.Segment) msgs.Ballot { ballotCap := msgs.NewBallot(seg) ballotCap.SetVarId(ballot.VarUUId[:]) - ballotCap.SetClock(ballot.Clock.AsData()) + ballotCap.SetClock(ballot.ClockData) if ballot.VoteCap == nil { voteCap := msgs.NewVote(seg) diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 2c65a55..5fa5e6d 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -22,7 +22,9 @@ type VectorClock struct { } func VectorClockFromData(vcData []byte) *VectorClock { - return &VectorClock{data: vcData} + return &VectorClock{ + data: vcData, + } } func NewVectorClock() *VectorClock { @@ -147,11 +149,13 @@ func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { if _, found := vc.adds[*vUUId]; found { delete(vc.adds, *vUUId) vc.length-- + vc.data = nil return vc } else if ch, found := vc.changes[*vUUId]; found { if ch != deleted { vc.length-- vc.changes[*vUUId] = deleted + vc.data = nil } return vc } else if _, found := vc.initial[*vUUId]; found { @@ -160,6 +164,7 @@ func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { } vc.changes[*vUUId] = deleted vc.length-- + vc.data = nil } return vc } @@ -168,6 +173,7 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { vc.init() if old, found := vc.adds[*vUUId]; found { vc.adds[*vUUId] = old + inc + vc.data = nil return vc } else if old, found := vc.changes[*vUUId]; found { if old == deleted { @@ -176,12 +182,14 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { } else { vc.changes[*vUUId] = old + inc } + vc.data = nil return vc } else if old, found := vc.initial[*vUUId]; found { if vc.changes == nil { vc.changes = make(map[common.VarUUId]uint64) } vc.changes[*vUUId] = old + inc + vc.data = nil return vc } else { if vc.adds == nil { @@ -189,6 +197,7 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { } vc.adds[*vUUId] = inc vc.length++ + vc.data = nil return vc } } @@ -198,6 +207,7 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { if old, found := vc.adds[*vUUId]; found { if v > old { vc.adds[*vUUId] = v + vc.data = nil return true } return false @@ -207,6 +217,7 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { if old == deleted { vc.length++ } + vc.data = nil return true } return false @@ -216,6 +227,7 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { vc.changes = make(map[common.VarUUId]uint64) } vc.changes[*vUUId] = v + vc.data = nil return true } return false @@ -225,12 +237,18 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { } vc.adds[*vUUId] = v vc.length++ + vc.data = nil return true } } func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { vcA.init() + if vcA.length == 0 && vcB.data != nil { + vcA.inited = false + vcA.data = vcB.data + return len(vcA.data) > 0 + } vcB.init() changed := false vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { @@ -267,6 +285,9 @@ func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { return true } }) + if changed { + vcA.data = nil + } return changed } @@ -276,6 +297,7 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { if old <= v { delete(vc.adds, *vUUId) vc.length-- + vc.data = nil return true } return false @@ -283,6 +305,7 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { if old != deleted && old <= v { vc.changes[*vUUId] = deleted vc.length-- + vc.data = nil return true } return false @@ -293,6 +316,7 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { } vc.changes[*vUUId] = deleted vc.length-- + vc.data = nil return true } return false @@ -330,30 +354,28 @@ func (vc *VectorClock) AsData() []byte { return []byte{} } - if len(vc.adds) == 0 && len(vc.changes) == 0 { - return vc.data - } + if vc.data == nil { - seg := capn.NewBuffer(nil) - vcCap := msgs.NewRootVectorClock(seg) - vUUIds := seg.NewDataList(vc.length) - values := seg.NewUInt64List(vc.length) - vcCap.SetVarUuids(vUUIds) - vcCap.SetValues(values) - idx := 0 - initial := make(map[common.VarUUId]uint64, vc.length) - vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - initial[*vUUId] = v - vUUIds.Set(idx, vUUId[:]) - values.Set(idx, v) - idx++ - return true - }) + if vc.length == 0 { + vc.data = []byte{} - vc.initial = initial - vc.adds = nil - vc.changes = nil - vc.data = server.SegToBytes(seg) + } else { + seg := capn.NewBuffer(nil) + vcCap := msgs.NewRootVectorClock(seg) + vUUIds := seg.NewDataList(vc.length) + values := seg.NewUInt64List(vc.length) + vcCap.SetVarUuids(vUUIds) + vcCap.SetValues(values) + idx := 0 + vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + vUUIds.Set(idx, vUUId[:]) + values.Set(idx, v) + idx++ + return true + }) + vc.data = server.SegToBytes(seg) + } + } return vc.data } From f9fa8c002143c82c6da44a48c1a3fb168bf9cd57 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 29 Jul 2016 17:24:44 +0100 Subject: [PATCH 27/78] Well this is a hell of a lot nicer, and I think this shows the path that we're going to take going forwards. Ref T42. --HG-- branch : T42 --- client/versioncache.go | 4 +- paxos/ballotaccumulator.go | 68 ++++--- paxos/proposermanager.go | 2 +- txnengine/ballot.go | 87 +++++---- txnengine/frame.go | 77 ++++---- txnengine/transaction.go | 24 +-- txnengine/var.go | 8 +- txnengine/vectorclock.go | 362 +++++++++++++++++++++---------------- 8 files changed, 363 insertions(+), 269 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index 0bbb075..f516a78 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -19,7 +19,7 @@ func NewVersionCache() versionCache { } func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outcome) { - clock := eng.VectorClockFromData(outcome.Commit()) + clock := eng.VectorClockFromData(outcome.Commit(), false) actions := outcome.Txn().Actions() for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) @@ -44,7 +44,7 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda for idx, l := 0, updates.Len(); idx < l; idx++ { update := updates.At(idx) txnId := common.MakeTxnId(update.TxnId()) - clock := eng.VectorClockFromData(update.Clock()) + clock := eng.VectorClockFromData(update.Clock(), false) actions := update.Actions() validActions := make([]*msgs.Action, 0, actions.Len()) diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index f5f4e46..06beab1 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -62,7 +62,6 @@ func NewBallotAccumulator(txnId *common.TxnId, txn *msgs.Txn) *BallotAccumulator type varBallot struct { vUUId *common.VarUUId result *eng.Ballot - clock *eng.VectorClock rmToBallot rmBallots voters int } @@ -170,7 +169,7 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { } ba.dirty = false - combinedClock := eng.NewVectorClock() + combinedClock := eng.NewVectorClock().AsMutable() aborted, deadlock := false, false vUUIds := common.VarUUIds(make([]*common.VarUUId, 0, len(ba.vUUIdToBallots))) @@ -258,75 +257,89 @@ func (ba *BallotAccumulator) Status(sc *server.StatusConsumer) { sc.Join() } -func (vb *varBallot) CalculateResult(br badReads, clock *eng.VectorClock) { - vb.result = eng.NewBallot(vb.vUUId, eng.Commit, nil) - vb.clock = eng.NewVectorClock() +type varBallotReducer struct { + vUUId *common.VarUUId + *eng.BallotBuilder + badReads +} + +func (vb *varBallot) CalculateResult(br badReads, clock *eng.VectorClockMutable) { + result := &varBallotReducer{ + vUUId: vb.vUUId, + BallotBuilder: eng.NewBallotBuilder(vb.vUUId, eng.Commit, eng.NewVectorClock().AsMutable()), + badReads: br, + } for _, rmBal := range vb.rmToBallot { - vb.combineVote(rmBal, br) + result.combineVote(rmBal) } - vb.result.ClockData = vb.clock.AsData() - if !vb.result.Aborted() { - clock.MergeInMax(vb.clock) + if !result.Aborted() { + clock.MergeInMax(result.Clock) } + vb.result = result.ToBallot() } -func (vb *varBallot) combineVote(rmBal *rmBallot, br badReads) { - cur := vb.result +func (cur *varBallotReducer) combineVote(rmBal *rmBallot) { new := rmBal.ballot - newClock := eng.VectorClockFromData(new.ClockData) if new.Vote == eng.AbortBadRead { - br.combine(rmBal, newClock) + cur.badReads.combine(rmBal) } + curClock := cur.Clock + newClock := rmBal.ballot.Clock + switch { case cur.Vote == eng.Commit && new.Vote == eng.Commit: - vb.clock.MergeInMax(newClock) + curClock.MergeInMax(newClock) - case cur.Vote == eng.AbortDeadlock && vb.clock.Len() == 0: + case cur.Vote == eng.AbortDeadlock && curClock.Len() == 0: // Do nothing - ignore the new ballot case new.Vote == eng.AbortDeadlock && newClock.Len() == 0: // This has been created by abort proposer. This trumps everything. cur.Vote = eng.AbortDeadlock - vb.clock = newClock + cur.VoteCap = new.VoteCap + cur.Clock = newClock.AsMutable() case cur.Vote == eng.Commit: // new.Vote != eng.Commit otherwise we'd have hit first case. cur.Vote = new.Vote - vb.clock = newClock.Clone() + cur.VoteCap = new.VoteCap + cur.Clock = newClock.AsMutable() case new.Vote == eng.Commit: // But we know cur.Vote != eng.Commit. Do nothing. case new.Vote == eng.AbortDeadlock && cur.Vote == eng.AbortDeadlock: - vb.clock.MergeInMax(newClock) + curClock.MergeInMax(newClock) case new.Vote == eng.AbortDeadlock && cur.Vote == eng.AbortBadRead && - newClock.At(vb.vUUId) < vb.clock.At(vb.vUUId): + newClock.At(cur.vUUId) < curClock.At(cur.vUUId): // The new Deadlock is strictly in the past of the current // BadRead, so we stay on the badread. - vb.clock.MergeInMax(newClock) + curClock.MergeInMax(newClock) case new.Vote == eng.AbortDeadlock && cur.Vote == eng.AbortBadRead: // The new Deadlock is equal or greater than (by clock local // elem) than the current Badread. We should switch to the // Deadlock cur.Vote = eng.AbortDeadlock - vb.clock.MergeInMax(newClock) + cur.VoteCap = new.VoteCap + curClock.MergeInMax(newClock) case cur.Vote == eng.AbortBadRead: // && new.Vote == eng.AbortBadRead - vb.clock.MergeInMax(newClock) + curClock.MergeInMax(newClock) - case newClock.At(vb.vUUId) > vb.clock.At(vb.vUUId): + case newClock.At(cur.vUUId) > curClock.At(cur.vUUId): // && cur.Vote == AbortDeadlock && new.Vote == AbortBadRead. The // new BadRead is strictly in the future of the cur Deadlock, so // we should switch to the BadRead. cur.Vote = eng.AbortBadRead - vb.clock.MergeInMax(newClock) + cur.VoteCap = new.VoteCap + curClock.MergeInMax(newClock) default: // cur.Vote == AbortDeadlock && new.Vote == AbortBadRead. - vb.clock.MergeInMax(newClock) + curClock.MergeInMax(newClock) } } @@ -367,8 +380,9 @@ func NewBadReads() badReads { return make(map[common.VarUUId]*badReadAction) } -func (br badReads) combine(rmBal *rmBallot, clock *eng.VectorClock) { +func (br badReads) combine(rmBal *rmBallot) { badRead := rmBal.ballot.VoteCap.AbortBadRead() + clock := rmBal.ballot.Clock txnId := common.MakeTxnId(badRead.TxnId()) actions := badRead.TxnActions() @@ -484,7 +498,7 @@ func (br badReads) AddToSeg(seg *capn.Segment) msgs.Update_List { update.SetTxnId(txnId[:]) actionList := msgs.NewActionList(seg, len(*badReadActions)) update.SetActions(actionList) - clock := eng.NewVectorClock() + clock := eng.NewVectorClock().AsMutable() for idy, bra := range *badReadActions { action := bra.action switch action.Which() { diff --git a/paxos/proposermanager.go b/paxos/proposermanager.go index 3dedee6..f552dd4 100644 --- a/paxos/proposermanager.go +++ b/paxos/proposermanager.go @@ -381,7 +381,7 @@ func MakeAbortBallots(txn *msgs.Txn, alloc *msgs.Allocation) []*eng.Ballot { for idx, l := 0, actionIndices.Len(); idx < l; idx++ { action := actions.At(int(actionIndices.At(idx))) vUUId := common.MakeVarUUId(action.VarId()) - ballots[idx] = eng.NewBallot(vUUId, eng.AbortDeadlock, nil) + ballots[idx] = eng.NewBallotBuilder(vUUId, eng.AbortDeadlock, nil).ToBallot() } return ballots } diff --git a/txnengine/ballot.go b/txnengine/ballot.go index 0fa3e4c..7b5680d 100644 --- a/txnengine/ballot.go +++ b/txnengine/ballot.go @@ -27,45 +27,51 @@ func (v Vote) ToVoteEnum() msgs.VoteEnum { type Ballot struct { VarUUId *common.VarUUId - ClockData []byte + Clock *VectorClock Vote Vote BallotCap *msgs.Ballot VoteCap *msgs.Vote } -func NewBallot(vUUId *common.VarUUId, vote Vote, clock *VectorClock) *Ballot { - ballot := &Ballot{ - VarUUId: vUUId, - Vote: vote, - BallotCap: nil, - VoteCap: nil, - } - if clock == nil { - ballot.ClockData = []byte{} - } else { - ballot.ClockData = clock.AsData() - } - return ballot +type BallotBuilder struct { + *Ballot + Clock *VectorClockMutable + seg *capn.Segment } func BallotFromCap(ballotCap *msgs.Ballot) *Ballot { voteCap := ballotCap.Vote() - ballot := &Ballot{ + return &Ballot{ VarUUId: common.MakeVarUUId(ballotCap.VarId()), - ClockData: ballotCap.Clock(), + Clock: VectorClockFromData(ballotCap.Clock(), false), Vote: Vote(voteCap.Which()), BallotCap: ballotCap, VoteCap: &voteCap, } - return ballot } func (ballot *Ballot) Aborted() bool { return ballot.Vote != Commit } -func (ballot *Ballot) CreateBadReadCap(txnId *common.TxnId, actions *msgs.Action_List) { +func (ballot *Ballot) AddToSeg(seg *capn.Segment) msgs.Ballot { + return *ballot.BallotCap +} + +func NewBallotBuilder(vUUId *common.VarUUId, vote Vote, clock *VectorClockMutable) *BallotBuilder { + ballot := &Ballot{ + VarUUId: vUUId, + Vote: vote, + } + return &BallotBuilder{ + Ballot: ballot, + Clock: clock, + } +} + +func (ballot *BallotBuilder) CreateBadReadCap(txnId *common.TxnId, actions *msgs.Action_List) *BallotBuilder { seg := capn.NewBuffer(nil) + ballot.seg = seg voteCap := msgs.NewVote(seg) voteCap.SetAbortBadRead() badReadCap := voteCap.AbortBadRead() @@ -73,27 +79,36 @@ func (ballot *Ballot) CreateBadReadCap(txnId *common.TxnId, actions *msgs.Action badReadCap.SetTxnActions(*actions) ballot.VoteCap = &voteCap ballot.Vote = AbortBadRead + return ballot } -func (ballot *Ballot) AddToSeg(seg *capn.Segment) msgs.Ballot { - ballotCap := msgs.NewBallot(seg) - ballotCap.SetVarId(ballot.VarUUId[:]) - ballotCap.SetClock(ballot.ClockData) +func (ballot *BallotBuilder) ToBallot() *Ballot { + if ballot.BallotCap == nil { + if ballot.seg == nil { + ballot.seg = capn.NewBuffer(nil) + } + seg := ballot.seg + ballotCap := msgs.NewBallot(seg) + ballotCap.SetVarId(ballot.VarUUId[:]) + clockData := ballot.Clock.AsData() + ballot.Ballot.Clock = VectorClockFromData(clockData, false) + ballotCap.SetClock(clockData) - if ballot.VoteCap == nil { - voteCap := msgs.NewVote(seg) - ballot.VoteCap = &voteCap - switch ballot.Vote { - case Commit: - voteCap.SetCommit() - case AbortDeadlock: - voteCap.SetAbortDeadlock() - case AbortBadRead: - voteCap.SetAbortBadRead() + if ballot.VoteCap == nil { + voteCap := msgs.NewVote(seg) + ballot.VoteCap = &voteCap + switch ballot.Vote { + case Commit: + voteCap.SetCommit() + case AbortDeadlock: + voteCap.SetAbortDeadlock() + case AbortBadRead: + voteCap.SetAbortBadRead() + } } - } - ballotCap.SetVote(*ballot.VoteCap) - ballot.BallotCap = &ballotCap - return ballotCap + ballotCap.SetVote(*ballot.VoteCap) + ballot.BallotCap = &ballotCap + } + return ballot.Ballot } diff --git a/txnengine/frame.go b/txnengine/frame.go index 84bf3ad..ab7ba86 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -22,18 +22,18 @@ type frame struct { v *Var frameTxnId *common.TxnId frameTxnActions *msgs.Action_List - frameTxnClock *VectorClock - frameWritesClock *VectorClock - readVoteClock *VectorClock + frameTxnClock *VectorClockMutable // the clock (including merge missing) of the frame txn + frameWritesClock *VectorClockMutable // max elems from all writes of all txns in parent frame + readVoteClock *VectorClockMutable positionsFound bool - mask *VectorClock + mask *VectorClockMutable frameOpen frameClosed frameErase currentState frameStateMachineComponent } -func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *msgs.Action_List, txnClock *VectorClock, writesClock *VectorClock) *frame { +func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *msgs.Action_List, txnClock, writesClock *VectorClockMutable) *frame { f := &frame{ parent: parent, v: v, @@ -44,13 +44,12 @@ func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *msgs.Actio positionsFound: false, } if parent == nil { - f.mask = NewVectorClock() + f.mask = NewVectorClock().AsMutable() } else { f.mask = parent.mask } f.init() server.Log(f, "NewFrame") - f.calculateReadVoteClock() f.maybeScheduleRoll() return f } @@ -78,7 +77,7 @@ func (f *frame) nextState() { } func (f *frame) String() string { - return fmt.Sprintf("%v Frame %v (%v) r%v w%v", f.v.UUId, f.frameTxnId, f.frameTxnClock.Len, f.readVoteClock, f.writeVoteClock) + return fmt.Sprintf("%v Frame %v (%v) r%v w%v", f.v.UUId, f.frameTxnId, f.frameTxnClock.Len(), f.readVoteClock, f.writeVoteClock) } func (f *frame) Status(sc *server.StatusConsumer) { @@ -135,7 +134,7 @@ type frameOpen struct { learntFutureReads []*localAction maxUncommittedRead *localAction uncommittedReads uint - writeVoteClock *VectorClock + writeVoteClock *VectorClockMutable writes *sl.SkipList clientWrites map[[common.ClientLen]byte]server.EmptyStruct uncommittedWrites uint @@ -192,6 +191,7 @@ func (fo *frameOpen) AddRead(action *localAction) { fo.maxUncommittedRead = action } action.frame = fo.frame + fo.calculateReadVoteClock() if !action.VoteCommit(fo.readVoteClock) { fo.ReadAborted(action) } @@ -413,6 +413,7 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { // in the action.outcomeClock then we know that we must be // missing some TGCs - essentially we can infer TGCs by // observing the outcome clocks on future txns we learn. + fo.calculateReadVoteClock() fo.readVoteClock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { if action.outcomeClock.At(vUUId) == 0 { fo.mask.SetVarIdMax(vUUId, v) @@ -457,6 +458,7 @@ func (fo *frameOpen) WriteLearnt(action *localAction) bool { // See corresponding comment in ReadLearnt clock := fo.writeVoteClock if clock == nil { + fo.calculateReadVoteClock() clock = fo.readVoteClock } clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { @@ -485,7 +487,7 @@ func (fo *frameOpen) isLocked() bool { if fo.frameTxnActions == nil || fo.parent == nil { return false } - rvcLen := fo.readVoteClock.Len + rvcLen := fo.readVoteClock.Len() actionsLen := fo.frameTxnActions.Len() excess := rvcLen - actionsLen return excess > server.FrameLockMinExcessSize && rvcLen > actionsLen*server.FrameLockMinRatio @@ -536,31 +538,31 @@ func (fo *frameOpen) maybeStartWrites() { func (fo *frameOpen) calculateReadVoteClock() { if fo.readVoteClock == nil { + if fo.frameWritesClock.At(fo.v.UUId) == 0 { + panic(fmt.Sprintf("%v no write to self! %v", fo.frame, fo.frameWritesClock)) + } clock := fo.frameTxnClock.Clone() - written := fo.frameWritesClock.Clone() clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { if fo.mask.At(vUUId) >= v { clock.Delete(vUUId) } return true }) - written.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + fo.frameWritesClock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { if fo.mask.At(vUUId) < v || fo.v.UUId.Compare(vUUId) == common.EQ { clock.SetVarIdMax(vUUId, v+1) } return true }) fo.readVoteClock = clock - if fo.frameWritesClock.At(fo.v.UUId) == 0 { - panic(fmt.Sprintf("%v no write to self! %v", fo.frame, fo.frameWritesClock)) - } } } func (fo *frameOpen) calculateWriteVoteClock() { if fo.writeVoteClock == nil { + fo.calculateReadVoteClock() clock := fo.readVoteClock.Clone() - written := NewVectorClock() + written := NewVectorClock().AsMutable() for node := fo.reads.First(); node != nil; node = node.Next() { action := node.Key.(*localAction) clock.MergeInMax(action.outcomeClock) @@ -611,30 +613,39 @@ func (fo *frameOpen) maybeCreateChild() { } localElemVals.Sort() - var clock, written *VectorClock + var clock, written *VectorClockMutable elem := fo.frameTxnClock.At(fo.v.UUId) switch { case len(localElemVals) == 1 && localElemVals[0] == elem: + // We must have learnt one or more writes that have the same + // local elem as the frame txn so they were siblings of the + // frame txn. By dfn, there can have been no successful reads of + // this frame txn. clock = fo.frameTxnClock.Clone() written = fo.frameWritesClock.Clone() - for fo.reads.Len() != 0 { + if fo.reads.Len() != 0 { panic(fmt.Sprintf("%v has committed reads even though frame has younger siblings", fo.frame)) } case localElemVals[0] == elem: + // We learnt of some siblings to this frame txn, but we also did + // further work. Again, there can not have been any reads of + // this frame txn. We can also ignore our siblings because the + // further work will by definition include the consequences of + // the siblings to this frame. localElemVals = localElemVals[1:] - for fo.reads.Len() != 0 { + if fo.reads.Len() != 0 { panic(fmt.Sprintf("%v has committed reads even though frame has younger siblings", fo.frame)) } fo.calculateWriteVoteClock() - clock = fo.writeVoteClock.Clone() - written = NewVectorClock() + clock = fo.writeVoteClock + written = NewVectorClock().AsMutable() default: fo.calculateWriteVoteClock() - clock = fo.writeVoteClock.Clone() - written = NewVectorClock() + clock = fo.writeVoteClock + written = NewVectorClock().AsMutable() } var winner *localAction @@ -643,18 +654,19 @@ func (fo *frameOpen) maybeCreateChild() { for _, localElemVal := range localElemVals { actions := localElemValToTxns[localElemVal] for _, action := range *actions { - action.outcomeClock = action.outcomeClock.Clone() - action.outcomeClock.MergeInMissing(clock) + outcomeClock := action.outcomeClock.AsMutable() + action.outcomeClock = outcomeClock + outcomeClock.MergeInMissing(clock) winner = maxTxnByOutcomeClock(winner, action) if positions == nil && action.createPositions != nil { positions = action.createPositions } - clock.MergeInMax(action.outcomeClock) + clock.MergeInMax(outcomeClock) if action.writesClock == nil { for _, k := range action.writes { - written.SetVarIdMax(k, action.outcomeClock.At(k)) + written.SetVarIdMax(k, outcomeClock.At(k)) } } else { written.MergeInMax(action.writesClock) @@ -662,7 +674,7 @@ func (fo *frameOpen) maybeCreateChild() { } } - fo.child = NewFrame(fo.frame, fo.v, winner.Id, winner.writeTxnActions, winner.outcomeClock, written) + fo.child = NewFrame(fo.frame, fo.v, winner.Id, winner.writeTxnActions, winner.outcomeClock.AsMutable(), written) for _, action := range fo.learntFutureReads { action.frame = nil if !fo.child.ReadLearnt(action) { @@ -671,6 +683,10 @@ func (fo *frameOpen) maybeCreateChild() { } fo.learntFutureReads = nil fo.nextState() + fo.readVoteClock = nil + fo.writeVoteClock = nil + fo.clientWrites = nil + fo.rollTxn = nil fo.v.SetCurFrame(fo.child, winner, positions) } @@ -781,13 +797,12 @@ func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId return &ctxn, posMap } -func (fo *frameOpen) subtractClock(clock *VectorClock) { +func (fo *frameOpen) subtractClock(clock VectorClockInterface) { if fo.currentState != fo { panic(fmt.Sprintf("%v subtractClock called with frame in state %v", fo.v, fo.currentState)) } if changed := fo.mask.MergeInMax(clock); changed && fo.reads.Len() == 0 && fo.writeVoteClock == nil { fo.readVoteClock = nil - fo.calculateReadVoteClock() } } @@ -902,7 +917,7 @@ func (fe *frameErase) maybeErase() { server.Log(fe.frame, "maybeErase") child := fe.child child.parent = nil - child.MaybeCompleteTxns() + child.MaybeCompleteTxns() // child may be in frame open! fe.nextState() } } diff --git a/txnengine/transaction.go b/txnengine/transaction.go index 5c9b763..434a01c 100644 --- a/txnengine/transaction.go +++ b/txnengine/transaction.go @@ -64,7 +64,7 @@ type localAction struct { writeAction *msgs.Action createPositions *common.Positions roll bool - outcomeClock *VectorClock + outcomeClock VectorClockInterface writesClock *VectorClock } @@ -84,24 +84,23 @@ func (action *localAction) IsImmigrant() bool { return action.writesClock != nil } -func (action *localAction) VoteDeadlock(clock *VectorClock) { +func (action *localAction) VoteDeadlock(clock *VectorClockMutable) { if action.ballot == nil { - action.ballot = NewBallot(action.vUUId, AbortDeadlock, clock) + action.ballot = NewBallotBuilder(action.vUUId, AbortDeadlock, clock).ToBallot() action.voteCast(action.ballot, true) } } -func (action *localAction) VoteBadRead(clock *VectorClock, txnId *common.TxnId, actions *msgs.Action_List) { +func (action *localAction) VoteBadRead(clock *VectorClockMutable, txnId *common.TxnId, actions *msgs.Action_List) { if action.ballot == nil { - action.ballot = NewBallot(action.vUUId, AbortBadRead, clock) - action.ballot.CreateBadReadCap(txnId, actions) + action.ballot = NewBallotBuilder(action.vUUId, AbortBadRead, clock).CreateBadReadCap(txnId, actions).ToBallot() action.voteCast(action.ballot, true) } } -func (action *localAction) VoteCommit(clock *VectorClock) bool { +func (action *localAction) VoteCommit(clock *VectorClockMutable) bool { if action.ballot == nil { - action.ballot = NewBallot(action.vUUId, Commit, clock) + action.ballot = NewBallotBuilder(action.vUUId, Commit, clock).ToBallot() return !action.voteCast(action.ballot, false) } return false @@ -161,10 +160,8 @@ func ImmigrationTxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateCha action.writeTxnActions = &txnActions positions := varCap.Positions() action.createPositions = (*common.Positions)(&positions) - action.outcomeClock = VectorClockFromData(varCap.WriteTxnClock()) - action.outcomeClock.init() - action.writesClock = VectorClockFromData(varCap.WritesClock()) - action.writesClock.init() + action.outcomeClock = VectorClockFromData(varCap.WriteTxnClock(), false) + action.writesClock = VectorClockFromData(varCap.WritesClock(), false) actionsMap[*action.vUUId] = action } @@ -537,8 +534,7 @@ func (tro *txnReceiveOutcome) BallotOutcomeReceived(outcome *msgs.Outcome) { } switch outcome.Which() { case msgs.OUTCOME_COMMIT: - tro.outcomeClock = VectorClockFromData(outcome.Commit()) - tro.outcomeClock.init() + tro.outcomeClock = VectorClockFromData(outcome.Commit(), true) /* excess := tro.outcomeClock.Len - tro.TxnCap.Actions().Len() fmt.Printf("%v ", excess) diff --git a/txnengine/var.go b/txnengine/var.go index be4e77e..540eeb8 100644 --- a/txnengine/var.go +++ b/txnengine/var.go @@ -46,8 +46,8 @@ func VarFromData(data []byte, exe *dispatcher.Executor, db *db.Databases, vm *Va } writeTxnId := common.MakeTxnId(varCap.WriteTxnId()) - writeTxnClock := VectorClockFromData(varCap.WriteTxnClock()) - writesClock := VectorClockFromData(varCap.WritesClock()) + writeTxnClock := VectorClockFromData(varCap.WriteTxnClock(), true).AsMutable() + writesClock := VectorClockFromData(varCap.WritesClock(), true).AsMutable() server.Log(v.UUId, "Restored", writeTxnId) if result, err := db.ReadonlyTransaction(func(rtxn *mdbs.RTxn) interface{} { @@ -72,8 +72,8 @@ func VarFromData(data []byte, exe *dispatcher.Executor, db *db.Databases, vm *Va func NewVar(uuid *common.VarUUId, exe *dispatcher.Executor, db *db.Databases, vm *VarManager) *Var { v := newVar(uuid, exe, db, vm) - clock := NewVectorClock().Bump(v.UUId, 1) - written := NewVectorClock().Bump(v.UUId, 1) + clock := NewVectorClock().AsMutable().Bump(v.UUId, 1) + written := NewVectorClock().AsMutable().Bump(v.UUId, 1) v.curFrame = NewFrame(nil, v, nil, nil, clock, written) seg := capn.NewBuffer(nil) diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 5fa5e6d..4226555 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -12,33 +12,76 @@ const ( deleted uint64 = 0 ) +type VectorClockInterface interface { + Len() int + ForEach(func(*common.VarUUId, uint64) bool) bool + At(*common.VarUUId) uint64 + LessThan(VectorClockInterface) bool + AsMutable() *VectorClockMutable + AsData() []byte +} + +func lessThan(a, b VectorClockInterface) bool { + // 1. If A has more elems than B then A cannot be < B + aLen, bLen := a.Len(), b.Len() + if aLen > bLen { + return false + } + ltFound := false + // 2. For every elem e in A, B[e] must be >= A[e] + completed := a.ForEach(func(vUUId *common.VarUUId, valA uint64) bool { + valB := b.At(vUUId) + if valA > valB { + return false + } + ltFound = ltFound || valA < valB + return true + }) + if !completed { + return false + } + // 3. Everything in A is also in B and <= B. If A == B for + // everything in A, then B must be > A if len(B) > len(A) + return ltFound || bLen > aLen +} + type VectorClock struct { data []byte initial map[common.VarUUId]uint64 + decoded bool +} + +type VectorClockMutable struct { + *VectorClock + data []byte adds map[common.VarUUId]uint64 changes map[common.VarUUId]uint64 length int - inited bool } -func VectorClockFromData(vcData []byte) *VectorClock { - return &VectorClock{ - data: vcData, +func VectorClockFromData(vcData []byte, forceDecode bool) *VectorClock { + vc := &VectorClock{ + data: vcData, + decoded: false, } + if forceDecode { + vc.decode() + } + return vc } func NewVectorClock() *VectorClock { return &VectorClock{ - data: []byte{}, - inited: true, + data: []byte{}, + decoded: true, } } -func (vc *VectorClock) init() { - if vc == nil || vc.inited { +func (vc *VectorClock) decode() { + if vc == nil || vc.decoded { return } - vc.inited = true + vc.decoded = true if len(vc.data) == 0 { return } @@ -48,7 +91,6 @@ func (vc *VectorClock) init() { } vcCap := msgs.ReadRootVectorClock(seg) l := vcCap.VarUuids().Len() - vc.length = l vc.initial = make(map[common.VarUUId]uint64, l) keys := vcCap.VarUuids() values := vcCap.Values() @@ -58,18 +100,84 @@ func (vc *VectorClock) init() { } } -func (vcA *VectorClock) Clone() *VectorClock { +func (vc *VectorClock) Len() int { + vc.decode() + return len(vc.initial) +} + +func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { + vc.decode() + if value, found := vc.initial[*vUUId]; found { + return value + } + return deleted +} + +func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { + vc.decode() + for k, v := range vc.initial { + if !it(&k, v) { + return false + } + } + return true +} + +func (vcA *VectorClock) LessThan(vcB VectorClockInterface) bool { + return lessThan(vcA, vcB) +} + +func (vc *VectorClock) AsData() []byte { + if vc == nil { + return []byte{} + } + return vc.data +} + +func (vc *VectorClock) AsMutable() *VectorClockMutable { + return &VectorClockMutable{ + VectorClock: vc, + length: vc.Len(), // forces decode + data: vc.data, + } +} + +func (vc *VectorClock) String() string { + if !vc.decoded { + return "VC:(undecoded)" + } + str := fmt.Sprintf("VC:(%v)", vc.Len()) + vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + str += fmt.Sprintf(" %v:%v", vUUId, v) + return true + }) + return str +} + +func (vc *VectorClockMutable) ensureChanges() { + if vc.changes == nil { + vc.changes = make(map[common.VarUUId]uint64) + } +} + +func (vc *VectorClockMutable) ensureAdds() { + if vc.adds == nil { + vc.adds = make(map[common.VarUUId]uint64) + } +} + +func (vc *VectorClockMutable) AsMutable() *VectorClockMutable { + return vc +} + +func (vcA *VectorClockMutable) Clone() *VectorClockMutable { if vcA == nil { return nil } - if !vcA.inited { - return VectorClockFromData(vcA.data) - } - vcB := &VectorClock{ - data: vcA.data, - initial: vcA.initial, - length: vcA.length, - inited: true, + vcB := &VectorClockMutable{ + VectorClock: vcA.VectorClock, + data: vcA.data, + length: vcA.Len(), } if len(vcA.adds) > 0 { adds := make(map[common.VarUUId]uint64, len(vcA.adds)) @@ -88,64 +196,44 @@ func (vcA *VectorClock) Clone() *VectorClock { return vcB } -func (vc *VectorClock) Len() int { - vc.init() +func (vc *VectorClockMutable) Len() int { return vc.length } -func (vc *VectorClock) ForEach(it func(*common.VarUUId, uint64) bool) bool { - vc.init() +func (vc *VectorClockMutable) At(vUUId *common.VarUUId) uint64 { + if value, found := vc.adds[*vUUId]; found { + return value + } else if value, found := vc.changes[*vUUId]; found { + return value + } else { + return vc.VectorClock.At(vUUId) + } +} + +func (vc *VectorClockMutable) ForEach(it func(*common.VarUUId, uint64) bool) bool { for k, v := range vc.adds { if !it(&k, v) { return false } } chCount := len(vc.changes) - for k, v := range vc.initial { + return vc.VectorClock.ForEach(func(k *common.VarUUId, v uint64) bool { if chCount == 0 { - if !it(&k, v) { - return false - } - } else if ch, found := vc.changes[k]; found { + return it(k, v) + } else if ch, found := vc.changes[*k]; found { chCount-- - if ch != deleted { - if !it(&k, ch) { - return false - } + if ch == deleted { + return true + } else { + return it(k, ch) } - } else if !it(&k, v) { - return false + } else { + return it(k, v) } - } - return true -} - -func (vc *VectorClock) String() string { - if !vc.inited { - return "VC:(undecoded)" - } - str := fmt.Sprintf("VC:(%v)", vc.Len()) - vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - str += fmt.Sprintf(" %v:%v", vUUId, v) - return true }) - return str } -func (vc *VectorClock) At(vUUId *common.VarUUId) uint64 { - vc.init() - if value, found := vc.adds[*vUUId]; found { - return value - } else if value, found := vc.changes[*vUUId]; found { - return value - } else if value, found := vc.initial[*vUUId]; found { - return value - } - return deleted -} - -func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { - vc.init() +func (vc *VectorClockMutable) Delete(vUUId *common.VarUUId) *VectorClockMutable { if _, found := vc.adds[*vUUId]; found { delete(vc.adds, *vUUId) vc.length-- @@ -159,9 +247,7 @@ func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { } return vc } else if _, found := vc.initial[*vUUId]; found { - if vc.changes == nil { - vc.changes = make(map[common.VarUUId]uint64) - } + vc.ensureChanges() vc.changes[*vUUId] = deleted vc.length-- vc.data = nil @@ -169,8 +255,7 @@ func (vc *VectorClock) Delete(vUUId *common.VarUUId) *VectorClock { return vc } -func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { - vc.init() +func (vc *VectorClockMutable) Bump(vUUId *common.VarUUId, inc uint64) *VectorClockMutable { if old, found := vc.adds[*vUUId]; found { vc.adds[*vUUId] = old + inc vc.data = nil @@ -185,16 +270,12 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { vc.data = nil return vc } else if old, found := vc.initial[*vUUId]; found { - if vc.changes == nil { - vc.changes = make(map[common.VarUUId]uint64) - } + vc.ensureChanges() vc.changes[*vUUId] = old + inc vc.data = nil return vc } else { - if vc.adds == nil { - vc.adds = make(map[common.VarUUId]uint64) - } + vc.ensureAdds() vc.adds[*vUUId] = inc vc.length++ vc.data = nil @@ -202,8 +283,7 @@ func (vc *VectorClock) Bump(vUUId *common.VarUUId, inc uint64) *VectorClock { } } -func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { - vc.init() +func (vc *VectorClockMutable) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { if old, found := vc.adds[*vUUId]; found { if v > old { vc.adds[*vUUId] = v @@ -223,18 +303,14 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { return false } else if old, found := vc.initial[*vUUId]; found { if v > old { - if vc.changes == nil { - vc.changes = make(map[common.VarUUId]uint64) - } + vc.ensureChanges() vc.changes[*vUUId] = v vc.data = nil return true } return false } else { - if vc.adds == nil { - vc.adds = make(map[common.VarUUId]uint64) - } + vc.ensureAdds() vc.adds[*vUUId] = v vc.length++ vc.data = nil @@ -242,57 +318,7 @@ func (vc *VectorClock) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { } } -func (vcA *VectorClock) MergeInMax(vcB *VectorClock) bool { - vcA.init() - if vcA.length == 0 && vcB.data != nil { - vcA.inited = false - vcA.data = vcB.data - return len(vcA.data) > 0 - } - vcB.init() - changed := false - vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - // put "|| changed" last to avoid short-circuit - changed = vcA.SetVarIdMax(vUUId, v) || changed - return true - }) - return changed -} - -func (vcA *VectorClock) MergeInMissing(vcB *VectorClock) bool { - vcA.init() - vcB.init() - changed := false - vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if _, found := vcA.adds[*vUUId]; found { - return true - } else if ch, found := vcA.changes[*vUUId]; found { - if ch == deleted { - vcA.length++ - vcA.changes[*vUUId] = v - changed = true - } - return true - } else if _, found := vcA.initial[*vUUId]; found { - return true - } else { - vcA.length++ - if vcA.adds == nil { - vcA.adds = make(map[common.VarUUId]uint64) - } - vcA.adds[*vUUId] = v - changed = true - return true - } - }) - if changed { - vcA.data = nil - } - return changed -} - -func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { - vc.init() +func (vc *VectorClockMutable) DeleteIfMatch(vUUId *common.VarUUId, v uint64) bool { if old, found := vc.adds[*vUUId]; found { if old <= v { delete(vc.adds, *vUUId) @@ -311,9 +337,7 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { return false } else if old, found := vc.initial[*vUUId]; found { if old <= v { - if vc.changes == nil { - vc.changes = make(map[common.VarUUId]uint64) - } + vc.ensureChanges() vc.changes[*vUUId] = deleted vc.length-- vc.data = nil @@ -324,41 +348,62 @@ func (vc *VectorClock) SubtractIfMatch(vUUId *common.VarUUId, v uint64) bool { return false } -func (vcA *VectorClock) LessThan(vcB *VectorClock) bool { - vcA.init() - vcB.init() - // 1. If A has more elems than B then A cannot be < B - if vcA.length > vcB.length { +func (vcA *VectorClockMutable) LessThan(vcB VectorClockInterface) bool { + return lessThan(vcA, vcB) +} + +func (vcA *VectorClockMutable) MergeInMax(vcB VectorClockInterface) bool { + if vcB.Len() == 0 { return false } - ltFound := false - // 2. For every elem e in A, B[e] must be >= A[e] - completed := vcA.ForEach(func(vUUId *common.VarUUId, valA uint64) bool { - valB := vcB.At(vUUId) - if valA > valB { - return false - } - ltFound = ltFound || valA < valB + changed := false + vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + changed = vcA.SetVarIdMax(vUUId, v) || changed return true }) - if !completed { - return false + return changed +} + +func (vcA *VectorClockMutable) MergeInMissing(vcB VectorClockInterface) bool { + changed := false + vcB.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if _, found := vcA.adds[*vUUId]; found { + return true + } else if ch, found := vcA.changes[*vUUId]; found { + if ch == deleted { + vcA.length++ + vcA.changes[*vUUId] = v + changed = true + } + return true + } else if _, found := vcA.initial[*vUUId]; found { + return true + } else { + vcA.length++ + vcA.ensureAdds() + vcA.adds[*vUUId] = v + changed = true + return true + } + }) + if changed { + vcA.data = nil } - // 3. Everything in A is also in B and <= B. If A == B for - // everything in A, then B must be > A if len(B) > len(A) - return ltFound || vcB.length > vcA.length + return changed } -func (vc *VectorClock) AsData() []byte { +func (vc *VectorClockMutable) AsData() []byte { if vc == nil { return []byte{} } if vc.data == nil { - if vc.length == 0 { vc.data = []byte{} + } else if len(vc.adds) == 0 && len(vc.changes) == 0 { + vc.data = vc.VectorClock.data + } else { seg := capn.NewBuffer(nil) vcCap := msgs.NewRootVectorClock(seg) @@ -379,3 +424,12 @@ func (vc *VectorClock) AsData() []byte { return vc.data } + +func (vc *VectorClockMutable) String() string { + str := fmt.Sprintf("VCb:(%v)", vc.Len()) + vc.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + str += fmt.Sprintf(" %v:%v", vUUId, v) + return true + }) + return str +} From 8eca00e32d72643d3fee057591a1761a9f4a51c4 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 29 Jul 2016 22:59:44 +0100 Subject: [PATCH 28/78] Made same set of changes to Ballot. Action list is probably next. Ref T42. --HG-- branch : T42 --- capnp/acceptor.capnp | 5 +- capnp/acceptor.capnp.go | 32 +++++++++---- capnp/paxostxnvote.capnp | 5 +- capnp/paxostxnvote.capnp.go | 32 +++++++++---- paxos/acceptormanager.go | 19 ++++---- paxos/ballotaccumulator.go | 25 +++++----- paxos/proposal.go | 16 +++---- txnengine/ballot.go | 95 +++++++++++++++++++------------------ txnengine/transaction.go | 2 +- 9 files changed, 130 insertions(+), 101 deletions(-) diff --git a/capnp/acceptor.capnp b/capnp/acceptor.capnp index b3da079..8ee28b7 100644 --- a/capnp/acceptor.capnp +++ b/capnp/acceptor.capnp @@ -5,7 +5,6 @@ $Go.import("goshawkdb.io/server/capnp"); @0xefa5a1e88b6da9e3; -using Ballot = import "ballot.capnp"; using Txn = import "transaction.capnp"; using Outcome = import "outcome.capnp"; @@ -19,11 +18,11 @@ struct AcceptorState { struct InstancesForVar { varId @0: Data; instances @1: List(AcceptedInstance); - result @2: Ballot.Ballot; + result @2: Data; } struct AcceptedInstance { rmId @0: UInt32; roundNumber @1: UInt64; - ballot @2: Ballot.Ballot; + ballot @2: Data; } diff --git a/capnp/acceptor.capnp.go b/capnp/acceptor.capnp.go index 5d70fab..cfca635 100644 --- a/capnp/acceptor.capnp.go +++ b/capnp/acceptor.capnp.go @@ -260,8 +260,8 @@ func (s InstancesForVar) Instances() AcceptedInstance_List { return AcceptedInstance_List(C.Struct(s).GetObject(1)) } func (s InstancesForVar) SetInstances(v AcceptedInstance_List) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s InstancesForVar) Result() Ballot { return Ballot(C.Struct(s).GetObject(2).ToStruct()) } -func (s InstancesForVar) SetResult(v Ballot) { C.Struct(s).SetObject(2, C.Object(v)) } +func (s InstancesForVar) Result() []byte { return C.Struct(s).GetObject(2).ToData() } +func (s InstancesForVar) SetResult(v []byte) { C.Struct(s).SetObject(2, s.Segment.NewData(v)) } func (s InstancesForVar) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -329,7 +329,11 @@ func (s InstancesForVar) WriteJSON(w io.Writer) error { } { s := s.Result() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -413,7 +417,11 @@ func (s InstancesForVar) WriteCapLit(w io.Writer) error { } { s := s.Result() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -468,8 +476,8 @@ func (s AcceptedInstance) RmId() uint32 { return C.Struct(s).Get32(0) func (s AcceptedInstance) SetRmId(v uint32) { C.Struct(s).Set32(0, v) } func (s AcceptedInstance) RoundNumber() uint64 { return C.Struct(s).Get64(8) } func (s AcceptedInstance) SetRoundNumber(v uint64) { C.Struct(s).Set64(8, v) } -func (s AcceptedInstance) Ballot() Ballot { return Ballot(C.Struct(s).GetObject(0).ToStruct()) } -func (s AcceptedInstance) SetBallot(v Ballot) { C.Struct(s).SetObject(0, C.Object(v)) } +func (s AcceptedInstance) Ballot() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s AcceptedInstance) SetBallot(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s AcceptedInstance) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -523,7 +531,11 @@ func (s AcceptedInstance) WriteJSON(w io.Writer) error { } { s := s.Ballot() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -593,7 +605,11 @@ func (s AcceptedInstance) WriteCapLit(w io.Writer) error { } { s := s.Ballot() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/paxostxnvote.capnp b/capnp/paxostxnvote.capnp index 4cb5a85..feb5ebb 100644 --- a/capnp/paxostxnvote.capnp +++ b/capnp/paxostxnvote.capnp @@ -5,7 +5,6 @@ $Go.import("goshawkdb.io/server/capnp"); @0xd3af64eb7d699620; -using Ballot = import "ballot.capnp"; using Txn = import "transaction.capnp"; using Outcome = import "outcome.capnp"; @@ -50,14 +49,14 @@ struct TxnVotePromise { freeChoice @2: Void; accepted :group { roundNumber @3: UInt64; - ballot @4: Ballot.Ballot; + ballot @4: Data; } roundNumberTooLow @5: UInt32; } } struct TxnVoteAcceptRequest { - ballot @0: Ballot.Ballot; + ballot @0: Data; roundNumber @1: UInt64; } diff --git a/capnp/paxostxnvote.capnp.go b/capnp/paxostxnvote.capnp.go index 19be4b1..f7e7ade 100644 --- a/capnp/paxostxnvote.capnp.go +++ b/capnp/paxostxnvote.capnp.go @@ -1093,8 +1093,8 @@ func (s TxnVotePromise) Accepted() TxnVotePromiseAccepted { return TxnVotePromis func (s TxnVotePromise) SetAccepted() { C.Struct(s).Set16(8, 1) } func (s TxnVotePromiseAccepted) RoundNumber() uint64 { return C.Struct(s).Get64(16) } func (s TxnVotePromiseAccepted) SetRoundNumber(v uint64) { C.Struct(s).Set64(16, v) } -func (s TxnVotePromiseAccepted) Ballot() Ballot { return Ballot(C.Struct(s).GetObject(1).ToStruct()) } -func (s TxnVotePromiseAccepted) SetBallot(v Ballot) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s TxnVotePromiseAccepted) Ballot() []byte { return C.Struct(s).GetObject(1).ToData() } +func (s TxnVotePromiseAccepted) SetBallot(v []byte) { C.Struct(s).SetObject(1, s.Segment.NewData(v)) } func (s TxnVotePromise) RoundNumberTooLow() uint32 { return C.Struct(s).Get32(16) } func (s TxnVotePromise) SetRoundNumberTooLow(v uint32) { C.Struct(s).Set16(8, 2) @@ -1190,7 +1190,11 @@ func (s TxnVotePromise) WriteJSON(w io.Writer) error { } { s := s.Ballot() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -1320,7 +1324,11 @@ func (s TxnVotePromise) WriteCapLit(w io.Writer) error { } { s := s.Ballot() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -1394,8 +1402,8 @@ func AutoNewTxnVoteAcceptRequest(s *C.Segment) TxnVoteAcceptRequest { func ReadRootTxnVoteAcceptRequest(s *C.Segment) TxnVoteAcceptRequest { return TxnVoteAcceptRequest(s.Root(0).ToStruct()) } -func (s TxnVoteAcceptRequest) Ballot() Ballot { return Ballot(C.Struct(s).GetObject(0).ToStruct()) } -func (s TxnVoteAcceptRequest) SetBallot(v Ballot) { C.Struct(s).SetObject(0, C.Object(v)) } +func (s TxnVoteAcceptRequest) Ballot() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s TxnVoteAcceptRequest) SetBallot(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s TxnVoteAcceptRequest) RoundNumber() uint64 { return C.Struct(s).Get64(0) } func (s TxnVoteAcceptRequest) SetRoundNumber(v uint64) { C.Struct(s).Set64(0, v) } func (s TxnVoteAcceptRequest) WriteJSON(w io.Writer) error { @@ -1413,7 +1421,11 @@ func (s TxnVoteAcceptRequest) WriteJSON(w io.Writer) error { } { s := s.Ballot() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -1464,7 +1476,11 @@ func (s TxnVoteAcceptRequest) WriteCapLit(w io.Writer) error { } { s := s.Ballot() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/paxos/acceptormanager.go b/paxos/acceptormanager.go index 3992cab..0bfa912 100644 --- a/paxos/acceptormanager.go +++ b/paxos/acceptormanager.go @@ -118,13 +118,13 @@ func (am *AcceptorManager) loadFromData(txnId *common.TxnId, data []byte) error for idy, m := 0, acceptedInstances.Len(); idy < m; idy++ { acceptedInstance := acceptedInstances.At(idy) roundNumber := acceptedInstance.RoundNumber() - ballot := acceptedInstance.Ballot() + ballotData := acceptedInstance.Ballot() instance := &instance{ manager: am, vUUId: vUUId, promiseNum: paxosNumber(roundNumber), acceptedNum: paxosNumber(roundNumber), - accepted: &ballot, + accepted: eng.BallotFromData(ballotData), } binary.BigEndian.PutUint32(instIdSlice[common.KeyLen:], acceptedInstance.RmId()) copy(instIdSlice[common.KeyLen+4:], vUUId[:]) @@ -213,10 +213,11 @@ func (am *AcceptorManager) TwoATxnVotesReceived(sender common.RMId, txnId *commo for idx, l := 0, requests.Len(); idx < l; idx++ { request := requests.At(idx) - vUUId := common.MakeVarUUId(request.Ballot().VarId()) + ballot := eng.BallotFromData(request.Ballot()) + vUUId := ballot.VarUUId copy(instIdSlice[common.KeyLen+4:], vUUId[:]) inst := am.ensureInstance(txnId, &instId, vUUId) - accepted, rejected := inst.TwoATxnVotesReceived(&request) + accepted, rejected := inst.TwoATxnVotesReceived(paxosNumber(request.RoundNumber()), ballot) if accepted { a.BallotAccepted(instanceRMId, inst, vUUId, &txnCap) } else if rejected { @@ -353,7 +354,7 @@ type instance struct { vUUId *common.VarUUId promiseNum paxosNumber acceptedNum paxosNumber - accepted *msgs.Ballot + accepted *eng.Ballot } func (i *instance) OneATxnVotesReceived(proposal *msgs.TxnVoteProposal, promise *msgs.TxnVotePromise) { @@ -367,23 +368,21 @@ func (i *instance) OneATxnVotesReceived(proposal *msgs.TxnVoteProposal, promise promise.SetAccepted() accepted := promise.Accepted() accepted.SetRoundNumber(uint64(i.acceptedNum)) - accepted.SetBallot(*i.accepted) + accepted.SetBallot(i.accepted.Data) } } else { promise.SetRoundNumberTooLow(uint32(i.promiseNum >> 32)) } } -func (i *instance) TwoATxnVotesReceived(request *msgs.TxnVoteAcceptRequest) (accepted bool, rejected bool) { - roundNumber := paxosNumber(request.RoundNumber()) +func (i *instance) TwoATxnVotesReceived(roundNumber paxosNumber, ballot *eng.Ballot) (accepted bool, rejected bool) { if roundNumber == i.acceptedNum && i.accepted != nil { // duplicate 2a. Don't issue any response. return } else if roundNumber >= i.promiseNum || i.promiseNum == 0 { i.promiseNum = roundNumber i.acceptedNum = roundNumber - ballot := request.Ballot() - i.accepted = &ballot + i.accepted = ballot accepted = true return } else { diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 06beab1..6cf9506 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -101,16 +101,14 @@ func BallotAccumulatorFromData(txnId *common.TxnId, txn *msgs.Txn, outcome *outc vBallot.rmToBallot = rmBals for idy, m := 0, acceptedInstances.Len(); idy < m; idy++ { acceptedInstance := acceptedInstances.At(idy) - ballot := acceptedInstance.Ballot() rmBal := &rmBallot{ instanceRMId: common.RMId(acceptedInstance.RmId()), - ballot: eng.BallotFromCap(&ballot), + ballot: eng.BallotFromData(acceptedInstance.Ballot()), roundNumber: paxosNumber(acceptedInstance.RoundNumber()), } rmBals[idy] = rmBal } - result := instancesForVar.Result() - vBallot.result = eng.BallotFromCap(&result) + vBallot.result = eng.BallotFromData(instancesForVar.Result()) } return ba @@ -128,18 +126,17 @@ func (ba *BallotAccumulator) BallotReceived(instanceRMId common.RMId, inst *inst if vBallot.rmToBallot == nil { vBallot.rmToBallot = rmBallots(make([]*rmBallot, 0, vBallot.voters)) } - ballot := eng.BallotFromCap(inst.accepted) found := false for idx, rBal := range vBallot.rmToBallot { if found = rBal.instanceRMId == instanceRMId; found { - vBallot.rmToBallot[idx].ballot = ballot + vBallot.rmToBallot[idx].ballot = inst.accepted break } } if !found { rmBal := &rmBallot{ instanceRMId: instanceRMId, - ballot: ballot, + ballot: inst.accepted, roundNumber: inst.acceptedNum, } vBallot.rmToBallot = append(vBallot.rmToBallot, rmBal) @@ -237,14 +234,14 @@ func (ba *BallotAccumulator) AddInstancesToSeg(seg *capn.Segment) msgs.Instances instancesForVar := instances.At(idx) idx++ instancesForVar.SetVarId(vUUIdCopy[:]) - instancesForVar.SetResult(vBallot.result.AddToSeg(seg)) + instancesForVar.SetResult(vBallot.result.Data) acceptedInstances := msgs.NewAcceptedInstanceList(seg, len(vBallot.rmToBallot)) instancesForVar.SetInstances(acceptedInstances) for idy, rmBal := range vBallot.rmToBallot { acceptedInstance := acceptedInstances.At(idy) acceptedInstance.SetRmId(uint32(rmBal.instanceRMId)) acceptedInstance.SetRoundNumber(uint64(rmBal.roundNumber)) - acceptedInstance.SetBallot(*rmBal.ballot.BallotCap) + acceptedInstance.SetBallot(rmBal.ballot.Data) } } return instances @@ -264,18 +261,18 @@ type varBallotReducer struct { } func (vb *varBallot) CalculateResult(br badReads, clock *eng.VectorClockMutable) { - result := &varBallotReducer{ + reducer := &varBallotReducer{ vUUId: vb.vUUId, BallotBuilder: eng.NewBallotBuilder(vb.vUUId, eng.Commit, eng.NewVectorClock().AsMutable()), badReads: br, } for _, rmBal := range vb.rmToBallot { - result.combineVote(rmBal) + reducer.combineVote(rmBal) } - if !result.Aborted() { - clock.MergeInMax(result.Clock) + if !reducer.Aborted() { + clock.MergeInMax(reducer.Clock) } - vb.result = result.ToBallot() + vb.result = reducer.ToBallot() } func (cur *varBallotReducer) combineVote(rmBal *rmBallot) { diff --git a/paxos/proposal.go b/paxos/proposal.go index 672b760..1904193 100644 --- a/paxos/proposal.go +++ b/paxos/proposal.go @@ -290,7 +290,7 @@ type proposalOneB struct { *proposalInstance promisesReceivedFrom []common.RMId winningRound paxosNumber - winningBallot *msgs.Ballot + winningBallot []byte } func (oneB *proposalOneB) proposalInstanceComponentWitness() {} @@ -328,8 +328,7 @@ func (oneB *proposalOneB) oneBTxnVotesReceived(sender common.RMId, promise *msgs accepted := promise.Accepted() if roundNumber = paxosNumber(accepted.RoundNumber()); roundNumber > oneB.winningRound { oneB.winningRound = roundNumber - ballot := accepted.Ballot() - oneB.winningBallot = &ballot + oneB.winningBallot = accepted.Ballot() } default: panic(fmt.Sprintf("Unexpected promise type: %v", promise.Which())) @@ -366,19 +365,18 @@ func (twoA *proposalTwoA) init(pi *proposalInstance) { func (twoA *proposalTwoA) start() {} func (twoA *proposalTwoA) addTwoAToAcceptRequest(seg *capn.Segment, acceptRequest *msgs.TxnVoteAcceptRequest, sender *proposalSender) bool { - var ballotPtr *msgs.Ballot + var ballotData []byte if twoA.winningBallot == nil { // free choice from everyone - ballot := twoA.ballot.AddToSeg(seg) - ballotPtr = &ballot + ballotData = twoA.ballot.Data } else { - ballotPtr = twoA.winningBallot + ballotData = twoA.winningBallot } - acceptRequest.SetBallot(*ballotPtr) + acceptRequest.SetBallot(ballotData) acceptRequest.SetRoundNumber(uint64(twoA.currentRoundNumber)) twoA.twoASender = sender twoA.nextState(nil) - return ballotPtr.Vote().Which() != msgs.VOTE_COMMIT + return eng.BallotFromData(ballotData).Vote != eng.Commit } // twoB diff --git a/txnengine/ballot.go b/txnengine/ballot.go index 7b5680d..1f72544 100644 --- a/txnengine/ballot.go +++ b/txnengine/ballot.go @@ -1,8 +1,10 @@ package txnengine import ( + "fmt" capn "github.com/glycerine/go-capnproto" "goshawkdb.io/common" + "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" ) @@ -26,27 +28,32 @@ func (v Vote) ToVoteEnum() msgs.VoteEnum { } type Ballot struct { - VarUUId *common.VarUUId - Clock *VectorClock - Vote Vote - BallotCap *msgs.Ballot - VoteCap *msgs.Vote + VarUUId *common.VarUUId + Data []byte + VoteCap *msgs.Vote + Clock *VectorClock + Vote Vote } type BallotBuilder struct { *Ballot Clock *VectorClockMutable - seg *capn.Segment } -func BallotFromCap(ballotCap *msgs.Ballot) *Ballot { +func BallotFromData(data []byte) *Ballot { + seg, _, err := capn.ReadFromMemoryZeroCopy(data) + if err != nil { + panic(fmt.Sprintf("Error when decoding ballot: %v", err)) + } + ballotCap := msgs.ReadRootBallot(seg) voteCap := ballotCap.Vote() + vUUId := common.MakeVarUUId(ballotCap.VarId()) return &Ballot{ - VarUUId: common.MakeVarUUId(ballotCap.VarId()), - Clock: VectorClockFromData(ballotCap.Clock(), false), - Vote: Vote(voteCap.Which()), - BallotCap: ballotCap, - VoteCap: &voteCap, + VarUUId: vUUId, + Data: data, + VoteCap: &voteCap, + Clock: VectorClockFromData(ballotCap.Clock(), false), + Vote: Vote(voteCap.Which()), } } @@ -54,10 +61,6 @@ func (ballot *Ballot) Aborted() bool { return ballot.Vote != Commit } -func (ballot *Ballot) AddToSeg(seg *capn.Segment) msgs.Ballot { - return *ballot.BallotCap -} - func NewBallotBuilder(vUUId *common.VarUUId, vote Vote, clock *VectorClockMutable) *BallotBuilder { ballot := &Ballot{ VarUUId: vUUId, @@ -69,46 +72,48 @@ func NewBallotBuilder(vUUId *common.VarUUId, vote Vote, clock *VectorClockMutabl } } -func (ballot *BallotBuilder) CreateBadReadCap(txnId *common.TxnId, actions *msgs.Action_List) *BallotBuilder { +func (ballot *BallotBuilder) buildSeg() (*capn.Segment, msgs.Ballot) { seg := capn.NewBuffer(nil) - ballot.seg = seg + ballotCap := msgs.NewRootBallot(seg) + ballotCap.SetVarId(ballot.VarUUId[:]) + clockData := ballot.Clock.AsData() + ballot.Ballot.Clock = VectorClockFromData(clockData, false) + ballotCap.SetClock(clockData) + return seg, ballotCap +} + +func (ballot *BallotBuilder) CreateBadReadBallot(txnId *common.TxnId, actions *msgs.Action_List) *Ballot { + ballot.Vote = AbortBadRead + seg, ballotCap := ballot.buildSeg() + voteCap := msgs.NewVote(seg) + ballot.VoteCap = &voteCap voteCap.SetAbortBadRead() badReadCap := voteCap.AbortBadRead() badReadCap.SetTxnId(txnId[:]) badReadCap.SetTxnActions(*actions) - ballot.VoteCap = &voteCap - ballot.Vote = AbortBadRead - return ballot + ballotCap.SetVote(voteCap) + ballot.Data = server.SegToBytes(seg) + return ballot.Ballot } func (ballot *BallotBuilder) ToBallot() *Ballot { - if ballot.BallotCap == nil { - if ballot.seg == nil { - ballot.seg = capn.NewBuffer(nil) - } - seg := ballot.seg - ballotCap := msgs.NewBallot(seg) - ballotCap.SetVarId(ballot.VarUUId[:]) - clockData := ballot.Clock.AsData() - ballot.Ballot.Clock = VectorClockFromData(clockData, false) - ballotCap.SetClock(clockData) + seg, ballotCap := ballot.buildSeg() - if ballot.VoteCap == nil { - voteCap := msgs.NewVote(seg) - ballot.VoteCap = &voteCap - switch ballot.Vote { - case Commit: - voteCap.SetCommit() - case AbortDeadlock: - voteCap.SetAbortDeadlock() - case AbortBadRead: - voteCap.SetAbortBadRead() - } + if ballot.VoteCap == nil { + voteCap := msgs.NewVote(seg) + ballot.VoteCap = &voteCap + switch ballot.Vote { + case Commit: + voteCap.SetCommit() + case AbortDeadlock: + voteCap.SetAbortDeadlock() + default: + panic("ToBallot called for Abort Badread vote") } - - ballotCap.SetVote(*ballot.VoteCap) - ballot.BallotCap = &ballotCap } + + ballotCap.SetVote(*ballot.VoteCap) + ballot.Data = server.SegToBytes(seg) return ballot.Ballot } diff --git a/txnengine/transaction.go b/txnengine/transaction.go index 434a01c..d88f556 100644 --- a/txnengine/transaction.go +++ b/txnengine/transaction.go @@ -93,7 +93,7 @@ func (action *localAction) VoteDeadlock(clock *VectorClockMutable) { func (action *localAction) VoteBadRead(clock *VectorClockMutable, txnId *common.TxnId, actions *msgs.Action_List) { if action.ballot == nil { - action.ballot = NewBallotBuilder(action.vUUId, AbortBadRead, clock).CreateBadReadCap(txnId, actions).ToBallot() + action.ballot = NewBallotBuilder(action.vUUId, AbortBadRead, clock).CreateBadReadBallot(txnId, actions) action.voteCast(action.ballot, true) } } From 456649d512413917227868572823ea04b68f4687 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 29 Jul 2016 23:27:57 +0100 Subject: [PATCH 29/78] Switch VC to use *uint64 rather than uint64 in the adds and changes. Ref T42. --HG-- branch : T42 --- txnengine/vectorclock.go | 81 ++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 4226555..597d476 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -54,8 +54,8 @@ type VectorClock struct { type VectorClockMutable struct { *VectorClock data []byte - adds map[common.VarUUId]uint64 - changes map[common.VarUUId]uint64 + adds map[common.VarUUId]*uint64 + changes map[common.VarUUId]*uint64 length int } @@ -156,13 +156,13 @@ func (vc *VectorClock) String() string { func (vc *VectorClockMutable) ensureChanges() { if vc.changes == nil { - vc.changes = make(map[common.VarUUId]uint64) + vc.changes = make(map[common.VarUUId]*uint64) } } func (vc *VectorClockMutable) ensureAdds() { if vc.adds == nil { - vc.adds = make(map[common.VarUUId]uint64) + vc.adds = make(map[common.VarUUId]*uint64) } } @@ -179,17 +179,23 @@ func (vcA *VectorClockMutable) Clone() *VectorClockMutable { data: vcA.data, length: vcA.Len(), } + copies := make([]uint64, len(vcA.adds)+len(vcA.changes)) + idx := 0 if len(vcA.adds) > 0 { - adds := make(map[common.VarUUId]uint64, len(vcA.adds)) + adds := make(map[common.VarUUId]*uint64, len(vcA.adds)) for k, v := range vcA.adds { - adds[k] = v + copies[idx] = *v + adds[k] = &copies[idx] + idx++ } vcB.adds = adds } if len(vcA.changes) > 0 { - changes := make(map[common.VarUUId]uint64, len(vcA.changes)) + changes := make(map[common.VarUUId]*uint64, len(vcA.changes)) for k, v := range vcA.changes { - changes[k] = v + copies[idx] = *v + changes[k] = &copies[idx] + idx++ } vcB.changes = changes } @@ -202,9 +208,9 @@ func (vc *VectorClockMutable) Len() int { func (vc *VectorClockMutable) At(vUUId *common.VarUUId) uint64 { if value, found := vc.adds[*vUUId]; found { - return value + return *value } else if value, found := vc.changes[*vUUId]; found { - return value + return *value } else { return vc.VectorClock.At(vUUId) } @@ -212,7 +218,7 @@ func (vc *VectorClockMutable) At(vUUId *common.VarUUId) uint64 { func (vc *VectorClockMutable) ForEach(it func(*common.VarUUId, uint64) bool) bool { for k, v := range vc.adds { - if !it(&k, v) { + if !it(&k, *v) { return false } } @@ -222,10 +228,10 @@ func (vc *VectorClockMutable) ForEach(it func(*common.VarUUId, uint64) bool) boo return it(k, v) } else if ch, found := vc.changes[*k]; found { chCount-- - if ch == deleted { + if *ch == deleted { return true } else { - return it(k, ch) + return it(k, *ch) } } else { return it(k, v) @@ -240,15 +246,16 @@ func (vc *VectorClockMutable) Delete(vUUId *common.VarUUId) *VectorClockMutable vc.data = nil return vc } else if ch, found := vc.changes[*vUUId]; found { - if ch != deleted { + if *ch != deleted { vc.length-- - vc.changes[*vUUId] = deleted + *ch = deleted vc.data = nil } return vc } else if _, found := vc.initial[*vUUId]; found { vc.ensureChanges() - vc.changes[*vUUId] = deleted + v := deleted + vc.changes[*vUUId] = &v vc.length-- vc.data = nil } @@ -257,26 +264,27 @@ func (vc *VectorClockMutable) Delete(vUUId *common.VarUUId) *VectorClockMutable func (vc *VectorClockMutable) Bump(vUUId *common.VarUUId, inc uint64) *VectorClockMutable { if old, found := vc.adds[*vUUId]; found { - vc.adds[*vUUId] = old + inc + *old = *old + inc vc.data = nil return vc } else if old, found := vc.changes[*vUUId]; found { - if old == deleted { - vc.changes[*vUUId] = inc + if *old == deleted { + *old = inc vc.length++ } else { - vc.changes[*vUUId] = old + inc + *old = *old + inc } vc.data = nil return vc } else if old, found := vc.initial[*vUUId]; found { vc.ensureChanges() - vc.changes[*vUUId] = old + inc + inc += old + vc.changes[*vUUId] = &inc vc.data = nil return vc } else { vc.ensureAdds() - vc.adds[*vUUId] = inc + vc.adds[*vUUId] = &inc vc.length++ vc.data = nil return vc @@ -285,18 +293,18 @@ func (vc *VectorClockMutable) Bump(vUUId *common.VarUUId, inc uint64) *VectorClo func (vc *VectorClockMutable) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool { if old, found := vc.adds[*vUUId]; found { - if v > old { - vc.adds[*vUUId] = v + if v > *old { + *old = v vc.data = nil return true } return false } else if old, found := vc.changes[*vUUId]; found { - if v > old { - vc.changes[*vUUId] = v - if old == deleted { + if v > *old { + if *old == deleted { vc.length++ } + *old = v vc.data = nil return true } @@ -304,14 +312,14 @@ func (vc *VectorClockMutable) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool } else if old, found := vc.initial[*vUUId]; found { if v > old { vc.ensureChanges() - vc.changes[*vUUId] = v + vc.changes[*vUUId] = &v vc.data = nil return true } return false } else { vc.ensureAdds() - vc.adds[*vUUId] = v + vc.adds[*vUUId] = &v vc.length++ vc.data = nil return true @@ -320,7 +328,7 @@ func (vc *VectorClockMutable) SetVarIdMax(vUUId *common.VarUUId, v uint64) bool func (vc *VectorClockMutable) DeleteIfMatch(vUUId *common.VarUUId, v uint64) bool { if old, found := vc.adds[*vUUId]; found { - if old <= v { + if *old <= v { delete(vc.adds, *vUUId) vc.length-- vc.data = nil @@ -328,8 +336,8 @@ func (vc *VectorClockMutable) DeleteIfMatch(vUUId *common.VarUUId, v uint64) boo } return false } else if old, found := vc.changes[*vUUId]; found { - if old != deleted && old <= v { - vc.changes[*vUUId] = deleted + if *old != deleted && *old <= v { + *old = deleted vc.length-- vc.data = nil return true @@ -338,7 +346,8 @@ func (vc *VectorClockMutable) DeleteIfMatch(vUUId *common.VarUUId, v uint64) boo } else if old, found := vc.initial[*vUUId]; found { if old <= v { vc.ensureChanges() - vc.changes[*vUUId] = deleted + v = deleted + vc.changes[*vUUId] = &v vc.length-- vc.data = nil return true @@ -370,9 +379,9 @@ func (vcA *VectorClockMutable) MergeInMissing(vcB VectorClockInterface) bool { if _, found := vcA.adds[*vUUId]; found { return true } else if ch, found := vcA.changes[*vUUId]; found { - if ch == deleted { + if *ch == deleted { vcA.length++ - vcA.changes[*vUUId] = v + vcA.changes[*vUUId] = &v changed = true } return true @@ -381,7 +390,7 @@ func (vcA *VectorClockMutable) MergeInMissing(vcB VectorClockInterface) bool { } else { vcA.length++ vcA.ensureAdds() - vcA.adds[*vUUId] = v + vcA.adds[*vUUId] = &v changed = true return true } From 0c613d918e8665736174dc2e63f554e06631d5df Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 6 Aug 2016 17:09:14 +0100 Subject: [PATCH 30/78] Same approach for txn actions and the txn itself. This seems to have had a substantial perf improvement. Ref T42 --HG-- branch : T42 --- capnp/acceptor.capnp | 8 +- capnp/acceptor.capnp.go | 48 ++------ capnp/ballot.capnp | 2 +- capnp/ballot.capnp.go | 70 ++++-------- capnp/connection.capnp | 3 +- capnp/connection.capnp.go | 18 ++- capnp/migration.capnp | 3 +- capnp/migration.capnp.go | 16 ++- capnp/outcome.capnp | 6 +- capnp/outcome.capnp.go | 80 +++++-------- capnp/paxostxnvote.capnp | 3 +- capnp/paxostxnvote.capnp.go | 16 ++- capnp/transaction.capnp | 6 +- capnp/transaction.capnp.go | 184 ++++++++++++++++++++++++------ client/clienttxnsubmitter.go | 16 +-- client/localconnection.go | 78 +++++++------ client/simpletxnsubmitter.go | 40 ++++--- client/versioncache.go | 7 +- cmd/goshawkdb/main.go | 3 + db/transaction.go | 18 --- network/connection.go | 8 +- network/connectionmanager.go | 9 +- network/topologytransmogrifier.go | 51 ++++----- paxos/acceptor.go | 37 +++--- paxos/acceptordispatcher.go | 6 +- paxos/acceptormanager.go | 18 +-- paxos/ballotaccumulator.go | 85 +++++--------- paxos/network.go | 2 +- paxos/proposal.go | 43 ++++--- paxos/proposer.go | 28 ++--- paxos/proposerdispatcher.go | 18 +-- paxos/proposermanager.go | 53 +++++---- txnengine/ballot.go | 4 +- txnengine/frame.go | 15 +-- txnengine/transaction.go | 82 ++++++------- txnengine/utils.go | 137 ++++++++++++++++++++++ txnengine/var.go | 18 +-- txnengine/vardispatcher.go | 2 +- txnengine/varmanager.go | 3 +- 39 files changed, 688 insertions(+), 556 deletions(-) create mode 100644 txnengine/utils.go diff --git a/capnp/acceptor.capnp b/capnp/acceptor.capnp index 8ee28b7..7b28638 100644 --- a/capnp/acceptor.capnp +++ b/capnp/acceptor.capnp @@ -5,14 +5,12 @@ $Go.import("goshawkdb.io/server/capnp"); @0xefa5a1e88b6da9e3; -using Txn = import "transaction.capnp"; using Outcome = import "outcome.capnp"; struct AcceptorState { - txn @0: Txn.Txn; - outcome @1: Outcome.Outcome; - sendToAll @2: Bool; - instances @3: List(InstancesForVar); + outcome @0: Outcome.Outcome; + sendToAll @1: Bool; + instances @2: List(InstancesForVar); } struct InstancesForVar { diff --git a/capnp/acceptor.capnp.go b/capnp/acceptor.capnp.go index cfca635..bb346a5 100644 --- a/capnp/acceptor.capnp.go +++ b/capnp/acceptor.capnp.go @@ -12,20 +12,18 @@ import ( type AcceptorState C.Struct -func NewAcceptorState(s *C.Segment) AcceptorState { return AcceptorState(s.NewStruct(8, 3)) } -func NewRootAcceptorState(s *C.Segment) AcceptorState { return AcceptorState(s.NewRootStruct(8, 3)) } -func AutoNewAcceptorState(s *C.Segment) AcceptorState { return AcceptorState(s.NewStructAR(8, 3)) } +func NewAcceptorState(s *C.Segment) AcceptorState { return AcceptorState(s.NewStruct(8, 2)) } +func NewRootAcceptorState(s *C.Segment) AcceptorState { return AcceptorState(s.NewRootStruct(8, 2)) } +func AutoNewAcceptorState(s *C.Segment) AcceptorState { return AcceptorState(s.NewStructAR(8, 2)) } func ReadRootAcceptorState(s *C.Segment) AcceptorState { return AcceptorState(s.Root(0).ToStruct()) } -func (s AcceptorState) Txn() Txn { return Txn(C.Struct(s).GetObject(0).ToStruct()) } -func (s AcceptorState) SetTxn(v Txn) { C.Struct(s).SetObject(0, C.Object(v)) } -func (s AcceptorState) Outcome() Outcome { return Outcome(C.Struct(s).GetObject(1).ToStruct()) } -func (s AcceptorState) SetOutcome(v Outcome) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s AcceptorState) Outcome() Outcome { return Outcome(C.Struct(s).GetObject(0).ToStruct()) } +func (s AcceptorState) SetOutcome(v Outcome) { C.Struct(s).SetObject(0, C.Object(v)) } func (s AcceptorState) SendToAll() bool { return C.Struct(s).Get1(0) } func (s AcceptorState) SetSendToAll(v bool) { C.Struct(s).Set1(0, v) } func (s AcceptorState) Instances() InstancesForVar_List { - return InstancesForVar_List(C.Struct(s).GetObject(2)) + return InstancesForVar_List(C.Struct(s).GetObject(1)) } -func (s AcceptorState) SetInstances(v InstancesForVar_List) { C.Struct(s).SetObject(2, C.Object(v)) } +func (s AcceptorState) SetInstances(v InstancesForVar_List) { C.Struct(s).SetObject(1, C.Object(v)) } func (s AcceptorState) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -35,21 +33,6 @@ func (s AcceptorState) WriteJSON(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("\"txn\":") - if err != nil { - return err - } - { - s := s.Txn() - err = s.WriteJSON(b) - if err != nil { - return err - } - } - err = b.WriteByte(',') - if err != nil { - return err - } _, err = b.WriteString("\"outcome\":") if err != nil { return err @@ -134,21 +117,6 @@ func (s AcceptorState) WriteCapLit(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("txn = ") - if err != nil { - return err - } - { - s := s.Txn() - err = s.WriteCapLit(b) - if err != nil { - return err - } - } - _, err = b.WriteString(", ") - if err != nil { - return err - } _, err = b.WriteString("outcome = ") if err != nil { return err @@ -228,7 +196,7 @@ func (s AcceptorState) MarshalCapLit() ([]byte, error) { type AcceptorState_List C.PointerList func NewAcceptorStateList(s *C.Segment, sz int) AcceptorState_List { - return AcceptorState_List(s.NewCompositeList(8, 3, sz)) + return AcceptorState_List(s.NewCompositeList(8, 2, sz)) } func (s AcceptorState_List) Len() int { return C.PointerList(s).Len() } func (s AcceptorState_List) At(i int) AcceptorState { diff --git a/capnp/ballot.capnp b/capnp/ballot.capnp index bd51112..ae19903 100644 --- a/capnp/ballot.capnp +++ b/capnp/ballot.capnp @@ -18,7 +18,7 @@ struct Vote { commit @0: Void; abortBadRead :group { txnId @1: Data; - txnActions @2: List(Txn.Action); + txnActions @2: Data; } abortDeadlock @3: Void; } diff --git a/capnp/ballot.capnp.go b/capnp/ballot.capnp.go index c53437a..0848c11 100644 --- a/capnp/ballot.capnp.go +++ b/capnp/ballot.capnp.go @@ -188,19 +188,19 @@ const ( VOTE_ABORTDEADLOCK Vote_Which = 2 ) -func NewVote(s *C.Segment) Vote { return Vote(s.NewStruct(8, 2)) } -func NewRootVote(s *C.Segment) Vote { return Vote(s.NewRootStruct(8, 2)) } -func AutoNewVote(s *C.Segment) Vote { return Vote(s.NewStructAR(8, 2)) } -func ReadRootVote(s *C.Segment) Vote { return Vote(s.Root(0).ToStruct()) } -func (s Vote) Which() Vote_Which { return Vote_Which(C.Struct(s).Get16(0)) } -func (s Vote) SetCommit() { C.Struct(s).Set16(0, 0) } -func (s Vote) AbortBadRead() VoteAbortBadRead { return VoteAbortBadRead(s) } -func (s Vote) SetAbortBadRead() { C.Struct(s).Set16(0, 1) } -func (s VoteAbortBadRead) TxnId() []byte { return C.Struct(s).GetObject(0).ToData() } -func (s VoteAbortBadRead) SetTxnId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } -func (s VoteAbortBadRead) TxnActions() Action_List { return Action_List(C.Struct(s).GetObject(1)) } -func (s VoteAbortBadRead) SetTxnActions(v Action_List) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s Vote) SetAbortDeadlock() { C.Struct(s).Set16(0, 2) } +func NewVote(s *C.Segment) Vote { return Vote(s.NewStruct(8, 2)) } +func NewRootVote(s *C.Segment) Vote { return Vote(s.NewRootStruct(8, 2)) } +func AutoNewVote(s *C.Segment) Vote { return Vote(s.NewStructAR(8, 2)) } +func ReadRootVote(s *C.Segment) Vote { return Vote(s.Root(0).ToStruct()) } +func (s Vote) Which() Vote_Which { return Vote_Which(C.Struct(s).Get16(0)) } +func (s Vote) SetCommit() { C.Struct(s).Set16(0, 0) } +func (s Vote) AbortBadRead() VoteAbortBadRead { return VoteAbortBadRead(s) } +func (s Vote) SetAbortBadRead() { C.Struct(s).Set16(0, 1) } +func (s VoteAbortBadRead) TxnId() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s VoteAbortBadRead) SetTxnId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } +func (s VoteAbortBadRead) TxnActions() []byte { return C.Struct(s).GetObject(1).ToData() } +func (s VoteAbortBadRead) SetTxnActions(v []byte) { C.Struct(s).SetObject(1, s.Segment.NewData(v)) } +func (s Vote) SetAbortDeadlock() { C.Struct(s).Set16(0, 2) } func (s Vote) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -257,25 +257,11 @@ func (s Vote) WriteJSON(w io.Writer) error { } { s := s.TxnActions() - { - err = b.WriteByte('[') - if err != nil { - return err - } - for i, s := range s.ToArray() { - if i != 0 { - _, err = b.WriteString(", ") - } - if err != nil { - return err - } - err = s.WriteJSON(b) - if err != nil { - return err - } - } - err = b.WriteByte(']') + buf, err = json.Marshal(s) + if err != nil { + return err } + _, err = b.Write(buf) if err != nil { return err } @@ -365,25 +351,11 @@ func (s Vote) WriteCapLit(w io.Writer) error { } { s := s.TxnActions() - { - err = b.WriteByte('[') - if err != nil { - return err - } - for i, s := range s.ToArray() { - if i != 0 { - _, err = b.WriteString(", ") - } - if err != nil { - return err - } - err = s.WriteCapLit(b) - if err != nil { - return err - } - } - err = b.WriteByte(']') + buf, err = json.Marshal(s) + if err != nil { + return err } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/connection.capnp b/capnp/connection.capnp index 0bf66ed..dd83cdf 100644 --- a/capnp/connection.capnp +++ b/capnp/connection.capnp @@ -7,7 +7,6 @@ $Go.import("goshawkdb.io/server/capnp"); using PTV = import "paxostxnvote.capnp"; using Outcome = import "outcome.capnp"; -using Txn = import "transaction.capnp"; using TxnCompletion = import "txncompletion.capnp"; using Config = import "configuration.capnp"; using Migration = import "migration.capnp"; @@ -25,7 +24,7 @@ struct Message { union { heartbeat @0: Void; connectionError @1: Text; - txnSubmission @2: Txn.Txn; + txnSubmission @2: Data; submissionOutcome @3: Outcome.Outcome; submissionComplete @4: TxnCompletion.TxnSubmissionComplete; submissionAbort @5: TxnCompletion.TxnSubmissionAbort; diff --git a/capnp/connection.capnp.go b/capnp/connection.capnp.go index e1d2c2e..1335c56 100644 --- a/capnp/connection.capnp.go +++ b/capnp/connection.capnp.go @@ -359,10 +359,10 @@ func (s Message) SetConnectionError(v string) { C.Struct(s).Set16(0, 1) C.Struct(s).SetObject(0, s.Segment.NewText(v)) } -func (s Message) TxnSubmission() Txn { return Txn(C.Struct(s).GetObject(0).ToStruct()) } -func (s Message) SetTxnSubmission(v Txn) { +func (s Message) TxnSubmission() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s Message) SetTxnSubmission(v []byte) { C.Struct(s).Set16(0, 2) - C.Struct(s).SetObject(0, C.Object(v)) + C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s Message) SubmissionOutcome() Outcome { return Outcome(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetSubmissionOutcome(v Outcome) { @@ -480,7 +480,11 @@ func (s Message) WriteJSON(w io.Writer) error { } { s := s.TxnSubmission() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -698,7 +702,11 @@ func (s Message) WriteCapLit(w io.Writer) error { } { s := s.TxnSubmission() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/migration.capnp b/capnp/migration.capnp index 6914c0b..60574ee 100644 --- a/capnp/migration.capnp +++ b/capnp/migration.capnp @@ -5,7 +5,6 @@ $Go.import("goshawkdb.io/server/capnp"); @0x83d51cd76711395c; -using Txn = import "transaction.capnp"; using Outcome = import "outcome.capnp"; using Var = import "var.capnp"; @@ -19,6 +18,6 @@ struct MigrationComplete { } struct MigrationElement { - txn @0: Txn.Txn; + txn @0: Data; vars @1: List(Var.Var); } diff --git a/capnp/migration.capnp.go b/capnp/migration.capnp.go index 9349201..abeb34b 100644 --- a/capnp/migration.capnp.go +++ b/capnp/migration.capnp.go @@ -298,8 +298,8 @@ func AutoNewMigrationElement(s *C.Segment) MigrationElement { func ReadRootMigrationElement(s *C.Segment) MigrationElement { return MigrationElement(s.Root(0).ToStruct()) } -func (s MigrationElement) Txn() Txn { return Txn(C.Struct(s).GetObject(0).ToStruct()) } -func (s MigrationElement) SetTxn(v Txn) { C.Struct(s).SetObject(0, C.Object(v)) } +func (s MigrationElement) Txn() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s MigrationElement) SetTxn(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s MigrationElement) Vars() Var_List { return Var_List(C.Struct(s).GetObject(1)) } func (s MigrationElement) SetVars(v Var_List) { C.Struct(s).SetObject(1, C.Object(v)) } func (s MigrationElement) WriteJSON(w io.Writer) error { @@ -317,7 +317,11 @@ func (s MigrationElement) WriteJSON(w io.Writer) error { } { s := s.Txn() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -382,7 +386,11 @@ func (s MigrationElement) WriteCapLit(w io.Writer) error { } { s := s.Txn() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/outcome.capnp b/capnp/outcome.capnp index 84e5a72..923a02f 100644 --- a/capnp/outcome.capnp +++ b/capnp/outcome.capnp @@ -5,11 +5,9 @@ $Go.import("goshawkdb.io/server/capnp"); @0xe10cac715301f488; -using Txn = import "transaction.capnp"; - struct Outcome { id @0: List(OutcomeId); - txn @1: Txn.Txn; + txn @1: Data; union { commit @2: Data; abort :group { @@ -23,7 +21,7 @@ struct Outcome { struct Update { txnId @0: Data; - actions @1: List(Txn.Action); + actions @1: Data; clock @2: Data; } diff --git a/capnp/outcome.capnp.go b/capnp/outcome.capnp.go index 8d5515b..8cf90c7 100644 --- a/capnp/outcome.capnp.go +++ b/capnp/outcome.capnp.go @@ -33,8 +33,8 @@ func ReadRootOutcome(s *C.Segment) Outcome { return Outcome(s.Root(0).ToStruct() func (s Outcome) Which() Outcome_Which { return Outcome_Which(C.Struct(s).Get16(0)) } func (s Outcome) Id() OutcomeId_List { return OutcomeId_List(C.Struct(s).GetObject(0)) } func (s Outcome) SetId(v OutcomeId_List) { C.Struct(s).SetObject(0, C.Object(v)) } -func (s Outcome) Txn() Txn { return Txn(C.Struct(s).GetObject(1).ToStruct()) } -func (s Outcome) SetTxn(v Txn) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s Outcome) Txn() []byte { return C.Struct(s).GetObject(1).ToData() } +func (s Outcome) SetTxn(v []byte) { C.Struct(s).SetObject(1, s.Segment.NewData(v)) } func (s Outcome) Commit() []byte { return C.Struct(s).GetObject(2).ToData() } func (s Outcome) SetCommit(v []byte) { C.Struct(s).Set16(0, 0) @@ -97,7 +97,11 @@ func (s Outcome) WriteJSON(w io.Writer) error { } { s := s.Txn() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -238,7 +242,11 @@ func (s Outcome) WriteCapLit(w io.Writer) error { } { s := s.Txn() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -351,16 +359,16 @@ func (s Outcome_List) Set(i int, item Outcome) { C.PointerList(s).Set(i, C.Objec type Update C.Struct -func NewUpdate(s *C.Segment) Update { return Update(s.NewStruct(0, 3)) } -func NewRootUpdate(s *C.Segment) Update { return Update(s.NewRootStruct(0, 3)) } -func AutoNewUpdate(s *C.Segment) Update { return Update(s.NewStructAR(0, 3)) } -func ReadRootUpdate(s *C.Segment) Update { return Update(s.Root(0).ToStruct()) } -func (s Update) TxnId() []byte { return C.Struct(s).GetObject(0).ToData() } -func (s Update) SetTxnId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } -func (s Update) Actions() Action_List { return Action_List(C.Struct(s).GetObject(1)) } -func (s Update) SetActions(v Action_List) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s Update) Clock() []byte { return C.Struct(s).GetObject(2).ToData() } -func (s Update) SetClock(v []byte) { C.Struct(s).SetObject(2, s.Segment.NewData(v)) } +func NewUpdate(s *C.Segment) Update { return Update(s.NewStruct(0, 3)) } +func NewRootUpdate(s *C.Segment) Update { return Update(s.NewRootStruct(0, 3)) } +func AutoNewUpdate(s *C.Segment) Update { return Update(s.NewStructAR(0, 3)) } +func ReadRootUpdate(s *C.Segment) Update { return Update(s.Root(0).ToStruct()) } +func (s Update) TxnId() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s Update) SetTxnId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } +func (s Update) Actions() []byte { return C.Struct(s).GetObject(1).ToData() } +func (s Update) SetActions(v []byte) { C.Struct(s).SetObject(1, s.Segment.NewData(v)) } +func (s Update) Clock() []byte { return C.Struct(s).GetObject(2).ToData() } +func (s Update) SetClock(v []byte) { C.Struct(s).SetObject(2, s.Segment.NewData(v)) } func (s Update) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -395,25 +403,11 @@ func (s Update) WriteJSON(w io.Writer) error { } { s := s.Actions() - { - err = b.WriteByte('[') - if err != nil { - return err - } - for i, s := range s.ToArray() { - if i != 0 { - _, err = b.WriteString(", ") - } - if err != nil { - return err - } - err = s.WriteJSON(b) - if err != nil { - return err - } - } - err = b.WriteByte(']') + buf, err = json.Marshal(s) + if err != nil { + return err } + _, err = b.Write(buf) if err != nil { return err } @@ -483,25 +477,11 @@ func (s Update) WriteCapLit(w io.Writer) error { } { s := s.Actions() - { - err = b.WriteByte('[') - if err != nil { - return err - } - for i, s := range s.ToArray() { - if i != 0 { - _, err = b.WriteString(", ") - } - if err != nil { - return err - } - err = s.WriteCapLit(b) - if err != nil { - return err - } - } - err = b.WriteByte(']') + buf, err = json.Marshal(s) + if err != nil { + return err } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/paxostxnvote.capnp b/capnp/paxostxnvote.capnp index feb5ebb..0d40150 100644 --- a/capnp/paxostxnvote.capnp +++ b/capnp/paxostxnvote.capnp @@ -5,7 +5,6 @@ $Go.import("goshawkdb.io/server/capnp"); @0xd3af64eb7d699620; -using Txn = import "transaction.capnp"; using Outcome = import "outcome.capnp"; struct OneATxnVotes { @@ -21,7 +20,7 @@ struct OneBTxnVotes { } struct TwoATxnVotes { - txn @0: Txn.Txn; + txn @0: Data; rmId @1: UInt32; acceptRequests @2: List(TxnVoteAcceptRequest); } diff --git a/capnp/paxostxnvote.capnp.go b/capnp/paxostxnvote.capnp.go index f7e7ade..c6c690e 100644 --- a/capnp/paxostxnvote.capnp.go +++ b/capnp/paxostxnvote.capnp.go @@ -436,8 +436,8 @@ func NewTwoATxnVotes(s *C.Segment) TwoATxnVotes { return TwoATxnVotes(s.New func NewRootTwoATxnVotes(s *C.Segment) TwoATxnVotes { return TwoATxnVotes(s.NewRootStruct(8, 2)) } func AutoNewTwoATxnVotes(s *C.Segment) TwoATxnVotes { return TwoATxnVotes(s.NewStructAR(8, 2)) } func ReadRootTwoATxnVotes(s *C.Segment) TwoATxnVotes { return TwoATxnVotes(s.Root(0).ToStruct()) } -func (s TwoATxnVotes) Txn() Txn { return Txn(C.Struct(s).GetObject(0).ToStruct()) } -func (s TwoATxnVotes) SetTxn(v Txn) { C.Struct(s).SetObject(0, C.Object(v)) } +func (s TwoATxnVotes) Txn() []byte { return C.Struct(s).GetObject(0).ToData() } +func (s TwoATxnVotes) SetTxn(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s TwoATxnVotes) RmId() uint32 { return C.Struct(s).Get32(0) } func (s TwoATxnVotes) SetRmId(v uint32) { C.Struct(s).Set32(0, v) } func (s TwoATxnVotes) AcceptRequests() TxnVoteAcceptRequest_List { @@ -461,7 +461,11 @@ func (s TwoATxnVotes) WriteJSON(w io.Writer) error { } { s := s.Txn() - err = s.WriteJSON(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } @@ -545,7 +549,11 @@ func (s TwoATxnVotes) WriteCapLit(w io.Writer) error { } { s := s.Txn() - err = s.WriteCapLit(b) + buf, err = json.Marshal(s) + if err != nil { + return err + } + _, err = b.Write(buf) if err != nil { return err } diff --git a/capnp/transaction.capnp b/capnp/transaction.capnp index b8b89e7..483b465 100644 --- a/capnp/transaction.capnp +++ b/capnp/transaction.capnp @@ -12,12 +12,16 @@ struct Txn { submitter @1: UInt32; submitterBootCount @2: UInt32; retry @3: Bool; - actions @4: List(Action); + actions @4: Data; allocations @5: List(Allocation); fInc @6: UInt8; topologyVersion @7: UInt32; } +struct ActionListWrapper { + actions @0: List(Action); +} + struct Action { varId @0: Data; union { diff --git a/capnp/transaction.capnp.go b/capnp/transaction.capnp.go index 49ca00f..afb23c5 100644 --- a/capnp/transaction.capnp.go +++ b/capnp/transaction.capnp.go @@ -24,8 +24,8 @@ func (s Txn) SubmitterBootCount() uint32 { return C.Struct(s).Get32(4) } func (s Txn) SetSubmitterBootCount(v uint32) { C.Struct(s).Set32(4, v) } func (s Txn) Retry() bool { return C.Struct(s).Get1(64) } func (s Txn) SetRetry(v bool) { C.Struct(s).Set1(64, v) } -func (s Txn) Actions() Action_List { return Action_List(C.Struct(s).GetObject(1)) } -func (s Txn) SetActions(v Action_List) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s Txn) Actions() []byte { return C.Struct(s).GetObject(1).ToData() } +func (s Txn) SetActions(v []byte) { C.Struct(s).SetObject(1, s.Segment.NewData(v)) } func (s Txn) Allocations() Allocation_List { return Allocation_List(C.Struct(s).GetObject(2)) } func (s Txn) SetAllocations(v Allocation_List) { C.Struct(s).SetObject(2, C.Object(v)) } func (s Txn) FInc() uint8 { return C.Struct(s).Get8(9) } @@ -123,25 +123,11 @@ func (s Txn) WriteJSON(w io.Writer) error { } { s := s.Actions() - { - err = b.WriteByte('[') - if err != nil { - return err - } - for i, s := range s.ToArray() { - if i != 0 { - _, err = b.WriteString(", ") - } - if err != nil { - return err - } - err = s.WriteJSON(b) - if err != nil { - return err - } - } - err = b.WriteByte(']') + buf, err = json.Marshal(s) + if err != nil { + return err } + _, err = b.Write(buf) if err != nil { return err } @@ -320,25 +306,11 @@ func (s Txn) WriteCapLit(w io.Writer) error { } { s := s.Actions() - { - err = b.WriteByte('[') - if err != nil { - return err - } - for i, s := range s.ToArray() { - if i != 0 { - _, err = b.WriteString(", ") - } - if err != nil { - return err - } - err = s.WriteCapLit(b) - if err != nil { - return err - } - } - err = b.WriteByte(']') + buf, err = json.Marshal(s) + if err != nil { + return err } + _, err = b.Write(buf) if err != nil { return err } @@ -442,6 +414,142 @@ func (s Txn_List) ToArray() []Txn { } func (s Txn_List) Set(i int, item Txn) { C.PointerList(s).Set(i, C.Object(item)) } +type ActionListWrapper C.Struct + +func NewActionListWrapper(s *C.Segment) ActionListWrapper { return ActionListWrapper(s.NewStruct(0, 1)) } +func NewRootActionListWrapper(s *C.Segment) ActionListWrapper { + return ActionListWrapper(s.NewRootStruct(0, 1)) +} +func AutoNewActionListWrapper(s *C.Segment) ActionListWrapper { + return ActionListWrapper(s.NewStructAR(0, 1)) +} +func ReadRootActionListWrapper(s *C.Segment) ActionListWrapper { + return ActionListWrapper(s.Root(0).ToStruct()) +} +func (s ActionListWrapper) Actions() Action_List { return Action_List(C.Struct(s).GetObject(0)) } +func (s ActionListWrapper) SetActions(v Action_List) { C.Struct(s).SetObject(0, C.Object(v)) } +func (s ActionListWrapper) WriteJSON(w io.Writer) error { + b := bufio.NewWriter(w) + var err error + var buf []byte + _ = buf + err = b.WriteByte('{') + if err != nil { + return err + } + _, err = b.WriteString("\"actions\":") + if err != nil { + return err + } + { + s := s.Actions() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + err = s.WriteJSON(b) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + err = b.WriteByte('}') + if err != nil { + return err + } + err = b.Flush() + return err +} +func (s ActionListWrapper) MarshalJSON() ([]byte, error) { + b := bytes.Buffer{} + err := s.WriteJSON(&b) + return b.Bytes(), err +} +func (s ActionListWrapper) WriteCapLit(w io.Writer) error { + b := bufio.NewWriter(w) + var err error + var buf []byte + _ = buf + err = b.WriteByte('(') + if err != nil { + return err + } + _, err = b.WriteString("actions = ") + if err != nil { + return err + } + { + s := s.Actions() + { + err = b.WriteByte('[') + if err != nil { + return err + } + for i, s := range s.ToArray() { + if i != 0 { + _, err = b.WriteString(", ") + } + if err != nil { + return err + } + err = s.WriteCapLit(b) + if err != nil { + return err + } + } + err = b.WriteByte(']') + } + if err != nil { + return err + } + } + err = b.WriteByte(')') + if err != nil { + return err + } + err = b.Flush() + return err +} +func (s ActionListWrapper) MarshalCapLit() ([]byte, error) { + b := bytes.Buffer{} + err := s.WriteCapLit(&b) + return b.Bytes(), err +} + +type ActionListWrapper_List C.PointerList + +func NewActionListWrapperList(s *C.Segment, sz int) ActionListWrapper_List { + return ActionListWrapper_List(s.NewCompositeList(0, 1, sz)) +} +func (s ActionListWrapper_List) Len() int { return C.PointerList(s).Len() } +func (s ActionListWrapper_List) At(i int) ActionListWrapper { + return ActionListWrapper(C.PointerList(s).At(i).ToStruct()) +} +func (s ActionListWrapper_List) ToArray() []ActionListWrapper { + n := s.Len() + a := make([]ActionListWrapper, n) + for i := 0; i < n; i++ { + a[i] = s.At(i) + } + return a +} +func (s ActionListWrapper_List) Set(i int, item ActionListWrapper) { + C.PointerList(s).Set(i, C.Object(item)) +} + type Action C.Struct type ActionRead Action type ActionWrite Action diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index e6ed223..8e9c658 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -9,6 +9,7 @@ import ( "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" "goshawkdb.io/server/paxos" + eng "goshawkdb.io/server/txnengine" "time" ) @@ -55,21 +56,22 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, start := time.Now() var cont TxnCompletionConsumer - cont = func(txnId *common.TxnId, outcome *msgs.Outcome, err error) { + cont = func(txn *eng.TxnReader, outcome *msgs.Outcome, err error) { if outcome == nil || err != nil { // node is shutting down or error cts.txnLive = false continuation(nil, err) return } + txnId := txn.Id end := time.Now() elapsed := end.Sub(start) start = end switch outcome.Which() { case msgs.OUTCOME_COMMIT: - cts.versionCache.UpdateFromCommit(txnId, outcome) + cts.versionCache.UpdateFromCommit(txn, outcome) clientOutcome.SetFinalId(txnId[:]) clientOutcome.SetCommit() - cts.addCreatesToCache(outcome) + cts.addCreatesToCache(txn) cts.txnLive = false cts.initialDelay = delay >> 1 continuation(&clientOutcome, nil) @@ -105,17 +107,17 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, binary.BigEndian.PutUint64(curTxnId[:8], curTxnIdNum) ctxnCap.SetId(curTxnId[:]) - cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false) + cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, curTxnId, cont, delay, false) } } cts.txnLive = true // fmt.Printf("%v ", delay) - cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, cont, delay, false) + cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, curTxnId, cont, delay, false) } -func (cts *ClientTxnSubmitter) addCreatesToCache(outcome *msgs.Outcome) { - actions := outcome.Txn().Actions() +func (cts *ClientTxnSubmitter) addCreatesToCache(txn *eng.TxnReader) { + actions := txn.Actions(true).Actions() for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) if action.Which() == msgs.ACTION_CREATE { diff --git a/client/localconnection.go b/client/localconnection.go index dfec185..84cc635 100644 --- a/client/localconnection.go +++ b/client/localconnection.go @@ -47,7 +47,7 @@ type localConnectionMsgStatus struct { type localConnectionMsgOutcomeReceived struct { localConnectionMsgBasic sender common.RMId - txnId *common.TxnId + txn *eng.TxnReader outcome *msgs.Outcome } @@ -86,13 +86,14 @@ func (lcmsq *localConnectionMsgSyncQuery) maybeClose() { type localConnectionMsgRunClientTxn struct { localConnectionMsgBasic localConnectionMsgSyncQuery - txn *cmsgs.ClientTxn - varPosMap map[common.VarUUId]*common.Positions - assignTxnId bool - outcome *msgs.Outcome + txn *cmsgs.ClientTxn + varPosMap map[common.VarUUId]*common.Positions + txnReader *eng.TxnReader + outcome *msgs.Outcome } -func (lcmrct *localConnectionMsgRunClientTxn) consumer(txnId *common.TxnId, outcome *msgs.Outcome, err error) { +func (lcmrct *localConnectionMsgRunClientTxn) consumer(txn *eng.TxnReader, outcome *msgs.Outcome, err error) { + lcmrct.txnReader = txn lcmrct.outcome = outcome lcmrct.err = err lcmrct.maybeClose() @@ -101,13 +102,15 @@ func (lcmrct *localConnectionMsgRunClientTxn) consumer(txnId *common.TxnId, outc type localConnectionMsgRunTxn struct { localConnectionMsgBasic localConnectionMsgSyncQuery - txn *msgs.Txn - assignTxnId bool - activeRMs []common.RMId - outcome *msgs.Outcome + txnCap *msgs.Txn + txnId *common.TxnId + activeRMs []common.RMId + txnReader *eng.TxnReader + outcome *msgs.Outcome } -func (lcmrt *localConnectionMsgRunTxn) consumer(txnId *common.TxnId, outcome *msgs.Outcome, err error) { +func (lcmrt *localConnectionMsgRunTxn) consumer(txn *eng.TxnReader, outcome *msgs.Outcome, err error) { + lcmrt.txnReader = txn lcmrt.outcome = outcome lcmrt.err = err lcmrt.maybeClose() @@ -153,11 +156,11 @@ func (lc *LocalConnection) Status(sc *server.StatusConsumer) { lc.enqueueQuery(localConnectionMsgStatus{StatusConsumer: sc}) } -func (lc *LocalConnection) SubmissionOutcomeReceived(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) { - server.Log("LC Received submission outcome for", txnId) +func (lc *LocalConnection) SubmissionOutcomeReceived(sender common.RMId, txn *eng.TxnReader, outcome *msgs.Outcome) { + server.Log("LC Received submission outcome for", txn.Id) lc.enqueueQuery(localConnectionMsgOutcomeReceived{ sender: sender, - txnId: txnId, + txn: txn, outcome: outcome, }) } @@ -184,31 +187,31 @@ func (lc *LocalConnection) TopologyChanged(topology *configuration.Topology, don } } -func (lc *LocalConnection) RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap map[common.VarUUId]*common.Positions, assignTxnId bool) (*msgs.Outcome, error) { +func (lc *LocalConnection) RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap map[common.VarUUId]*common.Positions) (*eng.TxnReader, *msgs.Outcome, error) { query := &localConnectionMsgRunClientTxn{ - txn: txn, - varPosMap: varPosMap, - assignTxnId: assignTxnId, + txn: txn, + varPosMap: varPosMap, } query.init() if lc.enqueueQuerySync(query, query.resultChan) { - return query.outcome, query.err + return query.txnReader, query.outcome, query.err } else { - return nil, nil + return nil, nil, nil } } -func (lc *LocalConnection) RunTransaction(txn *msgs.Txn, assignTxnId bool, activeRMs ...common.RMId) (*msgs.Outcome, error) { +// txnCap must be root in its segment +func (lc *LocalConnection) RunTransaction(txnCap *msgs.Txn, txnId *common.TxnId, activeRMs ...common.RMId) (*eng.TxnReader, *msgs.Outcome, error) { query := &localConnectionMsgRunTxn{ - txn: txn, - assignTxnId: assignTxnId, - activeRMs: activeRMs, + txnCap: txnCap, + txnId: txnId, + activeRMs: activeRMs, } query.init() if lc.enqueueQuerySync(query, query.resultChan) { - return query.outcome, query.err + return query.txnReader, query.outcome, query.err } else { - return nil, nil + return nil, nil, nil } } @@ -290,7 +293,7 @@ func (lc *LocalConnection) actorLoop(head *cc.ChanCellHead) { case *localConnectionMsgRunClientTxn: lc.runClientTransaction(msgT) case localConnectionMsgOutcomeReceived: - lc.submitter.SubmissionOutcomeReceived(msgT.sender, msgT.txnId, msgT.outcome) + lc.submitter.SubmissionOutcomeReceived(msgT.sender, msgT.txn, msgT.outcome) case localConnectionMsgServerConnectionsChanged: lc.submitter.ServerConnectionsChanged((map[common.RMId]paxos.Connection)(msgT)) case localConnectionMsgStatus: @@ -312,25 +315,24 @@ func (lc *LocalConnection) actorLoop(head *cc.ChanCellHead) { func (lc *LocalConnection) runClientTransaction(txnQuery *localConnectionMsgRunClientTxn) { txn := txnQuery.txn - if txnQuery.assignTxnId { - txnId := lc.getNextTxnId() - txn.SetId(txnId[:]) - server.Log("LC starting client txn", txnId) - } + txnId := lc.getNextTxnId() + txn.SetId(txnId[:]) + server.Log("LC starting client txn", txnId) if varPosMap := txnQuery.varPosMap; varPosMap != nil { lc.submitter.EnsurePositions(varPosMap) } - lc.submitter.SubmitClientTransaction(txn, txnQuery.consumer, 0, true) + lc.submitter.SubmitClientTransaction(txn, txnId, txnQuery.consumer, 0, true) } func (lc *LocalConnection) runTransaction(txnQuery *localConnectionMsgRunTxn) { - txn := txnQuery.txn - if txnQuery.assignTxnId { - txnId := lc.getNextTxnId() - txn.SetId(txnId[:]) + txnId := txnQuery.txnId + txnCap := txnQuery.txnCap + if txnId == nil { + txnId = lc.getNextTxnId() + txnCap.SetId(txnId[:]) server.Log("LC starting txn", txnId) } - lc.submitter.SubmitTransaction(txn, txnQuery.activeRMs, txnQuery.consumer, 0) + lc.submitter.SubmitTransaction(txnCap, txnId, txnQuery.activeRMs, txnQuery.consumer, 0) } func (lc *LocalConnection) getNextTxnId() *common.TxnId { diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 89d4d14..328b21d 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -31,8 +31,8 @@ type SimpleTxnSubmitter struct { bufferedSubmissions []func() } -type txnOutcomeConsumer func(common.RMId, *common.TxnId, *msgs.Outcome) -type TxnCompletionConsumer func(*common.TxnId, *msgs.Outcome, error) +type txnOutcomeConsumer func(common.RMId, *eng.TxnReader, *msgs.Outcome) +type TxnCompletionConsumer func(*eng.TxnReader, *msgs.Outcome, error) func NewSimpleTxnSubmitter(rmId common.RMId, bootCount uint32, connPub paxos.ServerConnectionPublisher) *SimpleTxnSubmitter { rng := rand.New(rand.NewSource(time.Now().UnixNano())) @@ -71,21 +71,22 @@ func (sts *SimpleTxnSubmitter) EnsurePositions(varPosMap map[common.VarUUId]*com } } -func (sts *SimpleTxnSubmitter) SubmissionOutcomeReceived(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) { +func (sts *SimpleTxnSubmitter) SubmissionOutcomeReceived(sender common.RMId, txn *eng.TxnReader, outcome *msgs.Outcome) { + txnId := txn.Id if consumer, found := sts.outcomeConsumers[*txnId]; found { - consumer(sender, txnId, outcome) + consumer(sender, txn, outcome) } else { // OSS is safe here - it's the default action on receipt of an unknown txnid paxos.NewOneShotSender(paxos.MakeTxnSubmissionCompleteMsg(txnId), sts.connPub, sender) } } -func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, activeRMs []common.RMId, continuation TxnCompletionConsumer, delay time.Duration) { +// txnCap must be a root +func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, txnId *common.TxnId, activeRMs []common.RMId, continuation TxnCompletionConsumer, delay time.Duration) { seg := capn.NewBuffer(nil) msg := msgs.NewRootMessage(seg) - msg.SetTxnSubmission(*txnCap) + msg.SetTxnSubmission(server.SegToBytes(txnCap.Segment)) - txnId := common.MakeTxnId(txnCap.Id()) server.Log(txnId, "Submitting txn") txnSender := paxos.NewRepeatingSender(server.SegToBytes(seg), activeRMs...) var removeSenderCh chan chan server.EmptyStruct @@ -102,7 +103,7 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, activeRMs []c close(doneChan) }() } - acceptors := paxos.GetAcceptorsFromTxn(txnCap) + acceptors := paxos.GetAcceptorsFromTxn(*txnCap) shutdownFun := func(shutdown bool) { delete(sts.outcomeConsumers, *txnId) @@ -124,28 +125,28 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, activeRMs []c // problem with these msgs getting to the propposers. paxos.NewOneShotSender(paxos.MakeTxnSubmissionAbortMsg(txnId), sts.connPub, activeRMs...) } - continuation(txnId, nil, nil) + continuation(nil, nil, nil) } } shutdownFunPtr := &shutdownFun sts.onShutdown[shutdownFunPtr] = server.EmptyStructVal outcomeAccumulator := paxos.NewOutcomeAccumulator(int(txnCap.FInc()), acceptors) - consumer := func(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) { + consumer := func(sender common.RMId, txn *eng.TxnReader, outcome *msgs.Outcome) { if outcome, _ = outcomeAccumulator.BallotOutcomeReceived(sender, outcome); outcome != nil { delete(sts.onShutdown, shutdownFunPtr) shutdownFun(false) - continuation(txnId, outcome, nil) + continuation(txn, outcome, nil) } } sts.outcomeConsumers[*txnId] = consumer // fmt.Printf("sts%v ", len(sts.outcomeConsumers)) } -func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, continuation TxnCompletionConsumer, delay time.Duration, useNextVersion bool) { +func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, txnId *common.TxnId, continuation TxnCompletionConsumer, delay time.Duration, useNextVersion bool) { // Frames could attempt rolls before we have a topology. if sts.topology.IsBlank() || (sts.topology.Next() != nil && (!useNextVersion || !sts.topology.NextBarrierReached1(sts.rmId))) { - fun := func() { sts.SubmitClientTransaction(ctxnCap, continuation, delay, useNextVersion) } + fun := func() { sts.SubmitClientTransaction(ctxnCap, txnId, continuation, delay, useNextVersion) } if sts.bufferedSubmissions == nil { sts.bufferedSubmissions = []func(){fun} } else { @@ -162,7 +163,7 @@ func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, continuation(nil, nil, err) return } - sts.SubmitTransaction(txnCap, activeRMs, continuation, delay) + sts.SubmitTransaction(txnCap, txnId, activeRMs, continuation, delay) } func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) { @@ -221,7 +222,7 @@ func (sts *SimpleTxnSubmitter) Shutdown() { func (sts *SimpleTxnSubmitter) clientToServerTxn(clientTxnCap *cmsgs.ClientTxn, topologyVersion uint32) (*msgs.Txn, []common.RMId, []common.RMId, error) { outgoingSeg := capn.NewBuffer(nil) - txnCap := msgs.NewTxn(outgoingSeg) + txnCap := msgs.NewRootTxn(outgoingSeg) txnCap.SetId(clientTxnCap.Id()) txnCap.SetRetry(clientTxnCap.Retry()) @@ -231,15 +232,18 @@ func (sts *SimpleTxnSubmitter) clientToServerTxn(clientTxnCap *cmsgs.ClientTxn, txnCap.SetTopologyVersion(topologyVersion) clientActions := clientTxnCap.Actions() - actions := msgs.NewActionList(outgoingSeg, clientActions.Len()) - txnCap.SetActions(actions) + actionsListSeg := capn.NewBuffer(nil) + actionsWrapper := msgs.NewRootActionListWrapper(actionsListSeg) + actions := msgs.NewActionList(actionsListSeg, clientActions.Len()) + actionsWrapper.SetActions(actions) picker := ch.NewCombinationPicker(int(sts.topology.FInc), sts.disabledHashCodes) - rmIdToActionIndices, err := sts.translateActions(outgoingSeg, picker, &actions, &clientActions) + rmIdToActionIndices, err := sts.translateActions(actionsListSeg, picker, &actions, &clientActions) if err != nil { return nil, nil, nil, err } + txnCap.SetActions(server.SegToBytes(actionsListSeg)) // NB: we're guaranteed that activeRMs and passiveRMs are // disjoint. Thus there is no RM that has some active and some // passive actions. diff --git a/client/versioncache.go b/client/versioncache.go index f516a78..fb6ae88 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -18,9 +18,10 @@ func NewVersionCache() versionCache { return make(map[common.VarUUId]*cached) } -func (vc versionCache) UpdateFromCommit(txnId *common.TxnId, outcome *msgs.Outcome) { +func (vc versionCache) UpdateFromCommit(txn *eng.TxnReader, outcome *msgs.Outcome) { + txnId := txn.Id clock := eng.VectorClockFromData(outcome.Commit(), false) - actions := outcome.Txn().Actions() + actions := txn.Actions(true).Actions() for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) if action.Which() != msgs.ACTION_READ { @@ -45,7 +46,7 @@ func (vc versionCache) UpdateFromAbort(updates *msgs.Update_List) map[*msgs.Upda update := updates.At(idx) txnId := common.MakeTxnId(update.TxnId()) clock := eng.VectorClockFromData(update.Clock(), false) - actions := update.Actions() + actions := eng.TxnActionsFromData(update.Actions(), true).Actions() validActions := make([]*msgs.Action, 0, actions.Len()) for idy, m := 0, actions.Len(); idy < m; idy++ { diff --git a/cmd/goshawkdb/main.go b/cmd/goshawkdb/main.go index 7cc59bb..a655286 100644 --- a/cmd/goshawkdb/main.go +++ b/cmd/goshawkdb/main.go @@ -20,6 +20,7 @@ import ( "os" "os/signal" "runtime" + "runtime/debug" "runtime/pprof" "runtime/trace" "sync/atomic" @@ -255,6 +256,8 @@ func (s *server) SignalShutdown() { } func (s *server) signalStatus() { + runtime.GC() + debug.FreeOSMemory() sc := goshawk.NewStatusConsumer() go sc.Consume(func(str string) { log.Printf("System Status for %v\n%v\nStatus End\n", s.rmId, str) diff --git a/db/transaction.go b/db/transaction.go index fc44c1c..fe27b6b 100644 --- a/db/transaction.go +++ b/db/transaction.go @@ -3,10 +3,7 @@ package db import ( "encoding/binary" "goshawkdb.io/common" - "goshawkdb.io/server" - msgs "goshawkdb.io/server/capnp" // "fmt" - capn "github.com/glycerine/go-capnproto" mdb "github.com/msackman/gomdb" mdbs "github.com/msackman/gomdb/server" ) @@ -16,21 +13,6 @@ func init() { DB.TransactionRefs = &mdbs.DBISettings{Flags: mdb.CREATE} } -func TxnToRootBytes(txn *msgs.Txn) []byte { - seg := capn.NewBuffer(nil) - txnCap := msgs.NewRootTxn(seg) - txnCap.SetId(txn.Id()) - txnCap.SetRetry(txn.Retry()) - txnCap.SetSubmitter(txn.Submitter()) - txnCap.SetSubmitterBootCount(txn.SubmitterBootCount()) - txnCap.SetActions(txn.Actions()) - txnCap.SetAllocations(txn.Allocations()) - txnCap.SetFInc(txn.FInc()) - txnCap.SetTopologyVersion(txn.TopologyVersion()) - - return server.SegToBytes(seg) -} - func (db *Databases) WriteTxnToDisk(rwtxn *mdbs.RWTxn, txnId *common.TxnId, txnBites []byte) error { bites, err := rwtxn.Get(db.TransactionRefs, txnId[:]) diff --git a/network/connection.go b/network/connection.go index 929561e..24e6e48 100644 --- a/network/connection.go +++ b/network/connection.go @@ -65,7 +65,7 @@ func (cms connectionMsgSend) witness() connectionMsg { return cms } type connectionMsgOutcomeReceived struct { connectionMsgBasic sender common.RMId - txnId *common.TxnId + txn *eng.TxnReader outcome *msgs.Outcome } @@ -98,10 +98,10 @@ func (conn *Connection) Send(msg []byte) { conn.enqueueQuery(connectionMsgSend(msg)) } -func (conn *Connection) SubmissionOutcomeReceived(sender common.RMId, txnId *common.TxnId, outcome *msgs.Outcome) { +func (conn *Connection) SubmissionOutcomeReceived(sender common.RMId, txn *eng.TxnReader, outcome *msgs.Outcome) { conn.enqueueQuery(connectionMsgOutcomeReceived{ sender: sender, - txnId: txnId, + txn: txn, outcome: outcome, }) } @@ -745,7 +745,7 @@ func (cr *connectionRun) outcomeReceived(out connectionMsgOutcomeReceived) { if cr.currentState != cr { return } - cr.submitter.SubmissionOutcomeReceived(out.sender, out.txnId, out.outcome) + cr.submitter.SubmissionOutcomeReceived(out.sender, out.txn, out.outcome) if cr.submitterIdle != nil && cr.submitter.IsIdle() { si := cr.submitterIdle cr.submitterIdle = nil diff --git a/network/connectionmanager.go b/network/connectionmanager.go index f3e3b42..70d9b7b 100644 --- a/network/connectionmanager.go +++ b/network/connectionmanager.go @@ -56,18 +56,19 @@ func (cm *ConnectionManager) DispatchMessage(sender common.RMId, msgType msgs.Me d := cm.Dispatchers switch msgType { case msgs.MESSAGE_TXNSUBMISSION: - txn := msg.TxnSubmission() - d.ProposerDispatcher.TxnReceived(sender, &txn) + txn := eng.TxnReaderFromData(msg.TxnSubmission()) + d.ProposerDispatcher.TxnReceived(sender, txn) case msgs.MESSAGE_SUBMISSIONOUTCOME: outcome := msg.SubmissionOutcome() - txnId := common.MakeTxnId(outcome.Txn().Id()) + txn := eng.TxnReaderFromData(outcome.Txn()) + txnId := txn.Id connNumber := binary.BigEndian.Uint32(txnId[8:12]) bootNumber := binary.BigEndian.Uint32(txnId[12:16]) if conn := cm.GetClient(bootNumber, connNumber); conn == nil { // OSS is safe here - it's the default action on receipt of outcome for unknown client. paxos.NewOneShotSender(paxos.MakeTxnSubmissionCompleteMsg(txnId), cm, sender) } else { - conn.SubmissionOutcomeReceived(sender, txnId, &outcome) + conn.SubmissionOutcomeReceived(sender, txn, &outcome) return } case msgs.MESSAGE_SUBMISSIONCOMPLETE: diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index ab83122..045abbf 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -507,7 +507,7 @@ type migrationTxnLocalStateChange struct { inprogressPtr *int32 } -func (mtlsc *migrationTxnLocalStateChange) TxnBallotsComplete(*eng.Txn, ...*eng.Ballot) { +func (mtlsc *migrationTxnLocalStateChange) TxnBallotsComplete(...*eng.Ballot) { panic("TxnBallotsComplete called on migrating txn.") } @@ -1574,12 +1574,14 @@ func (task *targetConfig) createTopologyTransaction(read, write *configuration.T } seg := capn.NewBuffer(nil) - txn := msgs.NewTxn(seg) + txn := msgs.NewRootTxn(seg) txn.SetSubmitter(uint32(task.connectionManager.RMId)) txn.SetSubmitterBootCount(task.connectionManager.BootCount) - actions := msgs.NewActionList(seg, 1) - txn.SetActions(actions) + actionsSeg := capn.NewBuffer(nil) + actionsWrapper := msgs.NewRootActionListWrapper(actionsSeg) + actions := msgs.NewActionList(actionsSeg, 1) + actionsWrapper.SetActions(actions) action := actions.At(0) action.SetVarId(configuration.TopologyVarUUId[:]) @@ -1618,6 +1620,7 @@ func (task *targetConfig) createTopologyTransaction(read, write *configuration.T } rw.SetReferences(refs) } + txn.SetActions(server.SegToBytes(actionsSeg)) allocs := msgs.NewAllocationList(seg, len(active)+len(passive)) txn.SetAllocations(allocs) @@ -1663,7 +1666,7 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog for { txn := task.createTopologyTransaction(nil, nil, []common.RMId{task.connectionManager.RMId}, nil) - result, err := task.localConnection.RunTransaction(txn, true, task.connectionManager.RMId) + _, result, err := task.localConnection.RunTransaction(txn, nil, task.connectionManager.RMId) if err != nil { return nil, err } @@ -1683,7 +1686,7 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog } update := abortUpdates.At(0) dbversion := common.MakeTxnId(update.TxnId()) - updateActions := update.Actions() + updateActions := eng.TxnActionsFromData(update.Actions(), true).Actions() if updateActions.Len() != 1 { return nil, fmt.Errorf("Internal error: read of topology version 0 gave multiple actions: %v", updateActions.Len()) } @@ -1706,7 +1709,7 @@ func (task *targetConfig) createTopologyZero(config *configuration.NextConfigura txn := task.createTopologyTransaction(nil, topology, []common.RMId{task.connectionManager.RMId}, nil) txnId := topology.DBVersion txn.SetId(txnId[:]) - result, err := task.localConnection.RunTransaction(txn, false, task.connectionManager.RMId) + _, result, err := task.localConnection.RunTransaction(txn, txnId, task.connectionManager.RMId) if err != nil { return nil, err } @@ -1723,11 +1726,11 @@ func (task *targetConfig) createTopologyZero(config *configuration.NextConfigura func (task *targetConfig) rewriteTopology(read, write *configuration.Topology, active, passive common.RMIds) (*configuration.Topology, bool, error) { txn := task.createTopologyTransaction(read, write, active, passive) - result, err := task.localConnection.RunTransaction(txn, true, active...) + txnReader, result, err := task.localConnection.RunTransaction(txn, nil, active...) if result == nil || err != nil { return nil, false, err } - txnId := common.MakeTxnId(result.Txn().Id()) + txnId := txnReader.Id if result.Which() == msgs.OUTCOME_COMMIT { topology := write.Clone() topology.DBVersion = txnId @@ -1748,7 +1751,7 @@ func (task *targetConfig) rewriteTopology(read, write *configuration.Topology, a update := abortUpdates.At(0) dbversion := common.MakeTxnId(update.TxnId()) - updateActions := update.Actions() + updateActions := eng.TxnActionsFromData(update.Actions(), true).Actions() if updateActions.Len() != 1 { return nil, false, fmt.Errorf("Internal error: readwrite of topology gave update with %v actions instead of 1!", @@ -1793,7 +1796,7 @@ func (task *targetConfig) attemptCreateRoots(rootCount int) (bool, configuration root.VarUUId = vUUId } ctxn.SetActions(actions) - result, err := task.localConnection.RunClientTransaction(&ctxn, nil, true) + txnReader, result, err := task.localConnection.RunClientTransaction(&ctxn, nil) log.Println("Create root result", result, err) if err != nil { return false, nil, err @@ -1802,7 +1805,7 @@ func (task *targetConfig) attemptCreateRoots(rootCount int) (bool, configuration return false, nil, nil } if result.Which() == msgs.OUTCOME_COMMIT { - actions := result.Txn().Actions() + actions := txnReader.Actions(true).Actions() for idx := range roots { root := &roots[idx] action := actions.At(idx) @@ -1819,8 +1822,7 @@ func (task *targetConfig) attemptCreateRoots(rootCount int) (bool, configuration server.Log("Topology: Roots created in", roots) return false, roots, nil } - abort := result.Abort() - if abort.Which() == msgs.OUTCOMEABORT_RESUBMIT { + if result.Abort().Which() == msgs.OUTCOMEABORT_RESUBMIT { return true, nil, nil } return false, nil, fmt.Errorf("Internal error: creation of root gave rerun outcome") @@ -1933,12 +1935,7 @@ func (it *dbIterator) iterate() { if txnBytes == nil { return true } - seg, _, err = capn.ReadFromMemoryZeroCopy(txnBytes) - if err != nil { - cursor.Error(err) - return true - } - txnCap := msgs.ReadRootTxn(seg) + txn := eng.TxnReaderFromData(txnBytes) // So, we only need to send based on the vars that we have // (in fact, we require the positions so we can only look // at the vars we have). However, the txn var allocations @@ -1947,8 +1944,8 @@ func (it *dbIterator) iterate() { // txn when it changes. So that all just means we must // ignore the allocations here, and just work through the // actions directly. - actions := txnCap.Actions() - varCaps, err := it.filterVars(cursor, vUUIdBytes, txnId[:], &actions) + actions := txn.Actions(true).Actions() + varCaps, err := it.filterVars(cursor, vUUIdBytes, txnId[:], actions) if err != nil { return true } else if len(varCaps) == 0 { @@ -1960,7 +1957,7 @@ func (it *dbIterator) iterate() { cursor.Error(err) return true } else if len(matchingVarCaps) != 0 { - sb.add(&txnCap, matchingVarCaps) + sb.add(txn, matchingVarCaps) } } } @@ -2076,7 +2073,7 @@ type sendBatch struct { } type migrationElem struct { - txn *msgs.Txn + txn *eng.TxnReader vars []*msgs.Var } @@ -2100,7 +2097,7 @@ func (sb *sendBatch) flush() { elems := msgs.NewMigrationElementList(seg, len(sb.elems)) for idx, elem := range sb.elems { elemCap := msgs.NewMigrationElement(seg) - elemCap.SetTxn(*elem.txn) + elemCap.SetTxn(elem.txn.Data) vars := msgs.NewVarList(seg, len(elem.vars)) for idy, varCap := range elem.vars { vars.Set(idy, *varCap) @@ -2116,9 +2113,9 @@ func (sb *sendBatch) flush() { sb.elems = sb.elems[:0] } -func (sb *sendBatch) add(txnCap *msgs.Txn, varCaps []*msgs.Var) { +func (sb *sendBatch) add(txn *eng.TxnReader, varCaps []*msgs.Var) { elem := &migrationElem{ - txn: txnCap, + txn: txn, vars: varCaps, } sb.elems = append(sb.elems, elem) diff --git a/paxos/acceptor.go b/paxos/acceptor.go index b26c65b..945d300 100644 --- a/paxos/acceptor.go +++ b/paxos/acceptor.go @@ -8,6 +8,7 @@ import ( "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" "goshawkdb.io/server/configuration" + eng "goshawkdb.io/server/txnengine" "log" ) @@ -21,19 +22,20 @@ type Acceptor struct { acceptorDeleteFromDisk } -func NewAcceptor(txnId *common.TxnId, txn *msgs.Txn, am *AcceptorManager) *Acceptor { +func NewAcceptor(txn *eng.TxnReader, am *AcceptorManager) *Acceptor { a := &Acceptor{ - txnId: txnId, + txnId: txn.Id, acceptorManager: am, } a.init(txn) return a } -func AcceptorFromData(txnId *common.TxnId, txn *msgs.Txn, outcome *msgs.Outcome, sendToAll bool, instances *msgs.InstancesForVar_List, am *AcceptorManager) *Acceptor { +func AcceptorFromData(txnId *common.TxnId, outcome *msgs.Outcome, sendToAll bool, instances *msgs.InstancesForVar_List, am *AcceptorManager) *Acceptor { outcomeEqualId := (*outcomeEqualId)(outcome) - a := NewAcceptor(txnId, txn, am) - a.ballotAccumulator = BallotAccumulatorFromData(txnId, txn, outcomeEqualId, instances) + txn := eng.TxnReaderFromData(outcome.Txn()) + a := NewAcceptor(txn, am) + a.ballotAccumulator = BallotAccumulatorFromData(txn, outcomeEqualId, instances) a.outcome = outcomeEqualId a.sendToAll = sendToAll a.sendToAllOnDisk = sendToAll @@ -41,7 +43,7 @@ func AcceptorFromData(txnId *common.TxnId, txn *msgs.Txn, outcome *msgs.Outcome, return a } -func (a *Acceptor) init(txn *msgs.Txn) { +func (a *Acceptor) init(txn *eng.TxnReader) { a.acceptorReceiveBallots.init(a, txn) a.acceptorWriteToDisk.init(a, txn) a.acceptorAwaitLocallyComplete.init(a, txn) @@ -92,7 +94,7 @@ func (a *Acceptor) nextState(requestedState acceptorStateMachineComponent) { } type acceptorStateMachineComponent interface { - init(*Acceptor, *msgs.Txn) + init(*Acceptor, *eng.TxnReader) start() acceptorStateMachineComponentWitness() } @@ -105,9 +107,9 @@ type acceptorReceiveBallots struct { outcome *outcomeEqualId } -func (arb *acceptorReceiveBallots) init(a *Acceptor, txn *msgs.Txn) { +func (arb *acceptorReceiveBallots) init(a *Acceptor, txn *eng.TxnReader) { arb.Acceptor = a - arb.ballotAccumulator = NewBallotAccumulator(arb.txnId, txn) + arb.ballotAccumulator = NewBallotAccumulator(txn) } func (arb *acceptorReceiveBallots) start() {} @@ -116,7 +118,7 @@ func (arb *acceptorReceiveBallots) String() string { return "acceptorReceiveBallots" } -func (arb *acceptorReceiveBallots) BallotAccepted(instanceRMId common.RMId, inst *instance, vUUId *common.VarUUId, txn *msgs.Txn) { +func (arb *acceptorReceiveBallots) BallotAccepted(instanceRMId common.RMId, inst *instance, vUUId *common.VarUUId, txn *eng.TxnReader) { // We can accept a ballot from instanceRMId at any point up until // we've received a TLC from instanceRMId (see notes in ALC re // retry). Note an acceptor can change it's mind! @@ -139,7 +141,7 @@ type acceptorWriteToDisk struct { sendToAllOnDisk bool } -func (awtd *acceptorWriteToDisk) init(a *Acceptor, txn *msgs.Txn) { +func (awtd *acceptorWriteToDisk) init(a *Acceptor, txn *eng.TxnReader) { awtd.Acceptor = a } @@ -150,7 +152,6 @@ func (awtd *acceptorWriteToDisk) start() { sendToAll := awtd.sendToAll stateSeg := capn.NewBuffer(nil) state := msgs.NewRootAcceptorState(stateSeg) - state.SetTxn(*awtd.ballotAccumulator.Txn) state.SetOutcome(*outcomeCap) state.SetSendToAll(awtd.sendToAll) state.SetInstances(awtd.ballotAccumulator.AddInstancesToSeg(stateSeg)) @@ -203,10 +204,10 @@ type acceptorAwaitLocallyComplete struct { txnSubmitter common.RMId } -func (aalc *acceptorAwaitLocallyComplete) init(a *Acceptor, txn *msgs.Txn) { +func (aalc *acceptorAwaitLocallyComplete) init(a *Acceptor, txn *eng.TxnReader) { aalc.Acceptor = a - aalc.tlcsReceived = make(map[common.RMId]server.EmptyStruct, aalc.ballotAccumulator.Txn.Allocations().Len()) - aalc.txnSubmitter = common.RMId(txn.Submitter()) + aalc.tlcsReceived = make(map[common.RMId]server.EmptyStruct, aalc.ballotAccumulator.txn.Txn.Allocations().Len()) + aalc.txnSubmitter = common.RMId(txn.Txn.Submitter()) } func (aalc *acceptorAwaitLocallyComplete) start() { @@ -230,7 +231,7 @@ func (aalc *acceptorAwaitLocallyComplete) start() { // opens the possibility that the acceptors do not arrive at the // same outcome and the txn will block. - allocs := aalc.ballotAccumulator.Txn.Allocations() + allocs := aalc.ballotAccumulator.txn.Txn.Allocations() aalc.pendingTLC = make(map[common.RMId]server.EmptyStruct, allocs.Len()) aalc.tgcRecipients = make([]common.RMId, 0, allocs.Len()) @@ -263,7 +264,7 @@ func (aalc *acceptorAwaitLocallyComplete) start() { } else { server.Log(aalc.txnId, "Adding sender for 2B") - submitter := common.RMId(aalc.ballotAccumulator.Txn.Submitter()) + submitter := common.RMId(aalc.ballotAccumulator.txn.Txn.Submitter()) aalc.twoBSender = newTwoBTxnVotesSender((*msgs.Outcome)(aalc.outcomeOnDisk), aalc.txnId, submitter, aalc.tgcRecipients...) aalc.acceptorManager.AddServerConnectionSubscriber(aalc.twoBSender) } @@ -324,7 +325,7 @@ type acceptorDeleteFromDisk struct { *Acceptor } -func (adfd *acceptorDeleteFromDisk) init(a *Acceptor, txn *msgs.Txn) { +func (adfd *acceptorDeleteFromDisk) init(a *Acceptor, txn *eng.TxnReader) { adfd.Acceptor = a } diff --git a/paxos/acceptordispatcher.go b/paxos/acceptordispatcher.go index 9ed70c2..39ae9b7 100644 --- a/paxos/acceptordispatcher.go +++ b/paxos/acceptordispatcher.go @@ -9,6 +9,7 @@ import ( msgs "goshawkdb.io/server/capnp" "goshawkdb.io/server/db" "goshawkdb.io/server/dispatcher" + eng "goshawkdb.io/server/txnengine" "log" ) @@ -36,8 +37,9 @@ func (ad *AcceptorDispatcher) OneATxnVotesReceived(sender common.RMId, oneATxnVo } func (ad *AcceptorDispatcher) TwoATxnVotesReceived(sender common.RMId, twoATxnVotes *msgs.TwoATxnVotes) { - txnId := common.MakeTxnId(twoATxnVotes.Txn().Id()) - ad.withAcceptorManager(txnId, func(am *AcceptorManager) { am.TwoATxnVotesReceived(sender, txnId, twoATxnVotes) }) + txn := eng.TxnReaderFromData(twoATxnVotes.Txn()) + txnId := txn.Id + ad.withAcceptorManager(txnId, func(am *AcceptorManager) { am.TwoATxnVotesReceived(sender, txn, twoATxnVotes) }) } func (ad *AcceptorDispatcher) TxnLocallyCompleteReceived(sender common.RMId, tlc *msgs.TxnLocallyComplete) { diff --git a/paxos/acceptormanager.go b/paxos/acceptormanager.go index 0bfa912..7cd6aa4 100644 --- a/paxos/acceptormanager.go +++ b/paxos/acceptormanager.go @@ -62,18 +62,19 @@ func (am *AcceptorManager) ensureInstance(txnId *common.TxnId, instId *instanceI } } -func (am *AcceptorManager) ensureAcceptor(txnId *common.TxnId, txnCap *msgs.Txn) *Acceptor { +func (am *AcceptorManager) ensureAcceptor(txn *eng.TxnReader) *Acceptor { + txnId := txn.Id aInst, found := am.acceptors[*txnId] switch { case found && aInst.acceptor != nil: return aInst.acceptor case found: - a := NewAcceptor(txnId, txnCap, am) + a := NewAcceptor(txn, am) aInst.acceptor = a a.Start() return a default: - a := NewAcceptor(txnId, txnCap, am) + a := NewAcceptor(txn, am) aInst = &acceptorInstances{acceptor: a} am.acceptors[*txnId] = aInst a.Start() @@ -98,7 +99,6 @@ func (am *AcceptorManager) loadFromData(txnId *common.TxnId, data []byte) error return err } state := msgs.ReadRootAcceptorState(seg) - txn := state.Txn() instId := instanceId([instanceIdLen]byte{}) instIdSlice := instId[:] @@ -107,7 +107,7 @@ func (am *AcceptorManager) loadFromData(txnId *common.TxnId, data []byte) error copy(instIdSlice, txnId[:]) instances := state.Instances() - acc := AcceptorFromData(txnId, &txn, &outcome, state.SendToAll(), &instances, am) + acc := AcceptorFromData(txnId, &outcome, state.SendToAll(), &instances, am) aInst := &acceptorInstances{acceptor: acc} am.acceptors[*txnId] = aInst @@ -197,16 +197,16 @@ func (am *AcceptorManager) OneATxnVotesReceived(sender common.RMId, txnId *commo NewOneShotSender(server.SegToBytes(replySeg), am, sender) } -func (am *AcceptorManager) TwoATxnVotesReceived(sender common.RMId, txnId *common.TxnId, twoATxnVotes *msgs.TwoATxnVotes) { +func (am *AcceptorManager) TwoATxnVotesReceived(sender common.RMId, txn *eng.TxnReader, twoATxnVotes *msgs.TwoATxnVotes) { instanceRMId := common.RMId(twoATxnVotes.RmId()) + txnId := txn.Id server.Log(txnId, "2A received from", sender, "; instance:", instanceRMId) instId := instanceId([instanceIdLen]byte{}) instIdSlice := instId[:] copy(instIdSlice, txnId[:]) binary.BigEndian.PutUint32(instIdSlice[common.KeyLen:], uint32(instanceRMId)) - txnCap := twoATxnVotes.Txn() - a := am.ensureAcceptor(txnId, &txnCap) + a := am.ensureAcceptor(txn) requests := twoATxnVotes.AcceptRequests() failureInstances := make([]*instance, 0, requests.Len()) failureRequests := make([]*msgs.TxnVoteAcceptRequest, 0, requests.Len()) @@ -219,7 +219,7 @@ func (am *AcceptorManager) TwoATxnVotesReceived(sender common.RMId, txnId *commo inst := am.ensureInstance(txnId, &instId, vUUId) accepted, rejected := inst.TwoATxnVotesReceived(paxosNumber(request.RoundNumber()), ballot) if accepted { - a.BallotAccepted(instanceRMId, inst, vUUId, &txnCap) + a.BallotAccepted(instanceRMId, inst, vUUId, txn) } else if rejected { failureInstances = append(failureInstances, inst) failureRequests = append(failureRequests, &request) diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 6cf9506..1b07e5c 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -12,8 +12,7 @@ import ( ) type BallotAccumulator struct { - Txn *msgs.Txn - txnId *common.TxnId + txn *eng.TxnReader vUUIdToBallots map[common.VarUUId]*varBallot outcome *outcomeEqualId incompleteVars int @@ -24,11 +23,10 @@ type BallotAccumulator struct { // paxos instance namespace is {rmId,varId}. So for each var, we // expect to see ballots from fInc distinct rms. -func NewBallotAccumulator(txnId *common.TxnId, txn *msgs.Txn) *BallotAccumulator { - actions := txn.Actions() +func NewBallotAccumulator(txn *eng.TxnReader) *BallotAccumulator { + actions := txn.Actions(true).Actions() ba := &BallotAccumulator{ - Txn: txn, - txnId: txnId, + txn: txn, vUUIdToBallots: make(map[common.VarUUId]*varBallot), outcome: nil, incompleteVars: actions.Len(), @@ -44,7 +42,7 @@ func NewBallotAccumulator(txnId *common.TxnId, txn *msgs.Txn) *BallotAccumulator ba.vUUIdToBallots[*vUUId] = vBallot } - allocs := txn.Allocations() + allocs := txn.Txn.Allocations() for idx, l := 0, allocs.Len(); idx < l; idx++ { alloc := allocs.At(idx) if alloc.Active() == 0 { @@ -84,8 +82,8 @@ type rmBallot struct { roundNumber paxosNumber } -func BallotAccumulatorFromData(txnId *common.TxnId, txn *msgs.Txn, outcome *outcomeEqualId, instances *msgs.InstancesForVar_List) *BallotAccumulator { - ba := NewBallotAccumulator(txnId, txn) +func BallotAccumulatorFromData(txn *eng.TxnReader, outcome *outcomeEqualId, instances *msgs.InstancesForVar_List) *BallotAccumulator { + ba := NewBallotAccumulator(txn) ba.outcome = outcome for idx, l := 0, instances.Len(); idx < l; idx++ { @@ -117,9 +115,9 @@ func BallotAccumulatorFromData(txnId *common.TxnId, txn *msgs.Txn, outcome *outc // For every vUUId involved in this txn, we should see fInc * ballots: // one from each RM voting for each vUUId. rmId is the paxos // instanceRMId. -func (ba *BallotAccumulator) BallotReceived(instanceRMId common.RMId, inst *instance, vUUId *common.VarUUId, txn *msgs.Txn) *outcomeEqualId { - if isDeflated(ba.Txn) && !isDeflated(txn) { - ba.Txn = txn +func (ba *BallotAccumulator) BallotReceived(instanceRMId common.RMId, inst *instance, vUUId *common.VarUUId, txn *eng.TxnReader) *outcomeEqualId { + if ba.txn.IsDeflated() && !txn.HasDeflated() { + ba.txn = txn } vBallot := ba.vUUIdToBallots[*vUUId] @@ -161,7 +159,7 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { // being caught up. By waiting for at least F+1 ballots for a var // (they don't have to be the same ballot!), we avoid this as there // must be at least one voter who isn't in the past. - if !(ba.dirty && (ba.incompleteVars == 0 || ba.Txn.Retry())) { + if !(ba.dirty && (ba.incompleteVars == 0 || ba.txn.Txn.Retry())) { return nil } ba.dirty = false @@ -171,7 +169,7 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { vUUIds := common.VarUUIds(make([]*common.VarUUId, 0, len(ba.vUUIdToBallots))) br := NewBadReads() - server.Log(ba.txnId, "Calculating result") + server.Log(ba.txn.Id, "Calculating result") for _, vBallot := range ba.vUUIdToBallots { if len(vBallot.rmToBallot) < vBallot.voters { continue @@ -204,8 +202,7 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { } if aborted { - deflatedTxn := deflateTxn(ba.Txn, seg) - outcome.SetTxn(*deflatedTxn) + outcome.SetTxn(ba.txn.AsDeflated().Data) outcome.SetAbort() abort := outcome.Abort() if deadlock { @@ -215,7 +212,7 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { } } else { - outcome.SetTxn(*ba.Txn) + outcome.SetTxn(ba.txn.Data) outcome.SetCommit(combinedClock.AsData()) } @@ -248,9 +245,9 @@ func (ba *BallotAccumulator) AddInstancesToSeg(seg *capn.Segment) msgs.Instances } func (ba *BallotAccumulator) Status(sc *server.StatusConsumer) { - sc.Emit(fmt.Sprintf("Ballot Accumulator for %v", ba.txnId)) + sc.Emit(fmt.Sprintf("Ballot Accumulator for %v", ba.txn.Id)) sc.Emit(fmt.Sprintf("- incomplete var count: %v", ba.incompleteVars)) - sc.Emit(fmt.Sprintf("- retry? %v", ba.Txn.Retry())) + sc.Emit(fmt.Sprintf("- retry? %v", ba.txn.Txn.Retry())) sc.Join() } @@ -340,37 +337,6 @@ func (cur *varBallotReducer) combineVote(rmBal *rmBallot) { } } -func deflateTxn(txn *msgs.Txn, seg *capn.Segment) *msgs.Txn { - if isDeflated(txn) { - return txn - } - deflatedTxn := msgs.NewTxn(seg) - deflatedTxn.SetId(txn.Id()) - deflatedTxn.SetRetry(txn.Retry()) - deflatedTxn.SetSubmitter(txn.Submitter()) - deflatedTxn.SetSubmitterBootCount(txn.SubmitterBootCount()) - deflatedTxn.SetFInc(txn.FInc()) - deflatedTxn.SetTopologyVersion(txn.TopologyVersion()) - - deflatedTxn.SetAllocations(txn.Allocations()) - - actionsList := txn.Actions() - deflatedActionsList := msgs.NewActionList(seg, actionsList.Len()) - deflatedTxn.SetActions(deflatedActionsList) - for idx, l := 0, actionsList.Len(); idx < l; idx++ { - deflatedAction := deflatedActionsList.At(idx) - deflatedAction.SetVarId(actionsList.At(idx).VarId()) - deflatedAction.SetMissing() - } - - return &deflatedTxn -} - -func isDeflated(txn *msgs.Txn) bool { - actions := txn.Actions() - return actions.Len() != 0 && actions.At(0).Which() == msgs.ACTION_MISSING -} - type badReads map[common.VarUUId]*badReadAction func NewBadReads() badReads { @@ -381,7 +347,7 @@ func (br badReads) combine(rmBal *rmBallot) { badRead := rmBal.ballot.VoteCap.AbortBadRead() clock := rmBal.ballot.Clock txnId := common.MakeTxnId(badRead.TxnId()) - actions := badRead.TxnActions() + actions := eng.TxnActionsFromData(badRead.TxnActions(), true).Actions() for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) @@ -493,21 +459,23 @@ func (br badReads) AddToSeg(seg *capn.Segment) msgs.Update_List { update := updates.At(idx) idx++ update.SetTxnId(txnId[:]) - actionList := msgs.NewActionList(seg, len(*badReadActions)) - update.SetActions(actionList) + actionsListSeg := capn.NewBuffer(nil) + actionsListWrapper := msgs.NewRootActionListWrapper(actionsListSeg) + actionsList := msgs.NewActionList(actionsListSeg, len(*badReadActions)) + actionsListWrapper.SetActions(actionsList) clock := eng.NewVectorClock().AsMutable() for idy, bra := range *badReadActions { action := bra.action switch action.Which() { case msgs.ACTION_READ: - newAction := actionList.At(idy) + newAction := actionsList.At(idy) newAction.SetVarId(action.VarId()) newAction.SetMissing() case msgs.ACTION_WRITE: - actionList.Set(idy, *action) + actionsList.Set(idy, *action) case msgs.ACTION_READWRITE: readWrite := action.Readwrite() - newAction := actionList.At(idy) + newAction := actionsList.At(idy) newAction.SetVarId(action.VarId()) newAction.SetWrite() newWrite := newAction.Write() @@ -515,7 +483,7 @@ func (br badReads) AddToSeg(seg *capn.Segment) msgs.Update_List { newWrite.SetReferences(readWrite.References()) case msgs.ACTION_CREATE: create := action.Create() - newAction := actionList.At(idy) + newAction := actionsList.At(idy) newAction.SetVarId(action.VarId()) newAction.SetWrite() newWrite := newAction.Write() @@ -523,7 +491,7 @@ func (br badReads) AddToSeg(seg *capn.Segment) msgs.Update_List { newWrite.SetReferences(create.References()) case msgs.ACTION_ROLL: roll := action.Roll() - newAction := actionList.At(idy) + newAction := actionsList.At(idy) newAction.SetVarId(action.VarId()) newAction.SetWrite() newWrite := newAction.Write() @@ -535,6 +503,7 @@ func (br badReads) AddToSeg(seg *capn.Segment) msgs.Update_List { } clock.SetVarIdMax(bra.vUUId, bra.clockElem) } + update.SetActions(server.SegToBytes(actionsListSeg)) update.SetClock(clock.AsData()) } diff --git a/paxos/network.go b/paxos/network.go index 98f93da..2df9fac 100644 --- a/paxos/network.go +++ b/paxos/network.go @@ -46,7 +46,7 @@ type Connection interface { type ClientConnection interface { Shutdownable ServerConnectionSubscriber - SubmissionOutcomeReceived(common.RMId, *common.TxnId, *msgs.Outcome) + SubmissionOutcomeReceived(common.RMId, *eng.TxnReader, *msgs.Outcome) } type Shutdownable interface { diff --git a/paxos/proposal.go b/paxos/proposal.go index 1904193..63329d0 100644 --- a/paxos/proposal.go +++ b/paxos/proposal.go @@ -15,8 +15,7 @@ type proposal struct { acceptors []common.RMId activeRMIds map[common.RMId]uint32 fInc int - txn *msgs.Txn - txnId *common.TxnId + txn *eng.TxnReader submitter common.RMId submitterBootCount uint32 skipPhase1 bool @@ -26,8 +25,9 @@ type proposal struct { finished bool } -func NewProposal(pm *ProposerManager, txnId *common.TxnId, txn *msgs.Txn, fInc int, ballots []*eng.Ballot, instanceRMId common.RMId, acceptors []common.RMId, skipPhase1 bool) *proposal { - allocs := txn.Allocations() +func NewProposal(pm *ProposerManager, txn *eng.TxnReader, fInc int, ballots []*eng.Ballot, instanceRMId common.RMId, acceptors []common.RMId, skipPhase1 bool) *proposal { + txnCap := txn.Txn + allocs := txnCap.Allocations() activeRMIds := make(map[common.RMId]uint32, allocs.Len()) for idx, l := 0, allocs.Len(); idx < l; idx++ { alloc := allocs.At(idx) @@ -45,9 +45,8 @@ func NewProposal(pm *ProposerManager, txnId *common.TxnId, txn *msgs.Txn, fInc i activeRMIds: activeRMIds, fInc: fInc, txn: txn, - txnId: txnId, - submitter: common.RMId(txn.Submitter()), - submitterBootCount: txn.SubmitterBootCount(), + submitter: common.RMId(txnCap.Submitter()), + submitterBootCount: txnCap.SubmitterBootCount(), skipPhase1: skipPhase1, instances: make(map[common.VarUUId]*proposalInstance, len(ballots)), pending: make([]*proposalInstance, 0, len(ballots)), @@ -100,7 +99,8 @@ func (p *proposal) maybeSendOneA() { sender := newProposalSender(p, pendingPromises) oneACap := msgs.NewOneATxnVotes(seg) msg.SetOneATxnVotes(oneACap) - oneACap.SetTxnId(p.txnId[:]) + txnId := p.txn.Id + oneACap.SetTxnId(txnId[:]) oneACap.SetRmId(uint32(p.instanceRMId)) proposals := msgs.NewTxnVoteProposalList(seg, len(pendingPromises)) oneACap.SetProposals(proposals) @@ -109,7 +109,7 @@ func (p *proposal) maybeSendOneA() { pi.addOneAToProposal(&proposal, sender) } sender.msg = server.SegToBytes(seg) - server.Log(p.txnId, "Adding sender for 1A") + server.Log(txnId, "Adding sender for 1A") p.proposerManager.AddServerConnectionSubscriber(sender) } @@ -149,13 +149,12 @@ func (p *proposal) maybeSendTwoA() { deflate = pi.addTwoAToAcceptRequest(seg, &acceptRequest, sender) || deflate } if deflate { - deflated := deflateTxn(p.txn, seg) - twoACap.SetTxn(*deflated) + twoACap.SetTxn(p.txn.AsDeflated().Data) } else { - twoACap.SetTxn(*p.txn) + twoACap.SetTxn(p.txn.Data) } sender.msg = server.SegToBytes(seg) - server.Log(p.txnId, "Adding sender for 2A") + server.Log(p.txn.Id, "Adding sender for 2A") p.proposerManager.AddServerConnectionSubscriber(sender) } @@ -179,12 +178,12 @@ func (p *proposal) FinishProposing() []common.RMId { for _, pi := range p.instances { if sender := pi.oneASender; sender != nil { pi.oneASender = nil - server.Log(p.txnId, "finishing sender for 1A") + server.Log(p.txn.Id, "finishing sender for 1A") sender.finished() } if sender := pi.twoASender; sender != nil { pi.twoASender = nil - server.Log(p.txnId, pi.ballot.VarUUId, "finishing sender for 2A") + server.Log(p.txn.Id, pi.ballot.VarUUId, "finishing sender for 2A") sender.finished() } } @@ -192,7 +191,7 @@ func (p *proposal) FinishProposing() []common.RMId { } func (p *proposal) Status(sc *server.StatusConsumer) { - sc.Emit(fmt.Sprintf("Proposal for %v-%v", p.txnId, p.instanceRMId)) + sc.Emit(fmt.Sprintf("Proposal for %v-%v", p.txn.Id, p.instanceRMId)) sc.Emit(fmt.Sprintf("- Acceptors: %v", p.acceptors)) sc.Emit(fmt.Sprintf("- Instances: %v", len(p.instances))) sc.Emit(fmt.Sprintf("- Finished? %v", p.finished)) @@ -496,7 +495,7 @@ func (s *proposalSender) ConnectionLost(lost common.RMId, conns map[common.RMId] if s.proposal.finished { return } - allocs := s.proposal.txn.Allocations() + allocs := s.proposal.txn.Txn.Allocations() for idx, l := 0, allocs.Len(); idx < l; idx++ { alloc := allocs.At(idx) rmId := common.RMId(alloc.RmId()) @@ -516,17 +515,17 @@ func (s *proposalSender) ConnectionLost(lost common.RMId, conns map[common.RMId] break } ballots := MakeAbortBallots(s.proposal.txn, &alloc) - server.Log(s.proposal.txnId, "Trying to abort", rmId, "due to lost submitter", lost, "Found actions:", len(ballots)) + server.Log(s.proposal.txn.Id, "Trying to abort", rmId, "due to lost submitter", lost, "Found actions:", len(ballots)) s.proposal.abortInstances = append(s.proposal.abortInstances, rmId) s.proposal.proposerManager.NewPaxosProposals( - s.txnId, s.txn, s.fInc, ballots, s.proposal.acceptors, rmId, false) + s.txn, s.fInc, ballots, s.proposal.acceptors, rmId, false) } } }) return } - alloc := AllocForRMId(s.proposal.txn, lost) + alloc := AllocForRMId(s.proposal.txn.Txn, lost) if alloc == nil || alloc.Active() == 0 { return } @@ -540,10 +539,10 @@ func (s *proposalSender) ConnectionLost(lost common.RMId, conns map[common.RMId] } } ballots := MakeAbortBallots(s.proposal.txn, alloc) - server.Log(s.proposal.txnId, "Trying to abort for", lost, "Found actions:", len(ballots)) + server.Log(s.proposal.txn.Id, "Trying to abort for", lost, "Found actions:", len(ballots)) s.proposal.abortInstances = append(s.proposal.abortInstances, lost) s.proposal.proposerManager.NewPaxosProposals( - s.txnId, s.txn, s.fInc, ballots, s.proposal.acceptors, lost, false) + s.txn, s.fInc, ballots, s.proposal.acceptors, lost, false) }) } diff --git a/paxos/proposer.go b/paxos/proposer.go index 5a10845..fa11b68 100644 --- a/paxos/proposer.go +++ b/paxos/proposer.go @@ -41,17 +41,18 @@ type Proposer struct { // we receive outcomes before the txn itself, we do not vote. So you // can be active, but not a voter. -func NewProposer(pm *ProposerManager, txnId *common.TxnId, txnCap *msgs.Txn, mode ProposerMode, topology *configuration.Topology) *Proposer { +func NewProposer(pm *ProposerManager, txn *eng.TxnReader, mode ProposerMode, topology *configuration.Topology) *Proposer { + txnCap := txn.Txn p := &Proposer{ proposerManager: pm, mode: mode, - txnId: txnId, + txnId: txn.Id, acceptors: GetAcceptorsFromTxn(txnCap), topology: topology, fInc: int(txnCap.FInc()), } if mode == ProposerActiveVoter { - p.txn = eng.TxnFromCap(pm.Exe, pm.VarDispatcher, p, pm.RMId, txnCap) + p.txn = eng.TxnFromReader(pm.Exe, pm.VarDispatcher, p, pm.RMId, txn) } p.init() return p @@ -204,8 +205,9 @@ func (pab *proposerAwaitBallots) init(proposer *Proposer) { func (pab *proposerAwaitBallots) start() { pab.txn.Start(true) - pab.submitter = common.RMId(pab.txn.TxnCap.Submitter()) - pab.submitterBootCount = pab.txn.TxnCap.SubmitterBootCount() + txnCap := pab.txn.TxnReader.Txn + pab.submitter = common.RMId(txnCap.Submitter()) + pab.submitterBootCount = txnCap.SubmitterBootCount() if pab.txn.Retry { pab.proposerManager.AddServerConnectionSubscriber(pab) } @@ -216,11 +218,11 @@ func (pab *proposerAwaitBallots) String() string { return "proposerAwaitBallots" } -func (pab *proposerAwaitBallots) TxnBallotsComplete(_ *eng.Txn, ballots ...*eng.Ballot) { +func (pab *proposerAwaitBallots) TxnBallotsComplete(ballots ...*eng.Ballot) { if pab.currentState == pab { server.Log(pab.txnId, "TxnBallotsComplete callback. Acceptors:", pab.acceptors) if !pab.allAcceptorsAgreed { - pab.proposerManager.NewPaxosProposals(pab.txnId, pab.txn.TxnCap, pab.fInc, ballots, pab.acceptors, pab.proposerManager.RMId, true) + pab.proposerManager.NewPaxosProposals(pab.txn.TxnReader, pab.fInc, ballots, pab.acceptors, pab.proposerManager.RMId, true) } pab.nextState() @@ -239,10 +241,10 @@ func (pab *proposerAwaitBallots) TxnBallotsComplete(_ *eng.Txn, ballots ...*eng. func (pab *proposerAwaitBallots) Abort() { if pab.currentState == pab && !pab.allAcceptorsAgreed { server.Log(pab.txnId, "Proposer Aborting") - txnCap := pab.txn.TxnCap - alloc := AllocForRMId(txnCap, pab.proposerManager.RMId) - ballots := MakeAbortBallots(txnCap, alloc) - pab.TxnBallotsComplete(pab.txn, ballots...) + txn := pab.txn.TxnReader + alloc := AllocForRMId(txn.Txn, pab.proposerManager.RMId) + ballots := MakeAbortBallots(txn, alloc) + pab.TxnBallotsComplete(ballots...) } } @@ -364,9 +366,9 @@ func (palc *proposerAwaitLocallyComplete) start() { if palc.txn == nil && palc.outcome.Which() == msgs.OUTCOME_COMMIT { // We are a learner (either active or passive), and the result // has turned out to be a commit. - txnCap := palc.outcome.Txn() + txn := eng.TxnReaderFromData(palc.outcome.Txn()) pm := palc.proposerManager - palc.txn = eng.TxnFromCap(pm.Exe, pm.VarDispatcher, palc.Proposer, pm.RMId, &txnCap) + palc.txn = eng.TxnFromReader(pm.Exe, pm.VarDispatcher, palc.Proposer, pm.RMId, txn) palc.txn.Start(false) } if palc.txn == nil { diff --git a/paxos/proposerdispatcher.go b/paxos/proposerdispatcher.go index 1c4912b..daf2439 100644 --- a/paxos/proposerdispatcher.go +++ b/paxos/proposerdispatcher.go @@ -30,9 +30,9 @@ func NewProposerDispatcher(count uint8, rmId common.RMId, cm ConnectionManager, return pd } -func (pd *ProposerDispatcher) TxnReceived(sender common.RMId, txn *msgs.Txn) { - txnId := common.MakeTxnId(txn.Id()) - pd.withProposerManager(txnId, func(pm *ProposerManager) { pm.TxnReceived(sender, txnId, txn) }) +func (pd *ProposerDispatcher) TxnReceived(sender common.RMId, txn *eng.TxnReader) { + txnId := txn.Id + pd.withProposerManager(txnId, func(pm *ProposerManager) { pm.TxnReceived(sender, txn) }) } func (pd *ProposerDispatcher) OneBTxnVotesReceived(sender common.RMId, oneBTxnVotes *msgs.OneBTxnVotes) { @@ -42,15 +42,17 @@ func (pd *ProposerDispatcher) OneBTxnVotesReceived(sender common.RMId, oneBTxnVo func (pd *ProposerDispatcher) TwoBTxnVotesReceived(sender common.RMId, twoBTxnVotes *msgs.TwoBTxnVotes) { var txnId *common.TxnId + var txn *eng.TxnReader switch twoBTxnVotes.Which() { case msgs.TWOBTXNVOTES_FAILURES: txnId = common.MakeTxnId(twoBTxnVotes.Failures().TxnId()) case msgs.TWOBTXNVOTES_OUTCOME: - txnId = common.MakeTxnId(twoBTxnVotes.Outcome().Txn().Id()) + txn = eng.TxnReaderFromData(twoBTxnVotes.Outcome().Txn()) + txnId = txn.Id default: panic(fmt.Sprintf("Unexpected 2BVotes type: %v", twoBTxnVotes.Which())) } - pd.withProposerManager(txnId, func(pm *ProposerManager) { pm.TwoBTxnVotesReceived(sender, txnId, twoBTxnVotes) }) + pd.withProposerManager(txnId, func(pm *ProposerManager) { pm.TwoBTxnVotesReceived(sender, txnId, txn, twoBTxnVotes) }) } func (pd *ProposerDispatcher) TxnGloballyCompleteReceived(sender common.RMId, tgc *msgs.TxnGloballyComplete) { @@ -68,10 +70,10 @@ func (pd *ProposerDispatcher) ImmigrationReceived(migration *msgs.Migration, sta elemsCount := elemsList.Len() for idx := 0; idx < elemsCount; idx++ { elem := elemsList.At(idx) - txnCap := elem.Txn() - txnId := common.MakeTxnId(txnCap.Id()) + txn := eng.TxnReaderFromData(elem.Txn()) + txnId := txn.Id varCaps := elem.Vars() - pd.withProposerManager(txnId, func(pm *ProposerManager) { pm.ImmigrationReceived(txnId, &txnCap, &varCaps, stateChange) }) + pd.withProposerManager(txnId, func(pm *ProposerManager) { pm.ImmigrationReceived(txn, &varCaps, stateChange) }) } } diff --git a/paxos/proposermanager.go b/paxos/proposermanager.go index f552dd4..75fe1a2 100644 --- a/paxos/proposermanager.go +++ b/paxos/proposermanager.go @@ -91,16 +91,18 @@ func (pm *ProposerManager) TopologyChanged(topology *configuration.Topology, don } } -func (pm *ProposerManager) ImmigrationReceived(txnId *common.TxnId, txnCap *msgs.Txn, varCaps *msgs.Var_List, stateChange eng.TxnLocalStateChange) { - eng.ImmigrationTxnFromCap(pm.Exe, pm.VarDispatcher, stateChange, pm.RMId, txnCap, varCaps) +func (pm *ProposerManager) ImmigrationReceived(txn *eng.TxnReader, varCaps *msgs.Var_List, stateChange eng.TxnLocalStateChange) { + eng.ImmigrationTxnFromCap(pm.Exe, pm.VarDispatcher, stateChange, pm.RMId, txn, varCaps) } -func (pm *ProposerManager) TxnReceived(sender common.RMId, txnId *common.TxnId, txnCap *msgs.Txn) { +func (pm *ProposerManager) TxnReceived(sender common.RMId, txn *eng.TxnReader) { // Due to failures, we can actually receive outcomes (2Bs) first, // before we get the txn to vote on it - due to failures, other - // proposers will have created abort proposals, and consensus may - // have already been reached. If this is the case, it is correct to - // ignore this message. + // proposers will have created abort proposals on our behalf, and + // consensus may have already been reached. If this is the case, it + // is correct to ignore this message. + txnId := txn.Id + txnCap := txn.Txn if _, found := pm.proposers[*txnId]; !found { server.Log(txnId, "Received") accept := true @@ -109,7 +111,7 @@ func (pm *ProposerManager) TxnReceived(sender common.RMId, txnId *common.TxnId, // Could also do pm.topology.BarrierReached1(sender), but // would need to specialise that to rolls rather than // topology txns, and it's enforced on the sending side - // anyway. One the sender has received the next topology, + // anyway. Once the sender has received the next topology, // it'll do the right thing and locally block until it's // in barrier1. (pm.topology.Next() != nil && pm.topology.Next().Version == txnCap.TopologyVersion()) @@ -119,7 +121,7 @@ func (pm *ProposerManager) TxnReceived(sender common.RMId, txnId *common.TxnId, } } if accept { - proposer := NewProposer(pm, txnId, txnCap, ProposerActiveVoter, pm.topology) + proposer := NewProposer(pm, txn, ProposerActiveVoter, pm.topology) pm.proposers[*txnId] = proposer proposer.Start() @@ -128,26 +130,27 @@ func (pm *ProposerManager) TxnReceived(sender common.RMId, txnId *common.TxnId, acceptors := GetAcceptorsFromTxn(txnCap) fInc := int(txnCap.FInc()) alloc := AllocForRMId(txnCap, pm.RMId) - ballots := MakeAbortBallots(txnCap, alloc) - pm.NewPaxosProposals(txnId, txnCap, fInc, ballots, acceptors, pm.RMId, true) + ballots := MakeAbortBallots(txn, alloc) + pm.NewPaxosProposals(txn, fInc, ballots, acceptors, pm.RMId, true) // ActiveLearner is right - we don't want the proposer to // vote, but it should exist to collect the 2Bs that should // come back. - proposer := NewProposer(pm, txnId, txnCap, ProposerActiveLearner, pm.topology) + proposer := NewProposer(pm, txn, ProposerActiveLearner, pm.topology) pm.proposers[*txnId] = proposer proposer.Start() } } } -func (pm *ProposerManager) NewPaxosProposals(txnId *common.TxnId, txn *msgs.Txn, fInc int, ballots []*eng.Ballot, acceptors []common.RMId, rmId common.RMId, skipPhase1 bool) { +func (pm *ProposerManager) NewPaxosProposals(txn *eng.TxnReader, fInc int, ballots []*eng.Ballot, acceptors []common.RMId, rmId common.RMId, skipPhase1 bool) { instId := instanceIdPrefix([instanceIdPrefixLen]byte{}) instIdSlice := instId[:] + txnId := txn.Id copy(instIdSlice, txnId[:]) binary.BigEndian.PutUint32(instIdSlice[common.KeyLen:], uint32(rmId)) if _, found := pm.proposals[instId]; !found { server.Log(txnId, "NewPaxos; acceptors:", acceptors, "; instance:", rmId) - prop := NewProposal(pm, txnId, txn, fInc, ballots, rmId, acceptors, skipPhase1) + prop := NewProposal(pm, txn, fInc, ballots, rmId, acceptors, skipPhase1) pm.proposals[instId] = prop prop.Start() } @@ -182,7 +185,7 @@ func (pm *ProposerManager) OneBTxnVotesReceived(sender common.RMId, txnId *commo } // from network -func (pm *ProposerManager) TwoBTxnVotesReceived(sender common.RMId, txnId *common.TxnId, twoBTxnVotes *msgs.TwoBTxnVotes) { +func (pm *ProposerManager) TwoBTxnVotesReceived(sender common.RMId, txnId *common.TxnId, txn *eng.TxnReader, twoBTxnVotes *msgs.TwoBTxnVotes) { instId := instanceIdPrefix([instanceIdPrefixLen]byte{}) instIdSlice := instId[:] copy(instIdSlice, txnId[:]) @@ -206,9 +209,9 @@ func (pm *ProposerManager) TwoBTxnVotesReceived(sender common.RMId, txnId *commo return } - txnCap := outcome.Txn() + txnCap := txn.Txn - alloc := AllocForRMId(&txnCap, pm.RMId) + alloc := AllocForRMId(txnCap, pm.RMId) if alloc.Active() != 0 { // We have no record of this, but we were active - we must @@ -227,13 +230,13 @@ func (pm *ProposerManager) TwoBTxnVotesReceived(sender common.RMId, txnId *commo // do is to start a proposal for our own vars. The proposal // itself will detect any further absences and take care of // them. - acceptors := GetAcceptorsFromTxn(&txnCap) + acceptors := GetAcceptorsFromTxn(txnCap) server.Log(txnId, "Starting abort proposals with acceptors", acceptors) fInc := int(txnCap.FInc()) - ballots := MakeAbortBallots(&txnCap, alloc) - pm.NewPaxosProposals(txnId, &txnCap, fInc, ballots, acceptors, pm.RMId, false) + ballots := MakeAbortBallots(txn, alloc) + pm.NewPaxosProposals(txn, fInc, ballots, acceptors, pm.RMId, false) - proposer := NewProposer(pm, txnId, &txnCap, ProposerActiveLearner, pm.topology) + proposer := NewProposer(pm, txn, ProposerActiveLearner, pm.topology) pm.proposers[*txnId] = proposer proposer.Start() proposer.BallotOutcomeReceived(sender, &outcome) @@ -242,7 +245,7 @@ func (pm *ProposerManager) TwoBTxnVotesReceived(sender common.RMId, txnId *commo if outcome.Which() == msgs.OUTCOME_COMMIT { server.Log(txnId, "2B outcome received from", sender, "(unknown learner)") // we must be a learner. - proposer := NewProposer(pm, txnId, &txnCap, ProposerPassiveLearner, pm.topology) + proposer := NewProposer(pm, txn, ProposerPassiveLearner, pm.topology) pm.proposers[*txnId] = proposer proposer.Start() proposer.BallotOutcomeReceived(sender, &outcome) @@ -322,7 +325,7 @@ func (pm *ProposerManager) Status(sc *server.StatusConsumer) { sc.Join() } -func GetAcceptorsFromTxn(txnCap *msgs.Txn) common.RMIds { +func GetAcceptorsFromTxn(txnCap msgs.Txn) common.RMIds { fInc := int(txnCap.FInc()) twoFInc := fInc + fInc - 1 acceptors := make([]common.RMId, twoFInc) @@ -363,7 +366,7 @@ func MakeTxnSubmissionAbortMsg(txnId *common.TxnId) []byte { return server.SegToBytes(seg) } -func AllocForRMId(txn *msgs.Txn, rmId common.RMId) *msgs.Allocation { +func AllocForRMId(txn msgs.Txn, rmId common.RMId) *msgs.Allocation { allocs := txn.Allocations() for idx, l := 0, allocs.Len(); idx < l; idx++ { alloc := allocs.At(idx) @@ -374,8 +377,8 @@ func AllocForRMId(txn *msgs.Txn, rmId common.RMId) *msgs.Allocation { return nil } -func MakeAbortBallots(txn *msgs.Txn, alloc *msgs.Allocation) []*eng.Ballot { - actions := txn.Actions() +func MakeAbortBallots(txn *eng.TxnReader, alloc *msgs.Allocation) []*eng.Ballot { + actions := txn.Actions(true).Actions() actionIndices := alloc.ActionIndices() ballots := make([]*eng.Ballot, actionIndices.Len()) for idx, l := 0, actionIndices.Len(); idx < l; idx++ { diff --git a/txnengine/ballot.go b/txnengine/ballot.go index 1f72544..c26d408 100644 --- a/txnengine/ballot.go +++ b/txnengine/ballot.go @@ -82,7 +82,7 @@ func (ballot *BallotBuilder) buildSeg() (*capn.Segment, msgs.Ballot) { return seg, ballotCap } -func (ballot *BallotBuilder) CreateBadReadBallot(txnId *common.TxnId, actions *msgs.Action_List) *Ballot { +func (ballot *BallotBuilder) CreateBadReadBallot(txnId *common.TxnId, actions *TxnActions) *Ballot { ballot.Vote = AbortBadRead seg, ballotCap := ballot.buildSeg() @@ -91,7 +91,7 @@ func (ballot *BallotBuilder) CreateBadReadBallot(txnId *common.TxnId, actions *m voteCap.SetAbortBadRead() badReadCap := voteCap.AbortBadRead() badReadCap.SetTxnId(txnId[:]) - badReadCap.SetTxnActions(*actions) + badReadCap.SetTxnActions(actions.Data) ballotCap.SetVote(voteCap) ballot.Data = server.SegToBytes(seg) return ballot.Ballot diff --git a/txnengine/frame.go b/txnengine/frame.go index ab7ba86..1e1efb4 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -21,7 +21,7 @@ type frame struct { child *frame v *Var frameTxnId *common.TxnId - frameTxnActions *msgs.Action_List + frameTxnActions *TxnActions frameTxnClock *VectorClockMutable // the clock (including merge missing) of the frame txn frameWritesClock *VectorClockMutable // max elems from all writes of all txns in parent frame readVoteClock *VectorClockMutable @@ -33,7 +33,7 @@ type frame struct { currentState frameStateMachineComponent } -func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *msgs.Action_List, txnClock, writesClock *VectorClockMutable) *frame { +func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *TxnActions, txnClock, writesClock *VectorClockMutable) *frame { f := &frame{ parent: parent, v: v, @@ -693,7 +693,7 @@ func (fo *frameOpen) maybeCreateChild() { func (fo *frameOpen) maybeScheduleRoll() { // do not check vm.RollAllowed here. if !fo.rollScheduled && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { + (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Actions().Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { fo.rollScheduled = true fo.v.vm.ScheduleCallback(func() { fo.v.applyToVar(func() { @@ -706,12 +706,12 @@ func (fo *frameOpen) maybeScheduleRoll() { func (fo *frameOpen) maybeStartRoll() { if fo.v.vm.RollAllowed && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { + (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Actions().Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { fo.rollActive = true go func() { server.Log(fo.frame, "Starting roll") ctxn, varPosMap := fo.createRollClientTxn() - outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap, true) + _, outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap) ow := "" if outcome != nil { ow = fmt.Sprint(outcome.Which()) @@ -743,8 +743,9 @@ func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId } var origWrite *msgs.Action vUUIdBytes := fo.v.UUId[:] - for idx, l := 0, fo.frameTxnActions.Len(); idx < l; idx++ { - action := fo.frameTxnActions.At(idx) + txnActions := fo.frameTxnActions.Actions() + for idx, l := 0, txnActions.Len(); idx < l; idx++ { + action := txnActions.At(idx) if bytes.Equal(action.VarId(), vUUIdBytes) { origWrite = &action break diff --git a/txnengine/transaction.go b/txnengine/transaction.go index d88f556..0385fe5 100644 --- a/txnengine/transaction.go +++ b/txnengine/transaction.go @@ -7,14 +7,12 @@ import ( "goshawkdb.io/common" "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" - "goshawkdb.io/server/db" "goshawkdb.io/server/dispatcher" - "sync" "sync/atomic" ) type TxnLocalStateChange interface { - TxnBallotsComplete(*Txn, ...*Ballot) + TxnBallotsComplete(...*Ballot) TxnLocallyComplete(*Txn) TxnFinished(*Txn) } @@ -25,14 +23,10 @@ type Txn struct { writes []*common.VarUUId localActions []localAction voter bool - TxnCap *msgs.Txn - txnRootBytes struct { - sync.RWMutex - bites []byte - } - exe *dispatcher.Executor - vd *VarDispatcher - stateChange TxnLocalStateChange + TxnReader *TxnReader + exe *dispatcher.Executor + vd *VarDispatcher + stateChange TxnLocalStateChange txnDetermineLocalBallots txnAwaitLocalBallots txnReceiveOutcome @@ -60,7 +54,7 @@ type localAction struct { ballot *Ballot frame *frame readVsn *common.TxnId - writeTxnActions *msgs.Action_List + writeTxnActions *TxnActions writeAction *msgs.Action createPositions *common.Positions roll bool @@ -91,7 +85,7 @@ func (action *localAction) VoteDeadlock(clock *VectorClockMutable) { } } -func (action *localAction) VoteBadRead(clock *VectorClockMutable, txnId *common.TxnId, actions *msgs.Action_List) { +func (action *localAction) VoteBadRead(clock *VectorClockMutable, txnId *common.TxnId, actions *TxnActions) { if action.ballot == nil { action.ballot = NewBallotBuilder(action.vUUId, AbortBadRead, clock).CreateBadReadBallot(txnId, actions) action.voteCast(action.ballot, true) @@ -147,9 +141,9 @@ func (action localAction) String() string { return fmt.Sprintf("Action from %v for %v: create:%v|read:%v|write:%v|roll:%v%s%s%s", action.Id, action.vUUId, isCreate, action.readVsn, isWrite, action.roll, f, b, i) } -func ImmigrationTxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateChange TxnLocalStateChange, ourRMId common.RMId, txnCap *msgs.Txn, varCaps *msgs.Var_List) { - txn := TxnFromCap(exe, vd, stateChange, ourRMId, txnCap) - txnActions := txnCap.Actions() +func ImmigrationTxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateChange TxnLocalStateChange, ourRMId common.RMId, reader *TxnReader, varCaps *msgs.Var_List) { + txn := TxnFromReader(exe, vd, stateChange, ourRMId, reader) + txnActions := reader.Actions(true) txn.localActions = make([]localAction, varCaps.Len()) actionsMap := make(map[common.VarUUId]*localAction) for idx, l := 0, varCaps.Len(); idx < l; idx++ { @@ -157,7 +151,7 @@ func ImmigrationTxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateCha action := &txn.localActions[idx] action.Txn = txn action.vUUId = common.MakeVarUUId(varCap.Id()) - action.writeTxnActions = &txnActions + action.writeTxnActions = txnActions positions := varCap.Positions() action.createPositions = (*common.Positions)(&positions) action.outcomeClock = VectorClockFromData(varCap.WriteTxnClock(), false) @@ -165,8 +159,10 @@ func ImmigrationTxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateCha actionsMap[*action.vUUId] = action } - for idx, l := 0, txnActions.Len(); idx < l; idx++ { - actionCap := txnActions.At(idx) + txnActionsList := txnActions.Actions() + + for idx, l := 0, txnActionsList.Len(); idx < l; idx++ { + actionCap := txnActionsList.At(idx) vUUId := common.MakeVarUUId(actionCap.VarId()) if action, found := actionsMap[*vUUId]; found { action.writeAction = &actionCap @@ -188,14 +184,16 @@ func ImmigrationTxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateCha } } -func TxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateChange TxnLocalStateChange, ourRMId common.RMId, txnCap *msgs.Txn) *Txn { - txnId := common.MakeTxnId(txnCap.Id()) - actions := txnCap.Actions() +func TxnFromReader(exe *dispatcher.Executor, vd *VarDispatcher, stateChange TxnLocalStateChange, ourRMId common.RMId, reader *TxnReader) *Txn { + txnId := reader.Id + actions := reader.Actions(true) + actionsList := actions.Actions() + txnCap := reader.Txn txn := &Txn{ Id: txnId, Retry: txnCap.Retry(), - writes: make([]*common.VarUUId, 0, actions.Len()), - TxnCap: txnCap, + writes: make([]*common.VarUUId, 0, actionsList.Len()), + TxnReader: reader, exe: exe, vd: vd, stateChange: stateChange, @@ -206,7 +204,7 @@ func TxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateChange TxnLoca alloc := allocations.At(idx) rmId := common.RMId(alloc.RmId()) if ourRMId == rmId { - txn.populate(alloc.ActionIndices(), actions) + txn.populate(alloc.ActionIndices(), actionsList, actions) break } } @@ -214,7 +212,7 @@ func TxnFromCap(exe *dispatcher.Executor, vd *VarDispatcher, stateChange TxnLoca return txn } -func (txn *Txn) populate(actionIndices capn.UInt16List, actions msgs.Action_List) { +func (txn *Txn) populate(actionIndices capn.UInt16List, actionsList *msgs.Action_List, actions *TxnActions) { localActions := make([]localAction, actionIndices.Len()) txn.localActions = localActions var action *localAction @@ -226,8 +224,8 @@ func (txn *Txn) populate(actionIndices capn.UInt16List, actions msgs.Action_List action = &localActions[actionIndicesIdx] } - for idx, l := 0, actions.Len(); idx < l; idx++ { - actionCap := actions.At(idx) + for idx, l := 0, actionsList.Len(); idx < l; idx++ { + actionCap := actionsList.At(idx) if idx == actionIndex { action.Txn = txn @@ -244,7 +242,7 @@ func (txn *Txn) populate(actionIndices capn.UInt16List, actions msgs.Action_List case msgs.ACTION_WRITE: if idx == actionIndex { - action.writeTxnActions = &actions + action.writeTxnActions = actions action.writeAction = &actionCap txn.writes = append(txn.writes, action.vUUId) } else { @@ -256,7 +254,7 @@ func (txn *Txn) populate(actionIndices capn.UInt16List, actions msgs.Action_List readWriteCap := actionCap.Readwrite() readVsn := common.MakeTxnId(readWriteCap.Version()) action.readVsn = readVsn - action.writeTxnActions = &actions + action.writeTxnActions = actions action.writeAction = &actionCap txn.writes = append(txn.writes, action.vUUId) } else { @@ -267,7 +265,7 @@ func (txn *Txn) populate(actionIndices capn.UInt16List, actions msgs.Action_List if idx == actionIndex { createCap := actionCap.Create() positions := common.Positions(createCap.Positions()) - action.writeTxnActions = &actions + action.writeTxnActions = actions action.writeAction = &actionCap action.createPositions = &positions txn.writes = append(txn.writes, action.vUUId) @@ -280,7 +278,7 @@ func (txn *Txn) populate(actionIndices capn.UInt16List, actions msgs.Action_List rollCap := actionCap.Roll() readVsn := common.MakeTxnId(rollCap.Version()) action.readVsn = readVsn - action.writeTxnActions = &actions + action.writeTxnActions = actions action.writeAction = &actionCap action.roll = true txn.writes = append(txn.writes, action.vUUId) @@ -305,22 +303,6 @@ func (txn *Txn) populate(actionIndices capn.UInt16List, actions msgs.Action_List } } -func (txn *Txn) TxnRootBytes() []byte { - trb := &txn.txnRootBytes - trb.RLock() - bites := trb.bites - trb.RUnlock() - if bites == nil { - trb.Lock() - if trb.bites == nil { - trb.bites = db.TxnToRootBytes(txn.TxnCap) - } - bites = trb.bites - trb.Unlock() - } - return bites -} - func (txn *Txn) Start(voter bool) { txn.voter = voter if voter { @@ -486,7 +468,7 @@ func (talb *txnAwaitLocalBallots) allTxnBallotsComplete() { action := &talb.localActions[idx] ballots[idx] = action.ballot } - talb.stateChange.TxnBallotsComplete(talb.Txn, ballots...) + talb.stateChange.TxnBallotsComplete(ballots...) } else { panic(fmt.Sprintf("%v error: Ballots completed with txn in wrong state: %v\n", talb.Id, talb.currentState)) } @@ -499,7 +481,7 @@ func (talb *txnAwaitLocalBallots) retryTxnBallotComplete(ballot *Ballot) { // Up until we actually receive the outcome, we should pass on all // of these to the proposer. if talb.currentState == &talb.txnReceiveOutcome { - talb.stateChange.TxnBallotsComplete(talb.Txn, ballot) + talb.stateChange.TxnBallotsComplete(ballot) } } diff --git a/txnengine/utils.go b/txnengine/utils.go new file mode 100644 index 0000000..9f36471 --- /dev/null +++ b/txnengine/utils.go @@ -0,0 +1,137 @@ +package txnengine + +import ( + "fmt" + capn "github.com/glycerine/go-capnproto" + "goshawkdb.io/common" + "goshawkdb.io/server" + msgs "goshawkdb.io/server/capnp" +) + +type TxnReader struct { + Id *common.TxnId + actions *TxnActions + Data []byte + Txn msgs.Txn + deflated *TxnReader +} + +func TxnReaderFromData(data []byte) *TxnReader { + // always force decode + seg, _, err := capn.ReadFromMemoryZeroCopy(data) + if err != nil { + panic(fmt.Sprintf("Error when decoding transaction: %v", err)) + } + txnCap := msgs.ReadRootTxn(seg) + txnId := common.MakeTxnId(txnCap.Id()) + return &TxnReader{ + Data: data, + Txn: txnCap, + Id: txnId, + } +} + +func (tr *TxnReader) Actions(forceDecode bool) *TxnActions { + if tr.actions == nil { + tr.actions = TxnActionsFromData(tr.Txn.Actions(), forceDecode) + } else if forceDecode { + tr.actions.decode() + } + return tr.actions +} + +func (tr *TxnReader) HasDeflated() bool { + return tr.deflated != nil || tr.Actions(true).deflated +} + +func (tr *TxnReader) IsDeflated() bool { + return tr.Actions(true).deflated +} + +func (tr *TxnReader) AsDeflated() *TxnReader { + if tr.deflated == nil { + if tr.IsDeflated() { + tr.deflated = tr + } + + actions := tr.actions.AsDeflated() + cap := tr.Txn + seg := capn.NewBuffer(nil) + root := msgs.NewRootTxn(seg) + root.SetId(cap.Id()) + root.SetSubmitter(cap.Submitter()) + root.SetSubmitterBootCount(cap.SubmitterBootCount()) + root.SetRetry(cap.Retry()) + root.SetActions(actions.Data) + root.SetAllocations(cap.Allocations()) + root.SetFInc(cap.FInc()) + root.SetTopologyVersion(cap.TopologyVersion()) + + tr.deflated = &TxnReader{ + Id: tr.Id, + actions: actions, + Data: server.SegToBytes(seg), + Txn: root, + } + } + return tr.deflated +} + +type TxnActions struct { + Data []byte + deflated bool + decoded bool + actionsCap msgs.Action_List +} + +func TxnActionsFromData(data []byte, forceDecode bool) *TxnActions { + actions := &TxnActions{Data: data} + if forceDecode { + actions.decode() + } + return actions +} + +func (actions *TxnActions) decode() { + if actions.decoded { + return + } + actions.decoded = true + seg, _, err := capn.ReadFromMemoryZeroCopy(actions.Data) + if err != nil { + panic(fmt.Sprintf("Error when decoding actions: %v", err)) + } + actions.actionsCap = msgs.ReadRootActionListWrapper(seg).Actions() + actions.deflated = actions.actionsCap.Len() == 0 || actions.actionsCap.At(0).Which() == msgs.ACTION_MISSING +} + +func (actions *TxnActions) Actions() *msgs.Action_List { + actions.decode() + return &actions.actionsCap +} + +func (actions *TxnActions) AsDeflated() *TxnActions { + actions.decode() + if actions.deflated { + return actions + } + + cap := &actions.actionsCap + seg := capn.NewBuffer(nil) + root := msgs.NewRootActionListWrapper(seg) + l := cap.Len() + list := msgs.NewActionList(seg, l) + root.SetActions(list) + for idx := 0; idx < l; idx++ { + newAction := list.At(idx) + newAction.SetVarId(cap.At(idx).VarId()) + newAction.SetMissing() + } + + return &TxnActions{ + Data: server.SegToBytes(seg), + deflated: true, + decoded: true, + actionsCap: list, + } +} diff --git a/txnengine/var.go b/txnengine/var.go index 540eeb8..da6f922 100644 --- a/txnengine/var.go +++ b/txnengine/var.go @@ -53,17 +53,11 @@ func VarFromData(data []byte, exe *dispatcher.Executor, db *db.Databases, vm *Va if result, err := db.ReadonlyTransaction(func(rtxn *mdbs.RTxn) interface{} { return db.ReadTxnBytesFromDisk(rtxn, writeTxnId) }).ResultError(); err == nil && result != nil { - bites := result.([]byte) - if seg, _, err := capn.ReadFromMemoryZeroCopy(bites); err == nil { - txn := msgs.ReadRootTxn(seg) - actions := txn.Actions() - v.curFrame = NewFrame(nil, v, writeTxnId, &actions, writeTxnClock, writesClock) - v.curFrameOnDisk = v.curFrame - v.varCap = &varCap - return v, nil - } else { - return nil, err - } + txn := TxnReaderFromData(result.([]byte)) + v.curFrame = NewFrame(nil, v, writeTxnId, txn.Actions(false), writeTxnClock, writesClock) + v.curFrameOnDisk = v.curFrame + v.varCap = &varCap + return v, nil } else { return nil, err } @@ -254,7 +248,7 @@ func (v *Var) maybeWriteFrame(f *frame, action *localAction, positions *common.P varCap.SetWritesClock(f.frameWritesClock.AsData()) varData := server.SegToBytes(varSeg) - txnBytes := action.TxnRootBytes() + txnBytes := action.TxnReader.Data // to ensure correct order of writes, schedule the write from // the current go-routine... diff --git a/txnengine/vardispatcher.go b/txnengine/vardispatcher.go index 94b3fbb..72cfdb9 100644 --- a/txnengine/vardispatcher.go +++ b/txnengine/vardispatcher.go @@ -71,6 +71,6 @@ func (vd *VarDispatcher) withVarManager(vUUId *common.VarUUId, fun func(*VarMana } type LocalConnection interface { - RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap map[common.VarUUId]*common.Positions, assignTxnId bool) (*msgs.Outcome, error) + RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap map[common.VarUUId]*common.Positions) (*TxnReader, *msgs.Outcome, error) Status(*server.StatusConsumer) } diff --git a/txnengine/varmanager.go b/txnengine/varmanager.go index 4d1f476..e8ed891 100644 --- a/txnengine/varmanager.go +++ b/txnengine/varmanager.go @@ -21,7 +21,6 @@ type VarManager struct { active map[common.VarUUId]*Var RollAllowed bool onDisk func(bool) - lc LocalConnection callbacks []func() beaterLive bool exe *dispatcher.Executor @@ -200,7 +199,7 @@ func (vm *VarManager) ScheduleCallback(fun func()) { func (vm *VarManager) beat(terminate chan struct{}) { if len(vm.callbacks) != 0 { callbacks := vm.callbacks - vm.callbacks = make([]func(), 0, len(callbacks)) + vm.callbacks = make([]func(), 0, 1+(len(callbacks)/2)) for _, fun := range callbacks { fun() } From 31bb0bde06aaa0f7aaf5686e6a948abd7b525b1d Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 12 Aug 2016 23:18:26 +0100 Subject: [PATCH 31/78] WIP. This is sorta close to being good. Needs more work. Ref T42. --HG-- branch : T42 --- consts.go | 8 +-- paxos/ballotaccumulator.go | 4 +- txnengine/ballot.go | 15 +++++ txnengine/frame.go | 125 ++++++++++++++++++++++++------------- txnengine/poisson.go | 82 ++++++++++++++++++++++++ txnengine/utils.go | 22 ++++++- txnengine/var.go | 6 +- txnengine/varmanager.go | 68 +++++++++----------- 8 files changed, 237 insertions(+), 93 deletions(-) create mode 100644 txnengine/poisson.go diff --git a/consts.go b/consts.go index d43d45d..13f9d96 100644 --- a/consts.go +++ b/consts.go @@ -10,12 +10,12 @@ const ( TwoToTheSixtyThree = 9223372036854775808 SubmissionInitialBackoff = 2 * time.Microsecond SubmissionMaxSubmitDelay = 2 * time.Second - VarIdleTimeoutMin = 500 * time.Millisecond - VarIdleTimeoutRange = 250 - FrameLockMinExcessSize = 100 - FrameLockMinRatio = 2 + VarIdleTimeoutMin = 50 * time.Millisecond + VarRollTimeExpectation = 2 * time.Millisecond + VarRollPRequirement = 0.95 ConnectionRestartDelayRangeMS = 5000 ConnectionRestartDelayMin = 3 * time.Second MostRandomByteIndex = 7 // will be the lsb of a big-endian client-n in the txnid. MigrationBatchElemCount = 64 + PoissonSamples = 64 ) diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 1b07e5c..6df8077 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -116,9 +116,7 @@ func BallotAccumulatorFromData(txn *eng.TxnReader, outcome *outcomeEqualId, inst // one from each RM voting for each vUUId. rmId is the paxos // instanceRMId. func (ba *BallotAccumulator) BallotReceived(instanceRMId common.RMId, inst *instance, vUUId *common.VarUUId, txn *eng.TxnReader) *outcomeEqualId { - if ba.txn.IsDeflated() && !txn.HasDeflated() { - ba.txn = txn - } + ba.txn = ba.txn.Combine(txn) vBallot := ba.vUUIdToBallots[*vUUId] if vBallot.rmToBallot == nil { diff --git a/txnengine/ballot.go b/txnengine/ballot.go index c26d408..160e119 100644 --- a/txnengine/ballot.go +++ b/txnengine/ballot.go @@ -16,6 +16,17 @@ const ( AbortDeadlock = Vote(msgs.VOTE_ABORTDEADLOCK) ) +func (v Vote) String() string { + switch v { + case AbortBadRead: + return "Abort-badRead" + case AbortDeadlock: + return "Abort-deadlock" + default: + return "Commit" + } +} + func (v Vote) ToVoteEnum() msgs.VoteEnum { switch v { case AbortBadRead: @@ -35,6 +46,10 @@ type Ballot struct { Vote Vote } +func (b *Ballot) String() string { + return fmt.Sprintf("%v %v", b.VarUUId, b.Vote) +} + type BallotBuilder struct { *Ballot Clock *VectorClockMutable diff --git a/txnengine/frame.go b/txnengine/frame.go index 1e1efb4..3d17974 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -11,6 +11,7 @@ import ( "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" "sort" + "time" ) var AbortRollNotFirst = errors.New("AbortRollNotFirst") @@ -27,6 +28,7 @@ type frame struct { readVoteClock *VectorClockMutable positionsFound bool mask *VectorClockMutable + scheduleInterval time.Duration frameOpen frameClosed frameErase @@ -45,12 +47,17 @@ func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *TxnActions } if parent == nil { f.mask = NewVectorClock().AsMutable() + f.scheduleInterval = server.VarIdleTimeoutMin + time.Duration(v.rng.Intn(int(server.VarIdleTimeoutMin))) } else { f.mask = parent.mask + f.scheduleInterval = parent.scheduleInterval / 2 + if f.scheduleInterval < server.VarIdleTimeoutMin { + f.scheduleInterval = server.VarIdleTimeoutMin + time.Duration(v.rng.Intn(int(server.VarIdleTimeoutMin))) + } } f.init() server.Log(f, "NewFrame") - f.maybeScheduleRoll() + f.maybeStartRoll() return f } @@ -173,6 +180,7 @@ func (fo *frameOpen) ReadRetry(action *localAction) bool { } func (fo *frameOpen) AddRead(action *localAction) { + fo.v.poisson.AddNow() txn := action.Txn server.Log(fo.frame, "AddRead", txn, action.readVsn) switch { @@ -234,6 +242,7 @@ func (fo *frameOpen) ReadCommitted(action *localAction) { } func (fo *frameOpen) AddWrite(action *localAction) { + fo.v.poisson.AddNow() txn := action.Txn server.Log(fo.frame, "AddWrite", txn) cid := txn.Id.ClientId() @@ -274,7 +283,7 @@ func (fo *frameOpen) WriteAborted(action *localAction, permitInactivate bool) { action.frame = nil if fo.writes.Len() == 0 { fo.writeVoteClock = nil - fo.maybeScheduleRoll() + fo.maybeStartRoll() if permitInactivate { fo.v.maybeMakeInactive() } @@ -303,6 +312,7 @@ func (fo *frameOpen) WriteCommitted(action *localAction) { } func (fo *frameOpen) AddReadWrite(action *localAction) { + fo.v.poisson.AddNow() txn := action.Txn server.Log(fo.frame, "AddReadWrite", txn, action.readVsn) switch { @@ -344,7 +354,7 @@ func (fo *frameOpen) ReadWriteAborted(action *localAction, permitInactivate bool action.frame = nil if fo.writes.Len() == 0 { fo.writeVoteClock = nil - fo.maybeScheduleRoll() + fo.maybeStartRoll() if permitInactivate { fo.v.maybeMakeInactive() } @@ -421,7 +431,7 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { return true }) server.Log(fo.frame, "ReadLearnt", txn, "uncommittedReads:", fo.uncommittedReads, "uncommittedWrites:", fo.uncommittedWrites) - fo.maybeScheduleRoll() + fo.maybeStartRoll() return true } else { panic(fmt.Sprintf("%v ReadLearnt called for known txn %v", fo.frame, txn)) @@ -510,7 +520,7 @@ func (fo *frameOpen) maybeFindMaxReadFrom(action *localAction, node *sl.Node) { } func (fo *frameOpen) maybeStartWrites() { - fo.maybeScheduleRoll() + fo.maybeStartRoll() if fo.writes.Len() == 0 || fo.uncommittedReads != 0 { return } @@ -519,10 +529,12 @@ func (fo *frameOpen) maybeStartWrites() { fo.maybeCreateChild() } else { fo.calculateWriteVoteClock() + now := time.Now() for node := fo.writes.First(); node != nil; { next := node.Next() if node.Value == postponed { node.Value = uncommitted + fo.v.poisson.AddThen(now) if action := node.Key.(*localAction); !action.VoteCommit(fo.writeVoteClock) { if action.IsRead() { fo.ReadWriteAborted(action, false) @@ -690,53 +702,80 @@ func (fo *frameOpen) maybeCreateChild() { fo.v.SetCurFrame(fo.child, winner, positions) } -func (fo *frameOpen) maybeScheduleRoll() { - // do not check vm.RollAllowed here. - if !fo.rollScheduled && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Actions().Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { - fo.rollScheduled = true - fo.v.vm.ScheduleCallback(func() { - fo.v.applyToVar(func() { - fo.rollScheduled = false - fo.maybeStartRoll() - }) - }) - } +func (fo *frameOpen) basicRollCondition() bool { + return !fo.rollScheduled && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && fo.v.curFrame == fo.frame && + (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Actions().Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) } func (fo *frameOpen) maybeStartRoll() { - if fo.v.vm.RollAllowed && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && - (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Actions().Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) { - fo.rollActive = true - go func() { - server.Log(fo.frame, "Starting roll") - ctxn, varPosMap := fo.createRollClientTxn() - _, outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap) - ow := "" - if outcome != nil { - ow = fmt.Sprint(outcome.Which()) - if outcome.Which() == msgs.OUTCOME_ABORT { - ow += fmt.Sprintf("-%v", outcome.Abort().Which()) - } + fo.maybeStartRollSchedule(true) +} + +func (fo *frameOpen) maybeStartRollSchedule(forceSchedule bool) { + if fo.basicRollCondition() { + multiplier := 1 + for node := fo.reads.First(); node != nil; node = node.Next() { + if node.Value == committed { + multiplier += node.Key.(*localAction).TxnReader.Actions(true).Actions().Len() } - // fmt.Printf("r%v ", ow) - server.Log(fo.frame, "Roll finished: outcome", ow, "; err:", err) - if (outcome == nil && err != nil) || (outcome != nil && outcome.Which() != msgs.OUTCOME_COMMIT) { + } + quietDuration := server.VarRollTimeExpectation * time.Duration(multiplier) + if quietDuration > 400*time.Millisecond { + quietDuration = 400 * time.Millisecond + } + // fmt.Printf("m%v ", multiplier) + probOfZero := fo.v.poisson.P(quietDuration, 0) + if !forceSchedule && probOfZero > server.VarRollPRequirement && fo.v.vm.RollAllowed { + // fmt.Printf("r%v\n", fo.v.UUId) + fo.startRoll() + } else { + fo.rollScheduled = true + // fmt.Printf("s%v(%v|%v)\n", fo.v.UUId, probOfZero, fo.scheduleInterval) + fo.v.vm.ScheduleCallback(fo.scheduleInterval, func(*time.Time) { fo.v.applyToVar(func() { - fo.rollActive = false - if err == AbortRollNotInPermutation { - fo.v.maybeMakeInactive() - } else { - fo.maybeScheduleRoll() - } + fo.rollScheduled = false + fo.maybeStartRollSchedule(false) }) + }) + fo.scheduleInterval += time.Duration(fo.v.rng.Intn(int(server.VarIdleTimeoutMin))) + if fo.scheduleInterval > 1000*server.VarIdleTimeoutMin { + fo.scheduleInterval = fo.scheduleInterval / 2 } - }() - } else { - fo.maybeScheduleRoll() + } } } +func (fo *frameOpen) startRoll() { + fo.rollActive = true + go func() { + server.Log(fo.frame, "Starting roll") + ctxn, varPosMap := fo.createRollClientTxn() + _, outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap) + ow := "" + if outcome != nil { + ow = fmt.Sprint(outcome.Which()) + if outcome.Which() == msgs.OUTCOME_ABORT { + ow += fmt.Sprintf("-%v", outcome.Abort().Which()) + } + } + // fmt.Printf("%v r%v (%v)\n", fo.v.UUId, ow, err == AbortRollNotFirst) + server.Log(fo.frame, "Roll finished: outcome", ow, "; err:", err) + fo.v.applyToVar(func() { + fo.rollActive = false + if fo.v.curFrame != fo.frame { + return + } + if (outcome == nil && err != nil) || (outcome != nil && outcome.Which() != msgs.OUTCOME_COMMIT) { + if err == AbortRollNotInPermutation { + fo.v.maybeMakeInactive() + } else { + fo.maybeStartRoll() + } + } + }) + }() +} + func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId]*common.Positions) { if fo.rollTxn != nil { return fo.rollTxn, fo.rollTxnPos @@ -864,7 +903,7 @@ func (fc *frameClosed) MaybeCompleteTxns() { } } } - fc.maybeScheduleRoll() + fc.maybeStartRoll() fc.v.maybeMakeInactive() } diff --git a/txnengine/poisson.go b/txnengine/poisson.go new file mode 100644 index 0000000..6522f6e --- /dev/null +++ b/txnengine/poisson.go @@ -0,0 +1,82 @@ +package txnengine + +import ( + "goshawkdb.io/server" + "math" + "time" +) + +type Poisson struct { + events []time.Time + ptr int +} + +func NewPoisson() *Poisson { + return &Poisson{ + events: make([]time.Time, 0, server.PoissonSamples), + ptr: 0, + } +} + +func (p *Poisson) AddNow() { + p.AddThen(time.Now()) +} + +func (p *Poisson) AddThen(now time.Time) { + l, c := len(p.events), cap(p.events) + switch { + case l < c: + p.events = append(p.events, now) + default: + p.events[p.ptr] = now + p.ptr++ + if p.ptr == l { + p.ptr = 0 + } + } +} + +func (p *Poisson) interval(now time.Time) time.Duration { + l, c := len(p.events), cap(p.events) + switch { + case l == 0: + return 0 + case l < c: + return now.Sub(p.events[0]) + default: + oldest := p.ptr + 1 + if oldest == l { + oldest = 0 + } + return now.Sub(p.events[oldest]) + } +} + +func (p *Poisson) length() float64 { + if l, c := len(p.events), cap(p.events); l < c { + return float64(l) + } else { + return float64(c) + } +} + +func (p *Poisson) λ(now time.Time) float64 { + return p.length() / float64(p.interval(now)) +} + +func (p *Poisson) P(t time.Duration, k int64) float64 { + now := time.Now() + λt := p.λ(now) * float64(t) + if math.IsNaN(λt) { + return 1 + } + return (math.Pow(λt, float64(k)) * math.Exp(-λt)) / float64(fac(k)) +} + +func fac(n int64) int64 { + acc := int64(1) + for ; n > 1; n-- { + acc *= n + } + return acc +} diff --git a/txnengine/utils.go b/txnengine/utils.go index 9f36471..9991cb2 100644 --- a/txnengine/utils.go +++ b/txnengine/utils.go @@ -37,11 +37,29 @@ func (tr *TxnReader) Actions(forceDecode bool) *TxnActions { } else if forceDecode { tr.actions.decode() } + if tr.deflated == nil && tr.actions.decoded && tr.actions.deflated { + tr.deflated = tr + } return tr.actions } -func (tr *TxnReader) HasDeflated() bool { - return tr.deflated != nil || tr.Actions(true).deflated +func (a *TxnReader) Combine(b *TxnReader) *TxnReader { + a.Actions(true) + b.Actions(true) + switch { + case a.deflated != nil && a.deflated != a: // a has both + return a + case b.deflated != nil && b.deflated != b: // b has both + return b + case a.deflated == a && b.deflated == nil: // a is deflated, b is not + b.deflated = a + return b + case a.deflated == nil && b.deflated == b: // b is deflated, a is not + a.deflated = b + return a + default: // a and b must both be the same + return a + } } func (tr *TxnReader) IsDeflated() bool { diff --git a/txnengine/var.go b/txnengine/var.go index da6f922..353650d 100644 --- a/txnengine/var.go +++ b/txnengine/var.go @@ -21,6 +21,7 @@ type VarWriteSubscriber struct { type Var struct { UUId *common.VarUUId positions *common.Positions + poisson *Poisson curFrame *frame curFrameOnDisk *frame writeInProgress func() @@ -83,6 +84,7 @@ func newVar(uuid *common.VarUUId, exe *dispatcher.Executor, db *db.Databases, vm return &Var{ UUId: uuid, positions: nil, + poisson: NewPoisson(), curFrame: nil, curFrameOnDisk: nil, writeInProgress: nil, @@ -211,8 +213,8 @@ func (v *Var) SetCurFrame(f *frame, action *localAction, positions *common.Posit } } - // diffLen := len(action.outcomeClock.Clock) - action.TxnCap.Actions().Len() - // fmt.Printf("%v ", diffLen) + diffLen := action.outcomeClock.Len() - action.TxnReader.Actions(true).Actions().Len() + fmt.Printf("d%v ", diffLen) v.maybeWriteFrame(f, action, positions) } diff --git a/txnengine/varmanager.go b/txnengine/varmanager.go index e8ed891..a063f57 100644 --- a/txnengine/varmanager.go +++ b/txnengine/varmanager.go @@ -4,26 +4,26 @@ import ( "fmt" mdb "github.com/msackman/gomdb" mdbs "github.com/msackman/gomdb/server" + tw "github.com/msackman/gotimerwheel" "goshawkdb.io/common" "goshawkdb.io/server" "goshawkdb.io/server/configuration" "goshawkdb.io/server/db" "goshawkdb.io/server/dispatcher" - "math/rand" "time" ) type VarManager struct { LocalConnection - Topology *configuration.Topology - RMId common.RMId - db *db.Databases - active map[common.VarUUId]*Var - RollAllowed bool - onDisk func(bool) - callbacks []func() - beaterLive bool - exe *dispatcher.Executor + Topology *configuration.Topology + RMId common.RMId + db *db.Databases + active map[common.VarUUId]*Var + RollAllowed bool + onDisk func(bool) + tw *tw.TimerWheel + beaterTerminator chan struct{} + exe *dispatcher.Executor } func init() { @@ -37,7 +37,7 @@ func NewVarManager(exe *dispatcher.Executor, rmId common.RMId, tp TopologyPublis db: db, active: make(map[common.VarUUId]*Var), RollAllowed: false, - callbacks: []func(){}, + tw: tw.NewTimerWheel(time.Now(), 25*time.Millisecond), exe: exe, } exe.Enqueue(func() { @@ -178,8 +178,8 @@ func (vm *VarManager) find(uuid *common.VarUUId) (*Var, bool) { func (vm *VarManager) Status(sc *server.StatusConsumer) { sc.Emit(fmt.Sprintf("- Active Vars: %v", len(vm.active))) - sc.Emit(fmt.Sprintf("- Callbacks: %v", len(vm.callbacks))) - sc.Emit(fmt.Sprintf("- Beater live? %v", vm.beaterLive)) + sc.Emit(fmt.Sprintf("- Callbacks: %v", vm.tw.Length())) + sc.Emit(fmt.Sprintf("- Beater live? %v", vm.beaterTerminator != nil)) sc.Emit(fmt.Sprintf("- Roll allowed? %v", vm.RollAllowed)) for _, v := range vm.active { v.Status(sc.Fork()) @@ -187,44 +187,34 @@ func (vm *VarManager) Status(sc *server.StatusConsumer) { sc.Join() } -func (vm *VarManager) ScheduleCallback(fun func()) { - vm.callbacks = append(vm.callbacks, fun) - if !vm.beaterLive { - vm.beaterLive = true - terminate := make(chan struct{}) - go vm.beater(terminate) +func (vm *VarManager) ScheduleCallback(interval time.Duration, fun tw.Event) { + if err := vm.tw.ScheduleEventIn(interval, fun); err != nil { + panic(err) + } + if vm.beaterTerminator == nil { + vm.beaterTerminator = make(chan struct{}) + go vm.beater(vm.beaterTerminator) } } -func (vm *VarManager) beat(terminate chan struct{}) { - if len(vm.callbacks) != 0 { - callbacks := vm.callbacks - vm.callbacks = make([]func(), 0, 1+(len(callbacks)/2)) - for _, fun := range callbacks { - fun() - } - } - if len(vm.callbacks) == 0 { - close(terminate) - vm.beaterLive = false +func (vm *VarManager) beat() { + vm.tw.AdvanceTo(time.Now(), 32) + // fmt.Println("done:", ) + if vm.tw.IsEmpty() && vm.beaterTerminator != nil { + close(vm.beaterTerminator) + vm.beaterTerminator = nil } } func (vm *VarManager) beater(terminate chan struct{}) { - barrier := make(chan server.EmptyStruct, 1) - fun := func() { - vm.beat(terminate) - barrier <- server.EmptyStructVal - } - rng := rand.New(rand.NewSource(time.Now().UnixNano())) + sleep := 100 * time.Millisecond for { - time.Sleep(server.VarIdleTimeoutMin + (time.Duration(rng.Intn(server.VarIdleTimeoutRange)) * time.Millisecond)) + time.Sleep(sleep) select { case <-terminate: return default: - vm.exe.Enqueue(fun) - <-barrier + vm.exe.Enqueue(vm.beat) } } } From 088ac4325d01034821a836feee3679dabc8e4b59 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 14 Aug 2016 19:34:40 +0100 Subject: [PATCH 32/78] Hmm. Still not convinced about all of this tbh. The effect of just one var refusing to go idle can be catastrophic. Feels more like a hard limit on roll initiation is a good idea. Ref T42 --HG-- branch : T42 --- consts.go | 2 +- txnengine/frame.go | 16 ++++-------- txnengine/poisson.go | 60 +++++++++++++++++++++++--------------------- 3 files changed, 37 insertions(+), 41 deletions(-) diff --git a/consts.go b/consts.go index 13f9d96..133e352 100644 --- a/consts.go +++ b/consts.go @@ -12,7 +12,7 @@ const ( SubmissionMaxSubmitDelay = 2 * time.Second VarIdleTimeoutMin = 50 * time.Millisecond VarRollTimeExpectation = 2 * time.Millisecond - VarRollPRequirement = 0.95 + VarRollPRequirement = 0.9 ConnectionRestartDelayRangeMS = 5000 ConnectionRestartDelayMin = 3 * time.Second MostRandomByteIndex = 7 // will be the lsb of a big-endian client-n in the txnid. diff --git a/txnengine/frame.go b/txnengine/frame.go index 3d17974..ca11d4e 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -10,6 +10,7 @@ import ( cmsgs "goshawkdb.io/common/capnp" "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" + "math" "sort" "time" ) @@ -708,10 +709,6 @@ func (fo *frameOpen) basicRollCondition() bool { } func (fo *frameOpen) maybeStartRoll() { - fo.maybeStartRollSchedule(true) -} - -func (fo *frameOpen) maybeStartRollSchedule(forceSchedule bool) { if fo.basicRollCondition() { multiplier := 1 for node := fo.reads.First(); node != nil; node = node.Next() { @@ -720,21 +717,18 @@ func (fo *frameOpen) maybeStartRollSchedule(forceSchedule bool) { } } quietDuration := server.VarRollTimeExpectation * time.Duration(multiplier) - if quietDuration > 400*time.Millisecond { - quietDuration = 400 * time.Millisecond - } - // fmt.Printf("m%v ", multiplier) probOfZero := fo.v.poisson.P(quietDuration, 0) - if !forceSchedule && probOfZero > server.VarRollPRequirement && fo.v.vm.RollAllowed { + probReq := math.Pow(server.VarRollPRequirement, float64(fo.reads.Len())) + if !(fo.reads.Len() > fo.uncommittedReads) || probOfZero > probReq && fo.v.vm.RollAllowed { // fmt.Printf("r%v\n", fo.v.UUId) fo.startRoll() } else { fo.rollScheduled = true - // fmt.Printf("s%v(%v|%v)\n", fo.v.UUId, probOfZero, fo.scheduleInterval) + // fmt.Printf("s%v(%v|%v|%v)\n", fo.v.UUId, probOfZero, probReq, fo.scheduleInterval) fo.v.vm.ScheduleCallback(fo.scheduleInterval, func(*time.Time) { fo.v.applyToVar(func() { fo.rollScheduled = false - fo.maybeStartRollSchedule(false) + fo.maybeStartRoll() }) }) fo.scheduleInterval += time.Duration(fo.v.rng.Intn(int(server.VarIdleTimeoutMin))) diff --git a/txnengine/poisson.go b/txnengine/poisson.go index 6522f6e..9a32347 100644 --- a/txnengine/poisson.go +++ b/txnengine/poisson.go @@ -8,13 +8,14 @@ import ( type Poisson struct { events []time.Time - ptr int + front int + back int + length int } func NewPoisson() *Poisson { return &Poisson{ - events: make([]time.Time, 0, server.PoissonSamples), - ptr: 0, + events: make([]time.Time, server.PoissonSamples), } } @@ -23,49 +24,50 @@ func (p *Poisson) AddNow() { } func (p *Poisson) AddThen(now time.Time) { - l, c := len(p.events), cap(p.events) - switch { - case l < c: - p.events = append(p.events, now) - default: - p.events[p.ptr] = now - p.ptr++ - if p.ptr == l { - p.ptr = 0 + p.events[p.front] = now + p.front++ + if p.front == server.PoissonSamples { + p.front = 0 + } + if p.front == p.back { + p.back++ + if p.back == server.PoissonSamples { + p.back = 0 } + } else { + p.length++ } } -func (p *Poisson) interval(now time.Time) time.Duration { - l, c := len(p.events), cap(p.events) - switch { - case l == 0: - return 0 - case l < c: - return now.Sub(p.events[0]) - default: - oldest := p.ptr + 1 - if oldest == l { - oldest = 0 +func (p *Poisson) Cull(limit time.Time) { + for p.back != p.front { + if p.events[p.back].Before(limit) { + p.length-- + p.back++ + if p.back == server.PoissonSamples { + p.back = 0 + } + } else { + break } - return now.Sub(p.events[oldest]) } } -func (p *Poisson) length() float64 { - if l, c := len(p.events), cap(p.events); l < c { - return float64(l) +func (p *Poisson) interval(now time.Time) time.Duration { + if p.length == 0 { + return 0 } else { - return float64(c) + return now.Sub(p.events[p.back]) } } func (p *Poisson) λ(now time.Time) float64 { - return p.length() / float64(p.interval(now)) + return float64(p.length) / float64(p.interval(now)) } func (p *Poisson) P(t time.Duration, k int64) float64 { now := time.Now() + p.Cull(now.Add(-1 * time.Second)) λt := p.λ(now) * float64(t) if math.IsNaN(λt) { return 1 From 632e683bdac72939bd1ad132450bab3c205599a9 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 19 Aug 2016 14:56:40 +0100 Subject: [PATCH 33/78] Use of -race reveals this mistake. --HG-- branch : T42 --- txnengine/frame.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index ca11d4e..294b7be 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -741,9 +741,10 @@ func (fo *frameOpen) maybeStartRoll() { func (fo *frameOpen) startRoll() { fo.rollActive = true + // must do roll txn creation in the main go-routine + ctxn, varPosMap := fo.createRollClientTxn() go func() { server.Log(fo.frame, "Starting roll") - ctxn, varPosMap := fo.createRollClientTxn() _, outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap) ow := "" if outcome != nil { From bf5ffb80b0a10aedbac3d84c5e4bae08e8ed1ce4 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 20 Aug 2016 23:01:45 +0100 Subject: [PATCH 34/78] Enforce deadline on rolls. This gets normal performance back to where we were, but we need to get rolls much faster. Ref T42. --HG-- branch : T42 --- consts.go | 3 ++- txnengine/frame.go | 40 +++++++++++++++++++++++----------------- txnengine/poisson.go | 5 ++--- 3 files changed, 27 insertions(+), 21 deletions(-) diff --git a/consts.go b/consts.go index 133e352..d341034 100644 --- a/consts.go +++ b/consts.go @@ -10,7 +10,8 @@ const ( TwoToTheSixtyThree = 9223372036854775808 SubmissionInitialBackoff = 2 * time.Microsecond SubmissionMaxSubmitDelay = 2 * time.Second - VarIdleTimeoutMin = 50 * time.Millisecond + VarRollDelayMin = 50 * time.Millisecond + VarRollDelayMax = 500 * time.Millisecond VarRollTimeExpectation = 2 * time.Millisecond VarRollPRequirement = 0.9 ConnectionRestartDelayRangeMS = 5000 diff --git a/txnengine/frame.go b/txnengine/frame.go index 294b7be..09acba5 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -10,7 +10,6 @@ import ( cmsgs "goshawkdb.io/common/capnp" "goshawkdb.io/server" msgs "goshawkdb.io/server/capnp" - "math" "sort" "time" ) @@ -48,12 +47,12 @@ func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *TxnActions } if parent == nil { f.mask = NewVectorClock().AsMutable() - f.scheduleInterval = server.VarIdleTimeoutMin + time.Duration(v.rng.Intn(int(server.VarIdleTimeoutMin))) + f.scheduleInterval = server.VarRollDelayMin + time.Duration(v.rng.Intn(int(server.VarRollDelayMin))) } else { f.mask = parent.mask f.scheduleInterval = parent.scheduleInterval / 2 - if f.scheduleInterval < server.VarIdleTimeoutMin { - f.scheduleInterval = server.VarIdleTimeoutMin + time.Duration(v.rng.Intn(int(server.VarIdleTimeoutMin))) + if f.scheduleInterval < server.VarRollDelayMin { + f.scheduleInterval = server.VarRollDelayMin + time.Duration(v.rng.Intn(int(server.VarRollDelayMin))) } } f.init() @@ -107,7 +106,7 @@ func (f *frame) Status(sc *server.StatusConsumer) { sc.Emit(fmt.Sprintf("- Mask: %v", f.mask)) sc.Emit(fmt.Sprintf("- Current State: %v", f.currentState)) sc.Emit(fmt.Sprintf("- Locked? %v", f.isLocked())) - sc.Emit(fmt.Sprintf("- Roll scheduled/active? %v/%v", f.rollScheduled, f.rollActive)) + sc.Emit(fmt.Sprintf("- Roll scheduled/active? %v/%v", f.rollScheduled != nil, f.rollActive)) sc.Emit(fmt.Sprintf("- DescendentOnDisk? %v", f.onDisk)) sc.Emit(fmt.Sprintf("- Child == nil? %v", f.child == nil)) sc.Emit(fmt.Sprintf("- Parent == nil? %v", f.parent == nil)) @@ -147,7 +146,7 @@ type frameOpen struct { clientWrites map[[common.ClientLen]byte]server.EmptyStruct uncommittedWrites uint rwPresent bool - rollScheduled bool + rollScheduled *time.Time rollActive bool rollTxn *cmsgs.ClientTxn rollTxnPos map[common.VarUUId]*common.Positions @@ -704,11 +703,15 @@ func (fo *frameOpen) maybeCreateChild() { } func (fo *frameOpen) basicRollCondition() bool { - return !fo.rollScheduled && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && fo.v.curFrame == fo.frame && + return fo.rollScheduled == nil && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && fo.v.curFrame == fo.frame && (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Actions().Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) } func (fo *frameOpen) maybeStartRoll() { + fo.maybeStartRollFrom(nil) +} + +func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { if fo.basicRollCondition() { multiplier := 1 for node := fo.reads.First(); node != nil; node = node.Next() { @@ -716,23 +719,26 @@ func (fo *frameOpen) maybeStartRoll() { multiplier += node.Key.(*localAction).TxnReader.Actions(true).Actions().Len() } } + now := time.Now() quietDuration := server.VarRollTimeExpectation * time.Duration(multiplier) - probOfZero := fo.v.poisson.P(quietDuration, 0) - probReq := math.Pow(server.VarRollPRequirement, float64(fo.reads.Len())) - if !(fo.reads.Len() > fo.uncommittedReads) || probOfZero > probReq && fo.v.vm.RollAllowed { + probOfZero := fo.v.poisson.P(quietDuration, 0, now) + if fo.v.vm.RollAllowed && (!(fo.reads.Len() > fo.uncommittedReads) || (then != nil && now.Sub(*then) > server.VarRollDelayMax) || probOfZero > server.VarRollPRequirement) { // fmt.Printf("r%v\n", fo.v.UUId) fo.startRoll() } else { - fo.rollScheduled = true - // fmt.Printf("s%v(%v|%v|%v)\n", fo.v.UUId, probOfZero, probReq, fo.scheduleInterval) + if then == nil { + then = &now + } + fo.rollScheduled = then + // fmt.Printf("s%v(%v|%v)\n", fo.v.UUId, probOfZero, fo.scheduleInterval) fo.v.vm.ScheduleCallback(fo.scheduleInterval, func(*time.Time) { fo.v.applyToVar(func() { - fo.rollScheduled = false - fo.maybeStartRoll() + fo.rollScheduled = nil + fo.maybeStartRollFrom(then) }) }) - fo.scheduleInterval += time.Duration(fo.v.rng.Intn(int(server.VarIdleTimeoutMin))) - if fo.scheduleInterval > 1000*server.VarIdleTimeoutMin { + fo.scheduleInterval += time.Duration(fo.v.rng.Intn(int(server.VarRollDelayMin))) + if fo.scheduleInterval > server.VarRollDelayMax { fo.scheduleInterval = fo.scheduleInterval / 2 } } @@ -842,7 +848,7 @@ func (fo *frameOpen) subtractClock(clock VectorClockInterface) { } func (fo *frameOpen) isIdle() bool { - return fo.parent == nil && !fo.rollScheduled && fo.isEmpty() + return fo.parent == nil && fo.rollScheduled == nil && fo.isEmpty() } func (fo *frameOpen) isEmpty() bool { diff --git a/txnengine/poisson.go b/txnengine/poisson.go index 9a32347..3fc64dd 100644 --- a/txnengine/poisson.go +++ b/txnengine/poisson.go @@ -65,9 +65,8 @@ func (p *Poisson) λ(now time.Time) float64 { return float64(p.length) / float64(p.interval(now)) } -func (p *Poisson) P(t time.Duration, k int64) float64 { - now := time.Now() - p.Cull(now.Add(-1 * time.Second)) +func (p *Poisson) P(t time.Duration, k int64, now time.Time) float64 { + //p.Cull(now.Add(-1 * time.Second)) λt := p.λ(now) * float64(t) if math.IsNaN(λt) { return 1 From e2b1c72ad1a919118624fb1dff9449e6542d8665 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 21 Aug 2016 14:19:54 +0100 Subject: [PATCH 35/78] Very minor changes. Ref T42. --HG-- branch : T42 --- consts.go | 2 +- txnengine/frame.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/consts.go b/consts.go index d341034..d49fbe1 100644 --- a/consts.go +++ b/consts.go @@ -12,7 +12,7 @@ const ( SubmissionMaxSubmitDelay = 2 * time.Second VarRollDelayMin = 50 * time.Millisecond VarRollDelayMax = 500 * time.Millisecond - VarRollTimeExpectation = 2 * time.Millisecond + VarRollTimeExpectation = 3 * time.Millisecond VarRollPRequirement = 0.9 ConnectionRestartDelayRangeMS = 5000 ConnectionRestartDelayMin = 3 * time.Second diff --git a/txnengine/frame.go b/txnengine/frame.go index 09acba5..6f42600 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -713,7 +713,7 @@ func (fo *frameOpen) maybeStartRoll() { func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { if fo.basicRollCondition() { - multiplier := 1 + multiplier := 0 for node := fo.reads.First(); node != nil; node = node.Next() { if node.Value == committed { multiplier += node.Key.(*localAction).TxnReader.Actions(true).Actions().Len() @@ -722,7 +722,7 @@ func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { now := time.Now() quietDuration := server.VarRollTimeExpectation * time.Duration(multiplier) probOfZero := fo.v.poisson.P(quietDuration, 0, now) - if fo.v.vm.RollAllowed && (!(fo.reads.Len() > fo.uncommittedReads) || (then != nil && now.Sub(*then) > server.VarRollDelayMax) || probOfZero > server.VarRollPRequirement) { + if fo.v.vm.RollAllowed && (probOfZero > server.VarRollPRequirement || (then != nil && now.Sub(*then) > server.VarRollDelayMax)) { // fmt.Printf("r%v\n", fo.v.UUId) fo.startRoll() } else { From f0ac277e64884e927c89411867cf0095c69cb047 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 21 Aug 2016 23:04:25 +0100 Subject: [PATCH 36/78] Well I think I've actually managed to make it slower today. Not sure how I've managed that... Ref T42. --HG-- branch : T42 --- txnengine/frame.go | 158 ++++++++++++++++++++++++--------------- txnengine/var.go | 4 +- txnengine/vectorclock.go | 2 +- 3 files changed, 99 insertions(+), 65 deletions(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index 6f42600..51e65b4 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -105,7 +105,6 @@ func (f *frame) Status(sc *server.StatusConsumer) { sc.Emit(fmt.Sprintf("- RW Present: %v", f.rwPresent)) sc.Emit(fmt.Sprintf("- Mask: %v", f.mask)) sc.Emit(fmt.Sprintf("- Current State: %v", f.currentState)) - sc.Emit(fmt.Sprintf("- Locked? %v", f.isLocked())) sc.Emit(fmt.Sprintf("- Roll scheduled/active? %v/%v", f.rollScheduled != nil, f.rollActive)) sc.Emit(fmt.Sprintf("- DescendentOnDisk? %v", f.onDisk)) sc.Emit(fmt.Sprintf("- Child == nil? %v", f.child == nil)) @@ -186,7 +185,7 @@ func (fo *frameOpen) AddRead(action *localAction) { switch { case fo.currentState != fo: panic(fmt.Sprintf("%v AddRead called for %v with frame in state %v", fo.v, txn, fo.currentState)) - case fo.writeVoteClock != nil || (fo.writes.Len() != 0 && fo.writes.First().Key.Compare(action) == sl.LT) || fo.frameTxnActions == nil || fo.isLocked(): + case fo.writes.Len() != 0 || (fo.writes.Len() != 0 && fo.writes.First().Key.Compare(action) == sl.LT) || fo.frameTxnActions == nil: // We could have learnt a write at this point but we're still fine to accept smaller reads. action.VoteDeadlock(fo.frameTxnClock) case fo.frameTxnId.Compare(action.readVsn) != common.EQ: @@ -250,7 +249,7 @@ func (fo *frameOpen) AddWrite(action *localAction) { switch { case fo.currentState != fo: panic(fmt.Sprintf("%v AddWrite called for %v with frame in state %v", fo.v, txn, fo.currentState)) - case fo.rwPresent || (fo.maxUncommittedRead != nil && action.Compare(fo.maxUncommittedRead) == sl.LT) || found || len(fo.learntFutureReads) != 0 || fo.isLocked(): + case fo.rwPresent || (fo.maxUncommittedRead != nil && action.Compare(fo.maxUncommittedRead) == sl.LT) || found || len(fo.learntFutureReads) != 0: action.VoteDeadlock(fo.frameTxnClock) case fo.writes.Get(action) == nil: fo.uncommittedWrites++ @@ -318,7 +317,7 @@ func (fo *frameOpen) AddReadWrite(action *localAction) { switch { case fo.currentState != fo: panic(fmt.Sprintf("%v AddReadWrite called for %v with frame in state %v", fo.v, txn, fo.currentState)) - case fo.writeVoteClock != nil || fo.writes.Len() != 0 || (fo.maxUncommittedRead != nil && action.Compare(fo.maxUncommittedRead) == sl.LT) || fo.frameTxnActions == nil || len(fo.learntFutureReads) != 0 || (!action.IsRoll() && fo.isLocked()): + case fo.writes.Len() != 0 || fo.writes.Len() != 0 || (fo.maxUncommittedRead != nil && action.Compare(fo.maxUncommittedRead) == sl.LT) || fo.frameTxnActions == nil || len(fo.learntFutureReads) != 0: action.VoteDeadlock(fo.frameTxnClock) case fo.frameTxnId.Compare(action.readVsn) != common.EQ: action.VoteBadRead(fo.frameTxnClock, fo.frameTxnId, fo.frameTxnActions) @@ -387,7 +386,7 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { panic(fmt.Sprintf("%v ReadLearnt called for %v with frame in state %v", fo.v, txn, fo.currentState)) } actClockElem := action.outcomeClock.At(fo.v.UUId) - if actClockElem == 0 { + if actClockElem == deleted { panic("Just did 0 - 1 in int64") } actClockElem-- @@ -424,12 +423,14 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { // missing some TGCs - essentially we can infer TGCs by // observing the outcome clocks on future txns we learn. fo.calculateReadVoteClock() + mask := NewVectorClock().AsMutable() fo.readVoteClock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if action.outcomeClock.At(vUUId) == 0 { - fo.mask.SetVarIdMax(vUUId, v) + if action.outcomeClock.At(vUUId) == deleted { + mask.SetVarIdMax(vUUId, v) } return true }) + fo.subtractClock(mask) server.Log(fo.frame, "ReadLearnt", txn, "uncommittedReads:", fo.uncommittedReads, "uncommittedWrites:", fo.uncommittedWrites) fo.maybeStartRoll() return true @@ -465,45 +466,30 @@ func (fo *frameOpen) WriteLearnt(action *localAction) bool { fo.writes.Insert(action, committed) action.frame = fo.frame fo.positionsFound = fo.positionsFound || (fo.frameTxnActions == nil && action.createPositions != nil) - // See corresponding comment in ReadLearnt + // See corresponding comment in ReadLearnt. We only force the + // readvoteclock here because we cannot calculate the + // writevoteclock because we may have uncommitted reads. clock := fo.writeVoteClock if clock == nil { fo.calculateReadVoteClock() clock = fo.readVoteClock } + mask := NewVectorClock().AsMutable() clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if action.outcomeClock.At(vUUId) == 0 { - fo.mask.SetVarIdMax(vUUId, v) + if action.outcomeClock.At(vUUId) == deleted { + mask.SetVarIdMax(vUUId, v) } return true }) + fo.subtractClock(mask) server.Log(fo.frame, "WriteLearnt", txn, "uncommittedReads:", fo.uncommittedReads, "uncommittedWrites:", fo.uncommittedWrites) - if fo.uncommittedReads == 0 { - fo.maybeCreateChild() - } + fo.maybeCreateChild() return true } else { panic(fmt.Sprintf("%v WriteLearnt called for known txn %v", fo.frame, txn)) } } -func (fo *frameOpen) isLocked() bool { - return false - // Locking is disabled because it's unsafe when there are temporary - // node failures around: with node failures, TGCs don't get issued - // so the clocks don't get tidied up, so the frame can lock itself - // and then we can't make progress. TODO FIXME. - /* - if fo.frameTxnActions == nil || fo.parent == nil { - return false - } - rvcLen := fo.readVoteClock.Len() - actionsLen := fo.frameTxnActions.Len() - excess := rvcLen - actionsLen - return excess > server.FrameLockMinExcessSize && rvcLen > actionsLen*server.FrameLockMinRatio - */ -} - func (fo *frameOpen) maybeFindMaxReadFrom(action *localAction, node *sl.Node) { if fo.uncommittedReads == 0 { fo.maxUncommittedRead = nil @@ -550,22 +536,38 @@ func (fo *frameOpen) maybeStartWrites() { func (fo *frameOpen) calculateReadVoteClock() { if fo.readVoteClock == nil { - if fo.frameWritesClock.At(fo.v.UUId) == 0 { + if fo.frameWritesClock.At(fo.v.UUId) == deleted { panic(fmt.Sprintf("%v no write to self! %v", fo.frame, fo.frameWritesClock)) } + + // see notes below in calculateWriteVoteClock! clock := fo.frameTxnClock.Clone() - clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if fo.mask.At(vUUId) >= v { - clock.Delete(vUUId) - } - return true - }) - fo.frameWritesClock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if fo.mask.At(vUUId) < v || fo.v.UUId.Compare(vUUId) == common.EQ { - clock.SetVarIdMax(vUUId, v+1) - } - return true - }) + if fo.mask.Len()+fo.frameWritesClock.Len() > clock.Len() { + // mask is bigger, so look through clock + clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if m := fo.mask.At(vUUId); m >= v { + clock.Delete(vUUId) + } else if w := fo.frameWritesClock.At(vUUId); w == v { + clock.Bump(vUUId, 1) + } + return true + }) + + } else { + fo.mask.ForEach(func(vUUId *common.VarUUId, m uint64) bool { + if v := clock.At(vUUId); m >= v { + clock.Delete(vUUId) + } + return true + }) + fo.frameWritesClock.ForEach(func(vUUId *common.VarUUId, w uint64) bool { + if v := clock.At(vUUId); v == w { + clock.Bump(vUUId, 1) + } + return true + }) + } + fo.readVoteClock = clock } } @@ -582,18 +584,43 @@ func (fo *frameOpen) calculateWriteVoteClock() { written.SetVarIdMax(k, action.outcomeClock.At(k)) } } - clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if fo.mask.At(vUUId) >= v { - clock.Delete(vUUId) - } - return true - }) - written.ForEach(func(vUUId *common.VarUUId, v uint64) bool { - if fo.mask.At(vUUId) < v { - clock.SetVarIdMax(vUUId, v+1) - } - return true - }) + + // Everything in written is also in clock. But the value in + // written can be lower than in clock because a txn may have a + // future read of a var with a higher clock elem. But if the + // mask is > then the value in clock then it can't be the case + // that the value in mask is <= the value in written. + + if fo.mask.Len()+written.Len() > clock.Len() { + // mask is bigger, so loop through clock + clock.ForEach(func(vUUId *common.VarUUId, v uint64) bool { + if m := fo.mask.At(vUUId); m >= v { + clock.Delete(vUUId) + } else if w := written.At(vUUId); w == v { + clock.Bump(vUUId, 1) + } + return true + }) + + } else { + // mask is smaller, so loop through mask. But this means we + // have to do written separately + fo.mask.ForEach(func(vUUId *common.VarUUId, m uint64) bool { + if v := clock.At(vUUId); m >= v { + // there is no risk we will add this back in in the + // written loop (see above) + clock.Delete(vUUId) + } + return true + }) + written.ForEach(func(vUUId *common.VarUUId, w uint64) bool { + if v := clock.At(vUUId); v == w { + clock.Bump(vUUId, 1) + } + return true + }) + } + fo.writeVoteClock = clock } } @@ -666,16 +693,17 @@ func (fo *frameOpen) maybeCreateChild() { for _, localElemVal := range localElemVals { actions := localElemValToTxns[localElemVal] for _, action := range *actions { - outcomeClock := action.outcomeClock.AsMutable() - action.outcomeClock = outcomeClock - outcomeClock.MergeInMissing(clock) - winner = maxTxnByOutcomeClock(winner, action) - if positions == nil && action.createPositions != nil { positions = action.createPositions } + outcomeClock := action.outcomeClock.AsMutable() + action.outcomeClock = outcomeClock + clock.MergeInMax(outcomeClock) + outcomeClock.MergeInMissing(clock) + winner = maxTxnByOutcomeClock(winner, action) + if action.writesClock == nil { for _, k := range action.writes { written.SetVarIdMax(k, outcomeClock.At(k)) @@ -842,8 +870,14 @@ func (fo *frameOpen) subtractClock(clock VectorClockInterface) { if fo.currentState != fo { panic(fmt.Sprintf("%v subtractClock called with frame in state %v", fo.v, fo.currentState)) } - if changed := fo.mask.MergeInMax(clock); changed && fo.reads.Len() == 0 && fo.writeVoteClock == nil { - fo.readVoteClock = nil + if changed := fo.mask.MergeInMax(clock); changed { + fo.mask.Delete(fo.v.UUId) + if fo.writes.Len() == 0 && len(fo.learntFutureReads) == 0 { + fo.writeVoteClock = nil + if fo.reads.Len() == 0 { + fo.readVoteClock = nil + } + } } } diff --git a/txnengine/var.go b/txnengine/var.go index 353650d..1f225ea 100644 --- a/txnengine/var.go +++ b/txnengine/var.go @@ -213,8 +213,8 @@ func (v *Var) SetCurFrame(f *frame, action *localAction, positions *common.Posit } } - diffLen := action.outcomeClock.Len() - action.TxnReader.Actions(true).Actions().Len() - fmt.Printf("d%v ", diffLen) + // diffLen := action.outcomeClock.Len() - action.TxnReader.Actions(true).Actions().Len() + // fmt.Printf("d%v ", diffLen) v.maybeWriteFrame(f, action, positions) } diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 597d476..848ffd5 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -362,7 +362,7 @@ func (vcA *VectorClockMutable) LessThan(vcB VectorClockInterface) bool { } func (vcA *VectorClockMutable) MergeInMax(vcB VectorClockInterface) bool { - if vcB.Len() == 0 { + if vcB == nil || vcB.Len() == 0 { return false } changed := false From 238ed2b55fb7d10c95198da8310bfed6680d3256 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 26 Aug 2016 16:18:05 +0100 Subject: [PATCH 37/78] So this turns out to be a good idea too - given we can guess approximate sizes, avoid expensive allocs with copies. --HG-- branch : dev --- consistenthash/cache.go | 2 +- txnengine/vectorclock.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/consistenthash/cache.go b/consistenthash/cache.go index fa1dda9..3130643 100644 --- a/consistenthash/cache.go +++ b/consistenthash/cache.go @@ -82,7 +82,7 @@ func (chc *ConsistentHashCache) Remove(vUUId *common.VarUUId) { // In here, we don't actually add to the cache because we don't know // if the corresponding txn is going to commit or not. func (chc *ConsistentHashCache) CreatePositions(vUUId *common.VarUUId, positionsLength int) (*common.Positions, []common.RMId, error) { - positionsCap := capn.NewBuffer(nil).NewUInt8List(positionsLength) + positionsCap := capn.NewBuffer(make([]byte, 0, positionsLength*2)).NewUInt8List(positionsLength) positionsSlice := make([]uint8, positionsLength) n, entropy := uint64(chc.rng.Int63()), uint64(server.TwoToTheSixtyThree) for idx := range positionsSlice { diff --git a/txnengine/vectorclock.go b/txnengine/vectorclock.go index 848ffd5..a92ffbe 100644 --- a/txnengine/vectorclock.go +++ b/txnengine/vectorclock.go @@ -414,7 +414,9 @@ func (vc *VectorClockMutable) AsData() []byte { vc.data = vc.VectorClock.data } else { - seg := capn.NewBuffer(nil) + // for each pair, we need KeyLen bytes for the vUUIds, and 8 + // bytes for value. Then double it to be safe. + seg := capn.NewBuffer(make([]byte, 0, vc.length*(common.KeyLen+8)*2)) vcCap := msgs.NewRootVectorClock(seg) vUUIds := seg.NewDataList(vc.length) values := seg.NewUInt64List(vc.length) From f23bb15793140490a5e333eb4a8d7b98c0af690f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 27 Aug 2016 17:43:54 +0100 Subject: [PATCH 38/78] Minor tidyings. Ref T34. --HG-- branch : T34 --- client/clienttxnsubmitter.go | 2 -- network/connection.go | 32 ++++++++++++++++---------------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index a85b6e9..536329f 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -157,8 +157,6 @@ func (cts *ClientTxnSubmitter) translateUpdates(seg *capn.Segment, updates map[c referencesMask = referencesMask[1:] varIdPos.SetVarId(ref.Id()) varIdPos.SetCapabilities(ref.Capabilities()) - } else { - varIdPos.SetVarId([]byte{}) } positions := common.Positions(ref.Positions()) cts.hashCache.AddPosition(common.MakeVarUUId(ref.Id()), &positions) diff --git a/network/connection.go b/network/connection.go index 3f24fa0..5c0dbaa 100644 --- a/network/connection.go +++ b/network/connection.go @@ -583,14 +583,14 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { } } - helloFromServer := cash.makeHelloServerFromServer(cash.topology) + helloFromServer := cash.makeHelloServerFromServer() if err := cash.send(server.SegToBytes(helloFromServer)); err != nil { return cash.connectionAwaitHandshake.maybeRestartConnection(err) } if seg, err := cash.readOne(); err == nil { hello := msgs.ReadRootHelloServerFromServer(seg) - if cash.verifyTopology(cash.topology, &hello) { + if cash.verifyTopology(&hello) { cash.remoteHost = hello.LocalHost() cash.remoteRMId = common.RMId(hello.RmId()) @@ -612,10 +612,10 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { } } -func (cash *connectionAwaitServerHandshake) verifyTopology(topology *configuration.Topology, remote *msgs.HelloServerFromServer) bool { - if topology.ClusterId == remote.ClusterId() { +func (cash *connectionAwaitServerHandshake) verifyTopology(remote *msgs.HelloServerFromServer) bool { + if cash.topology.ClusterId == remote.ClusterId() { remoteUUId := remote.ClusterUUId() - localUUId := topology.ClusterUUId() + localUUId := cash.topology.ClusterUUId() if remoteUUId == 0 || localUUId == 0 { return true } else { @@ -625,7 +625,7 @@ func (cash *connectionAwaitServerHandshake) verifyTopology(topology *configurati return false } -func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer(topology *configuration.Topology) *capn.Segment { +func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer() *capn.Segment { seg := capn.NewBuffer(nil) hello := msgs.NewRootHelloServerFromServer(seg) localHost := cash.connectionManager.LocalHost() @@ -635,8 +635,8 @@ func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer(topology * tieBreak := cash.rng.Uint32() cash.combinedTieBreak = tieBreak hello.SetTieBreak(tieBreak) - hello.SetClusterId(topology.ClusterId) - hello.SetClusterUUId(topology.ClusterUUId()) + hello.SetClusterId(cash.topology.ClusterId) + hello.SetClusterUUId(cash.topology.ClusterUUId()) return seg } @@ -672,11 +672,11 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { } peerCerts := socket.ConnectionState().PeerCertificates - if authenticated, hashsum, roots := cach.verifyPeerCerts(cach.topology, peerCerts); authenticated { + if authenticated, hashsum, roots := cach.verifyPeerCerts(peerCerts); authenticated { cach.peerCerts = peerCerts cach.roots = roots log.Printf("User '%s' authenticated", hex.EncodeToString(hashsum[:])) - helloFromServer := cach.makeHelloClientFromServer(cach.topology) + helloFromServer := cach.makeHelloClientFromServer() if err := cach.send(server.SegToBytes(helloFromServer)); err != nil { return false, err } @@ -688,8 +688,8 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { } } -func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configuration.Topology, peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*cmsgs.Capabilities) { - fingerprints := topology.Fingerprints() +func (cach *connectionAwaitClientHandshake) verifyPeerCerts(peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*cmsgs.Capabilities) { + fingerprints := cach.topology.Fingerprints() for _, cert := range peerCerts { hashsum = sha256.Sum256(cert.Raw) if roots, found := fingerprints[hashsum]; found { @@ -699,7 +699,7 @@ func (cach *connectionAwaitClientHandshake) verifyPeerCerts(topology *configurat return false, hashsum, nil } -func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology *configuration.Topology) *capn.Segment { +func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer() *capn.Segment { seg := capn.NewBuffer(nil) hello := cmsgs.NewRootHelloClientFromServer(seg) namespace := make([]byte, common.KeyLen-8) @@ -710,11 +710,11 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer(topology * rootsCap := cmsgs.NewRootList(seg, len(cach.roots)) idy := 0 rootsVar := make(map[common.VarUUId]*cmsgs.Capabilities, len(cach.roots)) - for idx, name := range topology.RootNames() { + for idx, name := range cach.topology.RootNames() { if capabilities, found := cach.roots[name]; found { rootCap := rootsCap.At(idy) idy++ - vUUId := topology.Roots[idx].VarUUId + vUUId := cach.topology.Roots[idx].VarUUId rootCap.SetName(name) rootCap.SetVarId(vUUId[:]) rootCap.SetCapabilities(*capabilities) @@ -815,7 +815,7 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error } if cr.isClient { if topology != nil { - if authenticated, _, roots := cr.verifyPeerCerts(topology, cr.peerCerts); !authenticated { + if authenticated, _, roots := cr.verifyPeerCerts(cr.peerCerts); !authenticated { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(client unauthed)") tc.maybeClose() return errors.New("Client connection closed: No client certificate known") From 04743a8e225e4f8f0e4463d2a7c2906ddecdaef2 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 31 Aug 2016 18:10:28 +0100 Subject: [PATCH 39/78] Minor refactorings. Ref T34. --HG-- branch : T34 --- client/versioncache.go | 59 ++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index 79de51b..f2b6c34 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -54,27 +54,28 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { if cTxn.Retry() { for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) + vUUId := common.MakeVarUUId(action.VarId()) if which := action.Which(); which != cmsgs.CLIENTACTION_READ { return fmt.Errorf("Retry transaction should only include reads. Found %v", which) + } else if _, found := vc[*vUUId]; !found { + return fmt.Errorf("Retry transaction has attempted to read from unknown object: %v", vUUId) } } } else { for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) + vUUId := common.MakeVarUUId(action.VarId()) + _, found := vc[*vUUId] switch action.Which() { - case cmsgs.CLIENTACTION_READ: - // do nothing - case cmsgs.CLIENTACTION_WRITE, cmsgs.CLIENTACTION_READWRITE: - vUUId := common.MakeVarUUId(action.VarId()) - if _, found := vc[*vUUId]; !found { - return fmt.Errorf("Transaction manipulates unknown object %v", vUUId) + case cmsgs.CLIENTACTION_READ, cmsgs.CLIENTACTION_WRITE, cmsgs.CLIENTACTION_READWRITE: + if !found { + return fmt.Errorf("Transaction manipulates unknown object: %v", vUUId) } case cmsgs.CLIENTACTION_CREATE: - vUUId := common.MakeVarUUId(action.VarId()) - if _, found := vc[*vUUId]; found { - return fmt.Errorf("Transaction tries to create known object %v", vUUId) + if found { + return fmt.Errorf("Transaction tries to create existing object %v", vUUId) } default: @@ -333,20 +334,24 @@ func mergeCaps(a, b *cmsgs.Capabilities) *cmsgs.Capabilities { aValue := a.Value() aRefsRead := a.References().Read() aRefsWrite := a.References().Write() - if aValue == cmsgs.VALUECAPABILITY_READWRITE && - aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL && - aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL { - return a - } - - seg := capn.NewBuffer(nil) - cap := cmsgs.NewCapabilities(seg) bValue := b.Value() + bRefsRead := b.References().Read() + bRefsWrite := b.References().Write() + valueRead := aValue == cmsgs.VALUECAPABILITY_READWRITE || aValue == cmsgs.VALUECAPABILITY_READ || bValue == cmsgs.VALUECAPABILITY_READWRITE || bValue == cmsgs.VALUECAPABILITY_READ valueWrite := aValue == cmsgs.VALUECAPABILITY_READWRITE || aValue == cmsgs.VALUECAPABILITY_WRITE || bValue == cmsgs.VALUECAPABILITY_READWRITE || bValue == cmsgs.VALUECAPABILITY_WRITE + refsReadAll := aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL || bRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ONLY + refsWriteAll := aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL || bRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL + + if valueRead && valueWrite && refsReadAll && refsWriteAll { + return maxCapsCap + } + + seg := capn.NewBuffer(nil) + cap := cmsgs.NewCapabilities(seg) switch { case valueRead && valueWrite: cap.SetValue(cmsgs.VALUECAPABILITY_READWRITE) @@ -358,35 +363,21 @@ func mergeCaps(a, b *cmsgs.Capabilities) *cmsgs.Capabilities { cap.SetValue(cmsgs.VALUECAPABILITY_NONE) } - isMax := valueRead && valueWrite - - bRefsRead := b.References().Read() - readAll := aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL || - aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL - if readAll { + if refsReadAll { cap.References().Read().SetAll() } else { - isMax = false aOnly, bOnly := aRefsRead.Only().ToArray(), bRefsRead.Only().ToArray() cap.References().Read().SetOnly(mergeOnlies(seg, aOnly, bOnly)) } - bRefsWrite := b.References().Write() - writeAll := aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL || - aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL - if writeAll { + if refsWriteAll { cap.References().Write().SetAll() } else { - isMax = false aOnly, bOnly := aRefsWrite.Only().ToArray(), bRefsWrite.Only().ToArray() cap.References().Write().SetOnly(mergeOnlies(seg, aOnly, bOnly)) } - if isMax { - return maxCapsCap - } else { - return &cap - } + return &cap } func mergeOnlies(seg *capn.Segment, a, b []uint32) capn.UInt32List { From d492c8189ec4ffa57fd5d59f9cd2e53da4ccb709 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 31 Aug 2016 22:31:11 +0100 Subject: [PATCH 40/78] WIP. Fixed some issues. Big refactoring still needed. Ref T34. --HG-- branch : T34 --- client/versioncache.go | 125 ++++++++++++++++++++++++++++++----------- 1 file changed, 93 insertions(+), 32 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index f2b6c34..b38cecd 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -200,17 +200,35 @@ func (vc versionCache) UpdateFromCommit(txn *eng.TxnReader, outcome *msgs.Outcom action := actions.At(idx) if act := action.Which(); act != msgs.ACTION_READ { vUUId := common.MakeVarUUId(action.VarId()) - if c, found := vc[*vUUId]; found { - c.txnId = txnId - c.clockElem = clock.At(vUUId) - } else if act == msgs.ACTION_CREATE { - vc[*vUUId] = &cached{ - txnId: txnId, - clockElem: clock.At(vUUId), - caps: maxCapsCap, + c, found := vc[*vUUId] + if act == msgs.ACTION_CREATE && !found { + create := action.Create() + c = &cached{ + txnId: txnId, + clockElem: clock.At(vUUId), + caps: maxCapsCap, + value: create.Value(), + references: create.References().ToArray(), } + vc[*vUUId] = c } else { - panic(fmt.Sprintf("%v contained action (%v) for unknown %v", txnId, act, vUUId)) + panic(fmt.Sprintf("%v contained illegal action (%v) for %v", txnId, act, vUUId)) + } + + c.txnId = txnId + c.clockElem = clock.At(vUUId) + + switch act { + case msgs.ACTION_WRITE: + write := action.Write() + c.value = write.Value() + c.references = write.References().ToArray() + case msgs.ACTION_READWRITE: + rw := action.Readwrite() + c.value = rw.Value() + c.references = rw.References().ToArray() + default: + panic(fmt.Sprintf("Unexpected action type on txn commit! %v %v", txnId, act)) } } } @@ -262,35 +280,78 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. case msgs.ACTION_WRITE: write := actionCap.Write() if c, found := vc[*vUUId]; found { - cmp := c.txnId.Compare(txnId) - if cmp == common.EQ && clockElem != c.clockElem { - panic(fmt.Sprintf("Clock version changed on write for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) + // If it's in vc then we can either reach it currently + // or we have been able to in the past. + valid := c.txnId == nil + if !valid { + cmp := c.txnId.Compare(txnId) + if cmp == common.EQ && clockElem != c.clockElem { + panic(fmt.Sprintf("Clock version changed on write for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) + } + valid = clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) } - if c.txnId == nil || clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { + // If it's not valid then we're not going to send it to + // the client in which case the capabilities can't + // widen: we already know everything the client knows + // and we're not extending that. + if valid { c.txnId = txnId c.clockElem = clockElem c.value = write.Value() refs := write.References().ToArray() c.references = refs - *updatesListPtr = append(*updatesListPtr, &update{ - cached: c, - varUUId: vUUId, - }) - for ; len(refs) > 0; refs = refs[1:] { - ref := refs[0] - caps := ref.Capabilities() - vUUId := common.MakeVarUUId(ref.Id()) - if c, found := vc[*vUUId]; found { - c.caps = mergeCaps(c.caps, &caps) - } else if ur, found := unreachedMap[*vUUId]; found { - delete(unreachedMap, *vUUId) - c := ur.update.cached - c.caps = &caps - vc[*vUUId] = c - *ur.updates = append(*ur.updates, ur.update) - refs = append(refs, ur.update.references...) - } else { - vc[*vUUId] = &cached{caps: &caps} + // This update could be here because the client txn + // had the capability to read this var and did so, + // but got the wrong version. In which case just + // updating this c is fine. But, this update could + // equally be here as a side effect of some other + // failed read. Just because c exists doesn't mean + // the client actually has the capability to read + // this var, so we need to be careful here. + valueCap := c.caps.Value() + refsReadCap := c.caps.References().Read() + refsReadCapAll := refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL + var refsReadCapOnly []uint32 + if !refsReadCapAll { + refsReadCapOnly = refsReadCap.Only().ToArray() + } + needsUpdate := valueCap == cmsgs.VALUECAPABILITY_READ || + valueCap == cmsgs.VALUECAPABILITY_READWRITE || + refsReadCapAll || len(refsReadCapOnly) > 0 + if needsUpdate { + *updatesListPtr = append(*updatesListPtr, &update{ + cached: c, + varUUId: vUUId, + }) + } + if refsReadCapAll || len(refsReadCapOnly) > 0 { + for idy, ref := range refs { + switch { + case refsReadCapAll: + case len(refsReadCapOnly) > 0 && refsReadCapOnly[0] == uint32(idy): + refsReadCapOnly = refsReadCapOnly[1:] + default: + continue + } + caps := ref.Capabilities() + vUUId := common.MakeVarUUId(ref.Id()) + if c, found := vc[*vUUId]; found { + // Even if c.txnId == nil, the fact is the + // client is going to have some capability + // to interact with this var, so we need to + // keep track of that. + c.caps = mergeCaps(c.caps, &caps) + } else if ur, found := unreachedMap[*vUUId]; found { + delete(unreachedMap, *vUUId) + c := ur.update.cached + c.caps = &caps + vc[*vUUId] = c + *ur.updates = append(*ur.updates, ur.update) + // FIXME err, this is a big problem now + refs = append(refs, ur.update.references...) + } else { + vc[*vUUId] = &cached{caps: &caps} + } } } } From 9feb5b1ff50f075507ccae68152ffa9915e0e243 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 1 Sep 2016 13:41:07 +0100 Subject: [PATCH 41/78] Adding comments. Ref T34 --HG-- branch : T34 --- client/simpletxnsubmitter.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 5fa9c37..c40edbf 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -290,6 +290,7 @@ func (sts *SimpleTxnSubmitter) setAllocations(allocIdx int, rmIdToActionIndices } } +// translate from client representation to server representation func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picker *ch.CombinationPicker, actions *msgs.Action_List, clientActions *cmsgs.ClientAction_List, vc versionCache) (map[common.RMId]*[]int, error) { referencesInNeedOfPositions := []*msgs.VarIdPos{} @@ -445,6 +446,9 @@ func (sts *SimpleTxnSubmitter) translateRoll(outgoingSeg *capn.Segment, referenc roll.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, nil, nil)) } +// so the challenge here is that we need to merge the references which +// the client may have rewritten with the 'actual' references taking +// into account masks and such from capabilities func copyReferences(clientReferences *cmsgs.ClientVarIdPos_List, seg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, vc versionCache) msgs.VarIdPos_List { all, mask, existingRefs := vc.ReferencesWriteMask(vUUId) if all { From 2a62cd54947a4278b8c32ebb09115d1b2e45a45c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 1 Sep 2016 18:08:18 +0100 Subject: [PATCH 42/78] Lots of reworking all around the updatefromabort case. There's a chance that that stuff is now correct. It's far more subtle and involved than I'd previously realised. Ref T34. --HG-- branch : T34 --- client/versioncache.go | 281 ++++++++++++++++++++++++++--------------- 1 file changed, 176 insertions(+), 105 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index b38cecd..4175c88 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -24,9 +24,11 @@ type update struct { varUUId *common.VarUUId } -type unreached struct { - update *update - updates *[]*update +type cacheOverlay struct { + *cached + // we only duplicate the txnId here for the MISSING case + txnId *common.TxnId + stored bool } var maxCapsCap *cmsgs.Capabilities @@ -235,18 +237,38 @@ func (vc versionCache) UpdateFromCommit(txn *eng.TxnReader, outcome *msgs.Outcom } func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common.TxnId]*[]*update { - l := updatesCap.Len() - validUpdates := make(map[common.TxnId]*[]*update) - unreachedMap := make(map[common.VarUUId]unreached, l) + updateGraph := make(map[common.VarUUId]*cacheOverlay) + + // 1. update everything we know we can already reach, and filter out erroneous updates + vc.updateExisting(updatesCap, updateGraph) + + // 2. figure out what we can now reach, and propagate through extended caps + vc.updateReachable(updateGraph) + + // 3. populate results + validUpdates := make(map[common.TxnId]*[]*update, len(updateGraph)) + for vUUId, overlay := range updateGraph { + if !overlay.stored { + continue + } + updateListPtr, found := validUpdates[*overlay.txnId] + if !found { + updateList := []*update{} + validUpdates[*overlay.txnId] = &updateList + } + vUUIdCopy := vUUId + *updateListPtr = append(*updateListPtr, &update{cached: overlay.cached, varUUId: &vUUIdCopy}) + } - for idx := 0; idx < l; idx++ { + return validUpdates +} + +func (vc versionCache) updateExisting(updatesCap *msgs.Update_List, updateGraph map[common.VarUUId]*cacheOverlay) { + for idx, l := 0, updatesCap.Len(); idx < l; idx++ { updateCap := updatesCap.At(idx) txnId := common.MakeTxnId(updateCap.TxnId()) clock := eng.VectorClockFromData(updateCap.Clock(), true) actionsCap := eng.TxnActionsFromData(updateCap.Actions(), true).Actions() - updatesList := make([]*update, 0, actionsCap.Len()) - updatesListPtr := &updatesList - validUpdates[*txnId] = updatesListPtr for idy, m := 0, actionsCap.Len(); idy < m; idy++ { actionCap := actionsCap.At(idy) @@ -270,10 +292,11 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. c.clockElem = 0 c.value = nil c.references = nil - *updatesListPtr = append(*updatesListPtr, &update{ - cached: c, - varUUId: vUUId, - }) + updateGraph[*vUUId] = &cacheOverlay{ + cached: c, + txnId: txnId, + stored: true, + } } } @@ -282,94 +305,44 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. if c, found := vc[*vUUId]; found { // If it's in vc then we can either reach it currently // or we have been able to in the past. - valid := c.txnId == nil - if !valid { + updating := c.txnId == nil + if !updating { cmp := c.txnId.Compare(txnId) if cmp == common.EQ && clockElem != c.clockElem { panic(fmt.Sprintf("Clock version changed on write for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) } - valid = clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) + updating = clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) } - // If it's not valid then we're not going to send it to - // the client in which case the capabilities can't + // If we're not updating then the update must predate + // our current knowledge of vUUId. So we're not going + // to send it to the client in which case the + // capabilities vUUId grants via its own refs can't // widen: we already know everything the client knows - // and we're not extending that. - if valid { + // and we're not extending that. So it's safe to + // totally ignore it. + if updating { c.txnId = txnId c.clockElem = clockElem c.value = write.Value() - refs := write.References().ToArray() - c.references = refs - // This update could be here because the client txn - // had the capability to read this var and did so, - // but got the wrong version. In which case just - // updating this c is fine. But, this update could - // equally be here as a side effect of some other - // failed read. Just because c exists doesn't mean - // the client actually has the capability to read - // this var, so we need to be careful here. - valueCap := c.caps.Value() - refsReadCap := c.caps.References().Read() - refsReadCapAll := refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL - var refsReadCapOnly []uint32 - if !refsReadCapAll { - refsReadCapOnly = refsReadCap.Only().ToArray() - } - needsUpdate := valueCap == cmsgs.VALUECAPABILITY_READ || - valueCap == cmsgs.VALUECAPABILITY_READWRITE || - refsReadCapAll || len(refsReadCapOnly) > 0 - if needsUpdate { - *updatesListPtr = append(*updatesListPtr, &update{ - cached: c, - varUUId: vUUId, - }) - } - if refsReadCapAll || len(refsReadCapOnly) > 0 { - for idy, ref := range refs { - switch { - case refsReadCapAll: - case len(refsReadCapOnly) > 0 && refsReadCapOnly[0] == uint32(idy): - refsReadCapOnly = refsReadCapOnly[1:] - default: - continue - } - caps := ref.Capabilities() - vUUId := common.MakeVarUUId(ref.Id()) - if c, found := vc[*vUUId]; found { - // Even if c.txnId == nil, the fact is the - // client is going to have some capability - // to interact with this var, so we need to - // keep track of that. - c.caps = mergeCaps(c.caps, &caps) - } else if ur, found := unreachedMap[*vUUId]; found { - delete(unreachedMap, *vUUId) - c := ur.update.cached - c.caps = &caps - vc[*vUUId] = c - *ur.updates = append(*ur.updates, ur.update) - // FIXME err, this is a big problem now - refs = append(refs, ur.update.references...) - } else { - vc[*vUUId] = &cached{caps: &caps} - } - } + c.references = write.References().ToArray() + updateGraph[*vUUId] = &cacheOverlay{ + cached: c, + txnId: txnId, + stored: true, } } - } else if _, found := unreachedMap[*vUUId]; found { - panic(fmt.Sprintf("%v reported twice in same update (and appeared in unreachedMap twice!)", vUUId)) + } else { //log.Printf("%v contains write action of %v\n", txnId, vUUId) - unreachedMap[*vUUId] = unreached{ - update: &update{ - cached: &cached{ - txnId: txnId, - clockElem: clockElem, - value: write.Value(), - references: write.References().ToArray(), - }, - varUUId: vUUId, + updateGraph[*vUUId] = &cacheOverlay{ + cached: &cached{ + txnId: txnId, + clockElem: clockElem, + value: write.Value(), + references: write.References().ToArray(), }, - updates: updatesListPtr, + txnId: txnId, + stored: false, } } @@ -378,18 +351,77 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. } } } +} + +func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOverlay) { + reaches := make(map[common.VarUUId][]*msgs.VarIdPos) + worklist := make([]common.VarUUId, 0, len(updateGraph)) - for txnId, updates := range validUpdates { - if len(*updates) == 0 { - delete(validUpdates, txnId) + for vUUId, overlay := range updateGraph { + if overlay.stored { + reaches[vUUId] = overlay.reachableReferences() + worklist = append(worklist, vUUId) + } + } + + for len(worklist) > 0 { + vUUId := worklist[0] + worklist = worklist[1:] + for _, ref := range reaches[vUUId] { + // Given the current vUUId.caps, we're looking at what we + // can reach from there. + vUUIdRef := common.MakeVarUUId(ref.Id()) + caps := ref.Capabilities() + var c *cached + if overlay, found := updateGraph[*vUUIdRef]; found { + if !overlay.stored { + overlay.stored = true + vc[*vUUIdRef] = overlay.cached + } + c = overlay.cached + } else { + // There's no update for vUUIdRef, but it's possible we're + // adding to the capabilities the client now has on + // vUUIdRef so we need to record that. That in turn can + // mean we now have access to extra vars. + var found bool + c, found = vc[*vUUIdRef] + if !found { + // We have no idea though what this var actually points + // to. caps is just our capabilities to act on this + // var, so there's no extra work to do + // (c.reachableReferences will return []). + c = &cached{caps: &caps} + vc[*vUUIdRef] = c + } + } + // We have two questions to answer: 1. Have we already + // processed vUUIdRef? 2. If we have, do we have wider caps + // now than before? + before := reaches[*vUUIdRef] + c.mergeCaps(&caps) + after := c.reachableReferences() + if len(after) > len(before) { + reaches[*vUUIdRef] = after + worklist = append(worklist, *vUUIdRef) + } } } - return validUpdates } -func mergeCaps(a, b *cmsgs.Capabilities) *cmsgs.Capabilities { - if a == maxCapsCap || b == maxCapsCap { - return maxCapsCap +func (c *cached) mergeCaps(b *cmsgs.Capabilities) { + a := c.caps + switch { + case a == b: + return + case a == maxCapsCap || b == maxCapsCap: + c.caps = maxCapsCap + return + case a == nil: + c.caps = b + return + case b == nil: + return } aValue := a.Value() @@ -408,7 +440,8 @@ func mergeCaps(a, b *cmsgs.Capabilities) *cmsgs.Capabilities { refsWriteAll := aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL || bRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL if valueRead && valueWrite && refsReadAll && refsWriteAll { - return maxCapsCap + c.caps = maxCapsCap + return } seg := capn.NewBuffer(nil) @@ -428,20 +461,30 @@ func mergeCaps(a, b *cmsgs.Capabilities) *cmsgs.Capabilities { cap.References().Read().SetAll() } else { aOnly, bOnly := aRefsRead.Only().ToArray(), bRefsRead.Only().ToArray() - cap.References().Read().SetOnly(mergeOnlies(seg, aOnly, bOnly)) + cap.References().Read().SetOnly(mergeOnliesSeg(seg, aOnly, bOnly)) } if refsWriteAll { cap.References().Write().SetAll() } else { aOnly, bOnly := aRefsWrite.Only().ToArray(), bRefsWrite.Only().ToArray() - cap.References().Write().SetOnly(mergeOnlies(seg, aOnly, bOnly)) + cap.References().Write().SetOnly(mergeOnliesSeg(seg, aOnly, bOnly)) } - return &cap + c.caps = &cap } -func mergeOnlies(seg *capn.Segment, a, b []uint32) capn.UInt32List { +func mergeOnliesSeg(seg *capn.Segment, a, b []uint32) capn.UInt32List { + only := mergeOnlies(a, b) + + cap := seg.NewUInt32List(len(only)) + for idx, index := range only { + cap.Set(idx, index) + } + return cap +} + +func mergeOnlies(a, b []uint32) []uint32 { only := make([]uint32, 0, len(a)+len(b)) for len(a) > 0 && len(b) > 0 { aIndex, bIndex := a[0], b[0] @@ -453,7 +496,7 @@ func mergeOnlies(seg *capn.Segment, a, b []uint32) capn.UInt32List { only = append(only, bIndex) b = b[1:] default: - only = append(only, bIndex) + only = append(only, aIndex) a = a[1:] b = b[1:] } @@ -464,11 +507,39 @@ func mergeOnlies(seg *capn.Segment, a, b []uint32) capn.UInt32List { only = append(only, b...) } - cap := seg.NewUInt32List(len(only)) - for idx, index := range only { - cap.Set(idx, index) + return only +} + +// does not leave holes in the result - compacted. +func (c *cached) reachableReferences() []*msgs.VarIdPos { + if c.caps == nil || len(c.references) == 0 { + return nil } - return cap + + refsReadCap := c.caps.References().Read() + refsWriteCap := c.caps.References().Write() + all := refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL || + refsWriteCap.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL + var only []uint32 + if !all { + refsReadOnly := c.caps.References().Read().Only().ToArray() + refsWriteOnly := c.caps.References().Write().Only().ToArray() + only = mergeOnlies(refsReadOnly, refsWriteOnly) + } + + result := make([]*msgs.VarIdPos, 0, len(c.references)) + for index, ref := range c.references { + if all { + result = append(result, &ref) + } else if len(only) > 0 && uint32(index) == only[0] { + result = append(result, &ref) + only = only[1:] + if len(only) == 0 { + break + } + } + } + return result } func (u *update) Value() []byte { From e38378410e86530f9d7f140019fbd12e681f108b Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 1 Sep 2016 20:46:02 +0100 Subject: [PATCH 43/78] Glad I realised this one... Ref T34. --HG-- branch : T34 --- client/versioncache.go | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index 4175c88..02dac20 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -373,7 +373,8 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver vUUIdRef := common.MakeVarUUId(ref.Id()) caps := ref.Capabilities() var c *cached - if overlay, found := updateGraph[*vUUIdRef]; found { + overlay, found := updateGraph[*vUUIdRef] + if found { if !overlay.stored { overlay.stored = true vc[*vUUIdRef] = overlay.cached @@ -384,12 +385,11 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // adding to the capabilities the client now has on // vUUIdRef so we need to record that. That in turn can // mean we now have access to extra vars. - var found bool c, found = vc[*vUUIdRef] if !found { - // We have no idea though what this var actually points - // to. caps is just our capabilities to act on this - // var, so there's no extra work to do + // We have no idea though what this var (vUUIdRef) + // actually points to. caps is just our capabilities to + // act on this var, so there's no extra work to do // (c.reachableReferences will return []). c = &cached{caps: &caps} vc[*vUUIdRef] = c @@ -399,29 +399,44 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // processed vUUIdRef? 2. If we have, do we have wider caps // now than before? before := reaches[*vUUIdRef] - c.mergeCaps(&caps) + ensureUpdate := c.mergeCaps(&caps) after := c.reachableReferences() if len(after) > len(before) { reaches[*vUUIdRef] = after worklist = append(worklist, *vUUIdRef) + ensureUpdate = true + } + if ensureUpdate && overlay == nil && c.txnId != nil { + // Our access to vUUIdRef has expanded to the extent that + // we can now see more of the refs from vUUIdRef, or we + // can now see the value of vUUIdRef. So even though there + // wasn't an actual update for vUUIdRef, we need to create + // one. + updateGraph[*vUUIdRef] = &cacheOverlay{ + cached: c, + txnId: c.txnId, + stored: true, + } } } } } -func (c *cached) mergeCaps(b *cmsgs.Capabilities) { +// returns true iff we couldn't read the value before merge, but we +// can after +func (c *cached) mergeCaps(b *cmsgs.Capabilities) (gainedRead bool) { a := c.caps switch { case a == b: - return + return false case a == maxCapsCap || b == maxCapsCap: c.caps = maxCapsCap - return + return a != maxCapsCap case a == nil: c.caps = b - return + return b.Value() == cmsgs.VALUECAPABILITY_READ || b.Value() == cmsgs.VALUECAPABILITY_READWRITE case b == nil: - return + return false } aValue := a.Value() @@ -439,6 +454,8 @@ func (c *cached) mergeCaps(b *cmsgs.Capabilities) { refsReadAll := aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL || bRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ONLY refsWriteAll := aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL || bRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL + gainedRead = valueRead && aValue != cmsgs.VALUECAPABILITY_READ && aValue != cmsgs.VALUECAPABILITY_READWRITE + if valueRead && valueWrite && refsReadAll && refsWriteAll { c.caps = maxCapsCap return @@ -472,6 +489,7 @@ func (c *cached) mergeCaps(b *cmsgs.Capabilities) { } c.caps = &cap + return } func mergeOnliesSeg(seg *capn.Segment, a, b []uint32) capn.UInt32List { From f2da2ac7a5440511319192894822e37f79894da1 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 2 Sep 2016 14:16:15 +0100 Subject: [PATCH 44/78] Hopefully, that's all the update-from-abort stuff sorted out. Which leaves the translate-from-client-txn stuff to do. Ref T34. --HG-- branch : T34 --- client/clienttxnsubmitter.go | 23 +------------ client/versioncache.go | 67 ++++++++++++++++++++++-------------- 2 files changed, 43 insertions(+), 47 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index 536329f..5b1c553 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -140,28 +140,7 @@ func (cts *ClientTxnSubmitter) translateUpdates(seg *capn.Segment, updates map[c for idy, action := range *actions { clientAction := clientActions.At(idy) - clientAction.SetVarId(action.varUUId[:]) - if value := action.Value(); value == nil { - clientAction.SetDelete() - } else { - clientAction.SetWrite() - clientWrite := clientAction.Write() - clientWrite.SetValue(value) - references := action.references - clientReferences := cmsgs.NewClientVarIdPosList(seg, len(references)) - clientWrite.SetReferences(clientReferences) - referencesMask := action.ReferencesReadMask() - for idz, ref := range references { - varIdPos := clientReferences.At(idz) - if len(referencesMask) != 0 && referencesMask[0] == uint32(idz) { - referencesMask = referencesMask[1:] - varIdPos.SetVarId(ref.Id()) - varIdPos.SetCapabilities(ref.Capabilities()) - } - positions := common.Positions(ref.Positions()) - cts.hashCache.AddPosition(common.MakeVarUUId(ref.Id()), &positions) - } - } + action.AddToClientAction(cts.hashCache, seg, &clientAction) } } return clientUpdates diff --git a/client/versioncache.go b/client/versioncache.go index 02dac20..ffb88cc 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -6,6 +6,7 @@ import ( "goshawkdb.io/common" cmsgs "goshawkdb.io/common/capnp" msgs "goshawkdb.io/server/capnp" + ch "goshawkdb.io/server/consistenthash" eng "goshawkdb.io/server/txnengine" ) @@ -246,6 +247,7 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. vc.updateReachable(updateGraph) // 3. populate results + updates := make([]update, len(updateGraph)) validUpdates := make(map[common.TxnId]*[]*update, len(updateGraph)) for vUUId, overlay := range updateGraph { if !overlay.stored { @@ -257,7 +259,11 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. validUpdates[*overlay.txnId] = &updateList } vUUIdCopy := vUUId - *updateListPtr = append(*updateListPtr, &update{cached: overlay.cached, varUUId: &vUUIdCopy}) + update := &updates[0] + updates = updates[1:] + update.cached = overlay.cached + update.varUUId = &vUUIdCopy + *updateListPtr = append(*updateListPtr, update) } return validUpdates @@ -560,31 +566,42 @@ func (c *cached) reachableReferences() []*msgs.VarIdPos { return result } -func (u *update) Value() []byte { - if u.value == nil { - return nil - } - switch u.caps.Value() { - case cmsgs.VALUECAPABILITY_READ, cmsgs.VALUECAPABILITY_READWRITE: - return u.value - default: - return []byte{} - } -} +func (u *update) AddToClientAction(hashCache *ch.ConsistentHashCache, seg *capn.Segment, clientAction *cmsgs.ClientAction) { + clientAction.SetVarId(u.varUUId[:]) + if u.cached.txnId == nil { + clientAction.SetDelete() + } else { + clientAction.SetWrite() + clientWrite := clientAction.Write() -func (u *update) ReferencesReadMask() []uint32 { - if u.value == nil { - return nil - } - read := u.caps.References().Read() - switch read.Which() { - case cmsgs.CAPABILITIESREFERENCESREAD_ALL: - mask := make([]uint32, len(u.references)) - for idx := range mask { - mask[idx] = uint32(idx) + switch u.caps.Value() { + case cmsgs.VALUECAPABILITY_READ, cmsgs.VALUECAPABILITY_READWRITE: + clientWrite.SetValue(u.value) + default: + clientWrite.SetValue([]byte{}) } - return mask - default: - return read.Only().ToArray() + + refsReadCaps := u.caps.References().Read() + all := refsReadCaps.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL + var only []uint32 + if !all { + only = refsReadCaps.Only().ToArray() + } + clientReferences := cmsgs.NewClientVarIdPosList(seg, len(u.references)) + for idx, ref := range u.references { + switch { + case all: + case len(only) > 0 && only[0] == uint32(idx): + only = only[1:] + default: + continue + } + varIdPos := clientReferences.At(idx) + varIdPos.SetVarId(ref.Id()) + varIdPos.SetCapabilities(ref.Capabilities()) + positions := common.Positions(ref.Positions()) + hashCache.AddPosition(common.MakeVarUUId(ref.Id()), &positions) + } + clientWrite.SetReferences(clientReferences) } } From cae57e16a232e911ef52b4cda398126223a417f3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 2 Sep 2016 21:08:42 +0100 Subject: [PATCH 45/78] Well that might be done on the incoming side too then. Ref T34. --HG-- branch : T34 --- client/simpletxnsubmitter.go | 256 +++++++++++++++++------------------ client/versioncache.go | 91 ++++++++----- 2 files changed, 178 insertions(+), 169 deletions(-) diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index c40edbf..dbf7202 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -308,29 +308,30 @@ func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picke switch clientAction.Which() { case cmsgs.CLIENTACTION_READ: - sts.translateRead(&action, &clientAction) + err = sts.translateRead(&action, clientAction.Read()) case cmsgs.CLIENTACTION_WRITE: - sts.translateWrite(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction, vUUId, vc) + err = sts.translateWrite(vc, outgoingSeg, &referencesInNeedOfPositions, vUUId, &action, clientAction.Write()) case cmsgs.CLIENTACTION_READWRITE: - sts.translateReadWrite(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction, vUUId, vc) + err = sts.translateReadWrite(vc, outgoingSeg, &referencesInNeedOfPositions, vUUId, &action, clientAction.Readwrite()) case cmsgs.CLIENTACTION_CREATE: var positions *common.Positions - positions, hashCodes, err = sts.translateCreate(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction, vUUId, vc) - if err != nil { - return nil, err - } + positions, hashCodes, err = sts.translateCreate(vc, outgoingSeg, &referencesInNeedOfPositions, vUUId, &action, clientAction.Create()) createdPositions[*vUUId] = positions case cmsgs.CLIENTACTION_ROLL: - sts.translateRoll(outgoingSeg, &referencesInNeedOfPositions, &action, &clientAction) + err = sts.translateRoll(vc, outgoingSeg, &referencesInNeedOfPositions, &action, clientAction.Roll()) default: panic(fmt.Sprintf("Unexpected action type: %v", clientAction.Which())) } + if err != nil { + return nil, err + } + if hashCodes == nil { hashCodes, err = sts.hashCache.GetHashCodes(common.MakeVarUUId(action.VarId())) if err != nil { @@ -383,9 +384,6 @@ func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picke positions, found := createdPositions[*vUUId] if !found { positions = sts.hashCache.GetPositions(vUUId) - if !vc.EnsureSubset(vUUId, vUUIdPos.Capabilities()) { - return nil, fmt.Errorf("Reference created to %v attempts to extend known capabilities.", vUUId) - } } if positions == nil { return nil, fmt.Errorf("Txn contains reference to unknown var %v", vUUId) @@ -395,35 +393,72 @@ func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picke return rmIdToActionIndices, nil } -func (sts *SimpleTxnSubmitter) translateRead(action *msgs.Action, clientAction *cmsgs.ClientAction) { +func (sts *SimpleTxnSubmitter) translateRead(action *msgs.Action, clientRead cmsgs.ClientActionRead) error { action.SetRead() - clientRead := clientAction.Read() read := action.Read() read.SetVersion(clientRead.Version()) + return nil } -func (sts *SimpleTxnSubmitter) translateWrite(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction, vUUId *common.VarUUId, vc versionCache) { - action.SetWrite() - clientWrite := clientAction.Write() - write := action.Write() - write.SetValue(vc.ValueForWrite(vUUId, clientWrite.Value())) +func (sts *SimpleTxnSubmitter) translateWrite(vc versionCache, outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, action *msgs.Action, clientWrite cmsgs.ClientActionWrite) error { + writeValue, err := vc.ValueForWrite(vUUId, clientWrite.Value()) + if err != nil { + return err + } clientReferences := clientWrite.References() - write.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, vUUId, vc)) + refsWithHoles, c, err := vc.ReferencesForWrite(vUUId, &clientReferences) + if err != nil { + return err + } + if refsWithHoles == nil { + // it really is just a write + action.SetWrite() + write := action.Write() + write.SetValue(writeValue) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, refsWithHoles, &clientReferences) + if err != nil { + return err + } + write.SetReferences(*refs) + } else { + // it actually needs to be a read-write + action.SetReadwrite() + readWrite := action.Readwrite() + readWrite.SetVersion(c.txnId[:]) + readWrite.SetValue(writeValue) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, refsWithHoles, &clientReferences) + if err != nil { + return err + } + readWrite.SetReferences(*refs) + } + return nil } -func (sts *SimpleTxnSubmitter) translateReadWrite(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction, vUUId *common.VarUUId, vc versionCache) { +func (sts *SimpleTxnSubmitter) translateReadWrite(vc versionCache, outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, action *msgs.Action, clientReadWrite cmsgs.ClientActionReadwrite) error { + writeValue, err := vc.ValueForWrite(vUUId, clientReadWrite.Value()) + if err != nil { + return err + } + clientReferences := clientReadWrite.References() + refsWithHoles, _, err := vc.ReferencesForWrite(vUUId, &clientReferences) + if err != nil { + return err + } action.SetReadwrite() - clientReadWrite := clientAction.Readwrite() readWrite := action.Readwrite() readWrite.SetVersion(clientReadWrite.Version()) - readWrite.SetValue(vc.ValueForWrite(vUUId, clientReadWrite.Value())) - clientReferences := clientReadWrite.References() - readWrite.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, vUUId, vc)) + readWrite.SetValue(writeValue) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, refsWithHoles, &clientReferences) + if err != nil { + return err + } + readWrite.SetReferences(*refs) + return nil } -func (sts *SimpleTxnSubmitter) translateCreate(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction, vUUId *common.VarUUId, vc versionCache) (*common.Positions, []common.RMId, error) { +func (sts *SimpleTxnSubmitter) translateCreate(vc versionCache, outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, action *msgs.Action, clientCreate cmsgs.ClientActionCreate) (*common.Positions, []common.RMId, error) { action.SetCreate() - clientCreate := clientAction.Create() create := action.Create() create.SetValue(clientCreate.Value()) positions, hashCodes, err := sts.hashCache.CreatePositions(vUUId, int(sts.topology.MaxRMCount)) @@ -432,149 +467,102 @@ func (sts *SimpleTxnSubmitter) translateCreate(outgoingSeg *capn.Segment, refere } create.SetPositions((capn.UInt8List)(*positions)) clientReferences := clientCreate.References() - create.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, nil, vc)) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, nil, &clientReferences) + if err != nil { + return nil, nil, err + } + create.SetReferences(*refs) return positions, hashCodes, nil } -func (sts *SimpleTxnSubmitter) translateRoll(outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientAction *cmsgs.ClientAction) { +func (sts *SimpleTxnSubmitter) translateRoll(vc versionCache, outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, action *msgs.Action, clientRoll cmsgs.ClientActionRoll) error { action.SetRoll() - clientRoll := clientAction.Roll() roll := action.Roll() roll.SetVersion(clientRoll.Version()) roll.SetValue(clientRoll.Value()) clientReferences := clientRoll.References() - roll.SetReferences(copyReferences(&clientReferences, outgoingSeg, referencesInNeedOfPositions, nil, nil)) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, nil, &clientReferences) + if err != nil { + return err + } + roll.SetReferences(*refs) + return nil } // so the challenge here is that we need to merge the references which // the client may have rewritten with the 'actual' references taking // into account masks and such from capabilities -func copyReferences(clientReferences *cmsgs.ClientVarIdPos_List, seg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, vc versionCache) msgs.VarIdPos_List { - all, mask, existingRefs := vc.ReferencesWriteMask(vUUId) - if all { +func copyReferences(vc versionCache, seg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, refsWithHoles []*msgs.VarIdPos, clientReferences *cmsgs.ClientVarIdPos_List) (*msgs.VarIdPos_List, error) { + if refsWithHoles == nil { refs := msgs.NewVarIdPosList(seg, clientReferences.Len()) for idx, l := 0, clientReferences.Len(); idx < l; idx++ { clientRef := clientReferences.At(idx) vUUIdPos := refs.At(idx) - vUUIdPos.SetId(clientRef.VarId()) - vUUIdPos.SetCapabilities(translateCapabilities(seg, clientRef.Capabilities())) + target := common.MakeVarUUId(clientRef.VarId()) + vUUIdPos.SetId(target[:]) + caps := clientRef.Capabilities() + if err := validateCapabilities(vc, target, caps); err != nil { + return nil, err + } + vUUIdPos.SetCapabilities(caps) *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) } - return refs + return &refs, nil } else { - refs := msgs.NewVarIdPosList(seg, len(existingRefs)) - clientRefLen := clientReferences.Len() - if clientRefLen > len(existingRefs) { - clientRefLen = len(existingRefs) - } - idx := 0 - for ; idx < clientRefLen; idx++ { + refs := msgs.NewVarIdPosList(seg, len(refsWithHoles)) + for idx, ref := range refsWithHoles { vUUIdPos := refs.At(idx) - if len(mask) > 0 && mask[0] == uint32(idx) { - mask = mask[1:] + if ref == nil { clientRef := clientReferences.At(idx) - vUUIdPos.SetId(clientRef.VarId()) - vUUIdPos.SetCapabilities(translateCapabilities(seg, clientRef.Capabilities())) + target := common.MakeVarUUId(clientRef.VarId()) + vUUIdPos.SetId(target[:]) + caps := clientRef.Capabilities() + if err := validateCapabilities(vc, target, caps); err != nil { + return nil, err + } + vUUIdPos.SetCapabilities(caps) } else { - existing := existingRefs[idx] - vUUIdPos.SetId(existing.Id()) - vUUIdPos.SetCapabilities(existing.Capabilities()) + vUUIdPos.SetId(ref.Id()) + vUUIdPos.SetCapabilities(ref.Capabilities()) } *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) } - for ; idx < len(existingRefs); idx++ { - vUUIdPos := refs.At(idx) - existing := existingRefs[idx] - vUUIdPos.SetId(existing.Id()) - vUUIdPos.SetCapabilities(existing.Capabilities()) - } - return refs + return &refs, nil } } -func translateCapabilities(seg *capn.Segment, cap cmsgs.Capabilities) cmsgs.Capabilities { - readWhich, writeWhich := cap.References().Read().Which(), cap.References().Write().Which() - if readWhich == cmsgs.CAPABILITIESREFERENCESREAD_ALL && - writeWhich == cmsgs.CAPABILITIESREFERENCESWRITE_ALL { - return cap - } - rebuild := false - if readWhich == cmsgs.CAPABILITIESREFERENCESREAD_ONLY { - only := cap.References().Read().Only().ToArray() - if len(only) > 1 { - old := only[0] - for _, index := range only[1:] { - if old >= index { - rebuild = true - break - } - old = index - } +func validateCapabilities(vc versionCache, target *common.VarUUId, cap cmsgs.Capabilities) error { + refsReadCap := cap.References().Read() + refsWriteCap := cap.References().Write() + if refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ONLY { + // just enforce that they unique and ascending + if !isUniqueAndAscending(refsReadCap.Only()) { + return fmt.Errorf("Invalid reference read capabilities: indices must be unique and ascending (ref target: %v)", target) } } - if !rebuild && writeWhich == cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { - only := cap.References().Write().Only().ToArray() - if len(only) > 1 { - old := only[0] - for _, index := range only[1:] { - if old >= index { - rebuild = true - break - } - old = index - } + if refsWriteCap.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { + // just enforce that they unique and ascending + if !isUniqueAndAscending(refsWriteCap.Only()) { + return fmt.Errorf("Invalid reference write capabilities: indices must be unique and ascending (ref target: %v)", target) } } - if !rebuild { - return cap - } - capNew := cmsgs.NewCapabilities(seg) - capNew.SetValue(cap.Value()) - readNew := capNew.References().Read() - if readWhich == cmsgs.CAPABILITIESREFERENCESREAD_ALL { - readNew.SetAll() - } else { - only := cap.References().Read().Only().ToArray() - common.SortUInt32(only).Sort() - if len(only) > 1 { - old := only[0] - for idx := 1; idx < len(only); idx++ { - cur := only[idx] - if cur == old { - only = append(only[:idx], only[idx+1:]...) - idx-- - } - old = cur - } - } - onlyNew := seg.NewUInt32List(len(only)) - for idx, index := range only { - onlyNew.Set(idx, index) - } - readNew.SetOnly(onlyNew) + if !vc.EnsureSubset(target, cap) { + return fmt.Errorf("Attempt made to grant wider capabilities on %v than acceptable", target) } - writeNew := capNew.References().Write() - if writeWhich == cmsgs.CAPABILITIESREFERENCESWRITE_ALL { - writeNew.SetAll() - } else { - only := cap.References().Write().Only().ToArray() - common.SortUInt32(only).Sort() - if len(only) > 1 { - old := only[0] - for idx := 1; idx < len(only); idx++ { - cur := only[idx] - if cur == old { - only = append(only[:idx], only[idx+1:]...) - idx-- - } - old = cur + return nil +} + +func isUniqueAndAscending(onlyCap capn.UInt32List) bool { + only := onlyCap.ToArray() + if len(only) > 0 { + old := only[0] + only = only[1:] + for _, index := range only { + if index <= old { + return false } + old = index } - onlyNew := seg.NewUInt32List(len(only)) - for idx, index := range only { - onlyNew.Set(idx, index) - } - writeNew.SetOnly(onlyNew) } - return capNew + return true } diff --git a/client/versioncache.go b/client/versioncache.go index ffb88cc..0ee92f5 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -60,7 +60,7 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { vUUId := common.MakeVarUUId(action.VarId()) if which := action.Which(); which != cmsgs.CLIENTACTION_READ { return fmt.Errorf("Retry transaction should only include reads. Found %v", which) - } else if _, found := vc[*vUUId]; !found { + } else if c, found := vc[*vUUId]; !found || c.txnId == nil { return fmt.Errorf("Retry transaction has attempted to read from unknown object: %v", vUUId) } } @@ -69,10 +69,10 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) vUUId := common.MakeVarUUId(action.VarId()) - _, found := vc[*vUUId] + c, found := vc[*vUUId] switch action.Which() { case cmsgs.CLIENTACTION_READ, cmsgs.CLIENTACTION_WRITE, cmsgs.CLIENTACTION_READWRITE: - if !found { + if !found || c.txnId == nil { return fmt.Errorf("Transaction manipulates unknown object: %v", vUUId) } @@ -89,35 +89,58 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { return nil } -func (vc versionCache) ValueForWrite(vUUId *common.VarUUId, value []byte) []byte { +func (vc versionCache) ValueForWrite(vUUId *common.VarUUId, value []byte) ([]byte, error) { if vc == nil { - return value + return value, nil } - if c, found := vc[*vUUId]; !found { - panic(fmt.Errorf("ValueForWrite called for unknown %v", vUUId)) + if c, found := vc[*vUUId]; !found || c.txnId == nil { + return nil, fmt.Errorf("ValueForWrite called for unknown %v", vUUId) } else { switch c.caps.Value() { case cmsgs.VALUECAPABILITY_WRITE, cmsgs.VALUECAPABILITY_READWRITE: - return value + return value, nil default: - return c.value + return c.value, nil } } } -func (vc versionCache) ReferencesWriteMask(vUUId *common.VarUUId) (bool, []uint32, []msgs.VarIdPos) { - if vc == nil || vUUId == nil { - return true, nil, nil +// returns the 'extra' refs, with holes in the list for valid client refs +func (vc versionCache) ReferencesForWrite(vUUId *common.VarUUId, clientRefs *cmsgs.ClientVarIdPos_List) ([]*msgs.VarIdPos, *cached, error) { + if vc == nil { + return nil, nil, nil } - if c, found := vc[*vUUId]; !found { - panic(fmt.Errorf("ReferencesWriteMask called for unknown %v", vUUId)) + if c, found := vc[*vUUId]; !found || c.txnId == nil { + return nil, nil, fmt.Errorf("ReferencesForWrite called for unknown %v", vUUId) } else { - write := c.caps.References().Write() - switch write.Which() { + refsWriteCap := c.caps.References().Write() + switch refsWriteCap.Which() { case cmsgs.CAPABILITIESREFERENCESWRITE_ALL: - return true, nil, c.references + return nil, c, nil default: - return false, write.Only().ToArray(), c.references + clientRefsLen := clientRefs.Len() + // If the client _can_ write it then it _must_ write it. + if clientRefsLen != len(c.references) { + return nil, nil, fmt.Errorf("Wrong number of references provided for write of %v", vUUId) + } + only := refsWriteCap.Only().ToArray() + results := make([]*msgs.VarIdPos, 0, len(c.references)) + nonNilAppended := false + for idx, ref := range c.references { + refCopy := ref + if len(only) > 0 && uint32(idx) == only[0] { + only = only[1:] + results = append(results, nil) + } else { + nonNilAppended = true + results = append(results, &refCopy) + } + } + if nonNilAppended { + return results, c, nil + } else { + return nil, c, nil + } } } } @@ -131,7 +154,11 @@ func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capabilitie return true } valueNew, valueOld := cap.Value(), c.caps.Value() - if valueNew > valueOld { + switch { + case valueNew == valueOld: + case valueNew == cmsgs.VALUECAPABILITY_NONE: // new is bottom, always fine + case valueOld == cmsgs.VALUECAPABILITY_READWRITE: // old is top, always fine + default: return false } @@ -144,14 +171,12 @@ func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capabilitie if len(readNewOnly) > len(readOldOnly) { return false } - common.SortUInt32(readNewOnly).Sort() - common.SortUInt32(readOldOnly).Sort() for idx, indexNew := range readNewOnly { indexOld := readOldOnly[0] readOldOnly = readOldOnly[1:] if indexNew < indexOld { return false - } else { + } else if indexNew > indexOld { for ; indexNew > indexOld && len(readOldOnly) > 0; readOldOnly = readOldOnly[1:] { indexOld = readOldOnly[0] } @@ -171,14 +196,12 @@ func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capabilitie if len(writeNewOnly) > len(writeOldOnly) { return false } - common.SortUInt32(writeNewOnly).Sort() - common.SortUInt32(writeOldOnly).Sort() for idx, indexNew := range writeNewOnly { indexOld := writeOldOnly[0] writeOldOnly = writeOldOnly[1:] if indexNew < indexOld { return false - } else { + } else if indexNew > indexOld { for ; indexNew > indexOld && len(writeOldOnly) > 0; writeOldOnly = writeOldOnly[1:] { indexOld = writeOldOnly[0] } @@ -188,11 +211,8 @@ func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capabilitie } } } - - return true - } else { - return true } + return true } func (vc versionCache) UpdateFromCommit(txn *eng.TxnReader, outcome *msgs.Outcome) { @@ -568,27 +588,28 @@ func (c *cached) reachableReferences() []*msgs.VarIdPos { func (u *update) AddToClientAction(hashCache *ch.ConsistentHashCache, seg *capn.Segment, clientAction *cmsgs.ClientAction) { clientAction.SetVarId(u.varUUId[:]) - if u.cached.txnId == nil { + c := u.cached + if c.txnId == nil { clientAction.SetDelete() } else { clientAction.SetWrite() clientWrite := clientAction.Write() - switch u.caps.Value() { + switch c.caps.Value() { case cmsgs.VALUECAPABILITY_READ, cmsgs.VALUECAPABILITY_READWRITE: - clientWrite.SetValue(u.value) + clientWrite.SetValue(c.value) default: clientWrite.SetValue([]byte{}) } - refsReadCaps := u.caps.References().Read() + refsReadCaps := c.caps.References().Read() all := refsReadCaps.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL var only []uint32 if !all { only = refsReadCaps.Only().ToArray() } - clientReferences := cmsgs.NewClientVarIdPosList(seg, len(u.references)) - for idx, ref := range u.references { + clientReferences := cmsgs.NewClientVarIdPosList(seg, len(c.references)) + for idx, ref := range c.references { switch { case all: case len(only) > 0 && only[0] == uint32(idx): From 46bf8437f5332accb92cee87ddded1e94dc1013f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 3 Sep 2016 11:49:11 +0100 Subject: [PATCH 46/78] Better understanding of what can happen with more onlies than you have refs. Ref T34. --HG-- branch : T34 --- client/simpletxnsubmitter.go | 14 +++--- client/versioncache.go | 83 +++++++++++++++++++++++------------- 2 files changed, 63 insertions(+), 34 deletions(-) diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index dbf7202..e96cd5c 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -512,7 +512,12 @@ func copyReferences(vc versionCache, seg *capn.Segment, referencesInNeedOfPositi refs := msgs.NewVarIdPosList(seg, len(refsWithHoles)) for idx, ref := range refsWithHoles { vUUIdPos := refs.At(idx) - if ref == nil { + switch { + case ref != nil: + vUUIdPos.SetId(ref.Id()) + vUUIdPos.SetCapabilities(ref.Capabilities()) + *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) + case idx < clientReferences.Len(): clientRef := clientReferences.At(idx) target := common.MakeVarUUId(clientRef.VarId()) vUUIdPos.SetId(target[:]) @@ -521,11 +526,10 @@ func copyReferences(vc versionCache, seg *capn.Segment, referencesInNeedOfPositi return nil, err } vUUIdPos.SetCapabilities(caps) - } else { - vUUIdPos.SetId(ref.Id()) - vUUIdPos.SetCapabilities(ref.Capabilities()) + *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) + default: + vUUIdPos.SetId([]byte{}) } - *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) } return &refs, nil } diff --git a/client/versioncache.go b/client/versioncache.go index 0ee92f5..cccf8ac 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -89,16 +89,19 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { return nil } +// the problem is that we can't distinguish between a client trying to write an empty value or not. func (vc versionCache) ValueForWrite(vUUId *common.VarUUId, value []byte) ([]byte, error) { if vc == nil { return value, nil } if c, found := vc[*vUUId]; !found || c.txnId == nil { - return nil, fmt.Errorf("ValueForWrite called for unknown %v", vUUId) + return nil, fmt.Errorf("Write attempted on unknown %v", vUUId) } else { - switch c.caps.Value() { - case cmsgs.VALUECAPABILITY_WRITE, cmsgs.VALUECAPABILITY_READWRITE: + switch valueCap := c.caps.Value(); { + case valueCap == cmsgs.VALUECAPABILITY_WRITE || valueCap == cmsgs.VALUECAPABILITY_READWRITE: return value, nil + case len(value) > 0: // fuzzy. The client could be attempting to write empty value illegally too. + return nil, fmt.Errorf("Transaction illegally to write the value of an object. %v", vUUId) default: return c.value, nil } @@ -119,23 +122,42 @@ func (vc versionCache) ReferencesForWrite(vUUId *common.VarUUId, clientRefs *cms return nil, c, nil default: clientRefsLen := clientRefs.Len() - // If the client _can_ write it then it _must_ write it. - if clientRefsLen != len(c.references) { - return nil, nil, fmt.Errorf("Wrong number of references provided for write of %v", vUUId) - } only := refsWriteCap.Only().ToArray() - results := make([]*msgs.VarIdPos, 0, len(c.references)) + // The client must provide refs for every index in only. + reqLen := 0 + if l := len(only); l > 0 { + reqLen = int(only[l-1]) + 1 + } + if clientRefsLen != reqLen { + return nil, nil, fmt.Errorf("Incorrect number of references provided for write of %v", vUUId) + } + // Where possible, we fill in the gaps in only with + // c.references. Keep in mind that the client may have onlies + // that are longer than the current number of + // references. This can happen when a capability in a ref + // includes writes to n refs, and then the object itself is + // updated to only include m refs, where m < n. We change a + // write to a readwrite iff c.references - onlies is not the + // empty set. + resultsLen := clientRefsLen + if l := len(c.references); l > resultsLen { + resultsLen = l + } + results := make([]*msgs.VarIdPos, resultsLen) nonNilAppended := false - for idx, ref := range c.references { - refCopy := ref + for idx := 0; idx < clientRefsLen; idx++ { if len(only) > 0 && uint32(idx) == only[0] { only = only[1:] - results = append(results, nil) - } else { + } else if idx < len(c.references) { nonNilAppended = true - results = append(results, &refCopy) + results[idx] = &c.references[idx] } } + // add on anything in c.references that's left over + for idx := clientRefsLen; idx < resultsLen; idx++ { + nonNilAppended = true + results[idx] = &c.references[idx] + } if nonNilAppended { return results, c, nil } else { @@ -561,26 +583,27 @@ func (c *cached) reachableReferences() []*msgs.VarIdPos { } refsReadCap := c.caps.References().Read() - refsWriteCap := c.caps.References().Write() - all := refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL || - refsWriteCap.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL + all := refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL var only []uint32 if !all { - refsReadOnly := c.caps.References().Read().Only().ToArray() - refsWriteOnly := c.caps.References().Write().Only().ToArray() - only = mergeOnlies(refsReadOnly, refsWriteOnly) + only = c.caps.References().Read().Only().ToArray() } result := make([]*msgs.VarIdPos, 0, len(c.references)) +LOOP: for index, ref := range c.references { - if all { - result = append(result, &ref) - } else if len(only) > 0 && uint32(index) == only[0] { - result = append(result, &ref) + refCopy := ref + switch { + case all: + case len(only) == 0: + break LOOP + case uint32(index) == only[0]: only = only[1:] - if len(only) == 0 { - break - } + default: + continue + } + if len(ref.Id()) == common.KeyLen { + result = append(result, &refCopy) } } return result @@ -619,9 +642,11 @@ func (u *update) AddToClientAction(hashCache *ch.ConsistentHashCache, seg *capn. } varIdPos := clientReferences.At(idx) varIdPos.SetVarId(ref.Id()) - varIdPos.SetCapabilities(ref.Capabilities()) - positions := common.Positions(ref.Positions()) - hashCache.AddPosition(common.MakeVarUUId(ref.Id()), &positions) + if len(ref.Id()) == common.KeyLen { + varIdPos.SetCapabilities(ref.Capabilities()) + positions := common.Positions(ref.Positions()) + hashCache.AddPosition(common.MakeVarUUId(ref.Id()), &positions) + } } clientWrite.SetReferences(clientReferences) } From 126fac1f8b2fbe007e3a3f5efe38e413dc7634ce Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 4 Sep 2016 17:00:13 +0100 Subject: [PATCH 47/78] Extract some functionality to common, and then a few bug fixes too. Ref T34. --HG-- branch : T34 --- client/clienttxnsubmitter.go | 2 +- client/versioncache.go | 147 +++++---------------------------- configuration/configuration.go | 22 ++--- network/connection.go | 12 +-- 4 files changed, 40 insertions(+), 143 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index 5b1c553..e434ed2 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -22,7 +22,7 @@ type ClientTxnSubmitter struct { initialDelay time.Duration } -func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, roots map[common.VarUUId]*cmsgs.Capabilities, cm paxos.ConnectionManager) *ClientTxnSubmitter { +func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, roots map[common.VarUUId]*common.Capabilities, cm paxos.ConnectionManager) *ClientTxnSubmitter { return &ClientTxnSubmitter{ SimpleTxnSubmitter: NewSimpleTxnSubmitter(rmId, bootCount, cm), versionCache: NewVersionCache(roots), diff --git a/client/versioncache.go b/client/versioncache.go index cccf8ac..2aeaa6d 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -15,7 +15,7 @@ type versionCache map[common.VarUUId]*cached type cached struct { txnId *common.TxnId clockElem uint64 - caps *cmsgs.Capabilities + caps *common.Capabilities value []byte references []msgs.VarIdPos } @@ -32,19 +32,7 @@ type cacheOverlay struct { stored bool } -var maxCapsCap *cmsgs.Capabilities - -func init() { - seg := capn.NewBuffer(nil) - cap := cmsgs.NewCapabilities(seg) - cap.SetValue(cmsgs.VALUECAPABILITY_READWRITE) - ref := cap.References() - ref.Read().SetAll() - ref.Write().SetAll() - maxCapsCap = &cap -} - -func NewVersionCache(roots map[common.VarUUId]*cmsgs.Capabilities) versionCache { +func NewVersionCache(roots map[common.VarUUId]*common.Capabilities) versionCache { cache := make(map[common.VarUUId]*cached) for vUUId, caps := range roots { cache[vUUId] = &cached{caps: caps} @@ -60,7 +48,7 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { vUUId := common.MakeVarUUId(action.VarId()) if which := action.Which(); which != cmsgs.CLIENTACTION_READ { return fmt.Errorf("Retry transaction should only include reads. Found %v", which) - } else if c, found := vc[*vUUId]; !found || c.txnId == nil { + } else if _, found := vc[*vUUId]; !found { return fmt.Errorf("Retry transaction has attempted to read from unknown object: %v", vUUId) } } @@ -69,10 +57,10 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) vUUId := common.MakeVarUUId(action.VarId()) - c, found := vc[*vUUId] + _, found := vc[*vUUId] switch action.Which() { case cmsgs.CLIENTACTION_READ, cmsgs.CLIENTACTION_WRITE, cmsgs.CLIENTACTION_READWRITE: - if !found || c.txnId == nil { + if !found { return fmt.Errorf("Transaction manipulates unknown object: %v", vUUId) } @@ -94,7 +82,7 @@ func (vc versionCache) ValueForWrite(vUUId *common.VarUUId, value []byte) ([]byt if vc == nil { return value, nil } - if c, found := vc[*vUUId]; !found || c.txnId == nil { + if c, found := vc[*vUUId]; !found { return nil, fmt.Errorf("Write attempted on unknown %v", vUUId) } else { switch valueCap := c.caps.Value(); { @@ -113,7 +101,7 @@ func (vc versionCache) ReferencesForWrite(vUUId *common.VarUUId, clientRefs *cms if vc == nil { return nil, nil, nil } - if c, found := vc[*vUUId]; !found || c.txnId == nil { + if c, found := vc[*vUUId]; !found { return nil, nil, fmt.Errorf("ReferencesForWrite called for unknown %v", vUUId) } else { refsWriteCap := c.caps.References().Write() @@ -172,7 +160,7 @@ func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capabilitie return true } if c, found := vc[*vUUId]; found { - if c.caps == maxCapsCap { + if c.caps == common.MaxCapsCap { return true } valueNew, valueOld := cap.Value(), c.caps.Value() @@ -251,7 +239,7 @@ func (vc versionCache) UpdateFromCommit(txn *eng.TxnReader, outcome *msgs.Outcom c = &cached{ txnId: txnId, clockElem: clock.At(vUUId), - caps: maxCapsCap, + caps: common.MaxCapsCap, value: create.Value(), references: create.References().ToArray(), } @@ -298,7 +286,8 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. updateListPtr, found := validUpdates[*overlay.txnId] if !found { updateList := []*update{} - validUpdates[*overlay.txnId] = &updateList + updateListPtr = &updateList + validUpdates[*overlay.txnId] = updateListPtr } vUUIdCopy := vUUId update := &updates[0] @@ -419,7 +408,7 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // Given the current vUUId.caps, we're looking at what we // can reach from there. vUUIdRef := common.MakeVarUUId(ref.Id()) - caps := ref.Capabilities() + caps := common.NewCapabilities(ref.Capabilities()) var c *cached overlay, found := updateGraph[*vUUIdRef] if found { @@ -439,7 +428,7 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // actually points to. caps is just our capabilities to // act on this var, so there's no extra work to do // (c.reachableReferences will return []). - c = &cached{caps: &caps} + c = &cached{caps: caps} vc[*vUUIdRef] = c } } @@ -447,7 +436,7 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // processed vUUIdRef? 2. If we have, do we have wider caps // now than before? before := reaches[*vUUIdRef] - ensureUpdate := c.mergeCaps(&caps) + ensureUpdate := c.mergeCaps(caps) after := c.reachableReferences() if len(after) > len(before) { reaches[*vUUIdRef] = after @@ -472,108 +461,16 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // returns true iff we couldn't read the value before merge, but we // can after -func (c *cached) mergeCaps(b *cmsgs.Capabilities) (gainedRead bool) { +func (c *cached) mergeCaps(b *common.Capabilities) (gainedRead bool) { a := c.caps - switch { - case a == b: - return false - case a == maxCapsCap || b == maxCapsCap: - c.caps = maxCapsCap - return a != maxCapsCap - case a == nil: - c.caps = b - return b.Value() == cmsgs.VALUECAPABILITY_READ || b.Value() == cmsgs.VALUECAPABILITY_READWRITE - case b == nil: - return false + c.caps = a.Union(b) + if a != c.caps { + aValue := a.Value() + nValue := c.caps.Value() + return (aValue != cmsgs.VALUECAPABILITY_READ && aValue != cmsgs.VALUECAPABILITY_READWRITE) && + (nValue == cmsgs.VALUECAPABILITY_READ || nValue == cmsgs.VALUECAPABILITY_READWRITE) } - - aValue := a.Value() - aRefsRead := a.References().Read() - aRefsWrite := a.References().Write() - - bValue := b.Value() - bRefsRead := b.References().Read() - bRefsWrite := b.References().Write() - - valueRead := aValue == cmsgs.VALUECAPABILITY_READWRITE || aValue == cmsgs.VALUECAPABILITY_READ || - bValue == cmsgs.VALUECAPABILITY_READWRITE || bValue == cmsgs.VALUECAPABILITY_READ - valueWrite := aValue == cmsgs.VALUECAPABILITY_READWRITE || aValue == cmsgs.VALUECAPABILITY_WRITE || - bValue == cmsgs.VALUECAPABILITY_READWRITE || bValue == cmsgs.VALUECAPABILITY_WRITE - refsReadAll := aRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL || bRefsRead.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ONLY - refsWriteAll := aRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL || bRefsWrite.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ALL - - gainedRead = valueRead && aValue != cmsgs.VALUECAPABILITY_READ && aValue != cmsgs.VALUECAPABILITY_READWRITE - - if valueRead && valueWrite && refsReadAll && refsWriteAll { - c.caps = maxCapsCap - return - } - - seg := capn.NewBuffer(nil) - cap := cmsgs.NewCapabilities(seg) - switch { - case valueRead && valueWrite: - cap.SetValue(cmsgs.VALUECAPABILITY_READWRITE) - case valueWrite: - cap.SetValue(cmsgs.VALUECAPABILITY_WRITE) - case valueRead: - cap.SetValue(cmsgs.VALUECAPABILITY_WRITE) - default: - cap.SetValue(cmsgs.VALUECAPABILITY_NONE) - } - - if refsReadAll { - cap.References().Read().SetAll() - } else { - aOnly, bOnly := aRefsRead.Only().ToArray(), bRefsRead.Only().ToArray() - cap.References().Read().SetOnly(mergeOnliesSeg(seg, aOnly, bOnly)) - } - - if refsWriteAll { - cap.References().Write().SetAll() - } else { - aOnly, bOnly := aRefsWrite.Only().ToArray(), bRefsWrite.Only().ToArray() - cap.References().Write().SetOnly(mergeOnliesSeg(seg, aOnly, bOnly)) - } - - c.caps = &cap - return -} - -func mergeOnliesSeg(seg *capn.Segment, a, b []uint32) capn.UInt32List { - only := mergeOnlies(a, b) - - cap := seg.NewUInt32List(len(only)) - for idx, index := range only { - cap.Set(idx, index) - } - return cap -} - -func mergeOnlies(a, b []uint32) []uint32 { - only := make([]uint32, 0, len(a)+len(b)) - for len(a) > 0 && len(b) > 0 { - aIndex, bIndex := a[0], b[0] - switch { - case aIndex < bIndex: - only = append(only, aIndex) - a = a[1:] - case aIndex > bIndex: - only = append(only, bIndex) - b = b[1:] - default: - only = append(only, aIndex) - a = a[1:] - b = b[1:] - } - } - if len(a) > 0 { - only = append(only, a...) - } else { - only = append(only, b...) - } - - return only + return false } // does not leave holes in the result - compacted. diff --git a/configuration/configuration.go b/configuration/configuration.go index 789d4d6..14f2240 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -32,7 +32,7 @@ type Configuration struct { roots []string rms common.RMIds rmsRemoved map[common.RMId]server.EmptyStruct - fingerprints map[[sha256.Size]byte]map[string]*cmsgs.Capabilities + fingerprints map[[sha256.Size]byte]map[string]*common.Capabilities nextConfiguration *NextConfiguration } @@ -207,7 +207,7 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { } else { rootsMap := make(map[string]server.EmptyStruct) rootsName := []string{} - fingerprints := make(map[[sha256.Size]byte]map[string]*cmsgs.Capabilities, len(config.ClientCertificateFingerprints)) + fingerprints := make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.ClientCertificateFingerprints)) seg := capn.NewBuffer(nil) for fingerprint, rootsCapabilities := range config.ClientCertificateFingerprints { fingerprintBytes, err := hex.DecodeString(fingerprint) @@ -219,7 +219,7 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { if len(rootsCapabilities) == 0 { return nil, fmt.Errorf("No roots configured for client fingerprint %v; at least 1 needed", fingerprint) } - roots := make(map[string]*cmsgs.Capabilities, len(rootsCapabilities)) + roots := make(map[string]*common.Capabilities, len(rootsCapabilities)) for name, rootCapabilities := range rootsCapabilities { if _, found := rootsMap[name]; !found { rootsMap[name] = server.EmptyStructVal @@ -285,7 +285,7 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { } capRefs.Write().SetOnly(only) } - roots[name] = &cap + roots[name] = common.NewCapabilities(cap) } ary := [sha256.Size]byte{} copy(ary[:], fingerprintBytes) @@ -325,18 +325,18 @@ func ConfigurationFromCap(config *msgs.Configuration) *Configuration { rootsName := []string{} rootsMap := make(map[string]server.EmptyStruct) fingerprints := config.Fingerprints() - fingerprintsMap := make(map[[sha256.Size]byte]map[string]*cmsgs.Capabilities, fingerprints.Len()) + fingerprintsMap := make(map[[sha256.Size]byte]map[string]*common.Capabilities, fingerprints.Len()) for idx, l := 0, fingerprints.Len(); idx < l; idx++ { fingerprint := fingerprints.At(idx) ary := [sha256.Size]byte{} copy(ary[:], fingerprint.Sha256()) rootsCap := fingerprint.Roots() - roots := make(map[string]*cmsgs.Capabilities, rootsCap.Len()) + roots := make(map[string]*common.Capabilities, rootsCap.Len()) for idy, m := 0, rootsCap.Len(); idy < m; idy++ { rootCap := rootsCap.At(idy) name := rootCap.Name() capabilities := rootCap.Capabilities() - roots[name] = &capabilities + roots[name] = common.NewCapabilities(capabilities) if _, found := rootsMap[name]; !found { rootsName = append(rootsName, name) rootsMap[name] = server.EmptyStructVal @@ -430,7 +430,7 @@ func (a *Configuration) Equal(b *Configuration) bool { return false } else { for name, aRootCaps := range aRoots { - if bRootCaps, found := bRoots[name]; !found || !common.EqualCapabilities(aRootCaps, bRootCaps) { + if bRootCaps, found := bRoots[name]; !found || !aRootCaps.Equal(bRootCaps) { return false } } @@ -463,7 +463,7 @@ func (config *Configuration) SetClusterUUId(uuid uint64) { } } -func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*cmsgs.Capabilities { +func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*common.Capabilities { return config.fingerprints } @@ -530,7 +530,7 @@ func (config *Configuration) Clone() *Configuration { roots: make([]string, len(config.roots)), rms: make([]common.RMId, len(config.rms)), rmsRemoved: make(map[common.RMId]server.EmptyStruct, len(config.rmsRemoved)), - fingerprints: make(map[[sha256.Size]byte]map[string]*cmsgs.Capabilities, len(config.fingerprints)), + fingerprints: make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.fingerprints)), nextConfiguration: config.nextConfiguration.Clone(), } @@ -593,7 +593,7 @@ func (config *Configuration) AddToSegAutoRoot(seg *capn.Segment) msgs.Configurat for name, capabilities := range roots { rootCap := msgs.NewRoot(seg) rootCap.SetName(name) - rootCap.SetCapabilities(*capabilities) + rootCap.SetCapabilities(capabilities.Capabilities) rootsCap.Set(idy, rootCap) idy++ } diff --git a/network/connection.go b/network/connection.go index 5c0dbaa..3cc5af5 100644 --- a/network/connection.go +++ b/network/connection.go @@ -645,8 +645,8 @@ func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer() *capn.Se type connectionAwaitClientHandshake struct { *Connection peerCerts []*x509.Certificate - roots map[string]*cmsgs.Capabilities - rootsVar map[common.VarUUId]*cmsgs.Capabilities + roots map[string]*common.Capabilities + rootsVar map[common.VarUUId]*common.Capabilities } func (cach *connectionAwaitClientHandshake) connectionStateMachineComponentWitness() {} @@ -688,7 +688,7 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { } } -func (cach *connectionAwaitClientHandshake) verifyPeerCerts(peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*cmsgs.Capabilities) { +func (cach *connectionAwaitClientHandshake) verifyPeerCerts(peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*common.Capabilities) { fingerprints := cach.topology.Fingerprints() for _, cert := range peerCerts { hashsum = sha256.Sum256(cert.Raw) @@ -709,7 +709,7 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer() *capn.Se hello.SetNamespace(namespace) rootsCap := cmsgs.NewRootList(seg, len(cach.roots)) idy := 0 - rootsVar := make(map[common.VarUUId]*cmsgs.Capabilities, len(cach.roots)) + rootsVar := make(map[common.VarUUId]*common.Capabilities, len(cach.roots)) for idx, name := range cach.topology.RootNames() { if capabilities, found := cach.roots[name]; found { rootCap := rootsCap.At(idy) @@ -717,7 +717,7 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer() *capn.Se vUUId := cach.topology.Roots[idx].VarUUId rootCap.SetName(name) rootCap.SetVarId(vUUId[:]) - rootCap.SetCapabilities(*capabilities) + rootCap.SetCapabilities(capabilities.Capabilities) rootsVar[*vUUId] = capabilities } } @@ -821,7 +821,7 @@ func (cr *connectionRun) topologyChanged(tc *connectionMsgTopologyChanged) error return errors.New("Client connection closed: No client certificate known") } else if len(roots) == len(cr.roots) { for name, capsOld := range cr.roots { - if capsNew, found := roots[name]; !found || !common.EqualCapabilities(capsNew, capsOld) { + if capsNew, found := roots[name]; !found || !capsNew.Equal(capsOld) { server.Log("Connection", cr.Connection, "topologyChanged", tc, "(roots changed)") tc.maybeClose() return errors.New("Client connection closed: roots have changed") From 7923f517d08b941d2c74153cc9cb976c6b4513d6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 9 Sep 2016 12:18:23 +0100 Subject: [PATCH 48/78] Massive simplification of capabilities. Ref T34. --HG-- branch : T34 --- capnp/configuration.capnp | 4 +- capnp/configuration.capnp.go | 14 +- capnp/var.capnp | 6 +- capnp/var.capnp.go | 14 +- client/clienttxnsubmitter.go | 2 +- client/simpletxnsubmitter.go | 134 ++++-------------- client/versioncache.go | 240 ++++++--------------------------- configuration/configuration.go | 115 +++++----------- network/connection.go | 14 +- txnengine/frame.go | 2 +- 10 files changed, 127 insertions(+), 418 deletions(-) diff --git a/capnp/configuration.capnp b/capnp/configuration.capnp index 33471b2..506cd71 100644 --- a/capnp/configuration.capnp +++ b/capnp/configuration.capnp @@ -41,8 +41,8 @@ struct Fingerprint { } struct Root { - name @0: Text; - capabilities @1: Common.Capabilities; + name @0: Text; + capability @1: Common.Capability; } struct ConditionPair { diff --git a/capnp/configuration.capnp.go b/capnp/configuration.capnp.go index 2c67f2d..5cb19d2 100644 --- a/capnp/configuration.capnp.go +++ b/capnp/configuration.capnp.go @@ -1552,10 +1552,10 @@ func ReadRootRoot(s *C.Segment) Root { return Root(s.Root(0).ToStruct()) } func (s Root) Name() string { return C.Struct(s).GetObject(0).ToText() } func (s Root) NameBytes() []byte { return C.Struct(s).GetObject(0).ToDataTrimLastByte() } func (s Root) SetName(v string) { C.Struct(s).SetObject(0, s.Segment.NewText(v)) } -func (s Root) Capabilities() capnp.Capabilities { - return capnp.Capabilities(C.Struct(s).GetObject(1).ToStruct()) +func (s Root) Capability() capnp.Capability { + return capnp.Capability(C.Struct(s).GetObject(1).ToStruct()) } -func (s Root) SetCapabilities(v capnp.Capabilities) { C.Struct(s).SetObject(1, C.Object(v)) } +func (s Root) SetCapability(v capnp.Capability) { C.Struct(s).SetObject(1, C.Object(v)) } func (s Root) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -1584,12 +1584,12 @@ func (s Root) WriteJSON(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("\"capabilities\":") + _, err = b.WriteString("\"capability\":") if err != nil { return err } { - s := s.Capabilities() + s := s.Capability() err = s.WriteJSON(b) if err != nil { return err @@ -1635,12 +1635,12 @@ func (s Root) WriteCapLit(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("capabilities = ") + _, err = b.WriteString("capability = ") if err != nil { return err } { - s := s.Capabilities() + s := s.Capability() err = s.WriteCapLit(b) if err != nil { return err diff --git a/capnp/var.capnp b/capnp/var.capnp index 0a0d4a7..0a45f73 100644 --- a/capnp/var.capnp +++ b/capnp/var.capnp @@ -16,7 +16,7 @@ struct Var { } struct VarIdPos { - id @0: Data; - positions @1: List(UInt8); - capabilities @2: Common.Capabilities; + id @0: Data; + positions @1: List(UInt8); + capability @2: Common.Capability; } diff --git a/capnp/var.capnp.go b/capnp/var.capnp.go index d4734b4..5929f28 100644 --- a/capnp/var.capnp.go +++ b/capnp/var.capnp.go @@ -313,10 +313,10 @@ func (s VarIdPos) Id() []byte { return C.Struct(s).GetObject(0). func (s VarIdPos) SetId(v []byte) { C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s VarIdPos) Positions() C.UInt8List { return C.UInt8List(C.Struct(s).GetObject(1)) } func (s VarIdPos) SetPositions(v C.UInt8List) { C.Struct(s).SetObject(1, C.Object(v)) } -func (s VarIdPos) Capabilities() capnp.Capabilities { - return capnp.Capabilities(C.Struct(s).GetObject(2).ToStruct()) +func (s VarIdPos) Capability() capnp.Capability { + return capnp.Capability(C.Struct(s).GetObject(2).ToStruct()) } -func (s VarIdPos) SetCapabilities(v capnp.Capabilities) { C.Struct(s).SetObject(2, C.Object(v)) } +func (s VarIdPos) SetCapability(v capnp.Capability) { C.Struct(s).SetObject(2, C.Object(v)) } func (s VarIdPos) WriteJSON(w io.Writer) error { b := bufio.NewWriter(w) var err error @@ -382,12 +382,12 @@ func (s VarIdPos) WriteJSON(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("\"capabilities\":") + _, err = b.WriteString("\"capability\":") if err != nil { return err } { - s := s.Capabilities() + s := s.Capability() err = s.WriteJSON(b) if err != nil { return err @@ -470,12 +470,12 @@ func (s VarIdPos) WriteCapLit(w io.Writer) error { if err != nil { return err } - _, err = b.WriteString("capabilities = ") + _, err = b.WriteString("capability = ") if err != nil { return err } { - s := s.Capabilities() + s := s.Capability() err = s.WriteCapLit(b) if err != nil { return err diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index e434ed2..de546a5 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -22,7 +22,7 @@ type ClientTxnSubmitter struct { initialDelay time.Duration } -func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, roots map[common.VarUUId]*common.Capabilities, cm paxos.ConnectionManager) *ClientTxnSubmitter { +func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, roots map[common.VarUUId]*common.Capability, cm paxos.ConnectionManager) *ClientTxnSubmitter { return &ClientTxnSubmitter{ SimpleTxnSubmitter: NewSimpleTxnSubmitter(rmId, bootCount, cm), versionCache: NewVersionCache(roots), diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index e96cd5c..28e2135 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -401,55 +401,25 @@ func (sts *SimpleTxnSubmitter) translateRead(action *msgs.Action, clientRead cms } func (sts *SimpleTxnSubmitter) translateWrite(vc versionCache, outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, action *msgs.Action, clientWrite cmsgs.ClientActionWrite) error { - writeValue, err := vc.ValueForWrite(vUUId, clientWrite.Value()) - if err != nil { - return err - } + action.SetWrite() + write := action.Write() + write.SetValue(clientWrite.Value()) clientReferences := clientWrite.References() - refsWithHoles, c, err := vc.ReferencesForWrite(vUUId, &clientReferences) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, &clientReferences) if err != nil { return err } - if refsWithHoles == nil { - // it really is just a write - action.SetWrite() - write := action.Write() - write.SetValue(writeValue) - refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, refsWithHoles, &clientReferences) - if err != nil { - return err - } - write.SetReferences(*refs) - } else { - // it actually needs to be a read-write - action.SetReadwrite() - readWrite := action.Readwrite() - readWrite.SetVersion(c.txnId[:]) - readWrite.SetValue(writeValue) - refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, refsWithHoles, &clientReferences) - if err != nil { - return err - } - readWrite.SetReferences(*refs) - } + write.SetReferences(*refs) return nil } func (sts *SimpleTxnSubmitter) translateReadWrite(vc versionCache, outgoingSeg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, vUUId *common.VarUUId, action *msgs.Action, clientReadWrite cmsgs.ClientActionReadwrite) error { - writeValue, err := vc.ValueForWrite(vUUId, clientReadWrite.Value()) - if err != nil { - return err - } clientReferences := clientReadWrite.References() - refsWithHoles, _, err := vc.ReferencesForWrite(vUUId, &clientReferences) - if err != nil { - return err - } action.SetReadwrite() readWrite := action.Readwrite() readWrite.SetVersion(clientReadWrite.Version()) - readWrite.SetValue(writeValue) - refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, refsWithHoles, &clientReferences) + readWrite.SetValue(clientReadWrite.Value()) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, &clientReferences) if err != nil { return err } @@ -467,7 +437,7 @@ func (sts *SimpleTxnSubmitter) translateCreate(vc versionCache, outgoingSeg *cap } create.SetPositions((capn.UInt8List)(*positions)) clientReferences := clientCreate.References() - refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, nil, &clientReferences) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, &clientReferences) if err != nil { return nil, nil, err } @@ -481,7 +451,7 @@ func (sts *SimpleTxnSubmitter) translateRoll(vc versionCache, outgoingSeg *capn. roll.SetVersion(clientRoll.Version()) roll.SetValue(clientRoll.Value()) clientReferences := clientRoll.References() - refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, nil, &clientReferences) + refs, err := copyReferences(vc, outgoingSeg, referencesInNeedOfPositions, &clientReferences) if err != nil { return err } @@ -489,84 +459,26 @@ func (sts *SimpleTxnSubmitter) translateRoll(vc versionCache, outgoingSeg *capn. return nil } -// so the challenge here is that we need to merge the references which -// the client may have rewritten with the 'actual' references taking -// into account masks and such from capabilities -func copyReferences(vc versionCache, seg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, refsWithHoles []*msgs.VarIdPos, clientReferences *cmsgs.ClientVarIdPos_List) (*msgs.VarIdPos_List, error) { - if refsWithHoles == nil { - refs := msgs.NewVarIdPosList(seg, clientReferences.Len()) - for idx, l := 0, clientReferences.Len(); idx < l; idx++ { - clientRef := clientReferences.At(idx) - vUUIdPos := refs.At(idx) - target := common.MakeVarUUId(clientRef.VarId()) - vUUIdPos.SetId(target[:]) - caps := clientRef.Capabilities() - if err := validateCapabilities(vc, target, caps); err != nil { - return nil, err - } - vUUIdPos.SetCapabilities(caps) - *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) - } - return &refs, nil - } else { - refs := msgs.NewVarIdPosList(seg, len(refsWithHoles)) - for idx, ref := range refsWithHoles { - vUUIdPos := refs.At(idx) - switch { - case ref != nil: - vUUIdPos.SetId(ref.Id()) - vUUIdPos.SetCapabilities(ref.Capabilities()) - *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) - case idx < clientReferences.Len(): - clientRef := clientReferences.At(idx) - target := common.MakeVarUUId(clientRef.VarId()) - vUUIdPos.SetId(target[:]) - caps := clientRef.Capabilities() - if err := validateCapabilities(vc, target, caps); err != nil { - return nil, err - } - vUUIdPos.SetCapabilities(caps) - *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) - default: - vUUIdPos.SetId([]byte{}) - } +func copyReferences(vc versionCache, seg *capn.Segment, referencesInNeedOfPositions *[]*msgs.VarIdPos, clientReferences *cmsgs.ClientVarIdPos_List) (*msgs.VarIdPos_List, error) { + refs := msgs.NewVarIdPosList(seg, clientReferences.Len()) + for idx, l := 0, clientReferences.Len(); idx < l; idx++ { + clientRef := clientReferences.At(idx) + vUUIdPos := refs.At(idx) + target := common.MakeVarUUId(clientRef.VarId()) + vUUIdPos.SetId(target[:]) + caps := clientRef.Capability() + if err := validateCapability(vc, target, caps); err != nil { + return nil, err } - return &refs, nil + vUUIdPos.SetCapability(caps) + *referencesInNeedOfPositions = append(*referencesInNeedOfPositions, &vUUIdPos) } + return &refs, nil } -func validateCapabilities(vc versionCache, target *common.VarUUId, cap cmsgs.Capabilities) error { - refsReadCap := cap.References().Read() - refsWriteCap := cap.References().Write() - if refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ONLY { - // just enforce that they unique and ascending - if !isUniqueAndAscending(refsReadCap.Only()) { - return fmt.Errorf("Invalid reference read capabilities: indices must be unique and ascending (ref target: %v)", target) - } - } - if refsWriteCap.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { - // just enforce that they unique and ascending - if !isUniqueAndAscending(refsWriteCap.Only()) { - return fmt.Errorf("Invalid reference write capabilities: indices must be unique and ascending (ref target: %v)", target) - } - } +func validateCapability(vc versionCache, target *common.VarUUId, cap cmsgs.Capability) error { if !vc.EnsureSubset(target, cap) { return fmt.Errorf("Attempt made to grant wider capabilities on %v than acceptable", target) } return nil } - -func isUniqueAndAscending(onlyCap capn.UInt32List) bool { - only := onlyCap.ToArray() - if len(only) > 0 { - old := only[0] - only = only[1:] - for _, index := range only { - if index <= old { - return false - } - old = index - } - } - return true -} diff --git a/client/versioncache.go b/client/versioncache.go index 2aeaa6d..28bf1e3 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -15,7 +15,7 @@ type versionCache map[common.VarUUId]*cached type cached struct { txnId *common.TxnId clockElem uint64 - caps *common.Capabilities + caps *common.Capability value []byte references []msgs.VarIdPos } @@ -32,7 +32,7 @@ type cacheOverlay struct { stored bool } -func NewVersionCache(roots map[common.VarUUId]*common.Capabilities) versionCache { +func NewVersionCache(roots map[common.VarUUId]*common.Capability) versionCache { cache := make(map[common.VarUUId]*cached) for vUUId, caps := range roots { cache[vUUId] = &cached{caps: caps} @@ -77,150 +77,22 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { return nil } -// the problem is that we can't distinguish between a client trying to write an empty value or not. -func (vc versionCache) ValueForWrite(vUUId *common.VarUUId, value []byte) ([]byte, error) { - if vc == nil { - return value, nil - } - if c, found := vc[*vUUId]; !found { - return nil, fmt.Errorf("Write attempted on unknown %v", vUUId) - } else { - switch valueCap := c.caps.Value(); { - case valueCap == cmsgs.VALUECAPABILITY_WRITE || valueCap == cmsgs.VALUECAPABILITY_READWRITE: - return value, nil - case len(value) > 0: // fuzzy. The client could be attempting to write empty value illegally too. - return nil, fmt.Errorf("Transaction illegally to write the value of an object. %v", vUUId) - default: - return c.value, nil - } - } -} - -// returns the 'extra' refs, with holes in the list for valid client refs -func (vc versionCache) ReferencesForWrite(vUUId *common.VarUUId, clientRefs *cmsgs.ClientVarIdPos_List) ([]*msgs.VarIdPos, *cached, error) { - if vc == nil { - return nil, nil, nil - } - if c, found := vc[*vUUId]; !found { - return nil, nil, fmt.Errorf("ReferencesForWrite called for unknown %v", vUUId) - } else { - refsWriteCap := c.caps.References().Write() - switch refsWriteCap.Which() { - case cmsgs.CAPABILITIESREFERENCESWRITE_ALL: - return nil, c, nil - default: - clientRefsLen := clientRefs.Len() - only := refsWriteCap.Only().ToArray() - // The client must provide refs for every index in only. - reqLen := 0 - if l := len(only); l > 0 { - reqLen = int(only[l-1]) + 1 - } - if clientRefsLen != reqLen { - return nil, nil, fmt.Errorf("Incorrect number of references provided for write of %v", vUUId) - } - // Where possible, we fill in the gaps in only with - // c.references. Keep in mind that the client may have onlies - // that are longer than the current number of - // references. This can happen when a capability in a ref - // includes writes to n refs, and then the object itself is - // updated to only include m refs, where m < n. We change a - // write to a readwrite iff c.references - onlies is not the - // empty set. - resultsLen := clientRefsLen - if l := len(c.references); l > resultsLen { - resultsLen = l - } - results := make([]*msgs.VarIdPos, resultsLen) - nonNilAppended := false - for idx := 0; idx < clientRefsLen; idx++ { - if len(only) > 0 && uint32(idx) == only[0] { - only = only[1:] - } else if idx < len(c.references) { - nonNilAppended = true - results[idx] = &c.references[idx] - } - } - // add on anything in c.references that's left over - for idx := clientRefsLen; idx < resultsLen; idx++ { - nonNilAppended = true - results[idx] = &c.references[idx] - } - if nonNilAppended { - return results, c, nil - } else { - return nil, c, nil - } - } - } -} - -func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capabilities) bool { +func (vc versionCache) EnsureSubset(vUUId *common.VarUUId, cap cmsgs.Capability) bool { if vc == nil { return true } if c, found := vc[*vUUId]; found { - if c.caps == common.MaxCapsCap { + if c.caps == common.MaxCapability { return true } - valueNew, valueOld := cap.Value(), c.caps.Value() + capNew, capOld := cap.Which(), c.caps.Which() switch { - case valueNew == valueOld: - case valueNew == cmsgs.VALUECAPABILITY_NONE: // new is bottom, always fine - case valueOld == cmsgs.VALUECAPABILITY_READWRITE: // old is top, always fine + case capNew == capOld: + case capNew == cmsgs.CAPABILITY_NONE: // new is bottom, always fine + case capOld == cmsgs.CAPABILITY_READWRITE: // old is top, always fine default: return false } - - readNew, readOld := cap.References().Read(), c.caps.References().Read() - if readOld.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ONLY { - if readNew.Which() != cmsgs.CAPABILITIESREFERENCESREAD_ONLY { - return false - } - readNewOnly, readOldOnly := readNew.Only().ToArray(), readOld.Only().ToArray() - if len(readNewOnly) > len(readOldOnly) { - return false - } - for idx, indexNew := range readNewOnly { - indexOld := readOldOnly[0] - readOldOnly = readOldOnly[1:] - if indexNew < indexOld { - return false - } else if indexNew > indexOld { - for ; indexNew > indexOld && len(readOldOnly) > 0; readOldOnly = readOldOnly[1:] { - indexOld = readOldOnly[0] - } - if len(readNewOnly)-idx > len(readOldOnly) { - return false - } - } - } - } - - writeNew, writeOld := cap.References().Write(), c.caps.References().Write() - if writeOld.Which() == cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { - if writeNew.Which() != cmsgs.CAPABILITIESREFERENCESWRITE_ONLY { - return false - } - writeNewOnly, writeOldOnly := writeNew.Only().ToArray(), writeOld.Only().ToArray() - if len(writeNewOnly) > len(writeOldOnly) { - return false - } - for idx, indexNew := range writeNewOnly { - indexOld := writeOldOnly[0] - writeOldOnly = writeOldOnly[1:] - if indexNew < indexOld { - return false - } else if indexNew > indexOld { - for ; indexNew > indexOld && len(writeOldOnly) > 0; writeOldOnly = writeOldOnly[1:] { - indexOld = writeOldOnly[0] - } - if len(writeNewOnly)-idx > len(writeOldOnly) { - return false - } - } - } - } } return true } @@ -234,17 +106,18 @@ func (vc versionCache) UpdateFromCommit(txn *eng.TxnReader, outcome *msgs.Outcom if act := action.Which(); act != msgs.ACTION_READ { vUUId := common.MakeVarUUId(action.VarId()) c, found := vc[*vUUId] - if act == msgs.ACTION_CREATE && !found { + switch { + case !found && act == msgs.ACTION_CREATE: create := action.Create() c = &cached{ txnId: txnId, clockElem: clock.At(vUUId), - caps: common.MaxCapsCap, + caps: common.MaxCapability, value: create.Value(), references: create.References().ToArray(), } vc[*vUUId] = c - } else { + case !found, act == msgs.ACTION_CREATE: panic(fmt.Sprintf("%v contained illegal action (%v) for %v", txnId, act, vUUId)) } @@ -260,6 +133,7 @@ func (vc versionCache) UpdateFromCommit(txn *eng.TxnReader, outcome *msgs.Outcom rw := action.Readwrite() c.value = rw.Value() c.references = rw.References().ToArray() + case msgs.ACTION_CREATE: default: panic(fmt.Sprintf("Unexpected action type on txn commit! %v %v", txnId, act)) } @@ -325,6 +199,7 @@ func (vc versionCache) updateExisting(updatesCap *msgs.Update_List, updateGraph panic(fmt.Sprintf("Clock version changed on missing for %v@%v (new:%v != old:%v)", vUUId, txnId, clockElem, c.clockElem)) } if clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) { + // do not blank out c.caps here c.txnId = nil c.clockElem = 0 c.value = nil @@ -350,7 +225,7 @@ func (vc versionCache) updateExisting(updatesCap *msgs.Update_List, updateGraph } updating = clockElem > c.clockElem || (clockElem == c.clockElem && cmp == common.LT) } - // If we're not updating then the update must predate + // If we're not updating then the update must pre-date // our current knowledge of vUUId. So we're not going // to send it to the client in which case the // capabilities vUUId grants via its own refs can't @@ -391,7 +266,7 @@ func (vc versionCache) updateExisting(updatesCap *msgs.Update_List, updateGraph } func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOverlay) { - reaches := make(map[common.VarUUId][]*msgs.VarIdPos) + reaches := make(map[common.VarUUId][]msgs.VarIdPos) worklist := make([]common.VarUUId, 0, len(updateGraph)) for vUUId, overlay := range updateGraph { @@ -408,7 +283,7 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // Given the current vUUId.caps, we're looking at what we // can reach from there. vUUIdRef := common.MakeVarUUId(ref.Id()) - caps := common.NewCapabilities(ref.Capabilities()) + caps := common.NewCapability(ref.Capability()) var c *cached overlay, found := updateGraph[*vUUIdRef] if found { @@ -461,49 +336,29 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // returns true iff we couldn't read the value before merge, but we // can after -func (c *cached) mergeCaps(b *common.Capabilities) (gainedRead bool) { +func (c *cached) mergeCaps(b *common.Capability) (gainedRead bool) { a := c.caps c.caps = a.Union(b) - if a != c.caps { - aValue := a.Value() - nValue := c.caps.Value() - return (aValue != cmsgs.VALUECAPABILITY_READ && aValue != cmsgs.VALUECAPABILITY_READWRITE) && - (nValue == cmsgs.VALUECAPABILITY_READ || nValue == cmsgs.VALUECAPABILITY_READWRITE) + if a != c.caps { // change has happened + aCap := a.Which() + nCap := c.caps.Which() + return (aCap != cmsgs.CAPABILITY_READ && aCap != cmsgs.CAPABILITY_READWRITE) && + (nCap == cmsgs.CAPABILITY_READ || nCap == cmsgs.CAPABILITY_READWRITE) } return false } -// does not leave holes in the result - compacted. -func (c *cached) reachableReferences() []*msgs.VarIdPos { +func (c *cached) reachableReferences() []msgs.VarIdPos { if c.caps == nil || len(c.references) == 0 { return nil } - refsReadCap := c.caps.References().Read() - all := refsReadCap.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL - var only []uint32 - if !all { - only = c.caps.References().Read().Only().ToArray() - } - - result := make([]*msgs.VarIdPos, 0, len(c.references)) -LOOP: - for index, ref := range c.references { - refCopy := ref - switch { - case all: - case len(only) == 0: - break LOOP - case uint32(index) == only[0]: - only = only[1:] - default: - continue - } - if len(ref.Id()) == common.KeyLen { - result = append(result, &refCopy) - } + switch c.caps.Which() { + case cmsgs.CAPABILITY_READ, cmsgs.CAPABILITY_READWRITE: + return c.references + default: + return nil } - return result } func (u *update) AddToClientAction(hashCache *ch.ConsistentHashCache, seg *capn.Segment, clientAction *cmsgs.ClientAction) { @@ -515,36 +370,21 @@ func (u *update) AddToClientAction(hashCache *ch.ConsistentHashCache, seg *capn. clientAction.SetWrite() clientWrite := clientAction.Write() - switch c.caps.Value() { - case cmsgs.VALUECAPABILITY_READ, cmsgs.VALUECAPABILITY_READWRITE: + switch c.caps.Which() { + case cmsgs.CAPABILITY_READ, cmsgs.CAPABILITY_READWRITE: clientWrite.SetValue(c.value) - default: - clientWrite.SetValue([]byte{}) - } - - refsReadCaps := c.caps.References().Read() - all := refsReadCaps.Which() == cmsgs.CAPABILITIESREFERENCESREAD_ALL - var only []uint32 - if !all { - only = refsReadCaps.Only().ToArray() - } - clientReferences := cmsgs.NewClientVarIdPosList(seg, len(c.references)) - for idx, ref := range c.references { - switch { - case all: - case len(only) > 0 && only[0] == uint32(idx): - only = only[1:] - default: - continue - } - varIdPos := clientReferences.At(idx) - varIdPos.SetVarId(ref.Id()) - if len(ref.Id()) == common.KeyLen { - varIdPos.SetCapabilities(ref.Capabilities()) + clientReferences := cmsgs.NewClientVarIdPosList(seg, len(c.references)) + for idx, ref := range c.references { + varIdPos := clientReferences.At(idx) + varIdPos.SetVarId(ref.Id()) + varIdPos.SetCapability(ref.Capability()) positions := common.Positions(ref.Positions()) hashCache.AddPosition(common.MakeVarUUId(ref.Id()), &positions) } + clientWrite.SetReferences(clientReferences) + default: + clientWrite.SetValue([]byte{}) + clientWrite.SetReferences(cmsgs.NewClientVarIdPosList(seg, 0)) } - clientWrite.SetReferences(clientReferences) } } diff --git a/configuration/configuration.go b/configuration/configuration.go index 14f2240..8fca82b 100644 --- a/configuration/configuration.go +++ b/configuration/configuration.go @@ -27,22 +27,18 @@ type Configuration struct { F uint8 MaxRMCount uint16 NoSync bool - ClientCertificateFingerprints map[string]map[string]*RootCapabilities + ClientCertificateFingerprints map[string]map[string]*RootCapability clusterUUId uint64 roots []string rms common.RMIds rmsRemoved map[common.RMId]server.EmptyStruct - fingerprints map[[sha256.Size]byte]map[string]*common.Capabilities + fingerprints map[[sha256.Size]byte]map[string]*common.Capability nextConfiguration *NextConfiguration } -type RootCapabilities struct { - ValueRead bool - ValueWrite bool - ReferencesReadAll bool - ReferencesReadOnly []uint32 - ReferencesWriteAll bool - ReferencesWriteOnly []uint32 +type RootCapability struct { + Read bool + Write bool } type NextConfiguration struct { @@ -207,85 +203,46 @@ func decodeConfiguration(decoder *json.Decoder) (*Configuration, error) { } else { rootsMap := make(map[string]server.EmptyStruct) rootsName := []string{} - fingerprints := make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.ClientCertificateFingerprints)) + fingerprints := make(map[[sha256.Size]byte]map[string]*common.Capability, len(config.ClientCertificateFingerprints)) seg := capn.NewBuffer(nil) - for fingerprint, rootsCapabilities := range config.ClientCertificateFingerprints { + for fingerprint, rootsCapability := range config.ClientCertificateFingerprints { fingerprintBytes, err := hex.DecodeString(fingerprint) if err != nil { return nil, err } else if l := len(fingerprintBytes); l != sha256.Size { return nil, fmt.Errorf("Invalid fingerprint: expected %v bytes, and found %v", sha256.Size, l) } - if len(rootsCapabilities) == 0 { + if len(rootsCapability) == 0 { return nil, fmt.Errorf("No roots configured for client fingerprint %v; at least 1 needed", fingerprint) } - roots := make(map[string]*common.Capabilities, len(rootsCapabilities)) - for name, rootCapabilities := range rootsCapabilities { + roots := make(map[string]*common.Capability, len(rootsCapability)) + for name, rootCapability := range rootsCapability { if _, found := rootsMap[name]; !found { rootsMap[name] = server.EmptyStructVal rootsName = append(rootsName, name) } - common.SortUInt32(rootCapabilities.ReferencesReadOnly).Sort() - common.SortUInt32(rootCapabilities.ReferencesWriteOnly).Sort() - if rootCapabilities.ReferencesReadAll && len(rootCapabilities.ReferencesReadOnly) != 0 { - return nil, fmt.Errorf("ReferencesReadAll and ReferencesReadOnly must be mutually exclusive for client fingerprint %v, root %s", fingerprint, name) - } - if rootCapabilities.ReferencesWriteAll && len(rootCapabilities.ReferencesWriteOnly) != 0 { - return nil, fmt.Errorf("ReferencesWriteAll and ReferencesWriteOnly must be mutually exclusive for client fingerprint %v, root %s", fingerprint, name) - } - old := uint32(0) - for idx, index := range rootCapabilities.ReferencesReadOnly { - if index == old && idx > 0 { - return nil, fmt.Errorf("Client fingerprint %v, root %s: Duplicate read only reference index %v", - fingerprint, name, index) - } - old = index - } - old = uint32(0) - for idx, index := range rootCapabilities.ReferencesWriteOnly { - if index == old && idx > 0 { - return nil, fmt.Errorf("Client fingerprint %v, root %s: Duplicate write only reference index %v", - fingerprint, name, index) - } - old = index - } - if !rootCapabilities.ValueRead && !rootCapabilities.ValueWrite && - !rootCapabilities.ReferencesReadAll && !rootCapabilities.ReferencesWriteAll && - len(rootCapabilities.ReferencesReadOnly) == 0 && len(rootCapabilities.ReferencesWriteOnly) == 0 { - return nil, fmt.Errorf("Client fingerprint %v, root %s: no capabilities have been granted.", + if !rootCapability.Read && !rootCapability.Write { + return nil, fmt.Errorf("Client fingerprint %v, root %s: no capability has been granted.", fingerprint, name) } - cap := cmsgs.NewCapabilities(seg) - switch { - case rootCapabilities.ValueRead && rootCapabilities.ValueWrite: - cap.SetValue(cmsgs.VALUECAPABILITY_READWRITE) - case rootCapabilities.ValueRead: - cap.SetValue(cmsgs.VALUECAPABILITY_READ) - case rootCapabilities.ValueWrite: - cap.SetValue(cmsgs.VALUECAPABILITY_WRITE) - default: - cap.SetValue(cmsgs.VALUECAPABILITY_NONE) - } - capRefs := cap.References() - if rootCapabilities.ReferencesReadAll { - capRefs.Read().SetAll() - } else { - only := seg.NewUInt32List(len(rootCapabilities.ReferencesReadOnly)) - for idx, index := range rootCapabilities.ReferencesReadOnly { - only.Set(idx, index) - } - capRefs.Read().SetOnly(only) - } - if rootCapabilities.ReferencesWriteAll { - capRefs.Write().SetAll() + var capability *common.Capability + if rootCapability.Read && rootCapability.Write { + capability = common.MaxCapability } else { - only := seg.NewUInt32List(len(rootCapabilities.ReferencesWriteOnly)) - for idx, index := range rootCapabilities.ReferencesWriteOnly { - only.Set(idx, index) + cap := cmsgs.NewCapability(seg) + switch { + case rootCapability.Read && rootCapability.Write: + cap.SetReadWrite() + case rootCapability.Read: + cap.SetRead() + case rootCapability.Write: + cap.SetWrite() + default: + cap.SetNone() } - capRefs.Write().SetOnly(only) + capability = common.NewCapability(cap) } - roots[name] = common.NewCapabilities(cap) + roots[name] = capability } ary := [sha256.Size]byte{} copy(ary[:], fingerprintBytes) @@ -325,18 +282,18 @@ func ConfigurationFromCap(config *msgs.Configuration) *Configuration { rootsName := []string{} rootsMap := make(map[string]server.EmptyStruct) fingerprints := config.Fingerprints() - fingerprintsMap := make(map[[sha256.Size]byte]map[string]*common.Capabilities, fingerprints.Len()) + fingerprintsMap := make(map[[sha256.Size]byte]map[string]*common.Capability, fingerprints.Len()) for idx, l := 0, fingerprints.Len(); idx < l; idx++ { fingerprint := fingerprints.At(idx) ary := [sha256.Size]byte{} copy(ary[:], fingerprint.Sha256()) rootsCap := fingerprint.Roots() - roots := make(map[string]*common.Capabilities, rootsCap.Len()) + roots := make(map[string]*common.Capability, rootsCap.Len()) for idy, m := 0, rootsCap.Len(); idy < m; idy++ { rootCap := rootsCap.At(idy) name := rootCap.Name() - capabilities := rootCap.Capabilities() - roots[name] = common.NewCapabilities(capabilities) + capability := rootCap.Capability() + roots[name] = common.NewCapability(capability) if _, found := rootsMap[name]; !found { rootsName = append(rootsName, name) rootsMap[name] = server.EmptyStructVal @@ -463,7 +420,7 @@ func (config *Configuration) SetClusterUUId(uuid uint64) { } } -func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*common.Capabilities { +func (config *Configuration) Fingerprints() map[[sha256.Size]byte]map[string]*common.Capability { return config.fingerprints } @@ -530,13 +487,13 @@ func (config *Configuration) Clone() *Configuration { roots: make([]string, len(config.roots)), rms: make([]common.RMId, len(config.rms)), rmsRemoved: make(map[common.RMId]server.EmptyStruct, len(config.rmsRemoved)), - fingerprints: make(map[[sha256.Size]byte]map[string]*common.Capabilities, len(config.fingerprints)), + fingerprints: make(map[[sha256.Size]byte]map[string]*common.Capability, len(config.fingerprints)), nextConfiguration: config.nextConfiguration.Clone(), } copy(clone.Hosts, config.Hosts) if config.ClientCertificateFingerprints != nil { - clone.ClientCertificateFingerprints = make(map[string]map[string]*RootCapabilities, len(config.ClientCertificateFingerprints)) + clone.ClientCertificateFingerprints = make(map[string]map[string]*RootCapability, len(config.ClientCertificateFingerprints)) for k, v := range config.ClientCertificateFingerprints { clone.ClientCertificateFingerprints[k] = v } @@ -590,10 +547,10 @@ func (config *Configuration) AddToSegAutoRoot(seg *capn.Segment) msgs.Configurat fingerprintCap.SetSha256(fingerprint[:]) rootsCap := msgs.NewRootList(seg, len(roots)) idy := 0 - for name, capabilities := range roots { + for name, capability := range roots { rootCap := msgs.NewRoot(seg) rootCap.SetName(name) - rootCap.SetCapabilities(capabilities.Capabilities) + rootCap.SetCapability(capability.Capability) rootsCap.Set(idy, rootCap) idy++ } diff --git a/network/connection.go b/network/connection.go index 3cc5af5..d7f3ab8 100644 --- a/network/connection.go +++ b/network/connection.go @@ -645,8 +645,8 @@ func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer() *capn.Se type connectionAwaitClientHandshake struct { *Connection peerCerts []*x509.Certificate - roots map[string]*common.Capabilities - rootsVar map[common.VarUUId]*common.Capabilities + roots map[string]*common.Capability + rootsVar map[common.VarUUId]*common.Capability } func (cach *connectionAwaitClientHandshake) connectionStateMachineComponentWitness() {} @@ -688,7 +688,7 @@ func (cach *connectionAwaitClientHandshake) start() (bool, error) { } } -func (cach *connectionAwaitClientHandshake) verifyPeerCerts(peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*common.Capabilities) { +func (cach *connectionAwaitClientHandshake) verifyPeerCerts(peerCerts []*x509.Certificate) (authenticated bool, hashsum [sha256.Size]byte, roots map[string]*common.Capability) { fingerprints := cach.topology.Fingerprints() for _, cert := range peerCerts { hashsum = sha256.Sum256(cert.Raw) @@ -709,16 +709,16 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer() *capn.Se hello.SetNamespace(namespace) rootsCap := cmsgs.NewRootList(seg, len(cach.roots)) idy := 0 - rootsVar := make(map[common.VarUUId]*common.Capabilities, len(cach.roots)) + rootsVar := make(map[common.VarUUId]*common.Capability, len(cach.roots)) for idx, name := range cach.topology.RootNames() { - if capabilities, found := cach.roots[name]; found { + if capability, found := cach.roots[name]; found { rootCap := rootsCap.At(idy) idy++ vUUId := cach.topology.Roots[idx].VarUUId rootCap.SetName(name) rootCap.SetVarId(vUUId[:]) - rootCap.SetCapabilities(capabilities.Capabilities) - rootsVar[*vUUId] = capabilities + rootCap.SetCapability(capability.Capability) + rootsVar[*vUUId] = capability } } hello.SetRoots(rootsCap) diff --git a/txnengine/frame.go b/txnengine/frame.go index 0c56552..9669585 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -861,7 +861,7 @@ func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId posMap[*vUUId] = &pos varIdPos := refVarList.At(idx) varIdPos.SetVarId(vUUId[:]) - varIdPos.SetCapabilities(ref.Capabilities()) + varIdPos.SetCapability(ref.Capability()) } fo.rollTxn = &ctxn fo.rollTxnPos = posMap From a8ca0abdaadd75a3dd1cd0cd79abfdddc85e86b5 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 10 Sep 2016 15:09:30 +0100 Subject: [PATCH 49/78] Bug fix. Ref T34. --HG-- branch : T34 --- client/versioncache.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/client/versioncache.go b/client/versioncache.go index 28bf1e3..c506b34 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -310,7 +310,11 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver // We have two questions to answer: 1. Have we already // processed vUUIdRef? 2. If we have, do we have wider caps // now than before? - before := reaches[*vUUIdRef] + before, found := reaches[*vUUIdRef] + if !found { + before = c.reachableReferences() + reaches[*vUUIdRef] = before + } ensureUpdate := c.mergeCaps(caps) after := c.reachableReferences() if len(after) > len(before) { From 7064069708a100e09cd48769209f0832b88634f3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 10 Sep 2016 15:27:53 +0100 Subject: [PATCH 50/78] bug fix. Ref T34 --HG-- branch : T34 --- client/versioncache.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index c506b34..d990878 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -344,10 +344,14 @@ func (c *cached) mergeCaps(b *common.Capability) (gainedRead bool) { a := c.caps c.caps = a.Union(b) if a != c.caps { // change has happened - aCap := a.Which() nCap := c.caps.Which() - return (aCap != cmsgs.CAPABILITY_READ && aCap != cmsgs.CAPABILITY_READWRITE) && - (nCap == cmsgs.CAPABILITY_READ || nCap == cmsgs.CAPABILITY_READWRITE) + nRead := nCap == cmsgs.CAPABILITY_READ || nCap == cmsgs.CAPABILITY_READWRITE + if a == nil { + return nRead + } else { + aCap := a.Which() + return nRead && aCap != cmsgs.CAPABILITY_READ && aCap != cmsgs.CAPABILITY_READWRITE + } } return false } From 1190b66918e28732ab520f7df8c7f52cffca229b Mon Sep 17 00:00:00 2001 From: Ashley Hewson Date: Fri, 16 Sep 2016 11:30:09 +0100 Subject: [PATCH 51/78] don't clobber capnp struct from client Copyright: This patch is Copyright (c) 2016, LShift Ltd. opensource@lshift.net --HG-- branch : dev --- client/clienttxnsubmitter.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index 8e9c658..d7887a6 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -105,9 +105,13 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, curTxnIdNum := binary.BigEndian.Uint64(txnId[:8]) curTxnIdNum += 1 + uint64(cts.rng.Intn(8)) binary.BigEndian.PutUint64(curTxnId[:8], curTxnIdNum) - ctxnCap.SetId(curTxnId[:]) + newSeg := capn.NewBuffer(nil) + newCtxnCap := cmsgs.NewClientTxn(newSeg) + newCtxnCap.SetId(curTxnId[:]) + newCtxnCap.SetRetry(ctxnCap.Retry()) + newCtxnCap.SetActions(ctxnCap.Actions()) - cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, curTxnId, cont, delay, false) + cts.SimpleTxnSubmitter.SubmitClientTransaction(&newCtxnCap, curTxnId, cont, delay, false) } } From 645ca55ba547a76aa2cc67018016941b8cb475c7 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 16 Sep 2016 11:35:51 +0100 Subject: [PATCH 52/78] Update contributors. --HG-- branch : dev --- CONTRIBUTORS | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 0b23574..1684c7d 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -2,3 +2,17 @@ This file lists all individuals having contributed content to the repository. Matthew Sackman +Ashley Hewson + +By default, code in this repository is: +Copyright (C) 2015-2016 Matthew Sackman + +Some patches have different copyright assignments, and on each release +of GoshawkDB server, this file will be adjusted to contain those +details in full. + +Where the copyright of a patch differs from the default, the patch and +its copyright can be found by running the following command in this +repository: + +$ hg log -k copyright -v From b7bd60dba03f55c890bc3348e95550347df11042 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 16 Sep 2016 12:44:52 +0100 Subject: [PATCH 53/78] Turns out it's useful to actually enforce the capabilities... /me whistles quietly to himself. Ref T34. --HG-- branch : T34 --- client/versioncache.go | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index d990878..21e2a2b 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -48,8 +48,10 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { vUUId := common.MakeVarUUId(action.VarId()) if which := action.Which(); which != cmsgs.CLIENTACTION_READ { return fmt.Errorf("Retry transaction should only include reads. Found %v", which) - } else if _, found := vc[*vUUId]; !found { + } else if vc, found := vc[*vUUId]; !found { return fmt.Errorf("Retry transaction has attempted to read from unknown object: %v", vUUId) + } else if cap := vc.caps.Which(); !(cap == cmsgs.CAPABILITY_READ || cap == cmsgs.CAPABILITY_READWRITE) { + return fmt.Errorf("Retry transaction has attempted illegal read from object: %v", vUUId) } } @@ -57,11 +59,23 @@ func (vc versionCache) ValidateTransaction(cTxn *cmsgs.ClientTxn) error { for idx, l := 0, actions.Len(); idx < l; idx++ { action := actions.At(idx) vUUId := common.MakeVarUUId(action.VarId()) - _, found := vc[*vUUId] - switch action.Which() { + vc, found := vc[*vUUId] + switch act := action.Which(); act { case cmsgs.CLIENTACTION_READ, cmsgs.CLIENTACTION_WRITE, cmsgs.CLIENTACTION_READWRITE: if !found { return fmt.Errorf("Transaction manipulates unknown object: %v", vUUId) + } else { + cap := vc.caps.Which() + canRead := cap == cmsgs.CAPABILITY_READ || cap == cmsgs.CAPABILITY_READWRITE + canWrite := cap == cmsgs.CAPABILITY_WRITE || cap == cmsgs.CAPABILITY_READWRITE + switch { + case act == cmsgs.CLIENTACTION_READ && !canRead: + return fmt.Errorf("Transaction has illegal read action on object: %v", vUUId) + case act == cmsgs.CLIENTACTION_WRITE && !canWrite: + return fmt.Errorf("Transaction has illegal write action on object: %v", vUUId) + case act == cmsgs.CLIENTACTION_READWRITE && cap != cmsgs.CAPABILITY_READWRITE: + return fmt.Errorf("Transaction has illegal readwrite action on object: %v", vUUId) + } } case cmsgs.CLIENTACTION_CREATE: From 2c63a6bbcdf1303df206a4d1851d1c2223f12b4f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 24 Sep 2016 14:18:44 +0100 Subject: [PATCH 54/78] Bug fix / optimisation. Ref T34. --HG-- branch : T34 --- client/versioncache.go | 88 ++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 30 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index 21e2a2b..e8366fa 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -28,8 +28,13 @@ type update struct { type cacheOverlay struct { *cached // we only duplicate the txnId here for the MISSING case - txnId *common.TxnId - stored bool + txnId *common.TxnId + inCache bool + updateClient bool +} + +func (co cacheOverlay) String() string { + return fmt.Sprintf("@%v (%v) (inCache: %v, updateClient %v)", co.txnId, co.caps, co.inCache, co.updateClient) } func NewVersionCache(roots map[common.VarUUId]*common.Capability) versionCache { @@ -160,15 +165,17 @@ func (vc versionCache) UpdateFromAbort(updatesCap *msgs.Update_List) map[common. // 1. update everything we know we can already reach, and filter out erroneous updates vc.updateExisting(updatesCap, updateGraph) + // fmt.Printf("updateGraph after updateExisting:\n %v\n", updateGraph) // 2. figure out what we can now reach, and propagate through extended caps vc.updateReachable(updateGraph) + // fmt.Printf("updateGraph after updateReachable:\n %v\n", updateGraph) // 3. populate results updates := make([]update, len(updateGraph)) validUpdates := make(map[common.TxnId]*[]*update, len(updateGraph)) for vUUId, overlay := range updateGraph { - if !overlay.stored { + if !overlay.updateClient { continue } updateListPtr, found := validUpdates[*overlay.txnId] @@ -219,9 +226,10 @@ func (vc versionCache) updateExisting(updatesCap *msgs.Update_List, updateGraph c.value = nil c.references = nil updateGraph[*vUUId] = &cacheOverlay{ - cached: c, - txnId: txnId, - stored: true, + cached: c, + txnId: txnId, + inCache: true, + updateClient: true, } } } @@ -252,9 +260,10 @@ func (vc versionCache) updateExisting(updatesCap *msgs.Update_List, updateGraph c.value = write.Value() c.references = write.References().ToArray() updateGraph[*vUUId] = &cacheOverlay{ - cached: c, - txnId: txnId, - stored: true, + cached: c, + txnId: txnId, + inCache: true, + updateClient: true, } } @@ -267,8 +276,9 @@ func (vc versionCache) updateExisting(updatesCap *msgs.Update_List, updateGraph value: write.Value(), references: write.References().ToArray(), }, - txnId: txnId, - stored: false, + txnId: txnId, + inCache: false, + updateClient: false, } } @@ -284,7 +294,7 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver worklist := make([]common.VarUUId, 0, len(updateGraph)) for vUUId, overlay := range updateGraph { - if overlay.stored { + if overlay.updateClient { reaches[vUUId] = overlay.reachableReferences() worklist = append(worklist, vUUId) } @@ -294,17 +304,16 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver vUUId := worklist[0] worklist = worklist[1:] for _, ref := range reaches[vUUId] { - // Given the current vUUId.caps, we're looking at what we - // can reach from there. + // Given the current vUUId.caps, we're looking at what we can + // reach from there. However, just because we can reach + // something doesn't mean we should actually be sending that + // thing down to the client - we should only do that if the + // client has enough capabilities on it (i.e. can read it). vUUIdRef := common.MakeVarUUId(ref.Id()) caps := common.NewCapability(ref.Capability()) var c *cached overlay, found := updateGraph[*vUUIdRef] if found { - if !overlay.stored { - overlay.stored = true - vc[*vUUIdRef] = overlay.cached - } c = overlay.cached } else { // There's no update for vUUIdRef, but it's possible we're @@ -329,24 +338,43 @@ func (vc versionCache) updateReachable(updateGraph map[common.VarUUId]*cacheOver before = c.reachableReferences() reaches[*vUUIdRef] = before } - ensureUpdate := c.mergeCaps(caps) + valueOrRefsUpdated := c.mergeCaps(caps) after := c.reachableReferences() if len(after) > len(before) { reaches[*vUUIdRef] = after worklist = append(worklist, *vUUIdRef) - ensureUpdate = true + valueOrRefsUpdated = true } - if ensureUpdate && overlay == nil && c.txnId != nil { - // Our access to vUUIdRef has expanded to the extent that - // we can now see more of the refs from vUUIdRef, or we - // can now see the value of vUUIdRef. So even though there - // wasn't an actual update for vUUIdRef, we need to create - // one. - updateGraph[*vUUIdRef] = &cacheOverlay{ - cached: c, - txnId: c.txnId, - stored: true, + if overlay == nil { + // vUUIdRef is for a var that was not in any of the + // updates, and we know it is in the cache. But it may not + // yet be on the client. Its value in the cache has not + // changed: if c.txnId == nil then it's already nil on the + // client (if it exists at all on the client). So we only + // want to send this down to the client if the client has + // _gained_ the ability to read this vUUIdRef as a result + // of these updates. + if valueOrRefsUpdated && c.txnId != nil { + updateGraph[*vUUIdRef] = &cacheOverlay{ + cached: c, + txnId: c.txnId, + inCache: true, + updateClient: true, + } + } + } else { + if !overlay.inCache { + // There was an update for vUUIdRef, which we didn't + // know of before, and we've proven we can now reach + // vUUIdRef. Therefore we must store vUUIdRef in the + // cache to record the capability. + overlay.inCache = true + vc[*vUUIdRef] = overlay.cached } + // If !updateClient then we know there has not yet been + // any evidence the client can read this var. Is there + // now? + overlay.updateClient = overlay.updateClient || valueOrRefsUpdated } } } From 25b56738fe85275ce1121f9f31eb8a5d2fe0918e Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 25 Sep 2016 22:35:16 +0100 Subject: [PATCH 55/78] I think this shouldn't be possible any more, so panic if it turns out it is! Ref T34. --HG-- branch : T34 --- client/versioncache.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/client/versioncache.go b/client/versioncache.go index e8366fa..2f8e674 100644 --- a/client/versioncache.go +++ b/client/versioncache.go @@ -433,8 +433,7 @@ func (u *update) AddToClientAction(hashCache *ch.ConsistentHashCache, seg *capn. } clientWrite.SetReferences(clientReferences) default: - clientWrite.SetValue([]byte{}) - clientWrite.SetReferences(cmsgs.NewClientVarIdPosList(seg, 0)) + panic(fmt.Sprintf("Internal logic error: attempted to send client update with non-read capability (%v)", u.varUUId)) } } } From 29541f0ba060ab3aba4b4da99b9191e1b6bc06b0 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 9 Oct 2016 17:25:30 +0100 Subject: [PATCH 56/78] Well I can't quite figure out what I was thinking the day I refactored this... --HG-- branch : dev --- paxos/outcomeaccumulator.go | 1 + 1 file changed, 1 insertion(+) diff --git a/paxos/outcomeaccumulator.go b/paxos/outcomeaccumulator.go index 6ebf327..c8ebc0a 100644 --- a/paxos/outcomeaccumulator.go +++ b/paxos/outcomeaccumulator.go @@ -111,6 +111,7 @@ func (oa *OutcomeAccumulator) BallotOutcomeReceived(acceptorId common.RMId, outc // worry about that here. tOut.outcomeReceivedCount++ tOut.acceptors[acceptorOutcome.idx] = acceptorId + acceptorOutcome.tOut = tOut allAgreed := tOut.outcomeReceivedCount == len(oa.acceptorOutcomes) if oa.winningOutcome == nil && oa.fInc == tOut.outcomeReceivedCount { From 17f1a141f49ef655fccdb80d07b9b8959e25a767 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 15 Oct 2016 15:44:03 +0100 Subject: [PATCH 57/78] Maybe start roll in a bunch more places, which solves some recovery bugs. --HG-- branch : dev --- txnengine/frame.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index 9669585..063da58 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -401,6 +401,7 @@ func (fo *frameOpen) ReadLearnt(action *localAction) bool { // frame write clock elem. if actClockElem < reqClockElem { server.Log(fo.frame, "ReadLearnt", txn, "ignored, too old") + fo.maybeStartRoll() return false } else { server.Log(fo.frame, "ReadLearnt", txn, "of future frame") @@ -448,10 +449,12 @@ func (fo *frameOpen) WriteLearnt(action *localAction) bool { reqClockElem := fo.frameTxnClock.At(fo.v.UUId) if actClockElem < reqClockElem || (actClockElem == reqClockElem && action.Id.Compare(fo.frameTxnId) == common.LT) { server.Log(fo.frame, "WriteLearnt", txn, "ignored, too old") + fo.maybeStartRoll() return false } if action.Id.Compare(fo.frameTxnId) == common.EQ { server.Log(fo.frame, "WriteLearnt", txn, "is duplicate of current frame") + fo.maybeStartRoll() return false } if actClockElem == reqClockElem { @@ -715,8 +718,10 @@ func (fo *frameOpen) maybeCreateChild() { } fo.child = NewFrame(fo.frame, fo.v, winner.Id, winner.writeTxnActions, winner.outcomeClock.AsMutable(), written) + fo.v.SetCurFrame(fo.child, winner, positions) for _, action := range fo.learntFutureReads { action.frame = nil + server.Log(fo.frame, "new frame learning future reads") if !fo.child.ReadLearnt(action) { action.LocallyComplete() } @@ -727,7 +732,6 @@ func (fo *frameOpen) maybeCreateChild() { fo.writeVoteClock = nil fo.clientWrites = nil fo.rollTxn = nil - fo.v.SetCurFrame(fo.child, winner, positions) } func (fo *frameOpen) basicRollCondition() bool { @@ -754,6 +758,7 @@ func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { // fmt.Printf("r%v\n", fo.v.UUId) fo.startRoll() } else { + server.Log(fo.frame, "Roll callback scheduled") if then == nil { then = &now } From 2fc308c59d2b9f190152e5db07b44a0b68bbc06c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 18 Oct 2016 14:53:23 +0100 Subject: [PATCH 58/78] Well... it compiles again now. Needs testing. --HG-- branch : dev --- cmd/consistencychecker/main.go | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/cmd/consistencychecker/main.go b/cmd/consistencychecker/main.go index 1196a1f..d0730c3 100644 --- a/cmd/consistencychecker/main.go +++ b/cmd/consistencychecker/main.go @@ -13,7 +13,7 @@ import ( "goshawkdb.io/server/configuration" ch "goshawkdb.io/server/consistenthash" "goshawkdb.io/server/db" - _ "goshawkdb.io/server/txnengine" + eng "goshawkdb.io/server/txnengine" "io/ioutil" "log" "os" @@ -107,12 +107,9 @@ func (lc *locationChecker) locationCheck(cell *varWrapperCell) error { if res == nil || (ok && txnBites == nil) { return fmt.Errorf("Failed to find %v from %v in %v", txnId, vUUId, foundIn) } - seg, _, err := capn.ReadFromMemoryZeroCopy(txnBites) - if err != nil { + if _, _, err = capn.ReadFromMemoryZeroCopy(txnBites); err != nil { return err } - txnCap := msgs.ReadRootTxn(seg) - positions := varCap.Positions().ToArray() rmIds, err := lc.resolver.ResolveHashCodes(positions) if err != nil { @@ -147,14 +144,7 @@ func (lc *locationChecker) locationCheck(cell *varWrapperCell) error { } varBites, ok := res.([]byte) if res == nil || (ok && varBites == nil) { - if vUUId.BootCount() == 1 && vUUId.ConnectionCount() == 0 && - (txnId == nil || - (txnId.BootCount() == 1 && txnId.ConnectionCount() == 0 && - txnCap.Actions().Len() == 1 && txnCap.Actions().At(0).Which() == msgs.ACTION_CREATE)) { - fmt.Printf("Failed to find %v in %v (%v, %v, %v) but it looks like it's a bad root.\n", vUUId, remote, rmIds, positions, foundIn) - } else { - return fmt.Errorf("Failed to find %v in %v (%v, %v, %v)", vUUId, remote, rmIds, positions, foundIn) - } + return fmt.Errorf("Failed to find %v in %v (%v, %v, %v)", vUUId, remote, rmIds, positions, foundIn) } else { seg, _, err := capn.ReadFromMemoryZeroCopy(varBites) if err != nil { @@ -251,18 +241,13 @@ func (s *store) LoadTopology() error { rtxn.Error(fmt.Errorf("Unable to find txn for topology: %v", txnId)) return nil } - seg, _, err = capn.ReadFromMemoryZeroCopy(bites) - if err != nil { - rtxn.Error(err) - return nil - } - txnCap := msgs.ReadRootTxn(seg) - actions := txnCap.Actions() - if actions.Len() != 1 { - rtxn.Error(fmt.Errorf("Topology txn has %v actions; expected 1", actions.Len())) + txnReader := eng.TxnReaderFromData(bites) + actions := txnReader.Actions(true) + if l := actions.Actions().Len(); l != 1 { + rtxn.Error(fmt.Errorf("Topology txn has %v actions; expected 1", l)) return nil } - action := actions.At(0) + action := actions.Actions().At(0) var refs msgs.VarIdPos_List switch action.Which() { case msgs.ACTION_WRITE: @@ -286,14 +271,13 @@ func (s *store) LoadTopology() error { rtxn.Error(fmt.Errorf("Topology txn action has %v references; expected 1", refs.Len())) return nil } - rootRef := refs.At(0) seg, _, err = capn.ReadFromMemoryZeroCopy(bites) if err != nil { rtxn.Error(err) return nil } - topology, err := configuration.TopologyFromCap(txnId, &rootRef, bites) + topology, err := configuration.TopologyFromCap(txnId, &refs, bites) if err != nil { rtxn.Error(err) return nil From 37eef0ab1017d60d22e0e77d8666963a3f3f95e1 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Tue, 18 Oct 2016 17:18:50 +0100 Subject: [PATCH 59/78] Untested. Introduce a translation callback. This is probably about as nice as this can get: it gets the logic for rolls all back into one place, which is vastly preferable. Ref T45. --HG-- branch : T45 --- client/clienttxnsubmitter.go | 4 +-- client/localconnection.go | 18 ++++++++------ client/simpletxnsubmitter.go | 37 +++++++++++----------------- consts.go | 1 + network/topologytransmogrifier.go | 2 +- txnengine/frame.go | 41 ++++++++++++++++++++++++++++--- txnengine/vardispatcher.go | 3 ++- 7 files changed, 67 insertions(+), 39 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index 3f1d4a9..98a5c37 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -111,13 +111,13 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, newCtxnCap.SetRetry(ctxnCap.Retry()) newCtxnCap.SetActions(ctxnCap.Actions()) - return cts.SimpleTxnSubmitter.SubmitClientTransaction(&newCtxnCap, curTxnId, cont, delay, false, cts.versionCache) + return cts.SimpleTxnSubmitter.SubmitClientTransaction(nil, &newCtxnCap, curTxnId, cont, delay, false, cts.versionCache) } } cts.txnLive = true // fmt.Printf("%v ", delay) - return cts.SimpleTxnSubmitter.SubmitClientTransaction(ctxnCap, curTxnId, cont, delay, false, cts.versionCache) + return cts.SimpleTxnSubmitter.SubmitClientTransaction(nil, ctxnCap, curTxnId, cont, delay, false, cts.versionCache) } func (cts *ClientTxnSubmitter) addCreatesToCache(txn *eng.TxnReader) { diff --git a/client/localconnection.go b/client/localconnection.go index 5f4a906..5dda66c 100644 --- a/client/localconnection.go +++ b/client/localconnection.go @@ -86,10 +86,11 @@ func (lcmsq *localConnectionMsgSyncQuery) maybeClose() { type localConnectionMsgRunClientTxn struct { localConnectionMsgBasic localConnectionMsgSyncQuery - txn *cmsgs.ClientTxn - varPosMap map[common.VarUUId]*common.Positions - txnReader *eng.TxnReader - outcome *msgs.Outcome + txn *cmsgs.ClientTxn + varPosMap map[common.VarUUId]*common.Positions + translationCallback eng.TranslationCallback + txnReader *eng.TxnReader + outcome *msgs.Outcome } func (lcmrct *localConnectionMsgRunClientTxn) consumer(txn *eng.TxnReader, outcome *msgs.Outcome, err error) error { @@ -189,10 +190,11 @@ func (lc *LocalConnection) TopologyChanged(topology *configuration.Topology, don } } -func (lc *LocalConnection) RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap map[common.VarUUId]*common.Positions) (*eng.TxnReader, *msgs.Outcome, error) { +func (lc *LocalConnection) RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap map[common.VarUUId]*common.Positions, translationCallback eng.TranslationCallback) (*eng.TxnReader, *msgs.Outcome, error) { query := &localConnectionMsgRunClientTxn{ - txn: txn, - varPosMap: varPosMap, + txn: txn, + varPosMap: varPosMap, + translationCallback: translationCallback, } query.init() if lc.enqueueQuerySync(query, query.resultChan) { @@ -323,7 +325,7 @@ func (lc *LocalConnection) runClientTransaction(txnQuery *localConnectionMsgRunC if varPosMap := txnQuery.varPosMap; varPosMap != nil { lc.submitter.EnsurePositions(varPosMap) } - return lc.submitter.SubmitClientTransaction(txn, txnId, txnQuery.consumer, 0, true, nil) + return lc.submitter.SubmitClientTransaction(txnQuery.translationCallback, txn, txnId, txnQuery.consumer, 0, true, nil) } func (lc *LocalConnection) runTransaction(txnQuery *localConnectionMsgRunTxn) { diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index 28e2135..d00671e 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -21,6 +21,7 @@ type SimpleTxnSubmitter struct { bootCount uint32 disabledHashCodes map[common.RMId]server.EmptyStruct connections map[common.RMId]paxos.Connection + connectionsBool map[common.RMId]bool connPub paxos.ServerConnectionPublisher outcomeConsumers map[common.TxnId]txnOutcomeConsumer onShutdown map[*func(bool) error]server.EmptyStruct @@ -150,11 +151,11 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, txnId *common // fmt.Printf("sts%v ", len(sts.outcomeConsumers)) } -func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, txnId *common.TxnId, continuation TxnCompletionConsumer, delay time.Duration, useNextVersion bool, vc versionCache) error { +func (sts *SimpleTxnSubmitter) SubmitClientTransaction(translationCallback eng.TranslationCallback, ctxnCap *cmsgs.ClientTxn, txnId *common.TxnId, continuation TxnCompletionConsumer, delay time.Duration, useNextVersion bool, vc versionCache) error { // Frames could attempt rolls before we have a topology. if sts.topology.IsBlank() || (sts.topology.Next() != nil && (!useNextVersion || !sts.topology.NextBarrierReached1(sts.rmId))) { fun := func() error { - return sts.SubmitClientTransaction(ctxnCap, txnId, continuation, delay, useNextVersion, vc) + return sts.SubmitClientTransaction(translationCallback, ctxnCap, txnId, continuation, delay, useNextVersion, vc) } if sts.bufferedSubmissions == nil { sts.bufferedSubmissions = []func() error{fun} @@ -167,7 +168,7 @@ func (sts *SimpleTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, if next := sts.topology.Next(); next != nil && useNextVersion { version = next.Version } - txnCap, activeRMs, _, err := sts.clientToServerTxn(ctxnCap, version, vc) + txnCap, activeRMs, _, err := sts.clientToServerTxn(translationCallback, ctxnCap, version, vc) if err != nil { return continuation(nil, nil, err) } @@ -196,6 +197,10 @@ func (sts *SimpleTxnSubmitter) TopologyChanged(topology *configuration.Topology) func (sts *SimpleTxnSubmitter) ServerConnectionsChanged(servers map[common.RMId]paxos.Connection) error { server.Log("STS ServerConnectionsChanged", servers) sts.connections = servers + sts.connectionsBool = make(map[common.RMId]bool, len(servers)) + for k := range servers { + sts.connectionsBool[k] = true + } return sts.calculateDisabledHashcodes() } @@ -232,7 +237,7 @@ func (sts *SimpleTxnSubmitter) Shutdown() { } } -func (sts *SimpleTxnSubmitter) clientToServerTxn(clientTxnCap *cmsgs.ClientTxn, topologyVersion uint32, vc versionCache) (*msgs.Txn, []common.RMId, []common.RMId, error) { +func (sts *SimpleTxnSubmitter) clientToServerTxn(translationCallback eng.TranslationCallback, clientTxnCap *cmsgs.ClientTxn, topologyVersion uint32, vc versionCache) (*msgs.Txn, []common.RMId, []common.RMId, error) { outgoingSeg := capn.NewBuffer(nil) txnCap := msgs.NewRootTxn(outgoingSeg) @@ -250,7 +255,7 @@ func (sts *SimpleTxnSubmitter) clientToServerTxn(clientTxnCap *cmsgs.ClientTxn, actionsWrapper.SetActions(actions) picker := ch.NewCombinationPicker(int(sts.topology.FInc), sts.disabledHashCodes) - rmIdToActionIndices, err := sts.translateActions(actionsListSeg, picker, &actions, &clientActions, vc) + rmIdToActionIndices, err := sts.translateActions(translationCallback, actionsListSeg, picker, &actions, &clientActions, vc) if err != nil { return nil, nil, nil, err } @@ -291,7 +296,7 @@ func (sts *SimpleTxnSubmitter) setAllocations(allocIdx int, rmIdToActionIndices } // translate from client representation to server representation -func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picker *ch.CombinationPicker, actions *msgs.Action_List, clientActions *cmsgs.ClientAction_List, vc versionCache) (map[common.RMId]*[]int, error) { +func (sts *SimpleTxnSubmitter) translateActions(translationCallback eng.TranslationCallback, outgoingSeg *capn.Segment, picker *ch.CombinationPicker, actions *msgs.Action_List, clientActions *cmsgs.ClientAction_List, vc versionCache) (map[common.RMId]*[]int, error) { referencesInNeedOfPositions := []*msgs.VarIdPos{} rmIdToActionIndices := make(map[common.RMId]*[]int) @@ -338,24 +343,10 @@ func (sts *SimpleTxnSubmitter) translateActions(outgoingSeg *capn.Segment, picke return nil, err } } - if clientAction.Which() == cmsgs.CLIENTACTION_ROLL { - // We cannot roll for anyone else. This could try to happen - // during immigration. - found := false - for _, rmId := range hashCodes { - if found = rmId == sts.rmId; found { - break - } - } - if !found { - return nil, eng.AbortRollNotInPermutation - } - // If we're not first then first must not be active - if hashCodes[0] != sts.rmId { - if _, found := sts.connections[hashCodes[0]]; found { - return nil, eng.AbortRollNotFirst - } + if translationCallback != nil { + if err = translationCallback(&clientAction, &action, hashCodes, sts.connectionsBool); err != nil { + return nil, err } } diff --git a/consts.go b/consts.go index d49fbe1..47b335f 100644 --- a/consts.go +++ b/consts.go @@ -14,6 +14,7 @@ const ( VarRollDelayMax = 500 * time.Millisecond VarRollTimeExpectation = 3 * time.Millisecond VarRollPRequirement = 0.9 + VarRollForceNotFirstAfter = time.Second ConnectionRestartDelayRangeMS = 5000 ConnectionRestartDelayMin = 3 * time.Second MostRandomByteIndex = 7 // will be the lsb of a big-endian client-n in the txnid. diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index f2331cc..3e6ad9e 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -1796,7 +1796,7 @@ func (task *targetConfig) attemptCreateRoots(rootCount int) (bool, configuration root.VarUUId = vUUId } ctxn.SetActions(actions) - txnReader, result, err := task.localConnection.RunClientTransaction(&ctxn, nil) + txnReader, result, err := task.localConnection.RunClientTransaction(&ctxn, nil, nil) log.Println("Create root result", result, err) if err != nil { return false, nil, err diff --git a/txnengine/frame.go b/txnengine/frame.go index 063da58..e5022c0 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -754,9 +754,13 @@ func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { now := time.Now() quietDuration := server.VarRollTimeExpectation * time.Duration(multiplier) probOfZero := fo.v.poisson.P(quietDuration, 0, now) - if fo.v.vm.RollAllowed && (probOfZero > server.VarRollPRequirement || (then != nil && now.Sub(*then) > server.VarRollDelayMax)) { + elapsed := now.Sub(*then) + if fo.v.vm.RollAllowed && (probOfZero > server.VarRollPRequirement || (then != nil && elapsed > server.VarRollDelayMax)) { // fmt.Printf("r%v\n", fo.v.UUId) - fo.startRoll() + fo.startRoll(rollCallback{ + frameOpen: fo, + forceRoll: elapsed > server.VarRollForceNotFirstAfter, + }) } else { server.Log(fo.frame, "Roll callback scheduled") if then == nil { @@ -778,13 +782,13 @@ func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { } } -func (fo *frameOpen) startRoll() { +func (fo *frameOpen) startRoll(rollCB rollCallback) { fo.rollActive = true // must do roll txn creation in the main go-routine ctxn, varPosMap := fo.createRollClientTxn() go func() { server.Log(fo.frame, "Starting roll") - _, outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap) + _, outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap, rollCB.rollTranslationCallback) ow := "" if outcome != nil { ow = fmt.Sprint(outcome.Which()) @@ -810,6 +814,35 @@ func (fo *frameOpen) startRoll() { }() } +type rollCallback struct { + *frameOpen + forceRoll bool +} + +// careful in here: we'll be running this inside localConnection's actor. +func (rc rollCallback) rollTranslationCallback(cAction *cmsgs.ClientAction, action *msgs.Action, hashCodes []common.RMId, connections map[common.RMId]bool) error { + // We cannot roll for anyone else. This could try to happen during + // immigration, which is very bad because we will probably have the + // wrong hashcodes so could cause divergence. + found := false + for _, rmId := range hashCodes { + if found = rmId == rc.v.vm.RMId; found { + break + } + } + if !found { + return AbortRollNotInPermutation + } + + // If we're not first then first must not be active + if !rc.forceRoll && hashCodes[0] != rc.v.vm.RMId { + if connections[hashCodes[0]] { + return AbortRollNotFirst + } + } + return nil +} + func (fo *frameOpen) createRollClientTxn() (*cmsgs.ClientTxn, map[common.VarUUId]*common.Positions) { if fo.rollTxn != nil { return fo.rollTxn, fo.rollTxnPos diff --git a/txnengine/vardispatcher.go b/txnengine/vardispatcher.go index 72cfdb9..2113ba4 100644 --- a/txnengine/vardispatcher.go +++ b/txnengine/vardispatcher.go @@ -70,7 +70,8 @@ func (vd *VarDispatcher) withVarManager(vUUId *common.VarUUId, fun func(*VarMana return executor.Enqueue(func() { fun(manager) }) } +type TranslationCallback func(*cmsgs.ClientAction, *msgs.Action, []common.RMId, map[common.RMId]bool) error type LocalConnection interface { - RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap map[common.VarUUId]*common.Positions) (*TxnReader, *msgs.Outcome, error) + RunClientTransaction(*cmsgs.ClientTxn, map[common.VarUUId]*common.Positions, TranslationCallback) (*TxnReader, *msgs.Outcome, error) Status(*server.StatusConsumer) } From a8a28577ab7409afd9286f4c7a073342e556b8fd Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 19 Oct 2016 14:39:03 +0100 Subject: [PATCH 60/78] Rather than make the transaction code cope with the result arriving whilst we're still trying to vote on it, instead reject transactions that don't have us at the right boot count. This may lead to more reruns, but only when nodes bounce and it's much easier and safer to do this. This code currently untested. Ref T46. --HG-- branch : T46 --- network/connection.go | 6 +++--- network/connectionmanager.go | 12 ++++++++---- network/topologytransmogrifier.go | 2 +- paxos/network.go | 1 + paxos/proposermanager.go | 22 +++++++++++++++++++++- 5 files changed, 34 insertions(+), 9 deletions(-) diff --git a/network/connection.go b/network/connection.go index d7f3ab8..e8fda37 100644 --- a/network/connection.go +++ b/network/connection.go @@ -631,7 +631,7 @@ func (cash *connectionAwaitServerHandshake) makeHelloServerFromServer() *capn.Se localHost := cash.connectionManager.LocalHost() hello.SetLocalHost(localHost) hello.SetRmId(uint32(cash.connectionManager.RMId)) - hello.SetBootCount(cash.connectionManager.BootCount) + hello.SetBootCount(cash.connectionManager.BootCount()) tieBreak := cash.rng.Uint32() cash.combinedTieBreak = tieBreak hello.SetTieBreak(tieBreak) @@ -704,7 +704,7 @@ func (cach *connectionAwaitClientHandshake) makeHelloClientFromServer() *capn.Se hello := cmsgs.NewRootHelloClientFromServer(seg) namespace := make([]byte, common.KeyLen-8) binary.BigEndian.PutUint32(namespace[0:4], cach.ConnectionNumber) - binary.BigEndian.PutUint32(namespace[4:8], cach.connectionManager.BootCount) + binary.BigEndian.PutUint32(namespace[4:8], cach.connectionManager.BootCount()) binary.BigEndian.PutUint32(namespace[8:], uint32(cach.connectionManager.RMId)) hello.SetNamespace(namespace) rootsCap := cmsgs.NewRootList(seg, len(cach.roots)) @@ -780,7 +780,7 @@ func (cr *connectionRun) start() (bool, error) { } if cr.isClient { servers := cr.connectionManager.ClientEstablished(cr.ConnectionNumber, cr.Connection) - cr.submitter = client.NewClientTxnSubmitter(cr.connectionManager.RMId, cr.connectionManager.BootCount, cr.rootsVar, cr.connectionManager) + cr.submitter = client.NewClientTxnSubmitter(cr.connectionManager.RMId, cr.connectionManager.BootCount(), cr.rootsVar, cr.connectionManager) cr.submitter.TopologyChanged(cr.topology) cr.submitter.ServerConnectionsChanged(servers) } diff --git a/network/connectionmanager.go b/network/connectionmanager.go index 70d9b7b..5376f13 100644 --- a/network/connectionmanager.go +++ b/network/connectionmanager.go @@ -26,7 +26,7 @@ type ConnectionManager struct { sync.RWMutex localHost string RMId common.RMId - BootCount uint32 + bootcount uint32 NodeCertificatePrivateKeyPair *certs.NodeCertificatePrivateKeyPair Transmogrifier *TopologyTransmogrifier topology *configuration.Topology @@ -52,6 +52,10 @@ type topologySubscribers struct { subscribers []map[eng.TopologySubscriber]server.EmptyStruct } +func (cm *ConnectionManager) BootCount() uint32 { + return cm.bootcount +} + func (cm *ConnectionManager) DispatchMessage(sender common.RMId, msgType msgs.Message_Which, msg msgs.Message) { d := cm.Dispatchers switch msgType { @@ -252,7 +256,7 @@ func (cm *ConnectionManager) ClientLost(connNumber uint32, conn paxos.ClientConn } func (cm *ConnectionManager) GetClient(bootNumber, connNumber uint32) paxos.ClientConnection { - if bootNumber != cm.BootCount && bootNumber != 0 { + if bootNumber != cm.bootcount && bootNumber != 0 { return nil } cm.RLock() @@ -332,7 +336,7 @@ func (cm *ConnectionManager) enqueueSyncQuery(msg connectionManagerMsg, resultCh func NewConnectionManager(rmId common.RMId, bootCount uint32, procs int, db *db.Databases, nodeCertPrivKeyPair *certs.NodeCertificatePrivateKeyPair, port uint16, ss ShutdownSignaller, config *configuration.Configuration) (*ConnectionManager, *TopologyTransmogrifier) { cm := &ConnectionManager{ RMId: rmId, - BootCount: bootCount, + bootcount: bootCount, NodeCertificatePrivateKeyPair: nodeCertPrivKeyPair, servers: make(map[string]*connectionManagerMsgServerEstablished), rmToServer: make(map[common.RMId]*connectionManagerMsgServerEstablished), @@ -624,7 +628,7 @@ func (cm *ConnectionManager) cloneRMToServer() map[common.RMId]paxos.Connection func (cm *ConnectionManager) status(sc *server.StatusConsumer) { sc.Emit(fmt.Sprintf("Address: %v", cm.localHost)) - sc.Emit(fmt.Sprintf("Boot Count: %v", cm.BootCount)) + sc.Emit(fmt.Sprintf("Boot Count: %v", cm.bootcount)) sc.Emit(fmt.Sprintf("Current Topology: %v", cm.topology)) if cm.topology != nil && cm.topology.Next() != nil { sc.Emit(fmt.Sprintf("Next Topology: %v", cm.topology.Next())) diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index f2331cc..55aa1c6 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -1576,7 +1576,7 @@ func (task *targetConfig) createTopologyTransaction(read, write *configuration.T seg := capn.NewBuffer(nil) txn := msgs.NewRootTxn(seg) txn.SetSubmitter(uint32(task.connectionManager.RMId)) - txn.SetSubmitterBootCount(task.connectionManager.BootCount) + txn.SetSubmitterBootCount(task.connectionManager.BootCount()) actionsSeg := capn.NewBuffer(nil) actionsWrapper := msgs.NewRootActionListWrapper(actionsSeg) diff --git a/paxos/network.go b/paxos/network.go index 2df9fac..85c3015 100644 --- a/paxos/network.go +++ b/paxos/network.go @@ -21,6 +21,7 @@ type ConnectionManager interface { ClientEstablished(connNumber uint32, conn ClientConnection) map[common.RMId]Connection ClientLost(connNumber uint32, conn ClientConnection) GetClient(bootNumber, connNumber uint32) ClientConnection + BootCount() uint32 } type ServerConnectionPublisher interface { diff --git a/paxos/proposermanager.go b/paxos/proposermanager.go index 75fe1a2..2752f3a 100644 --- a/paxos/proposermanager.go +++ b/paxos/proposermanager.go @@ -29,6 +29,7 @@ type instanceIdPrefix [instanceIdPrefixLen]byte type ProposerManager struct { ServerConnectionPublisher RMId common.RMId + BootCount uint32 VarDispatcher *eng.VarDispatcher Exe *dispatcher.Executor DB *db.Databases @@ -41,6 +42,7 @@ func NewProposerManager(exe *dispatcher.Executor, rmId common.RMId, cm Connectio pm := &ProposerManager{ ServerConnectionPublisher: NewServerConnectionPublisherProxy(exe, cm), RMId: rmId, + BootCount: cm.BootCount(), proposals: make(map[instanceIdPrefix]*proposal), proposers: make(map[common.TxnId]*Proposer), VarDispatcher: varDispatcher, @@ -118,6 +120,25 @@ func (pm *ProposerManager) TxnReceived(sender common.RMId, txn *eng.TxnReader) { if accept { _, found := pm.topology.RMsRemoved()[sender] accept = !found + if accept { + accept = false + allocations := txn.Txn.Allocations() + for idx, l := 0, allocations.Len(); idx < l; idx++ { + alloc := allocations.At(idx) + rmId := common.RMId(alloc.RmId()) + if rmId == pm.RMId { + accept = alloc.Active() == pm.BootCount + break + } + } + if !accept { + server.Log(txnId, "Aborting received txn as it was submitted for an older version of us so we may have already voted on it.", pm.BootCount) + } + } else { + server.Log(txnId, "Aborting received txn as sender has been removed from topology.", sender) + } + } else { + server.Log(txnId, "Aborting received txn due to non-matching topology.", txnCap.TopologyVersion()) } } if accept { @@ -126,7 +147,6 @@ func (pm *ProposerManager) TxnReceived(sender common.RMId, txn *eng.TxnReader) { proposer.Start() } else { - server.Log(txnId, "Aborting received txn due to non-matching topology.", txnCap.TopologyVersion()) acceptors := GetAcceptorsFromTxn(txnCap) fInc := int(txnCap.FInc()) alloc := AllocForRMId(txnCap, pm.RMId) From 7f807cb287574ea8a31efe086ddea93007547247 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 19 Oct 2016 22:45:00 +0100 Subject: [PATCH 61/78] Yeah, rework all that as it was rubbish. This is much better though. Also spotted that if a roll failed, we would immediately retry rather than waiting, causing much thrashing. So don't do that. Ref T45 --HG-- branch : T45 --- txnengine/frame.go | 59 +++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index e5022c0..eff43e4 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -734,17 +734,17 @@ func (fo *frameOpen) maybeCreateChild() { fo.rollTxn = nil } -func (fo *frameOpen) basicRollCondition() bool { - return fo.rollScheduled == nil && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && fo.v.curFrame == fo.frame && +func (fo *frameOpen) basicRollCondition(rescheduling bool) bool { + return (rescheduling || fo.rollScheduled == nil) && !fo.rollActive && fo.currentState == fo && fo.child == nil && fo.writes.Len() == 0 && fo.v.positions != nil && fo.v.curFrame == fo.frame && (fo.reads.Len() > fo.uncommittedReads || (fo.frameTxnClock.Len() > fo.frameTxnActions.Actions().Len() && fo.parent == nil && fo.reads.Len() == 0 && len(fo.learntFutureReads) == 0)) } func (fo *frameOpen) maybeStartRoll() { - fo.maybeStartRollFrom(nil) + fo.maybeStartRollFrom(false) } -func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { - if fo.basicRollCondition() { +func (fo *frameOpen) maybeStartRollFrom(rescheduling bool) { + if fo.basicRollCondition(rescheduling) { multiplier := 0 for node := fo.reads.First(); node != nil; node = node.Next() { if node.Value == committed { @@ -754,34 +754,38 @@ func (fo *frameOpen) maybeStartRollFrom(then *time.Time) { now := time.Now() quietDuration := server.VarRollTimeExpectation * time.Duration(multiplier) probOfZero := fo.v.poisson.P(quietDuration, 0, now) - elapsed := now.Sub(*then) - if fo.v.vm.RollAllowed && (probOfZero > server.VarRollPRequirement || (then != nil && elapsed > server.VarRollDelayMax)) { - // fmt.Printf("r%v\n", fo.v.UUId) + elapsed := time.Duration(0) + if fo.rollScheduled == nil { + fo.rollScheduled = &now + } else { + elapsed = now.Sub(*fo.rollScheduled) + } + if fo.v.vm.RollAllowed && (probOfZero > server.VarRollPRequirement || (elapsed > server.VarRollDelayMax)) { + // fmt.Printf("%v r%v %v\n", now, fo.v.UUId, elapsed) fo.startRoll(rollCallback{ frameOpen: fo, forceRoll: elapsed > server.VarRollForceNotFirstAfter, }) } else { - server.Log(fo.frame, "Roll callback scheduled") - if then == nil { - then = &now - } - fo.rollScheduled = then - // fmt.Printf("s%v(%v|%v)\n", fo.v.UUId, probOfZero, fo.scheduleInterval) - fo.v.vm.ScheduleCallback(fo.scheduleInterval, func(*time.Time) { - fo.v.applyToVar(func() { - fo.rollScheduled = nil - fo.maybeStartRollFrom(then) - }) - }) - fo.scheduleInterval += time.Duration(fo.v.rng.Intn(int(server.VarRollDelayMin))) - if fo.scheduleInterval > server.VarRollDelayMax { - fo.scheduleInterval = fo.scheduleInterval / 2 - } + fo.scheduleRoll() } } } +func (fo *frameOpen) scheduleRoll() { + server.Log(fo.frame, "Roll callback scheduled") + // fmt.Printf("s%v(%v|%v)\n", fo.v.UUId, probOfZero, fo.scheduleInterval) + fo.v.vm.ScheduleCallback(fo.scheduleInterval, func(*time.Time) { + fo.v.applyToVar(func() { + fo.maybeStartRollFrom(true) + }) + }) + fo.scheduleInterval += time.Duration(fo.v.rng.Intn(int(server.VarRollDelayMin))) + if fo.scheduleInterval > server.VarRollDelayMax { + fo.scheduleInterval = fo.scheduleInterval / 2 + } +} + func (fo *frameOpen) startRoll(rollCB rollCallback) { fo.rollActive = true // must do roll txn creation in the main go-routine @@ -799,15 +803,17 @@ func (fo *frameOpen) startRoll(rollCB rollCallback) { // fmt.Printf("%v r%v (%v)\n", fo.v.UUId, ow, err == AbortRollNotFirst) server.Log(fo.frame, "Roll finished: outcome", ow, "; err:", err) fo.v.applyToVar(func() { - fo.rollActive = false if fo.v.curFrame != fo.frame { return } + fo.rollActive = false if (outcome == nil && err != nil) || (outcome != nil && outcome.Which() != msgs.OUTCOME_COMMIT) { if err == AbortRollNotInPermutation { + // we need to go to sleep - this var has been removed from this RM + fo.rollScheduled = nil fo.v.maybeMakeInactive() } else { - fo.maybeStartRoll() + fo.scheduleRoll() } } }) @@ -833,7 +839,6 @@ func (rc rollCallback) rollTranslationCallback(cAction *cmsgs.ClientAction, acti if !found { return AbortRollNotInPermutation } - // If we're not first then first must not be active if !rc.forceRoll && hashCodes[0] != rc.v.vm.RMId { if connections[hashCodes[0]] { From 0f01907c79c5eb84fd935c3a01da8c322aff9997 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 20 Oct 2016 09:40:12 +0100 Subject: [PATCH 62/78] This may be helpful. Or it may make no difference. --HG-- branch : dev --- txnengine/frame.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/txnengine/frame.go b/txnengine/frame.go index eff43e4..e2d2e4f 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -769,6 +769,8 @@ func (fo *frameOpen) maybeStartRollFrom(rescheduling bool) { } else { fo.scheduleRoll() } + } else if rescheduling { + fo.rollScheduled = nil } } From 2ae1c4424c5531903bff36006241b0814f29cce8 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Thu, 20 Oct 2016 16:04:28 +0100 Subject: [PATCH 63/78] Move some server.logs about so that they don't cause concurrent modification issues when activated! --HG-- branch : dev --- txnengine/frame.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/txnengine/frame.go b/txnengine/frame.go index e2d2e4f..b38a72b 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -792,8 +792,8 @@ func (fo *frameOpen) startRoll(rollCB rollCallback) { fo.rollActive = true // must do roll txn creation in the main go-routine ctxn, varPosMap := fo.createRollClientTxn() + server.Log(fo.frame, "Starting roll") go func() { - server.Log(fo.frame, "Starting roll") _, outcome, err := fo.v.vm.RunClientTransaction(ctxn, varPosMap, rollCB.rollTranslationCallback) ow := "" if outcome != nil { @@ -803,8 +803,8 @@ func (fo *frameOpen) startRoll(rollCB rollCallback) { } } // fmt.Printf("%v r%v (%v)\n", fo.v.UUId, ow, err == AbortRollNotFirst) - server.Log(fo.frame, "Roll finished: outcome", ow, "; err:", err) fo.v.applyToVar(func() { + server.Log(fo.frame, "Roll finished: outcome", ow, "; err:", err) if fo.v.curFrame != fo.frame { return } From 57c820d8538f1401ebc94114cdd0e4ea9071152c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 21 Oct 2016 22:33:28 +0100 Subject: [PATCH 64/78] Add extra panic Ref T46 --HG-- branch : T46 --- paxos/ballotaccumulator.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 6df8077..582586e 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -212,6 +212,9 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { } else { outcome.SetTxn(ba.txn.Data) outcome.SetCommit(combinedClock.AsData()) + if len(ba.vUUIdToBallots) > combinedClock.Len() { + panic(fmt.Sprintf("wha... %v, %v, %v", ba.txn.Id, ba.vUUIdToBallots, combinedClock)) + } } ba.outcome = (*outcomeEqualId)(&outcome) From 9b8795b26d80ed46cc734acc1436e7169f39e3a2 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 22 Oct 2016 09:29:16 +0100 Subject: [PATCH 65/78] Well this is a fairly embarassing bug... Ref T46. --HG-- branch : T46 --- paxos/ballotaccumulator.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/paxos/ballotaccumulator.go b/paxos/ballotaccumulator.go index 582586e..50b1525 100644 --- a/paxos/ballotaccumulator.go +++ b/paxos/ballotaccumulator.go @@ -113,8 +113,7 @@ func BallotAccumulatorFromData(txn *eng.TxnReader, outcome *outcomeEqualId, inst } // For every vUUId involved in this txn, we should see fInc * ballots: -// one from each RM voting for each vUUId. rmId is the paxos -// instanceRMId. +// one from each RM voting for each vUUId. func (ba *BallotAccumulator) BallotReceived(instanceRMId common.RMId, inst *instance, vUUId *common.VarUUId, txn *eng.TxnReader) *outcomeEqualId { ba.txn = ba.txn.Combine(txn) @@ -175,6 +174,8 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { vUUIds = append(vUUIds, vBallot.vUUId) if vBallot.result == nil { vBallot.CalculateResult(br, combinedClock) + } else if !vBallot.result.Aborted() { + combinedClock.MergeInMax(vBallot.result.Clock) } aborted = aborted || vBallot.result.Aborted() deadlock = deadlock || vBallot.result.Vote == eng.AbortDeadlock @@ -213,7 +214,7 @@ func (ba *BallotAccumulator) determineOutcome() *outcomeEqualId { outcome.SetTxn(ba.txn.Data) outcome.SetCommit(combinedClock.AsData()) if len(ba.vUUIdToBallots) > combinedClock.Len() { - panic(fmt.Sprintf("wha... %v, %v, %v", ba.txn.Id, ba.vUUIdToBallots, combinedClock)) + panic(fmt.Sprintf("Ballot outcome clock too short! %v, %v, %v", ba.txn.Id, ba.vUUIdToBallots, combinedClock)) } } From f0a67e921b404579b580f4727a1c50ac9cda0119 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 5 Nov 2016 10:21:20 +0000 Subject: [PATCH 66/78] Report LMDB version on startup. --HG-- branch : dev --- cmd/goshawkdb/main.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/goshawkdb/main.go b/cmd/goshawkdb/main.go index 7cc59bb..a5c188b 100644 --- a/cmd/goshawkdb/main.go +++ b/cmd/goshawkdb/main.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "flag" "fmt" + mdb "github.com/msackman/gomdb" mdbs "github.com/msackman/gomdb/server" "goshawkdb.io/common" "goshawkdb.io/common/certs" @@ -30,7 +31,7 @@ import ( func main() { log.SetPrefix(common.ProductName + " ") log.SetFlags(log.Ldate | log.Ltime | log.Lmicroseconds) - log.Printf("Version %s; %v", goshawk.ServerVersion, os.Args) + log.Printf("GoshawkDB Version %s with %s; %v", goshawk.ServerVersion, mdb.Version(), os.Args) if s, err := newServer(); err != nil { fmt.Printf("\n%v\n\n", err) From 4af1a0695abb46da4b8eb9e420670ef03ace26a3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sat, 5 Nov 2016 18:58:20 +0000 Subject: [PATCH 67/78] Factor out random binary backoff engines, and add into topology transmogrifier, which now makes it possible to start up bigger clusters from nothing. --HG-- branch : dev --- client/clienttxnsubmitter.go | 26 ++++------ client/localconnection.go | 18 +++---- client/simpletxnsubmitter.go | 24 +++++----- consts.go | 1 - network/topologytransmogrifier.go | 79 +++++++++++++++++++++++++------ txnengine/frame.go | 18 +++---- utils.go | 56 ++++++++++++++++++++++ 7 files changed, 157 insertions(+), 65 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index 98a5c37..be0f2ad 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -19,15 +19,16 @@ type ClientTxnSubmitter struct { *SimpleTxnSubmitter versionCache versionCache txnLive bool - initialDelay time.Duration + backoff *server.BinaryBackoffEngine } func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, roots map[common.VarUUId]*common.Capability, cm paxos.ConnectionManager) *ClientTxnSubmitter { + sts := NewSimpleTxnSubmitter(rmId, bootCount, cm) return &ClientTxnSubmitter{ - SimpleTxnSubmitter: NewSimpleTxnSubmitter(rmId, bootCount, cm), + SimpleTxnSubmitter: sts, versionCache: NewVersionCache(roots), txnLive: false, - initialDelay: time.Duration(0), + backoff: server.NewBinaryBackoffEngine(sts.rng, 0, server.SubmissionMaxSubmitDelay), } } @@ -51,11 +52,7 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetId(ctxnCap.Id()) curTxnId := common.MakeTxnId(ctxnCap.Id()) - - delay := cts.initialDelay - if delay < time.Millisecond { - delay = time.Duration(0) - } + cts.backoff.Shrink(time.Millisecond) start := time.Now() var cont TxnCompletionConsumer @@ -75,7 +72,6 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetCommit() cts.addCreatesToCache(txn) cts.txnLive = false - cts.initialDelay = delay >> 1 return continuation(&clientOutcome, nil) default: @@ -90,17 +86,13 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetFinalId(txnId[:]) clientOutcome.SetAbort(cts.translateUpdates(seg, validUpdates)) cts.txnLive = false - cts.initialDelay = delay >> 1 return continuation(&clientOutcome, nil) } } server.Log("Resubmitting", txnId, "; orig resubmit?", abort.Which() == msgs.OUTCOMEABORT_RESUBMIT) - delay = delay + time.Duration(cts.rng.Intn(int(elapsed))) - if delay > server.SubmissionMaxSubmitDelay { - delay = server.SubmissionMaxSubmitDelay + time.Duration(cts.rng.Intn(int(server.SubmissionMaxSubmitDelay))) - } - //fmt.Printf("%v ", delay) + cts.backoff.AdvanceBy(elapsed) + //fmt.Printf("%v ", cts.backoff.Cur) curTxnIdNum := binary.BigEndian.Uint64(txnId[:8]) curTxnIdNum += 1 + uint64(cts.rng.Intn(8)) @@ -111,13 +103,13 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, newCtxnCap.SetRetry(ctxnCap.Retry()) newCtxnCap.SetActions(ctxnCap.Actions()) - return cts.SimpleTxnSubmitter.SubmitClientTransaction(nil, &newCtxnCap, curTxnId, cont, delay, false, cts.versionCache) + return cts.SimpleTxnSubmitter.SubmitClientTransaction(nil, &newCtxnCap, curTxnId, cont, cts.backoff, false, cts.versionCache) } } cts.txnLive = true // fmt.Printf("%v ", delay) - return cts.SimpleTxnSubmitter.SubmitClientTransaction(nil, ctxnCap, curTxnId, cont, delay, false, cts.versionCache) + return cts.SimpleTxnSubmitter.SubmitClientTransaction(nil, ctxnCap, curTxnId, cont, cts.backoff, false, cts.versionCache) } func (cts *ClientTxnSubmitter) addCreatesToCache(txn *eng.TxnReader) { diff --git a/client/localconnection.go b/client/localconnection.go index 5dda66c..e6b5138 100644 --- a/client/localconnection.go +++ b/client/localconnection.go @@ -104,9 +104,10 @@ func (lcmrct *localConnectionMsgRunClientTxn) consumer(txn *eng.TxnReader, outco type localConnectionMsgRunTxn struct { localConnectionMsgBasic localConnectionMsgSyncQuery - txnCap *msgs.Txn + txn *msgs.Txn txnId *common.TxnId activeRMs []common.RMId + backoff *server.BinaryBackoffEngine txnReader *eng.TxnReader outcome *msgs.Outcome } @@ -204,11 +205,12 @@ func (lc *LocalConnection) RunClientTransaction(txn *cmsgs.ClientTxn, varPosMap } } -// txnCap must be root in its segment -func (lc *LocalConnection) RunTransaction(txnCap *msgs.Txn, txnId *common.TxnId, activeRMs ...common.RMId) (*eng.TxnReader, *msgs.Outcome, error) { +// txn must be root in its segment +func (lc *LocalConnection) RunTransaction(txn *msgs.Txn, txnId *common.TxnId, backoff *server.BinaryBackoffEngine, activeRMs ...common.RMId) (*eng.TxnReader, *msgs.Outcome, error) { query := &localConnectionMsgRunTxn{ - txnCap: txnCap, + txn: txn, txnId: txnId, + backoff: backoff, activeRMs: activeRMs, } query.init() @@ -325,18 +327,18 @@ func (lc *LocalConnection) runClientTransaction(txnQuery *localConnectionMsgRunC if varPosMap := txnQuery.varPosMap; varPosMap != nil { lc.submitter.EnsurePositions(varPosMap) } - return lc.submitter.SubmitClientTransaction(txnQuery.translationCallback, txn, txnId, txnQuery.consumer, 0, true, nil) + return lc.submitter.SubmitClientTransaction(txnQuery.translationCallback, txn, txnId, txnQuery.consumer, nil, true, nil) } func (lc *LocalConnection) runTransaction(txnQuery *localConnectionMsgRunTxn) { txnId := txnQuery.txnId - txnCap := txnQuery.txnCap + txn := txnQuery.txn if txnId == nil { txnId = lc.getNextTxnId() - txnCap.SetId(txnId[:]) + txn.SetId(txnId[:]) server.Log("LC starting txn", txnId) } - lc.submitter.SubmitTransaction(txnCap, txnId, txnQuery.activeRMs, txnQuery.consumer, 0) + lc.submitter.SubmitTransaction(txn, txnId, txnQuery.activeRMs, txnQuery.consumer, txnQuery.backoff) } func (lc *LocalConnection) getNextTxnId() *common.TxnId { diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index d00671e..a20d558 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -84,38 +84,38 @@ func (sts *SimpleTxnSubmitter) SubmissionOutcomeReceived(sender common.RMId, txn } // txnCap must be a root -func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, txnId *common.TxnId, activeRMs []common.RMId, continuation TxnCompletionConsumer, delay time.Duration) { +func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, txnId *common.TxnId, activeRMs []common.RMId, continuation TxnCompletionConsumer, delay *server.BinaryBackoffEngine) { seg := capn.NewBuffer(nil) msg := msgs.NewRootMessage(seg) msg.SetTxnSubmission(server.SegToBytes(txnCap.Segment)) server.Log(txnId, "Submitting txn") txnSender := paxos.NewRepeatingSender(server.SegToBytes(seg), activeRMs...) + sleeping := delay != nil && delay.Cur > 0 var removeSenderCh chan chan server.EmptyStruct - if delay == 0 { - sts.connPub.AddServerConnectionSubscriber(txnSender) - } else { + if sleeping { removeSenderCh = make(chan chan server.EmptyStruct) - go func() { - // fmt.Printf("%v ", delay) - time.Sleep(delay) + // fmt.Printf("%v ", delay.Cur) + delay.After(func() { sts.connPub.AddServerConnectionSubscriber(txnSender) doneChan := <-removeSenderCh sts.connPub.RemoveServerConnectionSubscriber(txnSender) close(doneChan) - }() + }) + } else { + sts.connPub.AddServerConnectionSubscriber(txnSender) } acceptors := paxos.GetAcceptorsFromTxn(*txnCap) shutdownFun := func(shutdown bool) error { delete(sts.outcomeConsumers, *txnId) // fmt.Printf("sts%v ", len(sts.outcomeConsumers)) - if delay == 0 { - sts.connPub.RemoveServerConnectionSubscriber(txnSender) - } else { + if sleeping { txnSenderRemovedChan := make(chan server.EmptyStruct) removeSenderCh <- txnSenderRemovedChan <-txnSenderRemovedChan + } else { + sts.connPub.RemoveServerConnectionSubscriber(txnSender) } // OSS is safe here - see above. paxos.NewOneShotSender(paxos.MakeTxnSubmissionCompleteMsg(txnId), sts.connPub, acceptors...) @@ -151,7 +151,7 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, txnId *common // fmt.Printf("sts%v ", len(sts.outcomeConsumers)) } -func (sts *SimpleTxnSubmitter) SubmitClientTransaction(translationCallback eng.TranslationCallback, ctxnCap *cmsgs.ClientTxn, txnId *common.TxnId, continuation TxnCompletionConsumer, delay time.Duration, useNextVersion bool, vc versionCache) error { +func (sts *SimpleTxnSubmitter) SubmitClientTransaction(translationCallback eng.TranslationCallback, ctxnCap *cmsgs.ClientTxn, txnId *common.TxnId, continuation TxnCompletionConsumer, delay *server.BinaryBackoffEngine, useNextVersion bool, vc versionCache) error { // Frames could attempt rolls before we have a topology. if sts.topology.IsBlank() || (sts.topology.Next() != nil && (!useNextVersion || !sts.topology.NextBarrierReached1(sts.rmId))) { fun := func() error { diff --git a/consts.go b/consts.go index 47b335f..3e0fc95 100644 --- a/consts.go +++ b/consts.go @@ -8,7 +8,6 @@ const ( ServerVersion = "dev" MDBInitialSize = 1048576 TwoToTheSixtyThree = 9223372036854775808 - SubmissionInitialBackoff = 2 * time.Microsecond SubmissionMaxSubmitDelay = 2 * time.Second VarRollDelayMin = 50 * time.Millisecond VarRollDelayMax = 500 * time.Millisecond diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index 5d80b8b..69b3935 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -416,17 +416,15 @@ func (tt *TopologyTransmogrifier) selectGoal(goal *configuration.NextConfigurati } } -func (tt *TopologyTransmogrifier) enqueueTick(task topologyTask) { - sleep := time.Duration(tt.rng.Intn(int(server.SubmissionMaxSubmitDelay))) - go func() { - time.Sleep(sleep) +func (tt *TopologyTransmogrifier) enqueueTick(task topologyTask, backoff *server.BinaryBackoffEngine) { + backoff.After(func() { tt.enqueueQuery(topologyTransmogrifierMsgExe(func() error { if tt.task == task { return tt.task.tick() } return nil })) - }() + }) } func (tt *TopologyTransmogrifier) migrationReceived(migration topologyTransmogrifierMsgMigration) error { @@ -846,6 +844,7 @@ func (task *joinCluster) allJoining(allRMIds common.RMIds) error { type installTargetOld struct { *targetConfig + backoff *server.BinaryBackoffEngine } func (task *installTargetOld) tick() error { @@ -883,24 +882,32 @@ func (task *installTargetOld) tick() error { log.Printf("Topology: Calculated target topology: %v (new rootsRequired: %v, active: %v, passive: %v)", targetTopology.Next(), rootsRequired, active, passive) + if task.backoff == nil { + task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) + } + if rootsRequired != 0 { + start := time.Now() resubmit, roots, err := task.attemptCreateRoots(rootsRequired) if err != nil { return task.fatal(err) } if resubmit { - task.enqueueTick(task) + task.backoff.AdvanceBy(time.Now().Sub(start)) + task.enqueueTick(task, task.backoff) return nil } targetTopology.Roots = append(targetTopology.Roots, roots...) } + start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, targetTopology, active, passive) if err != nil { return task.fatal(err) } if resubmit { - task.enqueueTick(task) + task.backoff.AdvanceBy(time.Now().Sub(start)) + task.enqueueTick(task, task.backoff) return nil } // Must be badread, which means again we should receive the @@ -1137,6 +1144,7 @@ func calculateMigrationConditions(added, lost, survived []common.RMId, from, to type installTargetNew struct { *targetConfig + backoff *server.BinaryBackoffEngine } func (task *installTargetNew) tick() error { @@ -1191,13 +1199,19 @@ func (task *installTargetNew) tick() error { topology := task.active.Clone() topology.Next().InstalledOnNew = true + if task.backoff == nil { + task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) + } + + start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { server.Log("Topology: Topology extension requires resubmit.") - task.enqueueTick(task) + task.backoff.AdvanceBy(time.Now().Sub(start)) + task.enqueueTick(task, task.backoff) } return nil } @@ -1206,6 +1220,7 @@ func (task *installTargetNew) tick() error { type awaitBarrier1 struct { *targetConfig + backoff *server.BinaryBackoffEngine varBarrierReached *configuration.Configuration proposerBarrierReached *configuration.Configuration connectionBarrierReached *configuration.Configuration @@ -1257,13 +1272,19 @@ func (task *awaitBarrier1) tick() error { next = topology.Next() next.BarrierReached1 = append(next.BarrierReached1, task.connectionManager.RMId) + if task.backoff == nil { + task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) + } + + start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { server.Log("Topology: Barrier1 reached. Requires resubmit.") - task.enqueueTick(task) + task.backoff.AdvanceBy(time.Now().Sub(start)) + task.enqueueTick(task, task.backoff) } } else if activeNextConfig != task.installing { @@ -1322,6 +1343,7 @@ func (task *awaitBarrier1) tick() error { type awaitBarrier2 struct { *targetConfig + backoff *server.BinaryBackoffEngine varBarrierReached *configuration.Configuration installing *configuration.Configuration } @@ -1366,13 +1388,19 @@ func (task *awaitBarrier2) tick() error { next = topology.Next() next.BarrierReached2 = append(next.BarrierReached2, task.connectionManager.RMId) + if task.backoff == nil { + task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) + } + + start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { server.Log("Topology: Barrier2 reached. Requires resubmit.") - task.enqueueTick(task) + task.backoff.AdvanceBy(time.Now().Sub(start)) + task.enqueueTick(task, task.backoff) } } else if activeNextConfig != task.installing { @@ -1401,6 +1429,7 @@ func (task *awaitBarrier2) tick() error { type migrate struct { *targetConfig + backoff *server.BinaryBackoffEngine emigrator *emigrator } @@ -1468,12 +1497,18 @@ func (task *migrate) tick() error { log.Printf("Topology: Recording local immigration progress (%v). Active: %v, Passive: %v", next.Pending, active, passive) + if task.backoff == nil { + task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) + } + + start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { - task.enqueueTick(task) + task.backoff.AdvanceBy(time.Now().Sub(start)) + task.enqueueTick(task, task.backoff) return nil } // Must be badread, which means again we should receive the @@ -1508,6 +1543,7 @@ func (task *migrate) ensureStopEmigrator() { type installCompletion struct { *targetConfig + backoff *server.BinaryBackoffEngine } func (task *installCompletion) tick() error { @@ -1553,12 +1589,18 @@ func (task *installCompletion) tick() error { } topology.Roots = newRoots + if task.backoff == nil { + task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) + } + + start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { - task.enqueueTick(task) + task.backoff.AdvanceBy(time.Now().Sub(start)) + task.enqueueTick(task, task.backoff) return nil } // Must be badread, which means again we should receive the @@ -1663,10 +1705,12 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog return nil, err } + backoff := server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) + start := time.Now() for { txn := task.createTopologyTransaction(nil, nil, []common.RMId{task.connectionManager.RMId}, nil) - _, result, err := task.localConnection.RunTransaction(txn, nil, task.connectionManager.RMId) + _, result, err := task.localConnection.RunTransaction(txn, nil, backoff, task.connectionManager.RMId) if err != nil { return nil, err } @@ -1678,6 +1722,9 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog } abort := result.Abort() if abort.Which() == msgs.OUTCOMEABORT_RESUBMIT { + end := time.Now() + backoff.AdvanceBy(end.Sub(start)) + start = end continue } abortUpdates := abort.Rerun() @@ -1709,7 +1756,8 @@ func (task *targetConfig) createTopologyZero(config *configuration.NextConfigura txn := task.createTopologyTransaction(nil, topology, []common.RMId{task.connectionManager.RMId}, nil) txnId := topology.DBVersion txn.SetId(txnId[:]) - _, result, err := task.localConnection.RunTransaction(txn, txnId, task.connectionManager.RMId) + // in general, we do backoff locally, so don't pass backoff through here + _, result, err := task.localConnection.RunTransaction(txn, txnId, nil, task.connectionManager.RMId) if err != nil { return nil, err } @@ -1726,7 +1774,8 @@ func (task *targetConfig) createTopologyZero(config *configuration.NextConfigura func (task *targetConfig) rewriteTopology(read, write *configuration.Topology, active, passive common.RMIds) (*configuration.Topology, bool, error) { txn := task.createTopologyTransaction(read, write, active, passive) - txnReader, result, err := task.localConnection.RunTransaction(txn, nil, active...) + // in general, we do backoff locally, so don't pass backoff through here + txnReader, result, err := task.localConnection.RunTransaction(txn, nil, nil, active...) if result == nil || err != nil { return nil, false, err } diff --git a/txnengine/frame.go b/txnengine/frame.go index b38a72b..e73900e 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -28,7 +28,7 @@ type frame struct { readVoteClock *VectorClockMutable positionsFound bool mask *VectorClockMutable - scheduleInterval time.Duration + scheduleBackoff *server.BinaryBackoffEngine frameOpen frameClosed frameErase @@ -47,13 +47,11 @@ func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *TxnActions } if parent == nil { f.mask = NewVectorClock().AsMutable() - f.scheduleInterval = server.VarRollDelayMin + time.Duration(v.rng.Intn(int(server.VarRollDelayMin))) + f.scheduleBackoff = server.NewBinaryBackoffEngine(v.rng, server.VarRollDelayMin, server.VarRollDelayMax) } else { f.mask = parent.mask - f.scheduleInterval = parent.scheduleInterval / 2 - if f.scheduleInterval < server.VarRollDelayMin { - f.scheduleInterval = server.VarRollDelayMin + time.Duration(v.rng.Intn(int(server.VarRollDelayMin))) - } + f.scheduleBackoff = parent.scheduleBackoff + f.scheduleBackoff.Shrink(0) } f.init() server.Log(f, "NewFrame") @@ -776,16 +774,12 @@ func (fo *frameOpen) maybeStartRollFrom(rescheduling bool) { func (fo *frameOpen) scheduleRoll() { server.Log(fo.frame, "Roll callback scheduled") - // fmt.Printf("s%v(%v|%v)\n", fo.v.UUId, probOfZero, fo.scheduleInterval) - fo.v.vm.ScheduleCallback(fo.scheduleInterval, func(*time.Time) { + // fmt.Printf("s%v(%v|%v)\n", fo.v.UUId, probOfZero, fo.scheduleBackoff.Cur) + fo.v.vm.ScheduleCallback(fo.scheduleBackoff.Advance(), func(*time.Time) { fo.v.applyToVar(func() { fo.maybeStartRollFrom(true) }) }) - fo.scheduleInterval += time.Duration(fo.v.rng.Intn(int(server.VarRollDelayMin))) - if fo.scheduleInterval > server.VarRollDelayMax { - fo.scheduleInterval = fo.scheduleInterval / 2 - } } func (fo *frameOpen) startRoll(rollCB rollCallback) { diff --git a/utils.go b/utils.go index 42e8f6c..218abb1 100644 --- a/utils.go +++ b/utils.go @@ -4,6 +4,8 @@ import ( "bytes" capn "github.com/glycerine/go-capnproto" "log" + "math/rand" + "time" ) func CheckFatal(e error) { @@ -40,3 +42,57 @@ type EmptyStruct struct{} var EmptyStructVal = EmptyStruct{} func (es EmptyStruct) String() string { return "" } + +type BinaryBackoffEngine struct { + rng *rand.Rand + min time.Duration + max time.Duration + Cur time.Duration +} + +func NewBinaryBackoffEngine(rng *rand.Rand, min, max time.Duration) *BinaryBackoffEngine { + cur := time.Duration(0) + if min > 0 { + cur = min + time.Duration(rng.Intn(int(min))) + } + return &BinaryBackoffEngine{ + rng: rng, + min: min, + max: max, + Cur: cur, + } +} + +// returns the old delay, prior to change +func (bbe *BinaryBackoffEngine) Advance() time.Duration { + return bbe.AdvanceBy(bbe.Cur) +} + +// returns the old delay, prior to change +func (bbe *BinaryBackoffEngine) AdvanceBy(d time.Duration) time.Duration { + oldCur := bbe.Cur + bbe.Cur += time.Duration(bbe.rng.Intn(int(d))) + for bbe.max > bbe.min && bbe.Cur > bbe.max { + bbe.Cur = bbe.Cur / 2 + } + return oldCur +} + +func (bbe *BinaryBackoffEngine) After(fun func()) { + duration := bbe.Cur + go func() { + if duration > 0 { + time.Sleep(duration) + } + fun() + }() +} + +func (bbe *BinaryBackoffEngine) Shrink(roundToMin time.Duration) { + bbe.Cur = bbe.Cur / 2 + if bbe.Cur < bbe.min { + bbe.Cur = bbe.min + time.Duration(bbe.rng.Intn(int(bbe.min))) + } else if bbe.Cur < bbe.min+roundToMin { + bbe.Cur = bbe.min + } +} From d38736da05eff9bc267f9fea8bb05cb93671a9a5 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 6 Nov 2016 15:48:10 +0000 Subject: [PATCH 68/78] Introduce new flushed message. Introduce callback mech for server ConnectionEstablished observer. So when connection is established, wait for it to go through all the observers, then send the flushed message back over. Only allow client connections once we've got N-F flushed connections (including the local RM). Ref T47. --HG-- branch : T47 --- capnp/connection.capnp | 29 +++---- capnp/connection.capnp.go | 80 ++++++++++++------- client/localconnection.go | 24 ++++-- network/connection.go | 30 ++++++-- network/connectionmanager.go | 124 ++++++++++++++++++++++-------- network/topologytransmogrifier.go | 27 +++++-- paxos/acceptor.go | 3 +- paxos/network.go | 26 +++++-- paxos/proposal.go | 3 +- paxos/proposer.go | 3 +- 10 files changed, 245 insertions(+), 104 deletions(-) diff --git a/capnp/connection.capnp b/capnp/connection.capnp index dd83cdf..8a63bf4 100644 --- a/capnp/connection.capnp +++ b/capnp/connection.capnp @@ -23,19 +23,20 @@ struct HelloServerFromServer { struct Message { union { heartbeat @0: Void; - connectionError @1: Text; - txnSubmission @2: Data; - submissionOutcome @3: Outcome.Outcome; - submissionComplete @4: TxnCompletion.TxnSubmissionComplete; - submissionAbort @5: TxnCompletion.TxnSubmissionAbort; - oneATxnVotes @6: PTV.OneATxnVotes; - oneBTxnVotes @7: PTV.OneBTxnVotes; - twoATxnVotes @8: PTV.TwoATxnVotes; - twoBTxnVotes @9: PTV.TwoBTxnVotes; - txnLocallyComplete @10: TxnCompletion.TxnLocallyComplete; - txnGloballyComplete @11: TxnCompletion.TxnGloballyComplete; - topologyChangeRequest @12: Config.Configuration; - migration @13: Migration.Migration; - migrationComplete @14: Migration.MigrationComplete; + flushed @1: Void; + connectionError @2: Text; + txnSubmission @3: Data; + submissionOutcome @4: Outcome.Outcome; + submissionComplete @5: TxnCompletion.TxnSubmissionComplete; + submissionAbort @6: TxnCompletion.TxnSubmissionAbort; + oneATxnVotes @7: PTV.OneATxnVotes; + oneBTxnVotes @8: PTV.OneBTxnVotes; + twoATxnVotes @9: PTV.TwoATxnVotes; + twoBTxnVotes @10: PTV.TwoBTxnVotes; + txnLocallyComplete @11: TxnCompletion.TxnLocallyComplete; + txnGloballyComplete @12: TxnCompletion.TxnGloballyComplete; + topologyChangeRequest @13: Config.Configuration; + migration @14: Migration.Migration; + migrationComplete @15: Migration.MigrationComplete; } } diff --git a/capnp/connection.capnp.go b/capnp/connection.capnp.go index 1335c56..ff46efd 100644 --- a/capnp/connection.capnp.go +++ b/capnp/connection.capnp.go @@ -331,20 +331,21 @@ type Message_Which uint16 const ( MESSAGE_HEARTBEAT Message_Which = 0 - MESSAGE_CONNECTIONERROR Message_Which = 1 - MESSAGE_TXNSUBMISSION Message_Which = 2 - MESSAGE_SUBMISSIONOUTCOME Message_Which = 3 - MESSAGE_SUBMISSIONCOMPLETE Message_Which = 4 - MESSAGE_SUBMISSIONABORT Message_Which = 5 - MESSAGE_ONEATXNVOTES Message_Which = 6 - MESSAGE_ONEBTXNVOTES Message_Which = 7 - MESSAGE_TWOATXNVOTES Message_Which = 8 - MESSAGE_TWOBTXNVOTES Message_Which = 9 - MESSAGE_TXNLOCALLYCOMPLETE Message_Which = 10 - MESSAGE_TXNGLOBALLYCOMPLETE Message_Which = 11 - MESSAGE_TOPOLOGYCHANGEREQUEST Message_Which = 12 - MESSAGE_MIGRATION Message_Which = 13 - MESSAGE_MIGRATIONCOMPLETE Message_Which = 14 + MESSAGE_FLUSHED Message_Which = 1 + MESSAGE_CONNECTIONERROR Message_Which = 2 + MESSAGE_TXNSUBMISSION Message_Which = 3 + MESSAGE_SUBMISSIONOUTCOME Message_Which = 4 + MESSAGE_SUBMISSIONCOMPLETE Message_Which = 5 + MESSAGE_SUBMISSIONABORT Message_Which = 6 + MESSAGE_ONEATXNVOTES Message_Which = 7 + MESSAGE_ONEBTXNVOTES Message_Which = 8 + MESSAGE_TWOATXNVOTES Message_Which = 9 + MESSAGE_TWOBTXNVOTES Message_Which = 10 + MESSAGE_TXNLOCALLYCOMPLETE Message_Which = 11 + MESSAGE_TXNGLOBALLYCOMPLETE Message_Which = 12 + MESSAGE_TOPOLOGYCHANGEREQUEST Message_Which = 13 + MESSAGE_MIGRATION Message_Which = 14 + MESSAGE_MIGRATIONCOMPLETE Message_Which = 15 ) func NewMessage(s *C.Segment) Message { return Message(s.NewStruct(8, 1)) } @@ -353,87 +354,88 @@ func AutoNewMessage(s *C.Segment) Message { return Message(s.NewStructAR(8, func ReadRootMessage(s *C.Segment) Message { return Message(s.Root(0).ToStruct()) } func (s Message) Which() Message_Which { return Message_Which(C.Struct(s).Get16(0)) } func (s Message) SetHeartbeat() { C.Struct(s).Set16(0, 0) } +func (s Message) SetFlushed() { C.Struct(s).Set16(0, 1) } func (s Message) ConnectionError() string { return C.Struct(s).GetObject(0).ToText() } func (s Message) ConnectionErrorBytes() []byte { return C.Struct(s).GetObject(0).ToDataTrimLastByte() } func (s Message) SetConnectionError(v string) { - C.Struct(s).Set16(0, 1) + C.Struct(s).Set16(0, 2) C.Struct(s).SetObject(0, s.Segment.NewText(v)) } func (s Message) TxnSubmission() []byte { return C.Struct(s).GetObject(0).ToData() } func (s Message) SetTxnSubmission(v []byte) { - C.Struct(s).Set16(0, 2) + C.Struct(s).Set16(0, 3) C.Struct(s).SetObject(0, s.Segment.NewData(v)) } func (s Message) SubmissionOutcome() Outcome { return Outcome(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetSubmissionOutcome(v Outcome) { - C.Struct(s).Set16(0, 3) + C.Struct(s).Set16(0, 4) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) SubmissionComplete() TxnSubmissionComplete { return TxnSubmissionComplete(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetSubmissionComplete(v TxnSubmissionComplete) { - C.Struct(s).Set16(0, 4) + C.Struct(s).Set16(0, 5) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) SubmissionAbort() TxnSubmissionAbort { return TxnSubmissionAbort(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetSubmissionAbort(v TxnSubmissionAbort) { - C.Struct(s).Set16(0, 5) + C.Struct(s).Set16(0, 6) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) OneATxnVotes() OneATxnVotes { return OneATxnVotes(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetOneATxnVotes(v OneATxnVotes) { - C.Struct(s).Set16(0, 6) + C.Struct(s).Set16(0, 7) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) OneBTxnVotes() OneBTxnVotes { return OneBTxnVotes(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetOneBTxnVotes(v OneBTxnVotes) { - C.Struct(s).Set16(0, 7) + C.Struct(s).Set16(0, 8) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) TwoATxnVotes() TwoATxnVotes { return TwoATxnVotes(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetTwoATxnVotes(v TwoATxnVotes) { - C.Struct(s).Set16(0, 8) + C.Struct(s).Set16(0, 9) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) TwoBTxnVotes() TwoBTxnVotes { return TwoBTxnVotes(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetTwoBTxnVotes(v TwoBTxnVotes) { - C.Struct(s).Set16(0, 9) + C.Struct(s).Set16(0, 10) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) TxnLocallyComplete() TxnLocallyComplete { return TxnLocallyComplete(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetTxnLocallyComplete(v TxnLocallyComplete) { - C.Struct(s).Set16(0, 10) + C.Struct(s).Set16(0, 11) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) TxnGloballyComplete() TxnGloballyComplete { return TxnGloballyComplete(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetTxnGloballyComplete(v TxnGloballyComplete) { - C.Struct(s).Set16(0, 11) + C.Struct(s).Set16(0, 12) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) TopologyChangeRequest() Configuration { return Configuration(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetTopologyChangeRequest(v Configuration) { - C.Struct(s).Set16(0, 12) + C.Struct(s).Set16(0, 13) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) Migration() Migration { return Migration(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetMigration(v Migration) { - C.Struct(s).Set16(0, 13) + C.Struct(s).Set16(0, 14) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) MigrationComplete() MigrationComplete { return MigrationComplete(C.Struct(s).GetObject(0).ToStruct()) } func (s Message) SetMigrationComplete(v MigrationComplete) { - C.Struct(s).Set16(0, 14) + C.Struct(s).Set16(0, 15) C.Struct(s).SetObject(0, C.Object(v)) } func (s Message) WriteJSON(w io.Writer) error { @@ -456,6 +458,17 @@ func (s Message) WriteJSON(w io.Writer) error { return err } } + if s.Which() == MESSAGE_FLUSHED { + _, err = b.WriteString("\"flushed\":") + if err != nil { + return err + } + _ = s + _, err = b.WriteString("null") + if err != nil { + return err + } + } if s.Which() == MESSAGE_CONNECTIONERROR { _, err = b.WriteString("\"connectionError\":") if err != nil { @@ -678,6 +691,17 @@ func (s Message) WriteCapLit(w io.Writer) error { return err } } + if s.Which() == MESSAGE_FLUSHED { + _, err = b.WriteString("flushed = ") + if err != nil { + return err + } + _ = s + _, err = b.WriteString("null") + if err != nil { + return err + } + } if s.Which() == MESSAGE_CONNECTIONERROR { _, err = b.WriteString("connectionError = ") if err != nil { diff --git a/client/localconnection.go b/client/localconnection.go index e6b5138..f5aec9b 100644 --- a/client/localconnection.go +++ b/client/localconnection.go @@ -221,18 +221,24 @@ func (lc *LocalConnection) RunTransaction(txn *msgs.Txn, txnId *common.TxnId, ba } } -type localConnectionMsgServerConnectionsChanged map[common.RMId]paxos.Connection +type localConnectionMsgServerConnectionsChanged struct { + servers map[common.RMId]paxos.Connection + done func() +} func (lcmscc localConnectionMsgServerConnectionsChanged) witness() localConnectionMsg { return lcmscc } func (lc *LocalConnection) ConnectedRMs(servers map[common.RMId]paxos.Connection) { - lc.enqueueQuery(localConnectionMsgServerConnectionsChanged(servers)) + lc.enqueueQuery(localConnectionMsgServerConnectionsChanged{servers: servers}) } func (lc *LocalConnection) ConnectionLost(rmId common.RMId, servers map[common.RMId]paxos.Connection) { - lc.enqueueQuery(localConnectionMsgServerConnectionsChanged(servers)) + lc.enqueueQuery(localConnectionMsgServerConnectionsChanged{servers: servers}) } -func (lc *LocalConnection) ConnectionEstablished(rmId common.RMId, conn paxos.Connection, servers map[common.RMId]paxos.Connection) { - lc.enqueueQuery(localConnectionMsgServerConnectionsChanged(servers)) +func (lc *LocalConnection) ConnectionEstablished(rmId common.RMId, conn paxos.Connection, servers map[common.RMId]paxos.Connection, done func()) { + lc.enqueueQuery(localConnectionMsgServerConnectionsChanged{ + servers: servers, + done: done, + }) } func NewLocalConnection(rmId common.RMId, bootCount uint32, cm paxos.ConnectionManager) *LocalConnection { @@ -275,6 +281,9 @@ func (lc *LocalConnection) actorLoop(head *cc.ChanCellHead) { topology := lc.connectionManager.AddTopologySubscriber(eng.ConnectionSubscriber, lc) defer lc.connectionManager.RemoveTopologySubscriberAsync(eng.ConnectionSubscriber, lc) servers := lc.connectionManager.ClientEstablished(0, lc) + if servers == nil { + panic("LocalConnection failed to register with ConnectionManager!") + } defer lc.connectionManager.ClientLost(0, lc) lc.submitter.TopologyChanged(topology) lc.submitter.ServerConnectionsChanged(servers) @@ -301,7 +310,10 @@ func (lc *LocalConnection) actorLoop(head *cc.ChanCellHead) { case localConnectionMsgOutcomeReceived: err = lc.submitter.SubmissionOutcomeReceived(msgT.sender, msgT.txn, msgT.outcome) case localConnectionMsgServerConnectionsChanged: - err = lc.submitter.ServerConnectionsChanged((map[common.RMId]paxos.Connection)(msgT)) + err = lc.submitter.ServerConnectionsChanged(msgT.servers) + if msgT.done != nil { + msgT.done() + } case localConnectionMsgStatus: lc.status(msgT.StatusConsumer) default: diff --git a/network/connection.go b/network/connection.go index e8fda37..1a9e509 100644 --- a/network/connection.go +++ b/network/connection.go @@ -128,18 +128,24 @@ func (conn *Connection) Status(sc *server.StatusConsumer) { conn.enqueueQuery(connectionMsgStatus{StatusConsumer: sc}) } -type connectionMsgServerConnectionsChanged map[common.RMId]paxos.Connection +type connectionMsgServerConnectionsChanged struct { + servers map[common.RMId]paxos.Connection + done func() +} func (cmdhc connectionMsgServerConnectionsChanged) witness() connectionMsg { return cmdhc } func (conn *Connection) ConnectedRMs(servers map[common.RMId]paxos.Connection) { - conn.enqueueQuery(connectionMsgServerConnectionsChanged(servers)) + conn.enqueueQuery(connectionMsgServerConnectionsChanged{servers: servers}) } func (conn *Connection) ConnectionLost(rmId common.RMId, servers map[common.RMId]paxos.Connection) { - conn.enqueueQuery(connectionMsgServerConnectionsChanged(servers)) + conn.enqueueQuery(connectionMsgServerConnectionsChanged{servers: servers}) } -func (conn *Connection) ConnectionEstablished(rmId common.RMId, c paxos.Connection, servers map[common.RMId]paxos.Connection) { - conn.enqueueQuery(connectionMsgServerConnectionsChanged(servers)) +func (conn *Connection) ConnectionEstablished(rmId common.RMId, c paxos.Connection, servers map[common.RMId]paxos.Connection, done func()) { + conn.enqueueQuery(connectionMsgServerConnectionsChanged{ + servers: servers, + done: done, + }) } func (conn *Connection) enqueueQuery(msg connectionMsg) bool { @@ -267,7 +273,10 @@ func (conn *Connection) handleMsg(msg connectionMsg) (terminate bool, err error) case *connectionMsgTopologyChanged: err = conn.topologyChanged(msgT) case connectionMsgServerConnectionsChanged: - err = conn.serverConnectionsChanged(msgT) + err = conn.serverConnectionsChanged(msgT.servers) + if msgT.done != nil { + msgT.done() + } case connectionMsgStatus: conn.status(msgT.StatusConsumer) default: @@ -776,10 +785,17 @@ func (cr *connectionRun) start() (bool, error) { cr.beatBytes = server.SegToBytes(seg) if cr.isServer { - cr.connectionManager.ServerEstablished(cr.Connection, cr.remoteHost, cr.remoteRMId, cr.remoteBootCount, cr.combinedTieBreak, cr.remoteClusterUUId) + flushSeg := capn.NewBuffer(nil) + flushMsg := msgs.NewRootMessage(flushSeg) + flushMsg.SetFlushed() + flushBytes := server.SegToBytes(flushSeg) + cr.connectionManager.ServerEstablished(cr.Connection, cr.remoteHost, cr.remoteRMId, cr.remoteBootCount, cr.combinedTieBreak, cr.remoteClusterUUId, func() { cr.Send(flushBytes) }) } if cr.isClient { servers := cr.connectionManager.ClientEstablished(cr.ConnectionNumber, cr.Connection) + if servers == nil { + return false, fmt.Errorf("Not ready for client connections") + } cr.submitter = client.NewClientTxnSubmitter(cr.connectionManager.RMId, cr.connectionManager.BootCount(), cr.rootsVar, cr.connectionManager) cr.submitter.TopologyChanged(cr.topology) cr.submitter.ServerConnectionsChanged(servers) diff --git a/network/connectionmanager.go b/network/connectionmanager.go index 5376f13..618fd04 100644 --- a/network/connectionmanager.go +++ b/network/connectionmanager.go @@ -35,6 +35,7 @@ type ConnectionManager struct { queryChan <-chan connectionManagerMsg servers map[string]*connectionManagerMsgServerEstablished rmToServer map[common.RMId]*connectionManagerMsgServerEstablished + flushedServers map[common.RMId]server.EmptyStruct connCountToClient map[uint32]paxos.ClientConnection desired []string serverConnSubscribers serverConnSubscribers @@ -107,6 +108,8 @@ func (cm *ConnectionManager) DispatchMessage(sender common.RMId, msgType msgs.Me case msgs.MESSAGE_MIGRATIONCOMPLETE: migrationComplete := msg.MigrationComplete() cm.Transmogrifier.MigrationCompleteReceived(sender, &migrationComplete) + case msgs.MESSAGE_FLUSHED: + cm.ServerConnectionFlushed(sender) default: panic(fmt.Sprintf("Unexpected message received from %v (%v)", sender, msgType)) } @@ -133,13 +136,14 @@ type connectionManagerMsgSetDesired struct { type connectionManagerMsgServerEstablished struct { connectionManagerMsgBasic *Connection - send func([]byte) - established bool - host string - rmId common.RMId - bootCount uint32 - tieBreak uint32 - clusterUUId uint64 + send func([]byte) + established bool + host string + rmId common.RMId + bootCount uint32 + tieBreak uint32 + clusterUUId uint64 + flushCallback func() } type connectionManagerMsgServerLost struct { @@ -149,6 +153,11 @@ type connectionManagerMsgServerLost struct { restarting bool } +type connectionManagerMsgServerFlushed struct { + connectionManagerMsgBasic + rmId common.RMId +} + type connectionManagerMsgClientEstablished struct { connectionManagerMsgBasic connNumber uint32 @@ -212,16 +221,17 @@ func (cm *ConnectionManager) SetDesiredServers(localhost string, remotehosts []s }) } -func (cm *ConnectionManager) ServerEstablished(conn *Connection, host string, rmId common.RMId, bootCount uint32, tieBreak uint32, clusterUUId uint64) { +func (cm *ConnectionManager) ServerEstablished(conn *Connection, host string, rmId common.RMId, bootCount uint32, tieBreak uint32, clusterUUId uint64, flushCallback func()) { cm.enqueueQuery(&connectionManagerMsgServerEstablished{ - Connection: conn, - send: conn.Send, - established: true, - host: host, - rmId: rmId, - bootCount: bootCount, - tieBreak: tieBreak, - clusterUUId: clusterUUId, + Connection: conn, + send: conn.Send, + established: true, + host: host, + rmId: rmId, + bootCount: bootCount, + tieBreak: tieBreak, + clusterUUId: clusterUUId, + flushCallback: flushCallback, }) } @@ -233,6 +243,12 @@ func (cm *ConnectionManager) ServerLost(conn *Connection, rmId common.RMId, rest }) } +func (cm *ConnectionManager) ServerConnectionFlushed(rmId common.RMId) { + cm.enqueueQuery(connectionManagerMsgServerFlushed{ + rmId: rmId, + }) +} + // NB client established gets you server connection subscriber too. It // does not get you a topology subscriber. func (cm *ConnectionManager) ClientEstablished(connNumber uint32, conn paxos.ClientConnection) map[common.RMId]paxos.Connection { @@ -340,6 +356,7 @@ func NewConnectionManager(rmId common.RMId, bootCount uint32, procs int, db *db. NodeCertificatePrivateKeyPair: nodeCertPrivKeyPair, servers: make(map[string]*connectionManagerMsgServerEstablished), rmToServer: make(map[common.RMId]*connectionManagerMsgServerEstablished), + flushedServers: make(map[common.RMId]server.EmptyStruct), connCountToClient: make(map[uint32]paxos.ClientConnection), desired: nil, } @@ -412,6 +429,8 @@ func (cm *ConnectionManager) actorLoop(head *cc.ChanCellHead) { cm.serverEstablished(msgT) case connectionManagerMsgServerLost: cm.serverLost(msgT) + case connectionManagerMsgServerFlushed: + cm.serverFlushed(msgT.rmId) case *connectionManagerMsgClientEstablished: cm.clientEstablished(msgT) case connectionManagerMsgSetTopology: @@ -471,7 +490,7 @@ func (cm *ConnectionManager) setDesiredServers(hosts connectionManagerMsgSetDesi cd.host = hosts.local cm.rmToServer[cd.rmId] = cd cm.servers[cd.host] = cd - cm.serverConnSubscribers.ServerConnEstablished(cd) + cm.serverConnSubscribers.ServerConnEstablished(cd, func() { cm.ServerConnectionFlushed(cd.rmId) }) } desiredMap := make(map[string]server.EmptyStruct, len(hosts.remote)) @@ -561,7 +580,7 @@ func (cm *ConnectionManager) serverEstablished(connEst *connectionManagerMsgServ } else { cm.servers[connEst.host] = connEst cm.rmToServer[connEst.rmId] = connEst - cm.serverConnSubscribers.ServerConnEstablished(connEst) + cm.serverConnSubscribers.ServerConnEstablished(connEst, connEst.flushCallback) } } @@ -589,13 +608,24 @@ func (cm *ConnectionManager) serverLost(connLost connectionManagerMsgServerLost) } } +func (cm *ConnectionManager) serverFlushed(rmId common.RMId) { + if cm.flushedServers != nil { + cm.flushedServers[rmId] = server.EmptyStructVal + cm.checkFlushed(cm.topology) + } +} + func (cm *ConnectionManager) clientEstablished(msg *connectionManagerMsgClientEstablished) { - cm.Lock() - cm.connCountToClient[msg.connNumber] = msg.conn - cm.Unlock() - msg.servers = cm.cloneRMToServer() - close(msg.resultChan) - cm.serverConnSubscribers.AddSubscriber(msg.conn) + if cm.flushedServers == nil || msg.connNumber == 0 { // must always allow localconnection through! + cm.Lock() + cm.connCountToClient[msg.connNumber] = msg.conn + cm.Unlock() + msg.servers = cm.cloneRMToServer() + close(msg.resultChan) + cm.serverConnSubscribers.AddSubscriber(msg.conn) + } else { + close(msg.resultChan) + } } func (cm *ConnectionManager) setTopology(topology *configuration.Topology, callbacks map[eng.TopologyChangeSubscriberType]func()) { @@ -610,14 +640,30 @@ func (cm *ConnectionManager) setTopology(topology *configuration.Topology, callb cd.clusterUUId = clusterUUId cm.rmToServer[cm.RMId] = cd cm.servers[cd.host] = cd - cm.serverConnSubscribers.ServerConnEstablished(cd) + cm.serverConnSubscribers.ServerConnEstablished(cd, func() { cm.ServerConnectionFlushed(cd.rmId) }) } } func (cm *ConnectionManager) TopologyChanged(topology *configuration.Topology, done func(bool)) { + cm.checkFlushed(topology) done(true) } +func (cm *ConnectionManager) checkFlushed(topology *configuration.Topology) { + if cm.flushedServers != nil && topology != nil { + requiredFlushed := len(topology.Hosts) - int(topology.F) + for _, rmId := range topology.RMs() { + if _, found := cm.flushedServers[rmId]; found { + requiredFlushed-- + } + } + if requiredFlushed <= 0 { + log.Printf("%v Ready for client connections.", cm.RMId) + cm.flushedServers = nil + } + } +} + func (cm *ConnectionManager) cloneRMToServer() map[common.RMId]paxos.Connection { rmToServerCopy := make(map[common.RMId]paxos.Connection, len(cm.rmToServer)) for rmId, server := range cm.rmToServer { @@ -679,11 +725,23 @@ func (cm *ConnectionManager) Send(b []byte) { } // serverConnSubscribers -func (subs serverConnSubscribers) ServerConnEstablished(cd *connectionManagerMsgServerEstablished) { +func (subs serverConnSubscribers) ServerConnEstablished(cd *connectionManagerMsgServerEstablished, callback func()) { rmToServerCopy := subs.cloneRMToServer() + // we cope with the possibility that subscribers can change during iteration + resultChan := make(chan server.EmptyStruct, len(subs.subscribers)) + done := func() { resultChan <- server.EmptyStructVal } + expected := 0 for ob := range subs.subscribers { - ob.ConnectionEstablished(cd.rmId, cd, rmToServerCopy) + expected++ + ob.ConnectionEstablished(cd.rmId, cd, rmToServerCopy, done) } + go func() { + for expected > 0 { + <-resultChan + expected-- + } + callback() + }() } func (subs serverConnSubscribers) ServerConnLost(rmId common.RMId) { @@ -708,21 +766,23 @@ func (subs serverConnSubscribers) RemoveSubscriber(ob paxos.ServerConnectionSubs // topologySubscribers func (subs topologySubscribers) TopologyChanged(topology *configuration.Topology, callbacks map[eng.TopologyChangeSubscriberType]func()) { + // again, we try to cope with the possibility that subsMap changes during iteration for subType, subsMap := range subs.subscribers { subTypeCopy := subType - subCount := len(subsMap) - resultChan := make(chan bool, subCount) + resultChan := make(chan bool, len(subsMap)) done := func(success bool) { resultChan <- success } + expected := 0 for sub := range subsMap { + expected++ sub.TopologyChanged(topology, done) } if cb, found := callbacks[eng.TopologyChangeSubscriberType(subType)]; found { cbCopy := cb go func() { - server.Log("CM TopologyChanged", subTypeCopy, "expects", subCount, "Dones") - for subCount > 0 { + server.Log("CM TopologyChanged", subTypeCopy, "expects", expected, "Dones") + for expected > 0 { if result := <-resultChan; result { - subCount-- + expected-- } else { server.Log("CM TopologyChanged", subTypeCopy, "failed") return diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index 69b3935..e5b3d3f 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -67,7 +67,10 @@ func (tt *TopologyTransmogrifier) RequestConfigurationChange(config *configurati tt.enqueueQuery(topologyTransmogrifierMsgRequestConfigChange{config: config}) } -type topologyTransmogrifierMsgSetActiveConnections map[common.RMId]paxos.Connection +type topologyTransmogrifierMsgSetActiveConnections struct { + servers map[common.RMId]paxos.Connection + done func() +} func (ttmsac topologyTransmogrifierMsgSetActiveConnections) witness() topologyTransmogrifierMsg { return ttmsac @@ -159,15 +162,18 @@ func NewTopologyTransmogrifier(db *db.Databases, cm *ConnectionManager, lc *clie } func (tt *TopologyTransmogrifier) ConnectedRMs(conns map[common.RMId]paxos.Connection) { - tt.enqueueQuery(topologyTransmogrifierMsgSetActiveConnections(conns)) + tt.enqueueQuery(topologyTransmogrifierMsgSetActiveConnections{servers: conns}) } func (tt *TopologyTransmogrifier) ConnectionLost(rmId common.RMId, conns map[common.RMId]paxos.Connection) { - tt.enqueueQuery(topologyTransmogrifierMsgSetActiveConnections(conns)) + tt.enqueueQuery(topologyTransmogrifierMsgSetActiveConnections{servers: conns}) } -func (tt *TopologyTransmogrifier) ConnectionEstablished(rmId common.RMId, conn paxos.Connection, conns map[common.RMId]paxos.Connection) { - tt.enqueueQuery(topologyTransmogrifierMsgSetActiveConnections(conns)) +func (tt *TopologyTransmogrifier) ConnectionEstablished(rmId common.RMId, conn paxos.Connection, conns map[common.RMId]paxos.Connection, done func()) { + tt.enqueueQuery(topologyTransmogrifierMsgSetActiveConnections{ + servers: conns, + done: done, + }) } func (tt *TopologyTransmogrifier) actorLoop(head *cc.ChanCellHead) { @@ -215,7 +221,10 @@ func (tt *TopologyTransmogrifier) actorLoop(head *cc.ChanCellHead) { case topologyTransmogrifierMsgShutdown: terminate = true case topologyTransmogrifierMsgSetActiveConnections: - err = tt.activeConnectionsChange(msgT) + err = tt.activeConnectionsChange(msgT.servers) + if msgT.done != nil { + msgT.done() + } case topologyTransmogrifierMsgTopologyObserved: server.Log("Topology: New topology observed:", msgT.topology) err = tt.setActive(msgT.topology) @@ -1920,7 +1929,8 @@ func (e *emigrator) ConnectionLost(rmId common.RMId, conns map[common.RMId]paxos delete(e.activeBatches, rmId) } -func (e *emigrator) ConnectionEstablished(rmId common.RMId, conn paxos.Connection, conns map[common.RMId]paxos.Connection) { +func (e *emigrator) ConnectionEstablished(rmId common.RMId, conn paxos.Connection, conns map[common.RMId]paxos.Connection, done func()) { + defer done() if rmId == e.connectionManager.RMId { return } @@ -2111,7 +2121,8 @@ func (it *dbIterator) ConnectedRMs(conns map[common.RMId]paxos.Connection) { } } func (it *dbIterator) ConnectionLost(common.RMId, map[common.RMId]paxos.Connection) {} -func (it *dbIterator) ConnectionEstablished(common.RMId, paxos.Connection, map[common.RMId]paxos.Connection) { +func (it *dbIterator) ConnectionEstablished(rmId common.RMId, conn paxos.Connection, servers map[common.RMId]paxos.Connection, done func()) { + done() } type sendBatch struct { diff --git a/paxos/acceptor.go b/paxos/acceptor.go index 945d300..22b326c 100644 --- a/paxos/acceptor.go +++ b/paxos/acceptor.go @@ -418,7 +418,7 @@ func (s *twoBTxnVotesSender) ConnectedRMs(conns map[common.RMId]Connection) { func (s *twoBTxnVotesSender) ConnectionLost(common.RMId, map[common.RMId]Connection) {} -func (s *twoBTxnVotesSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection) { +func (s *twoBTxnVotesSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection, done func()) { for _, recipient := range s.recipients { if recipient == rmId { conn.Send(s.msg) @@ -428,4 +428,5 @@ func (s *twoBTxnVotesSender) ConnectionEstablished(rmId common.RMId, conn Connec if s.submitter == rmId { conn.Send(s.submitterMsg) } + done() } diff --git a/paxos/network.go b/paxos/network.go index 85c3015..d80503c 100644 --- a/paxos/network.go +++ b/paxos/network.go @@ -32,7 +32,7 @@ type ServerConnectionPublisher interface { type ServerConnectionSubscriber interface { ConnectedRMs(map[common.RMId]Connection) ConnectionLost(common.RMId, map[common.RMId]Connection) - ConnectionEstablished(common.RMId, Connection, map[common.RMId]Connection) + ConnectionEstablished(common.RMId, Connection, map[common.RMId]Connection, func()) } type Connection interface { @@ -100,12 +100,23 @@ func (pub *serverConnectionPublisherProxy) ConnectionLost(lost common.RMId, serv }) } -func (pub *serverConnectionPublisherProxy) ConnectionEstablished(gained common.RMId, conn Connection, servers map[common.RMId]Connection) { +func (pub *serverConnectionPublisherProxy) ConnectionEstablished(gained common.RMId, conn Connection, servers map[common.RMId]Connection, callback func()) { pub.exe.Enqueue(func() { pub.servers = servers + resultChan := make(chan server.EmptyStruct, len(pub.subs)) + done := func() { resultChan <- server.EmptyStructVal } + expected := 0 for sub := range pub.subs { - sub.ConnectionEstablished(gained, conn, servers) + expected++ + sub.ConnectionEstablished(gained, conn, servers, done) } + go func() { + for expected > 0 { + <-resultChan + expected-- + } + callback() + }() }) } @@ -145,7 +156,7 @@ func (s *OneShotSender) ConnectedRMs(conns map[common.RMId]Connection) { func (s *OneShotSender) ConnectionLost(common.RMId, map[common.RMId]Connection) {} -func (s *OneShotSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection) { +func (s *OneShotSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection, done func()) { if _, found := s.remaining[rmId]; found { delete(s.remaining, rmId) conn.Send(s.msg) @@ -154,6 +165,7 @@ func (s *OneShotSender) ConnectionEstablished(rmId common.RMId, conn Connection, s.connPub.RemoveServerConnectionSubscriber(s) } } + done() } type RepeatingSender struct { @@ -178,7 +190,8 @@ func (s *RepeatingSender) ConnectedRMs(conns map[common.RMId]Connection) { func (s *RepeatingSender) ConnectionLost(common.RMId, map[common.RMId]Connection) {} -func (s *RepeatingSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection) { +func (s *RepeatingSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection, done func()) { + defer done() for _, recipient := range s.recipients { if recipient == rmId { conn.Send(s.msg) @@ -205,6 +218,7 @@ func (s *RepeatingAllSender) ConnectedRMs(conns map[common.RMId]Connection) { func (s *RepeatingAllSender) ConnectionLost(common.RMId, map[common.RMId]Connection) {} -func (s *RepeatingAllSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection) { +func (s *RepeatingAllSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection, done func()) { conn.Send(s.msg) + done() } diff --git a/paxos/proposal.go b/paxos/proposal.go index 63329d0..ae72799 100644 --- a/paxos/proposal.go +++ b/paxos/proposal.go @@ -546,7 +546,7 @@ func (s *proposalSender) ConnectionLost(lost common.RMId, conns map[common.RMId] }) } -func (s *proposalSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection) { +func (s *proposalSender) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection, done func()) { for _, acc := range s.proposal.acceptors { if acc == rmId { conn.Send(s.msg) @@ -556,4 +556,5 @@ func (s *proposalSender) ConnectionEstablished(rmId common.RMId, conn Connection if bootCount, found := s.proposal.activeRMIds[rmId]; found && bootCount != conn.BootCount() { s.ConnectionLost(rmId, conns) } + done() } diff --git a/paxos/proposer.go b/paxos/proposer.go index fa11b68..daa42c3 100644 --- a/paxos/proposer.go +++ b/paxos/proposer.go @@ -258,10 +258,11 @@ func (pab *proposerAwaitBallots) ConnectionLost(rmId common.RMId, conns map[comm pab.maybeAbortRetry() } } -func (pab *proposerAwaitBallots) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection) { +func (pab *proposerAwaitBallots) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection, done func()) { if rmId == pab.submitter && conn.BootCount() != pab.submitterBootCount { pab.maybeAbortRetry() } + done() } func (pab *proposerAwaitBallots) maybeAbortRetry() { From c541356dad5475ebebdf20e826e321f50f2de2ca Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 9 Nov 2016 14:11:41 +0000 Subject: [PATCH 69/78] bug fix: see the rest of this comment in acceptor: We need to watch to see if the submitter dies. If it does, there is a chance that we might be the only remaining record of this txn and so we need to ensure progress somehow. To see how this happens, consider the following scenario: 1. Provided the submitter stays up, its repeating sender will make sure that the txn gets to all proposers, and progress continues to be made. 2. But consider what happens if the submitter and a proposer are on the same node which fails: That proposer has local votes and has sent those votes to us, so we now contain state. But that node now goes now. The txn never made it to any other node (we must be an acceptor, and a learner), so when the node comes back up, there is no record of it anywhere, other than in any such acceptor. Once we've gone to disk, we will then have a repeating 2B sender which will ensure progress, so we have no risk once we've started going to disk. However, if we are a learner, then we cannot start an abort proposer as we're not allowed to vote. So our response in this scenario is actually to start a repeating sender of the txn itself to the other active RMs, thus taking the role of the submitter. The other thing this issue has revealed is that the proposer cannot send deflated txns because we might need the full txn as an acceptor to send on in the above scenario. So we can only deflate after we have a result in the acceptor. --HG-- branch : dev --- client/simpletxnsubmitter.go | 2 +- network/connection.go | 9 +++- paxos/acceptor.go | 89 ++++++++++++++++++++++++++++++++++-- paxos/proposal.go | 12 ++--- paxos/proposer.go | 3 ++ 5 files changed, 100 insertions(+), 15 deletions(-) diff --git a/client/simpletxnsubmitter.go b/client/simpletxnsubmitter.go index a20d558..61eaa6f 100644 --- a/client/simpletxnsubmitter.go +++ b/client/simpletxnsubmitter.go @@ -89,7 +89,7 @@ func (sts *SimpleTxnSubmitter) SubmitTransaction(txnCap *msgs.Txn, txnId *common msg := msgs.NewRootMessage(seg) msg.SetTxnSubmission(server.SegToBytes(txnCap.Segment)) - server.Log(txnId, "Submitting txn") + server.Log(txnId, "Submitting txn with actives:", activeRMs) txnSender := paxos.NewRepeatingSender(server.SegToBytes(seg), activeRMs...) sleeping := delay != nil && delay.Cur > 0 var removeSenderCh chan chan server.EmptyStruct diff --git a/network/connection.go b/network/connection.go index 1a9e509..1e8a1b5 100644 --- a/network/connection.go +++ b/network/connection.go @@ -233,7 +233,12 @@ func (conn *Connection) actorLoop(head *cc.ChanCellHead) { ) chanFun := func(cell *cc.ChanCell) { queryChan, queryCell = conn.queryChan, cell } head.WithCell(chanFun) - terminate := false + if conn.topology == nil { + panic("Nil topology on connection start!") + err = errors.New("No local topology, not ready for any connections") + } + + terminate := err != nil for !terminate { if oldState != conn.currentState { oldState = conn.currentState @@ -794,7 +799,7 @@ func (cr *connectionRun) start() (bool, error) { if cr.isClient { servers := cr.connectionManager.ClientEstablished(cr.ConnectionNumber, cr.Connection) if servers == nil { - return false, fmt.Errorf("Not ready for client connections") + return false, errors.New("Not ready for client connections") } cr.submitter = client.NewClientTxnSubmitter(cr.connectionManager.RMId, cr.connectionManager.BootCount(), cr.rootsVar, cr.connectionManager) cr.submitter.TopologyChanged(cr.topology) diff --git a/paxos/acceptor.go b/paxos/acceptor.go index 22b326c..96d3157 100644 --- a/paxos/acceptor.go +++ b/paxos/acceptor.go @@ -103,16 +103,52 @@ type acceptorStateMachineComponent interface { type acceptorReceiveBallots struct { *Acceptor - ballotAccumulator *BallotAccumulator - outcome *outcomeEqualId + ballotAccumulator *BallotAccumulator + outcome *outcomeEqualId + txn *eng.TxnReader + txnSubmitter common.RMId + txnSubmitterBootCount uint32 + txnSender *RepeatingSender } func (arb *acceptorReceiveBallots) init(a *Acceptor, txn *eng.TxnReader) { arb.Acceptor = a arb.ballotAccumulator = NewBallotAccumulator(txn) + arb.txn = txn + arb.txnSubmitter = common.RMId(txn.Txn.Submitter()) + arb.txnSubmitterBootCount = txn.Txn.SubmitterBootCount() +} + +func (arb *acceptorReceiveBallots) start() { + // We need to watch to see if the submitter dies. If it does, there + // is a chance that we might be the only remaining record of this + // txn and so we need to ensure progress somehow. To see how this + // happens, consider the following scenario: + // + // 1. Provided the submitter stays up, its repeating sender will + // make sure that the txn gets to all proposers, and progress + // continues to be made. + // + // 2. But consider what happens if the submitter and a proposer are + // on the same node which fails: That proposer has local votes and + // has sent those votes to us, so we now contain state. But that + // node now goes now. The txn never made it to any other node (we + // must be an acceptor, and a learner), so when the node comes back + // up, there is no record of it anywhere, other than in any such + // acceptor. + // + // Once we've gone to disk, we will then have a repeating 2B sender + // which will ensure progress, so we have no risk once we've + // started going to disk. + // + // However, if we are a learner, then we cannot start an abort + // proposer as we're not allowed to vote. So our response in this + // scenario is actually to start a repeating sender of the txn + // itself to the other active RMs, thus taking the role of the + // submitter. + arb.acceptorManager.AddServerConnectionSubscriber(arb) } -func (arb *acceptorReceiveBallots) start() {} func (arb *acceptorReceiveBallots) acceptorStateMachineComponentWitness() {} func (arb *acceptorReceiveBallots) String() string { return "acceptorReceiveBallots" @@ -132,6 +168,49 @@ func (arb *acceptorReceiveBallots) BallotAccepted(instanceRMId common.RMId, inst } } +func (arb *acceptorReceiveBallots) ConnectedRMs(conns map[common.RMId]Connection) { + if conn, found := conns[arb.txnSubmitter]; !found || conn.BootCount() != arb.txnSubmitterBootCount { + arb.enqueueCreateTxnSender() + } +} +func (arb *acceptorReceiveBallots) ConnectionLost(rmId common.RMId, conns map[common.RMId]Connection) { + if rmId == arb.txnSubmitter { + arb.enqueueCreateTxnSender() + } +} +func (arb *acceptorReceiveBallots) ConnectionEstablished(rmId common.RMId, conn Connection, conns map[common.RMId]Connection, done func()) { + if rmId == arb.txnSubmitter && conn.BootCount() != arb.txnSubmitterBootCount { + arb.enqueueCreateTxnSender() + } + done() +} + +func (arb *acceptorReceiveBallots) enqueueCreateTxnSender() { + arb.acceptorManager.Exe.Enqueue(arb.createTxnSender) +} + +func (arb *acceptorReceiveBallots) createTxnSender() { + if arb.currentState == arb && arb.txnSender == nil { + arb.acceptorManager.RemoveServerConnectionSubscriber(arb) + seg := capn.NewBuffer(nil) + msg := msgs.NewRootMessage(seg) + msg.SetTxnSubmission(arb.txn.Data) + activeRMs := make([]common.RMId, 0, arb.txn.Txn.FInc()*2-1) + allocs := arb.txn.Txn.Allocations() + for idx := 0; idx < allocs.Len(); idx++ { + alloc := allocs.At(idx) + if alloc.Active() == 0 { + break + } else { + activeRMs = append(activeRMs, common.RMId(alloc.RmId())) + } + } + server.Log(arb.txnId, "Starting extra txn sender with actives:", activeRMs) + arb.txnSender = NewRepeatingSender(server.SegToBytes(seg), activeRMs...) + arb.acceptorManager.AddServerConnectionSubscriber(arb.txnSender) + } +} + // write to disk type acceptorWriteToDisk struct { @@ -146,6 +225,10 @@ func (awtd *acceptorWriteToDisk) init(a *Acceptor, txn *eng.TxnReader) { } func (awtd *acceptorWriteToDisk) start() { + awtd.acceptorManager.RemoveServerConnectionSubscriber(&awtd.acceptorReceiveBallots) + if awtd.txnSender != nil { + awtd.acceptorManager.RemoveServerConnectionSubscriber(awtd.txnSender) + } outcome := awtd.outcome outcomeCap := (*msgs.Outcome)(outcome) awtd.sendToAll = awtd.sendToAll || outcomeCap.Which() == msgs.OUTCOME_COMMIT diff --git a/paxos/proposal.go b/paxos/proposal.go index ae72799..3ce7f60 100644 --- a/paxos/proposal.go +++ b/paxos/proposal.go @@ -143,16 +143,11 @@ func (p *proposal) maybeSendTwoA() { twoACap.SetRmId(uint32(p.instanceRMId)) acceptRequests := msgs.NewTxnVoteAcceptRequestList(seg, len(pendingAccepts)) twoACap.SetAcceptRequests(acceptRequests) - deflate := false for idx, pi := range pendingAccepts { acceptRequest := acceptRequests.At(idx) - deflate = pi.addTwoAToAcceptRequest(seg, &acceptRequest, sender) || deflate - } - if deflate { - twoACap.SetTxn(p.txn.AsDeflated().Data) - } else { - twoACap.SetTxn(p.txn.Data) + pi.addTwoAToAcceptRequest(seg, &acceptRequest, sender) } + twoACap.SetTxn(p.txn.Data) sender.msg = server.SegToBytes(seg) server.Log(p.txn.Id, "Adding sender for 2A") p.proposerManager.AddServerConnectionSubscriber(sender) @@ -363,7 +358,7 @@ func (twoA *proposalTwoA) init(pi *proposalInstance) { func (twoA *proposalTwoA) start() {} -func (twoA *proposalTwoA) addTwoAToAcceptRequest(seg *capn.Segment, acceptRequest *msgs.TxnVoteAcceptRequest, sender *proposalSender) bool { +func (twoA *proposalTwoA) addTwoAToAcceptRequest(seg *capn.Segment, acceptRequest *msgs.TxnVoteAcceptRequest, sender *proposalSender) { var ballotData []byte if twoA.winningBallot == nil { // free choice from everyone ballotData = twoA.ballot.Data @@ -375,7 +370,6 @@ func (twoA *proposalTwoA) addTwoAToAcceptRequest(seg *capn.Segment, acceptReques acceptRequest.SetRoundNumber(uint64(twoA.currentRoundNumber)) twoA.twoASender = sender twoA.nextState(nil) - return eng.BallotFromData(ballotData).Vote != eng.Commit } // twoB diff --git a/paxos/proposer.go b/paxos/proposer.go index daa42c3..5b32c4e 100644 --- a/paxos/proposer.go +++ b/paxos/proposer.go @@ -209,6 +209,9 @@ func (pab *proposerAwaitBallots) start() { pab.submitter = common.RMId(txnCap.Submitter()) pab.submitterBootCount = txnCap.SubmitterBootCount() if pab.txn.Retry { + // We need to observe whether or not the submitter dies. If it + // does die, we should tidy up (abort) asap otherwise we have a + // leak which may never trigger. pab.proposerManager.AddServerConnectionSubscriber(pab) } } From 54600af5dbb60ed968648d64f1f7d47c4b05ab3d Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 11 Nov 2016 17:25:01 +0000 Subject: [PATCH 70/78] The conditions for setting non-0 cluster uuid are the same for creating the roots, so don't create them too early, and only propogate forwards if the active is non-0. --HG-- branch : dev --- network/topologytransmogrifier.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index e5b3d3f..28e8003 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -389,7 +389,10 @@ func (tt *TopologyTransmogrifier) selectGoal(goal *configuration.NextConfigurati log.Printf("Topology: Config transition to version %v completed.", goal.Version) return } - goal.SetClusterUUId(activeClusterUUId) + + if activeClusterUUId != 0 { + goal.SetClusterUUId(activeClusterUUId) + } } if tt.task != nil { @@ -909,6 +912,9 @@ func (task *installTargetOld) tick() error { targetTopology.Roots = append(targetTopology.Roots, roots...) } + targetTopology.SetClusterUUId(task.active.ClusterUUId()) + log.Println("Set cluster uuid", targetTopology.ClusterUUId()) + start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, targetTopology, active, passive) if err != nil { From e64cdac555521c4e7a6cbe80983fdd4edc8e090f Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Fri, 11 Nov 2016 18:39:23 +0000 Subject: [PATCH 71/78] Rewrite stupid code. --HG-- branch : dev --- network/connection.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/network/connection.go b/network/connection.go index 1e8a1b5..a792af7 100644 --- a/network/connection.go +++ b/network/connection.go @@ -630,11 +630,7 @@ func (cash *connectionAwaitServerHandshake) verifyTopology(remote *msgs.HelloSer if cash.topology.ClusterId == remote.ClusterId() { remoteUUId := remote.ClusterUUId() localUUId := cash.topology.ClusterUUId() - if remoteUUId == 0 || localUUId == 0 { - return true - } else { - return remoteUUId == localUUId - } + return remoteUUId == 0 || localUUId == 0 || remoteUUId == localUUId } return false } From 34936196a73cf47229244052437006428a3468fe Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 13 Nov 2016 10:11:16 +0000 Subject: [PATCH 72/78] Remove a couple of incorrect panics and extend some comments dealing with the state of txns during topology changes. --HG-- branch : dev --- network/connection.go | 8 ++++---- paxos/outcomeaccumulator.go | 24 +++++++++++++++++++++--- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/network/connection.go b/network/connection.go index a792af7..925eb61 100644 --- a/network/connection.go +++ b/network/connection.go @@ -235,7 +235,7 @@ func (conn *Connection) actorLoop(head *cc.ChanCellHead) { head.WithCell(chanFun) if conn.topology == nil { panic("Nil topology on connection start!") - err = errors.New("No local topology, not ready for any connections") + // err = errors.New("No local topology, not ready for any connections") } terminate := err != nil @@ -506,7 +506,7 @@ func (cah *connectionAwaitHandshake) verifyHello(hello *cmsgs.Hello) bool { } func (cah *connectionAwaitHandshake) maybeRestartConnection(err error) (bool, error) { - if cah.remoteHost == "" { + if len(cah.remoteHost) == 0 { // we came from the listener and don't know who the remote is, so have to shutdown return false, err } else { @@ -554,7 +554,7 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { // end as the server even though in a server-server connection we // really don't care which is which. config := cash.commonTLSConfig() - if cash.remoteHost == "" { + if len(cash.remoteHost) == 0 { // We came from the listener, so we're going to act as the server. config.ClientAuth = tls.RequireAndVerifyClientCert socket := tls.Server(cash.socket, config) @@ -619,7 +619,7 @@ func (cash *connectionAwaitServerHandshake) start() (bool, error) { cash.nextState(nil) return false, nil } else { - return cash.connectionAwaitHandshake.maybeRestartConnection(fmt.Errorf("Unequal remote topology")) + return cash.connectionAwaitHandshake.maybeRestartConnection(fmt.Errorf("Unequal remote topology (%v, %v)", cash.remoteHost, cash.remoteRMId)) } } else { return cash.connectionAwaitHandshake.maybeRestartConnection(err) diff --git a/paxos/outcomeaccumulator.go b/paxos/outcomeaccumulator.go index c8ebc0a..fb7d582 100644 --- a/paxos/outcomeaccumulator.go +++ b/paxos/outcomeaccumulator.go @@ -58,9 +58,19 @@ func (oa *OutcomeAccumulator) TopologyChange(topology *configuration.Topology) b // live transaction must have its outcome known. Therefore by this // point we should not have to deal with the removal of nodes // causing winningOutcome needing to go from nil to non-nil. + + // The above holds for user txns, but not for txns which are + // actually involved in a topology change. For example, a node + // which is being removed could start a topology txn, and then + // observe that the topology has changed and it has been + // removed. It then shuts down. This could result in a loss of + // acceptors and proposers. It's the loss of acceptors that's the + // biggest problem because we have no way to replace them. + for rmId := range topology.RMsRemoved() { if acceptorOutcome, found := oa.acceptorOutcomes[rmId]; found { delete(oa.acceptorOutcomes, rmId) + server.Log("OutcomeAccumulator deleting acceptor", rmId) oa.acceptors[acceptorOutcome.idx] = common.RMIdEmpty if l := oa.acceptors.NonEmptyLen(); l < oa.fInc { oa.fInc = l @@ -87,7 +97,13 @@ func (oa *OutcomeAccumulator) BallotOutcomeReceived(acceptorId common.RMId, outc outcomeEq := (*outcomeEqualId)(outcome) acceptorOutcome, found := oa.acceptorOutcomes[acceptorId] if !found { - panic(fmt.Sprintf("BallotOutcomeReceived: Unable to find precreated acceptorIndexWithTxnOutcome for %v", acceptorId)) + // It must have been removed due to a topology change. See notes + // in TopologyChange + if oa.winningOutcome == nil { + return nil, false + } else { + return (*msgs.Outcome)(oa.winningOutcome.outcome), oa.winningOutcome.outcomeReceivedCount == len(oa.acceptorOutcomes) + } } if tOut := acceptorOutcome.tOut; tOut != nil { @@ -114,7 +130,7 @@ func (oa *OutcomeAccumulator) BallotOutcomeReceived(acceptorId common.RMId, outc acceptorOutcome.tOut = tOut allAgreed := tOut.outcomeReceivedCount == len(oa.acceptorOutcomes) - if oa.winningOutcome == nil && oa.fInc == tOut.outcomeReceivedCount { + if oa.winningOutcome == nil && tOut.outcomeReceivedCount == oa.fInc { oa.winningOutcome = tOut return (*msgs.Outcome)(oa.winningOutcome.outcome), allAgreed } @@ -125,7 +141,9 @@ func (oa *OutcomeAccumulator) TxnGloballyCompleteReceived(acceptorId common.RMId server.Log("TGC received from", acceptorId, "; pending:", oa.pendingTGC) acceptorOutcome, found := oa.acceptorOutcomes[acceptorId] if !found { - panic(fmt.Sprintf("TxnGloballyCompleteReceived: Unable to find precreated acceptorIndexWithTxnOutcome for %v", acceptorId)) + // It must have been removed due to a topology change. See notes + // in TopologyChange + return oa.pendingTGC == 0 } if !acceptorOutcome.tgcReceived { acceptorOutcome.tgcReceived = true From 7da241d88f90ef97b3d292b44295f930478df9b0 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Sun, 13 Nov 2016 12:44:20 +0000 Subject: [PATCH 73/78] A comment. --HG-- branch : dev --- paxos/acceptormanager.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/paxos/acceptormanager.go b/paxos/acceptormanager.go index 7cd6aa4..e4f4bc8 100644 --- a/paxos/acceptormanager.go +++ b/paxos/acceptormanager.go @@ -379,7 +379,18 @@ func (i *instance) TwoATxnVotesReceived(roundNumber paxosNumber, ballot *eng.Bal if roundNumber == i.acceptedNum && i.accepted != nil { // duplicate 2a. Don't issue any response. return - } else if roundNumber >= i.promiseNum || i.promiseNum == 0 { + } else if roundNumber >= i.promiseNum { + // There is a danger here: there could be a race between the + // voter and abort proposers. In theory, the abort proposers + // could do the 1a, 1b and 2a msgs before the acceptor receives + // the direct 2a from the voter. In that case, we must make sure + // that the 2a from the voter does not overwrite the abort 2a + // otherwise we could witness a change in consensus from abort + // to commit. It is for this reason that the voter uses a round + // number of 0, whilst the abort proposors always start their 1a + // from a round number of 1. Thus in the above race, the late + // arriving 2a from the voter will never have a higher round + // number than the 1a/2a from the abort proposer. i.promiseNum = roundNumber i.acceptedNum = roundNumber i.accepted = ballot From e13e1d8c6312816eac33ab8c2fa74b35d66b93e3 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 14 Nov 2016 12:29:12 +0000 Subject: [PATCH 74/78] Revise the binary backoff engine somewhat, and tidy a lot of the topologytransmogrifier. --HG-- branch : dev --- client/clienttxnsubmitter.go | 11 ++--- consts.go | 1 + network/topologytransmogrifier.go | 72 +++++++++---------------------- txnengine/frame.go | 2 +- utils.go | 50 +++++++++++---------- 5 files changed, 50 insertions(+), 86 deletions(-) diff --git a/client/clienttxnsubmitter.go b/client/clienttxnsubmitter.go index be0f2ad..96fd400 100644 --- a/client/clienttxnsubmitter.go +++ b/client/clienttxnsubmitter.go @@ -10,7 +10,6 @@ import ( msgs "goshawkdb.io/server/capnp" "goshawkdb.io/server/paxos" eng "goshawkdb.io/server/txnengine" - "time" ) type ClientTxnCompletionConsumer func(*cmsgs.ClientTxnOutcome, error) error @@ -28,7 +27,7 @@ func NewClientTxnSubmitter(rmId common.RMId, bootCount uint32, roots map[common. SimpleTxnSubmitter: sts, versionCache: NewVersionCache(roots), txnLive: false, - backoff: server.NewBinaryBackoffEngine(sts.rng, 0, server.SubmissionMaxSubmitDelay), + backoff: server.NewBinaryBackoffEngine(sts.rng, server.SubmissionMinSubmitDelay, server.SubmissionMaxSubmitDelay), } } @@ -52,8 +51,7 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, clientOutcome.SetId(ctxnCap.Id()) curTxnId := common.MakeTxnId(ctxnCap.Id()) - cts.backoff.Shrink(time.Millisecond) - start := time.Now() + cts.backoff.Shrink(server.SubmissionMinSubmitDelay) var cont TxnCompletionConsumer cont = func(txn *eng.TxnReader, outcome *msgs.Outcome, err error) error { @@ -62,9 +60,6 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, return continuation(nil, err) } txnId := txn.Id - end := time.Now() - elapsed := end.Sub(start) - start = end switch outcome.Which() { case msgs.OUTCOME_COMMIT: cts.versionCache.UpdateFromCommit(txn, outcome) @@ -91,7 +86,7 @@ func (cts *ClientTxnSubmitter) SubmitClientTransaction(ctxnCap *cmsgs.ClientTxn, } server.Log("Resubmitting", txnId, "; orig resubmit?", abort.Which() == msgs.OUTCOMEABORT_RESUBMIT) - cts.backoff.AdvanceBy(elapsed) + cts.backoff.Advance() //fmt.Printf("%v ", cts.backoff.Cur) curTxnIdNum := binary.BigEndian.Uint64(txnId[:8]) diff --git a/consts.go b/consts.go index 3e0fc95..40c6bfb 100644 --- a/consts.go +++ b/consts.go @@ -8,6 +8,7 @@ const ( ServerVersion = "dev" MDBInitialSize = 1048576 TwoToTheSixtyThree = 9223372036854775808 + SubmissionMinSubmitDelay = 2 * time.Millisecond SubmissionMaxSubmitDelay = 2 * time.Second VarRollDelayMin = 50 * time.Millisecond VarRollDelayMax = 500 * time.Millisecond diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index 28e8003..718d08a 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -550,11 +550,13 @@ type topologyTask interface { type targetConfig struct { *TopologyTransmogrifier - config *configuration.NextConfiguration - sender paxos.ServerConnectionSubscriber + config *configuration.NextConfiguration + sender paxos.ServerConnectionSubscriber + backoff *server.BinaryBackoffEngine } func (task *targetConfig) tick() error { + task.backoff = nil switch { case task.active == nil: log.Println("Topology: Ensuring local topology.") @@ -707,6 +709,14 @@ func (task *targetConfig) isInRMs(rmIds common.RMIds) bool { return false } +func (task *targetConfig) createOrAdvanceBackoff() { + if task.backoff == nil { + task.backoff = server.NewBinaryBackoffEngine(task.rng, server.SubmissionMinSubmitDelay, server.SubmissionMaxSubmitDelay) + } else { + task.backoff.Advance() + } +} + // ensureLocalTopology type ensureLocalTopology struct { @@ -856,7 +866,6 @@ func (task *joinCluster) allJoining(allRMIds common.RMIds) error { type installTargetOld struct { *targetConfig - backoff *server.BinaryBackoffEngine } func (task *installTargetOld) tick() error { @@ -894,18 +903,13 @@ func (task *installTargetOld) tick() error { log.Printf("Topology: Calculated target topology: %v (new rootsRequired: %v, active: %v, passive: %v)", targetTopology.Next(), rootsRequired, active, passive) - if task.backoff == nil { - task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) - } - if rootsRequired != 0 { - start := time.Now() resubmit, roots, err := task.attemptCreateRoots(rootsRequired) if err != nil { return task.fatal(err) } if resubmit { - task.backoff.AdvanceBy(time.Now().Sub(start)) + task.createOrAdvanceBackoff() task.enqueueTick(task, task.backoff) return nil } @@ -915,13 +919,12 @@ func (task *installTargetOld) tick() error { targetTopology.SetClusterUUId(task.active.ClusterUUId()) log.Println("Set cluster uuid", targetTopology.ClusterUUId()) - start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, targetTopology, active, passive) if err != nil { return task.fatal(err) } if resubmit { - task.backoff.AdvanceBy(time.Now().Sub(start)) + task.createOrAdvanceBackoff() task.enqueueTick(task, task.backoff) return nil } @@ -1159,7 +1162,6 @@ func calculateMigrationConditions(added, lost, survived []common.RMId, from, to type installTargetNew struct { *targetConfig - backoff *server.BinaryBackoffEngine } func (task *installTargetNew) tick() error { @@ -1214,18 +1216,13 @@ func (task *installTargetNew) tick() error { topology := task.active.Clone() topology.Next().InstalledOnNew = true - if task.backoff == nil { - task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) - } - - start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { server.Log("Topology: Topology extension requires resubmit.") - task.backoff.AdvanceBy(time.Now().Sub(start)) + task.createOrAdvanceBackoff() task.enqueueTick(task, task.backoff) } return nil @@ -1235,7 +1232,6 @@ func (task *installTargetNew) tick() error { type awaitBarrier1 struct { *targetConfig - backoff *server.BinaryBackoffEngine varBarrierReached *configuration.Configuration proposerBarrierReached *configuration.Configuration connectionBarrierReached *configuration.Configuration @@ -1287,18 +1283,13 @@ func (task *awaitBarrier1) tick() error { next = topology.Next() next.BarrierReached1 = append(next.BarrierReached1, task.connectionManager.RMId) - if task.backoff == nil { - task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) - } - - start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { server.Log("Topology: Barrier1 reached. Requires resubmit.") - task.backoff.AdvanceBy(time.Now().Sub(start)) + task.createOrAdvanceBackoff() task.enqueueTick(task, task.backoff) } @@ -1358,7 +1349,6 @@ func (task *awaitBarrier1) tick() error { type awaitBarrier2 struct { *targetConfig - backoff *server.BinaryBackoffEngine varBarrierReached *configuration.Configuration installing *configuration.Configuration } @@ -1403,18 +1393,13 @@ func (task *awaitBarrier2) tick() error { next = topology.Next() next.BarrierReached2 = append(next.BarrierReached2, task.connectionManager.RMId) - if task.backoff == nil { - task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) - } - - start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { server.Log("Topology: Barrier2 reached. Requires resubmit.") - task.backoff.AdvanceBy(time.Now().Sub(start)) + task.createOrAdvanceBackoff() task.enqueueTick(task, task.backoff) } @@ -1444,7 +1429,6 @@ func (task *awaitBarrier2) tick() error { type migrate struct { *targetConfig - backoff *server.BinaryBackoffEngine emigrator *emigrator } @@ -1512,17 +1496,12 @@ func (task *migrate) tick() error { log.Printf("Topology: Recording local immigration progress (%v). Active: %v, Passive: %v", next.Pending, active, passive) - if task.backoff == nil { - task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) - } - - start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { - task.backoff.AdvanceBy(time.Now().Sub(start)) + task.createOrAdvanceBackoff() task.enqueueTick(task, task.backoff) return nil } @@ -1558,7 +1537,6 @@ func (task *migrate) ensureStopEmigrator() { type installCompletion struct { *targetConfig - backoff *server.BinaryBackoffEngine } func (task *installCompletion) tick() error { @@ -1604,17 +1582,12 @@ func (task *installCompletion) tick() error { } topology.Roots = newRoots - if task.backoff == nil { - task.backoff = server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) - } - - start := time.Now() _, resubmit, err := task.rewriteTopology(task.active, topology, active, passive) if err != nil { return task.fatal(err) } if resubmit { - task.backoff.AdvanceBy(time.Now().Sub(start)) + task.createOrAdvanceBackoff() task.enqueueTick(task, task.backoff) return nil } @@ -1720,8 +1693,7 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog return nil, err } - backoff := server.NewBinaryBackoffEngine(task.rng, 0, server.SubmissionMaxSubmitDelay) - start := time.Now() + backoff := server.NewBinaryBackoffEngine(task.rng, server.SubmissionMinSubmitDelay, server.SubmissionMaxSubmitDelay) for { txn := task.createTopologyTransaction(nil, nil, []common.RMId{task.connectionManager.RMId}, nil) @@ -1737,9 +1709,7 @@ func (task *targetConfig) getTopologyFromLocalDatabase() (*configuration.Topolog } abort := result.Abort() if abort.Which() == msgs.OUTCOMEABORT_RESUBMIT { - end := time.Now() - backoff.AdvanceBy(end.Sub(start)) - start = end + backoff.Advance() continue } abortUpdates := abort.Rerun() diff --git a/txnengine/frame.go b/txnengine/frame.go index e73900e..b005f0f 100644 --- a/txnengine/frame.go +++ b/txnengine/frame.go @@ -51,7 +51,7 @@ func NewFrame(parent *frame, v *Var, txnId *common.TxnId, txnActions *TxnActions } else { f.mask = parent.mask f.scheduleBackoff = parent.scheduleBackoff - f.scheduleBackoff.Shrink(0) + f.scheduleBackoff.Shrink(server.VarRollDelayMin) } f.init() server.Log(f, "NewFrame") diff --git a/utils.go b/utils.go index 218abb1..5d1565e 100644 --- a/utils.go +++ b/utils.go @@ -44,37 +44,33 @@ var EmptyStructVal = EmptyStruct{} func (es EmptyStruct) String() string { return "" } type BinaryBackoffEngine struct { - rng *rand.Rand - min time.Duration - max time.Duration - Cur time.Duration + rng *rand.Rand + min time.Duration + max time.Duration + period time.Duration + Cur time.Duration } func NewBinaryBackoffEngine(rng *rand.Rand, min, max time.Duration) *BinaryBackoffEngine { - cur := time.Duration(0) - if min > 0 { - cur = min + time.Duration(rng.Intn(int(min))) + if min <= 0 { + return nil } return &BinaryBackoffEngine{ - rng: rng, - min: min, - max: max, - Cur: cur, + rng: rng, + min: min, + max: max, + period: min, + Cur: 0, } } -// returns the old delay, prior to change func (bbe *BinaryBackoffEngine) Advance() time.Duration { - return bbe.AdvanceBy(bbe.Cur) -} - -// returns the old delay, prior to change -func (bbe *BinaryBackoffEngine) AdvanceBy(d time.Duration) time.Duration { oldCur := bbe.Cur - bbe.Cur += time.Duration(bbe.rng.Intn(int(d))) - for bbe.max > bbe.min && bbe.Cur > bbe.max { - bbe.Cur = bbe.Cur / 2 + bbe.period *= 2 + if bbe.period > bbe.max { + bbe.period = bbe.max } + bbe.Cur = time.Duration(bbe.rng.Intn(int(bbe.period))) return oldCur } @@ -88,11 +84,13 @@ func (bbe *BinaryBackoffEngine) After(fun func()) { }() } -func (bbe *BinaryBackoffEngine) Shrink(roundToMin time.Duration) { - bbe.Cur = bbe.Cur / 2 - if bbe.Cur < bbe.min { - bbe.Cur = bbe.min + time.Duration(bbe.rng.Intn(int(bbe.min))) - } else if bbe.Cur < bbe.min+roundToMin { - bbe.Cur = bbe.min +func (bbe *BinaryBackoffEngine) Shrink(roundToZero time.Duration) { + bbe.period /= 2 + if bbe.period < bbe.min { + bbe.period = bbe.min + } + bbe.Cur = time.Duration(bbe.rng.Intn(int(bbe.period))) + if bbe.Cur <= roundToZero { + bbe.Cur = 0 } } From e81fd4a236f9f6868e216cff274ec61e4657e692 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Mon, 14 Nov 2016 15:16:35 +0000 Subject: [PATCH 75/78] Refactoring; don't allow multiple enqueued ticks for a task; allow topology txns to back off much much further. --HG-- branch : dev --- network/topologytransmogrifier.go | 54 ++++++++++++++++++------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/network/topologytransmogrifier.go b/network/topologytransmogrifier.go index 718d08a..b8b72c2 100644 --- a/network/topologytransmogrifier.go +++ b/network/topologytransmogrifier.go @@ -428,15 +428,19 @@ func (tt *TopologyTransmogrifier) selectGoal(goal *configuration.NextConfigurati } } -func (tt *TopologyTransmogrifier) enqueueTick(task topologyTask, backoff *server.BinaryBackoffEngine) { - backoff.After(func() { - tt.enqueueQuery(topologyTransmogrifierMsgExe(func() error { - if tt.task == task { - return tt.task.tick() - } - return nil - })) - }) +func (tt *TopologyTransmogrifier) enqueueTick(task topologyTask, tc *targetConfig) { + if !tc.tickEnqueued { + tc.tickEnqueued = true + tc.backoff.After(func() { + tt.enqueueQuery(topologyTransmogrifierMsgExe(func() error { + tc.tickEnqueued = false + if tt.task == task { + return tt.task.tick() + } + return nil + })) + }) + } } func (tt *TopologyTransmogrifier) migrationReceived(migration topologyTransmogrifierMsgMigration) error { @@ -550,13 +554,16 @@ type topologyTask interface { type targetConfig struct { *TopologyTransmogrifier - config *configuration.NextConfiguration - sender paxos.ServerConnectionSubscriber - backoff *server.BinaryBackoffEngine + config *configuration.NextConfiguration + sender paxos.ServerConnectionSubscriber + backoff *server.BinaryBackoffEngine + tickEnqueued bool } func (task *targetConfig) tick() error { task.backoff = nil + task.tickEnqueued = false + switch { case task.active == nil: log.Println("Topology: Ensuring local topology.") @@ -711,7 +718,7 @@ func (task *targetConfig) isInRMs(rmIds common.RMIds) bool { func (task *targetConfig) createOrAdvanceBackoff() { if task.backoff == nil { - task.backoff = server.NewBinaryBackoffEngine(task.rng, server.SubmissionMinSubmitDelay, server.SubmissionMaxSubmitDelay) + task.backoff = server.NewBinaryBackoffEngine(task.rng, server.SubmissionMinSubmitDelay, time.Duration(len(task.config.Hosts))*server.SubmissionMaxSubmitDelay) } else { task.backoff.Advance() } @@ -910,22 +917,23 @@ func (task *installTargetOld) tick() error { } if resubmit { task.createOrAdvanceBackoff() - task.enqueueTick(task, task.backoff) + task.enqueueTick(task, task.targetConfig) return nil } targetTopology.Roots = append(targetTopology.Roots, roots...) } targetTopology.SetClusterUUId(task.active.ClusterUUId()) - log.Println("Set cluster uuid", targetTopology.ClusterUUId()) + server.Log("Set cluster uuid", targetTopology.ClusterUUId()) _, resubmit, err := task.rewriteTopology(task.active, targetTopology, active, passive) if err != nil { return task.fatal(err) } if resubmit { + server.Log("Topology: Installing to old requires resubmit.") task.createOrAdvanceBackoff() - task.enqueueTick(task, task.backoff) + task.enqueueTick(task, task.targetConfig) return nil } // Must be badread, which means again we should receive the @@ -1221,9 +1229,9 @@ func (task *installTargetNew) tick() error { return task.fatal(err) } if resubmit { - server.Log("Topology: Topology extension requires resubmit.") + server.Log("Topology: Installing to new requires resubmit.") task.createOrAdvanceBackoff() - task.enqueueTick(task, task.backoff) + task.enqueueTick(task, task.targetConfig) } return nil } @@ -1290,7 +1298,7 @@ func (task *awaitBarrier1) tick() error { if resubmit { server.Log("Topology: Barrier1 reached. Requires resubmit.") task.createOrAdvanceBackoff() - task.enqueueTick(task, task.backoff) + task.enqueueTick(task, task.targetConfig) } } else if activeNextConfig != task.installing { @@ -1400,7 +1408,7 @@ func (task *awaitBarrier2) tick() error { if resubmit { server.Log("Topology: Barrier2 reached. Requires resubmit.") task.createOrAdvanceBackoff() - task.enqueueTick(task, task.backoff) + task.enqueueTick(task, task.targetConfig) } } else if activeNextConfig != task.installing { @@ -1502,7 +1510,7 @@ func (task *migrate) tick() error { } if resubmit { task.createOrAdvanceBackoff() - task.enqueueTick(task, task.backoff) + task.enqueueTick(task, task.targetConfig) return nil } // Must be badread, which means again we should receive the @@ -1588,7 +1596,7 @@ func (task *installCompletion) tick() error { } if resubmit { task.createOrAdvanceBackoff() - task.enqueueTick(task, task.backoff) + task.enqueueTick(task, task.targetConfig) return nil } // Must be badread, which means again we should receive the @@ -1831,7 +1839,7 @@ func (task *targetConfig) attemptCreateRoots(rootCount int) (bool, configuration } ctxn.SetActions(actions) txnReader, result, err := task.localConnection.RunClientTransaction(&ctxn, nil, nil) - log.Println("Create root result", result, err) + server.Log("Create root result", result, err) if err != nil { return false, nil, err } From 4963d12e25922aa015278d0ee04f9f277448a577 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 16 Nov 2016 11:42:24 +0000 Subject: [PATCH 76/78] Set version. Ref T52 --HG-- branch : T52 --- consts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/consts.go b/consts.go index 40c6bfb..4d1472a 100644 --- a/consts.go +++ b/consts.go @@ -5,7 +5,7 @@ import ( ) const ( - ServerVersion = "dev" + ServerVersion = "0.3" MDBInitialSize = 1048576 TwoToTheSixtyThree = 9223372036854775808 SubmissionMinSubmitDelay = 2 * time.Millisecond From 130dd88093215a09f39b3f2bd448409db526172c Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 16 Nov 2016 17:11:18 +0000 Subject: [PATCH 77/78] Eval contributors. Ref T52 --HG-- branch : T52 --- CONTRIBUTORS | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 1684c7d..0533e68 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -7,12 +7,9 @@ Ashley Hewson By default, code in this repository is: Copyright (C) 2015-2016 Matthew Sackman -Some patches have different copyright assignments, and on each release -of GoshawkDB server, this file will be adjusted to contain those -details in full. +The following files are: -Where the copyright of a patch differs from the default, the patch and -its copyright can be found by running the following command in this -repository: +Copyright (C) 2015-2016 Matthew Sackman +Copyright (C) 2016 LShift Ltd -$ hg log -k copyright -v +client/clienttxnsubmitter.go From 59cec25587cefa1c78930b556b4ccc3dfb0434b6 Mon Sep 17 00:00:00 2001 From: Matthew Sackman Date: Wed, 16 Nov 2016 17:22:25 +0000 Subject: [PATCH 78/78] Correct date in NOTICE file. --HG-- branch : dev --- NOTICE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NOTICE b/NOTICE index 9b66c2f..e7477da 100644 --- a/NOTICE +++ b/NOTICE @@ -1,2 +1,2 @@ GoshawkDB Server -Copyright 2015 Matthew Sackman +Copyright 2015-2016 Matthew Sackman