Skip to content

Commit

Permalink
Implement Sticky TTL (uber#2264)
Browse files Browse the repository at this point in the history
  • Loading branch information
longquanzheng authored Jul 25, 2019
1 parent 8ec4e02 commit af42a50
Show file tree
Hide file tree
Showing 17 changed files with 169 additions and 31 deletions.
3 changes: 3 additions & 0 deletions common/service/dynamicconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ var keys = map[Key]string{
ArchiveRequestRPS: "history.archiveRequestRPS",
EmitShardDiffLog: "history.emitShardDiffLog",
HistoryThrottledLogRPS: "history.throttledLogRPS",
StickyTTL: "history.stickyTTL",

WorkerPersistenceMaxQPS: "worker.persistenceMaxQPS",
WorkerReplicatorMetaTaskConcurrency: "worker.replicatorMetaTaskConcurrency",
Expand Down Expand Up @@ -480,6 +481,8 @@ const (
EnableEventsV2
// HistoryThrottledLogRPS is the rate limit on number of log messages emitted per second for throttled logger
HistoryThrottledLogRPS
// StickyTTL is to expire a sticky tasklist if no update more than this duration
StickyTTL

// key for worker

Expand Down
15 changes: 15 additions & 0 deletions service/history/MockWorkflowExecutionContext.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,21 @@ func (_m *mockWorkflowExecutionContext) clear() {
_m.Called()
}

func (_m *mockWorkflowExecutionContext) getDomainName() string {
ret := _m.Called()

var r0 string
if rf, ok := ret.Get(0).(func() string); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(string)
}
}

return r0
}

func (_m *mockWorkflowExecutionContext) getDomainID() string {
ret := _m.Called()

Expand Down
1 change: 1 addition & 0 deletions service/history/conflictResolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ func (r *conflictResolverImpl) reset(
// if can see replication task, meaning that domain is
// global domain with > 1 target clusters
cache.ReplicationPolicyMultiCluster,
r.context.getDomainName(),
)

resetMutableStateBuilder.executionInfo.EventStoreVersion = eventStoreVersion
Expand Down
2 changes: 1 addition & 1 deletion service/history/historyBuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func (s *historyBuilderSuite) SetupTest() {
}
s.mockEventsCache = &MockEventsCache{}
s.msBuilder = newMutableStateBuilder(s.mockShard, s.mockEventsCache,
s.logger)
s.logger, "")
s.builder = newHistoryBuilder(s.msBuilder, s.logger)
}

Expand Down
34 changes: 19 additions & 15 deletions service/history/historyEngine.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,12 +306,14 @@ func (e *historyEngineImpl) createMutableState(
e.logger,
domainEntry.GetFailoverVersion(),
domainEntry.GetReplicationPolicy(),
domainEntry.GetInfo().Name,
)
} else {
msBuilder = newMutableStateBuilder(
e.shard,
e.shard.GetEventsCache(),
e.logger,
domainEntry.GetInfo().Name,
)
}

Expand Down Expand Up @@ -592,22 +594,24 @@ func (e *historyEngineImpl) getMutableState(
executionInfo := msBuilder.GetExecutionInfo()
execution.RunId = context.getExecution().RunId
retResp = &h.GetMutableStateResponse{
Execution: &execution,
WorkflowType: &workflow.WorkflowType{Name: common.StringPtr(executionInfo.WorkflowTypeName)},
LastFirstEventId: common.Int64Ptr(msBuilder.GetLastFirstEventID()),
NextEventId: common.Int64Ptr(msBuilder.GetNextEventID()),
PreviousStartedEventId: common.Int64Ptr(msBuilder.GetPreviousStartedEventID()),
TaskList: &workflow.TaskList{Name: common.StringPtr(executionInfo.TaskList)},
StickyTaskList: &workflow.TaskList{Name: common.StringPtr(executionInfo.StickyTaskList)},
ClientLibraryVersion: common.StringPtr(executionInfo.ClientLibraryVersion),
ClientFeatureVersion: common.StringPtr(executionInfo.ClientFeatureVersion),
ClientImpl: common.StringPtr(executionInfo.ClientImpl),
IsWorkflowRunning: common.BoolPtr(msBuilder.IsWorkflowExecutionRunning()),
StickyTaskListScheduleToStartTimeout: common.Int32Ptr(executionInfo.StickyScheduleToStartTimeout),
EventStoreVersion: common.Int32Ptr(msBuilder.GetEventStoreVersion()),
BranchToken: msBuilder.GetCurrentBranch(),
Execution: &execution,
WorkflowType: &workflow.WorkflowType{Name: common.StringPtr(executionInfo.WorkflowTypeName)},
LastFirstEventId: common.Int64Ptr(msBuilder.GetLastFirstEventID()),
NextEventId: common.Int64Ptr(msBuilder.GetNextEventID()),
PreviousStartedEventId: common.Int64Ptr(msBuilder.GetPreviousStartedEventID()),
TaskList: &workflow.TaskList{Name: common.StringPtr(executionInfo.TaskList)},
ClientLibraryVersion: common.StringPtr(executionInfo.ClientLibraryVersion),
ClientFeatureVersion: common.StringPtr(executionInfo.ClientFeatureVersion),
ClientImpl: common.StringPtr(executionInfo.ClientImpl),
IsWorkflowRunning: common.BoolPtr(msBuilder.IsWorkflowExecutionRunning()),
EventStoreVersion: common.Int32Ptr(msBuilder.GetEventStoreVersion()),
BranchToken: msBuilder.GetCurrentBranch(),
}

if msBuilder.IsStickyTaskListEnabled() {
retResp.StickyTaskList = &workflow.TaskList{Name: common.StringPtr(executionInfo.StickyTaskList)}
retResp.StickyTaskListScheduleToStartTimeout = common.Int32Ptr(executionInfo.StickyScheduleToStartTimeout)
}

replicationState := msBuilder.GetReplicationState()
if replicationState != nil {
retResp.ReplicationInfo = map[string]*workflow.ReplicationInfo{}
Expand Down
84 changes: 84 additions & 0 deletions service/history/historyEngine2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"encoding/json"
"errors"
"testing"
"time"

"github.com/pborman/uuid"
"github.com/stretchr/testify/mock"
Expand Down Expand Up @@ -195,6 +196,88 @@ func (s *engine2Suite) TearDownTest() {
s.mockTimerProcessor.AssertExpectations(s.T())
}

func (s *engine2Suite) TestRecordDecisionTaskStartedSuccessStickyExpired() {
domainID := validDomainID
we := workflow.WorkflowExecution{
WorkflowId: common.StringPtr("wId"),
RunId: common.StringPtr(validRunID),
}
tl := "testTaskList"
stickyTl := "stickyTaskList"
identity := "testIdentity"

msBuilder := newMutableStateBuilderWithEventV2(s.historyEngine.shard, s.mockEventsCache,
loggerimpl.NewDevelopmentForTest(s.Suite), we.GetRunId())
executionInfo := msBuilder.GetExecutionInfo()
executionInfo.StickyTaskList = stickyTl

addWorkflowExecutionStartedEvent(msBuilder, we, "wType", tl, []byte("input"), 100, 200, identity)
di := addDecisionTaskScheduledEvent(msBuilder)

ms := createMutableState(msBuilder)

gwmsResponse := &p.GetWorkflowExecutionResponse{State: ms}

s.mockExecutionMgr.On("GetWorkflowExecution", mock.Anything).Return(gwmsResponse, nil).Once()
s.mockHistoryV2Mgr.On("AppendHistoryNodes", mock.Anything).Return(&p.AppendHistoryNodesResponse{Size: 0}, nil).Once()
s.mockExecutionMgr.On("UpdateWorkflowExecution", mock.Anything).Return(&p.UpdateWorkflowExecutionResponse{
MutableStateUpdateSessionStats: &p.MutableStateUpdateSessionStats{},
}, nil).Once()
s.mockMetadataMgr.On("GetDomain", mock.Anything).Return(
&p.GetDomainResponse{
Info: &p.DomainInfo{ID: domainID},
Config: &p.DomainConfig{Retention: 1},
ReplicationConfig: &p.DomainReplicationConfig{
ActiveClusterName: cluster.TestCurrentClusterName,
Clusters: []*p.ClusterReplicationConfig{
&p.ClusterReplicationConfig{ClusterName: cluster.TestCurrentClusterName},
},
},
TableVersion: p.DomainTableVersionV1,
},
nil,
)

request := h.RecordDecisionTaskStartedRequest{
DomainUUID: common.StringPtr(domainID),
WorkflowExecution: &we,
ScheduleId: common.Int64Ptr(2),
TaskId: common.Int64Ptr(100),
RequestId: common.StringPtr("reqId"),
PollRequest: &workflow.PollForDecisionTaskRequest{
TaskList: &workflow.TaskList{
Name: common.StringPtr(stickyTl),
},
Identity: common.StringPtr(identity),
},
}

expectedResponse := h.RecordDecisionTaskStartedResponse{}
expectedResponse.WorkflowType = msBuilder.GetWorkflowType()
executionInfo = msBuilder.GetExecutionInfo()
if executionInfo.LastProcessedEvent != common.EmptyEventID {
expectedResponse.PreviousStartedEventId = common.Int64Ptr(executionInfo.LastProcessedEvent)
}
expectedResponse.ScheduledEventId = common.Int64Ptr(di.ScheduleID)
expectedResponse.StartedEventId = common.Int64Ptr(di.ScheduleID + 1)
expectedResponse.StickyExecutionEnabled = common.BoolPtr(false)
expectedResponse.NextEventId = common.Int64Ptr(msBuilder.GetNextEventID() + 1)
expectedResponse.Attempt = common.Int64Ptr(di.Attempt)
expectedResponse.WorkflowExecutionTaskList = common.TaskListPtr(workflow.TaskList{
Name: &executionInfo.TaskList,
Kind: common.TaskListKindPtr(workflow.TaskListKindNormal),
})
expectedResponse.EventStoreVersion = common.Int32Ptr(p.EventStoreVersionV2)
expectedResponse.BranchToken = msBuilder.GetCurrentBranch()

response, err := s.historyEngine.RecordDecisionTaskStarted(context.Background(), &request)
s.Nil(err)
s.NotNil(response)
expectedResponse.StartedTimestamp = response.StartedTimestamp
expectedResponse.ScheduledTimestamp = common.Int64Ptr(0)
s.Equal(&expectedResponse, response)
}

func (s *engine2Suite) TestRecordDecisionTaskStartedSuccessStickyEnabled() {
domainID := validDomainID
we := workflow.WorkflowExecution{
Expand All @@ -208,6 +291,7 @@ func (s *engine2Suite) TestRecordDecisionTaskStartedSuccessStickyEnabled() {
msBuilder := newMutableStateBuilderWithEventV2(s.historyEngine.shard, s.mockEventsCache,
loggerimpl.NewDevelopmentForTest(s.Suite), we.GetRunId())
executionInfo := msBuilder.GetExecutionInfo()
executionInfo.LastUpdatedTimestamp = time.Now()
executionInfo.StickyTaskList = stickyTl

addWorkflowExecutionStartedEvent(msBuilder, we, "wType", tl, []byte("input"), 100, 200, identity)
Expand Down
2 changes: 2 additions & 0 deletions service/history/historyEngine3_eventsv2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ package history
import (
"context"
"testing"
"time"

"github.com/pborman/uuid"
"github.com/stretchr/testify/mock"
Expand Down Expand Up @@ -209,6 +210,7 @@ func (s *engine3Suite) TestRecordDecisionTaskStartedSuccessStickyEnabled() {
msBuilder := newMutableStateBuilderWithEventV2(s.historyEngine.shard, s.mockEventsCache,
loggerimpl.NewDevelopmentForTest(s.Suite), we.GetRunId())
executionInfo := msBuilder.GetExecutionInfo()
executionInfo.LastUpdatedTimestamp = time.Now()
executionInfo.StickyTaskList = stickyTl

addWorkflowExecutionStartedEvent(msBuilder, we, "wType", tl, []byte("input"), 100, 200, identity)
Expand Down
4 changes: 2 additions & 2 deletions service/history/historyEngine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5126,7 +5126,7 @@ func addCompleteWorkflowEvent(builder mutableState, decisionCompletedEventID int
func newMutableStateBuilderWithEventV2(shard ShardContext, eventsCache eventsCache,
logger log.Logger, runID string) *mutableStateBuilder {

msBuilder := newMutableStateBuilder(shard, eventsCache, logger)
msBuilder := newMutableStateBuilder(shard, eventsCache, logger, "")
_ = msBuilder.SetHistoryTree(runID)

return msBuilder
Expand All @@ -5135,7 +5135,7 @@ func newMutableStateBuilderWithEventV2(shard ShardContext, eventsCache eventsCac
func newMutableStateBuilderWithReplicationStateWithEventV2(shard ShardContext, eventsCache eventsCache,
logger log.Logger, version int64, runID string) *mutableStateBuilder {

msBuilder := newMutableStateBuilderWithReplicationState(shard, eventsCache, logger, version, cache.ReplicationPolicyOneCluster)
msBuilder := newMutableStateBuilderWithReplicationState(shard, eventsCache, logger, version, cache.ReplicationPolicyOneCluster, "")
_ = msBuilder.SetHistoryTree(runID)

return msBuilder
Expand Down
7 changes: 4 additions & 3 deletions service/history/historyReplicator.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ var (
type (
conflictResolverProvider func(context workflowExecutionContext, logger log.Logger) conflictResolver
stateBuilderProvider func(msBuilder mutableState, logger log.Logger) stateBuilder
mutableStateProvider func(version int64, logger log.Logger) mutableState
mutableStateProvider func(version int64, logger log.Logger, domainName string) mutableState

historyReplicator struct {
shard ShardContext
Expand Down Expand Up @@ -146,7 +146,7 @@ func newHistoryReplicator(
getNewStateBuilder: func(msBuilder mutableState, logger log.Logger) stateBuilder {
return newStateBuilder(shard, msBuilder, logger)
},
getNewMutableState: func(version int64, logger log.Logger) mutableState {
getNewMutableState: func(version int64, logger log.Logger, domainName string) mutableState {
return newMutableStateBuilderWithReplicationState(
shard,
shard.GetEventsCache(),
Expand All @@ -155,6 +155,7 @@ func newHistoryReplicator(
// if can see replication task, meaning that domain is
// global domain with > 1 target clusters
cache.ReplicationPolicyMultiCluster,
domainName,
)
},
}
Expand Down Expand Up @@ -435,7 +436,7 @@ func (r *historyReplicator) ApplyStartEvent(
logger log.Logger,
) error {

msBuilder := r.getNewMutableState(request.GetVersion(), logger)
msBuilder := r.getNewMutableState(request.GetVersion(), logger, context.getDomainName())
err := r.ApplyReplicationTask(ctx, context, msBuilder, request, logger)
return err
}
Expand Down
18 changes: 15 additions & 3 deletions service/history/mutableStateBuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ type (
config *Config
timeSource clock.TimeSource
logger log.Logger
domainName string
}
)

Expand All @@ -116,6 +117,7 @@ func newMutableStateBuilder(
shard ShardContext,
eventsCache eventsCache,
logger log.Logger,
domainName string,
) *mutableStateBuilder {
s := &mutableStateBuilder{
updateActivityInfos: make(map[*persistence.ActivityInfo]struct{}),
Expand Down Expand Up @@ -153,6 +155,7 @@ func newMutableStateBuilder(
config: shard.GetConfig(),
timeSource: shard.GetTimeSource(),
logger: logger,
domainName: domainName,
}
s.executionInfo = &persistence.WorkflowExecutionInfo{
DecisionVersion: common.EmptyVersion,
Expand All @@ -177,8 +180,9 @@ func newMutableStateBuilderWithReplicationState(
logger log.Logger,
version int64,
replicationPolicy cache.ReplicationPolicy,
domainName string,
) *mutableStateBuilder {
s := newMutableStateBuilder(shard, eventsCache, logger)
s := newMutableStateBuilder(shard, eventsCache, logger, domainName)
s.replicationState = &persistence.ReplicationState{
StartVersion: version,
CurrentVersion: version,
Expand Down Expand Up @@ -586,7 +590,14 @@ func (e *mutableStateBuilder) assignTaskIDToEvents() error {
}

func (e *mutableStateBuilder) IsStickyTaskListEnabled() bool {
return len(e.executionInfo.StickyTaskList) > 0
if e.executionInfo.StickyTaskList == "" {
return false
}
maxDu := e.config.StickyTTL(e.domainName)
if e.timeSource.Now().After(e.executionInfo.LastUpdatedTimestamp.Add(maxDu)) {
return false
}
return true
}

func (e *mutableStateBuilder) CreateNewHistoryEvent(eventType workflow.EventType) *workflow.HistoryEvent {
Expand Down Expand Up @@ -2886,9 +2897,10 @@ func (e *mutableStateBuilder) AddContinueAsNewEvent(
e.logger,
e.GetCurrentVersion(),
e.replicationPolicy,
e.domainName,
)
} else {
newStateBuilder = newMutableStateBuilder(e.shard, e.eventsCache, e.logger)
newStateBuilder = newMutableStateBuilder(e.shard, e.eventsCache, e.logger, e.domainName)
}
domainID := domainEntry.GetInfo().ID
startedEvent, err := newStateBuilder.addWorkflowExecutionStartedEventForContinueAsNew(domainEntry, parentInfo, newExecution, e, attributes, firstRunID)
Expand Down
2 changes: 1 addition & 1 deletion service/history/mutableStateBuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (s *mutableStateSuite) SetupTest() {
}
s.mockEventsCache = &MockEventsCache{}
s.msBuilder = newMutableStateBuilder(s.mockShard, s.mockEventsCache,
s.logger)
s.logger, "")
}

func (s *mutableStateSuite) TearDownTest() {
Expand Down
4 changes: 4 additions & 0 deletions service/history/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ type Config struct {
SearchAttributesNumberOfKeysLimit dynamicconfig.IntPropertyFnWithDomainFilter
SearchAttributesSizeOfValueLimit dynamicconfig.IntPropertyFnWithDomainFilter
SearchAttributesTotalSizeLimit dynamicconfig.IntPropertyFnWithDomainFilter

// StickyTTL is to expire a sticky tasklist if no update more than this duration
StickyTTL dynamicconfig.DurationPropertyFnWithDomainFilter
}

const (
Expand Down Expand Up @@ -254,6 +257,7 @@ func NewConfig(dc *dynamicconfig.Collection, numberOfShards int, enableVisibilit
SearchAttributesNumberOfKeysLimit: dc.GetIntPropertyFilteredByDomain(dynamicconfig.SearchAttributesNumberOfKeysLimit, 100),
SearchAttributesSizeOfValueLimit: dc.GetIntPropertyFilteredByDomain(dynamicconfig.SearchAttributesSizeOfValueLimit, 2*1024),
SearchAttributesTotalSizeLimit: dc.GetIntPropertyFilteredByDomain(dynamicconfig.SearchAttributesTotalSizeLimit, 40*1024),
StickyTTL: dc.GetDurationPropertyFilteredByDomain(dynamicconfig.StickyTTL, time.Hour*24*365),
}

return cfg
Expand Down
1 change: 1 addition & 0 deletions service/history/stateBuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ func (b *stateBuilderImpl) applyEvents(domainID, requestID string, execution sha
b.logger,
newRunStartedEvent.GetVersion(),
domainEntry.GetReplicationPolicy(),
domainEntry.GetInfo().Name,
)
newRunStateBuilder := newStateBuilder(b.shard, newRunMutableStateBuilder, b.logger)

Expand Down
2 changes: 2 additions & 0 deletions service/history/stateBuilder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ func (s *stateBuilderSuite) TestApplyEvents_EventTypeWorkflowExecutionContinuedA
s.logger,
newRunStartedEvent.GetVersion(),
cache.ReplicationPolicyMultiCluster,
domainName,
)
err = expectedNewRunStateBuilder.ReplicateWorkflowExecutionStartedEvent(
cache.NewLocalDomainCacheEntryForTest(&persistence.DomainInfo{ID: domainID}, &persistence.DomainConfig{}, "", nil),
Expand Down Expand Up @@ -911,6 +912,7 @@ func (s *stateBuilderSuite) TestApplyEvents_EventTypeWorkflowExecutionContinuedA
s.logger,
newRunStartedEvent.GetVersion(),
cache.ReplicationPolicyMultiCluster,
domainName,
)
err = expectedNewRunStateBuilder.ReplicateWorkflowExecutionStartedEvent(
cache.NewLocalDomainCacheEntryForTest(&persistence.DomainInfo{ID: domainID}, &persistence.DomainConfig{}, "", nil),
Expand Down
Loading

0 comments on commit af42a50

Please sign in to comment.