Skip to content

Commit

Permalink
Add dynamic configs and metrics to control/monitor per id lengths (ca…
Browse files Browse the repository at this point in the history
…dence-workflow#4173)

Co-authored-by: Yichao Yang <ycyang@uber.com>
  • Loading branch information
andrewjdawson2016 and yycptt authored May 21, 2021
1 parent f1b6e63 commit 44911ff
Show file tree
Hide file tree
Showing 13 changed files with 554 additions and 130 deletions.
7 changes: 7 additions & 0 deletions common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@ const (
DefaultTransactionSizeLimit = 14 * 1024 * 1024
)

const (
// DefaultIDLengthWarnLimit is the warning length for various ID types
DefaultIDLengthWarnLimit = 128
// DefaultIDLengthErrorLimit is the maximum length allowed for various ID types
DefaultIDLengthErrorLimit = 1000
)

const (
// ArchivalEnabled is the status for enabling archival
ArchivalEnabled = "enabled"
Expand Down
107 changes: 86 additions & 21 deletions common/dynamicconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,24 +224,78 @@ const (
// Default value: 50*1024
// Allowed filters: DomainName
HistoryCountLimitWarn
// MaxIDLengthLimit is the length limit for various IDs, including: Domain, TaskList, WorkflowID, ActivityID, TimerID,WorkflowType, ActivityType, SignalName, MarkerName, ErrorReason/FailureReason/CancelCause, Identity, RequestID
// KeyName: limit.maxIDLength
// DomainNameMaxLength is the length limit for domain name
// KeyName: limit.domainNameLength
// Value type: Int
// Default value: 1000
// Allowed filters: N/A
MaxIDLengthLimit
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
DomainNameMaxLength
// IdentityMaxLength is the length limit for identity
// KeyName: limit.identityLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
IdentityMaxLength
// WorkflowIDMaxLength is the length limit for workflowID
// KeyName: limit.workflowIDLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
WorkflowIDMaxLength
// SignalNameMaxLength is the length limit for signal name
// KeyName: limit.signalNameLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
SignalNameMaxLength
// WorkflowTypeMaxLength is the length limit for workflow type
// KeyName: limit.workflowTypeLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
WorkflowTypeMaxLength
// RequestIDMaxLength is the length limit for requestID
// KeyName: limit.requestIDLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
RequestIDMaxLength
// TaskListNameMaxLength is the length limit for task list name
// KeyName: limit.taskListNameLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
TaskListNameMaxLength
// ActivityIDMaxLength is the length limit for activityID
// KeyName: limit.activityIDLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
ActivityIDMaxLength
// ActivityTypeMaxLength is the length limit for activity type
// KeyName: limit.activityTypeLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
ActivityTypeMaxLength
// MarkerNameMaxLength is the length limit for marker name
// KeyName: limit.markerNameLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
MarkerNameMaxLength
// TimerIDMaxLength is the length limit for timerID
// KeyName: limit.timerIDLength
// Value type: Int
// Default value: common.DefaultIDLengthErrorLimit (1000)
// Allowed filters: DomainName
TimerIDMaxLength
// MaxIDLengthWarnLimit is the warn length limit for various IDs, including: Domain, TaskList, WorkflowID, ActivityID, TimerID, WorkflowType, ActivityType, SignalName, MarkerName, ErrorReason/FailureReason/CancelCause, Identity, RequestID
// KeyName: limit.maxIDWarnLength
// Value type: Int
// Default value: 150
// Default value: common.DefaultIDLengthWarnLimit (128)
// Allowed filters: N/A
MaxIDLengthWarnLimit
// MaxRawTaskListNameLimit is max length of user provided task list name (non-sticky and non-scalable)
// KeyName: limit.maxRawTaskListNameLength
// Value type: Int
// Default value: 1000
// Allowed filters: DomainName
MaxRawTaskListNameLimit
// AdminErrorInjectionRate is the rate for injecting random error in admin client
// KeyName: admin.errorInjectionRate
// Value type: Float64
Expand Down Expand Up @@ -1811,15 +1865,26 @@ var keys = map[Key]string{
EnableGRPCOutbound: "system.enableGRPCOutbound",

// size limit
BlobSizeLimitError: "limit.blobSize.error",
BlobSizeLimitWarn: "limit.blobSize.warn",
HistorySizeLimitError: "limit.historySize.error",
HistorySizeLimitWarn: "limit.historySize.warn",
HistoryCountLimitError: "limit.historyCount.error",
HistoryCountLimitWarn: "limit.historyCount.warn",
MaxIDLengthLimit: "limit.maxIDLength",
MaxIDLengthWarnLimit: "limit.maxIDWarnLength",
MaxRawTaskListNameLimit: "limit.maxRawTaskListNameLength",
BlobSizeLimitError: "limit.blobSize.error",
BlobSizeLimitWarn: "limit.blobSize.warn",
HistorySizeLimitError: "limit.historySize.error",
HistorySizeLimitWarn: "limit.historySize.warn",
HistoryCountLimitError: "limit.historyCount.error",
HistoryCountLimitWarn: "limit.historyCount.warn",

// id length limits
MaxIDLengthWarnLimit: "limit.maxIDWarnLength",
DomainNameMaxLength: "limit.domainNameLength",
IdentityMaxLength: "limit.identityLength",
WorkflowIDMaxLength: "limit.workflowIDLength",
SignalNameMaxLength: "limit.signalNameLength",
WorkflowTypeMaxLength: "limit.workflowTypeLength",
RequestIDMaxLength: "limit.requestIDLength",
TaskListNameMaxLength: "limit.taskListNameLength",
ActivityIDMaxLength: "limit.activityIDLength",
ActivityTypeMaxLength: "limit.activityTypeLength",
MarkerNameMaxLength: "limit.markerNameLength",
TimerIDMaxLength: "limit.timerIDLength",

// admin settings
AdminErrorInjectionRate: "admin.errorInjectionRate",
Expand Down
24 changes: 22 additions & 2 deletions common/metrics/defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1597,7 +1597,17 @@ const (
CadenceErrUnauthorizedCounter
CadenceErrAuthorizeFailedCounter
CadenceErrRemoteSyncMatchFailedCounter
CadenceErrIDLengthExceededWarnLimit
CadenceErrDomainNameExceededWarnLimit
CadenceErrIdentityExceededWarnLimit
CadenceErrWorkflowIDExceededWarnLimit
CadenceErrSignalNameExceededWarnLimit
CadenceErrWorkflowTypeExceededWarnLimit
CadenceErrRequestIDExceededWarnLimit
CadenceErrTaskListNameExceededWarnLimit
CadenceErrActivityIDExceededWarnLimit
CadenceErrActivityTypeExceededWarnLimit
CadenceErrMarkerNameExceededWarnLimit
CadenceErrTimerIDExceededWarnLimit
PersistenceRequests
PersistenceFailures
PersistenceLatency
Expand Down Expand Up @@ -2071,7 +2081,17 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
CadenceErrUnauthorizedCounter: {metricName: "cadence_errors_unauthorized", metricType: Counter},
CadenceErrAuthorizeFailedCounter: {metricName: "cadence_errors_authorize_failed", metricType: Counter},
CadenceErrRemoteSyncMatchFailedCounter: {metricName: "cadence_errors_remote_syncmatch_failed", metricType: Counter},
CadenceErrIDLengthExceededWarnLimit: {metricName: "cadence_errors_id_length_exceeded_warn_limit", metricType: Counter},
CadenceErrDomainNameExceededWarnLimit: {metricName: "cadence_errors_domain_name_exceeded_warn_limit", metricType: Counter},
CadenceErrIdentityExceededWarnLimit: {metricName: "cadence_errors_identity_exceeded_warn_limit", metricType: Counter},
CadenceErrWorkflowIDExceededWarnLimit: {metricName: "cadence_errors_workflow_id_exceeded_warn_limit", metricType: Counter},
CadenceErrSignalNameExceededWarnLimit: {metricName: "cadence_errors_signal_name_exceeded_warn_limit", metricType: Counter},
CadenceErrWorkflowTypeExceededWarnLimit: {metricName: "cadence_errors_workflow_type_exceeded_warn_limit", metricType: Counter},
CadenceErrRequestIDExceededWarnLimit: {metricName: "cadence_errors_request_id_exceeded_warn_limit", metricType: Counter},
CadenceErrTaskListNameExceededWarnLimit: {metricName: "cadence_errors_task_list_name_exceeded_warn_limit", metricType: Counter},
CadenceErrActivityIDExceededWarnLimit: {metricName: "cadence_errors_activity_id_exceeded_warn_limit", metricType: Counter},
CadenceErrActivityTypeExceededWarnLimit: {metricName: "cadence_errors_activity_type_exceeded_warn_limit", metricType: Counter},
CadenceErrMarkerNameExceededWarnLimit: {metricName: "cadence_errors_marker_name_exceeded_warn_limit", metricType: Counter},
CadenceErrTimerIDExceededWarnLimit: {metricName: "cadence_errors_timer_id_exceeded_warn_limit", metricType: Counter},
PersistenceRequests: {metricName: "persistence_requests", metricType: Counter},
PersistenceFailures: {metricName: "persistence_errors", metricType: Counter},
PersistenceLatency: {metricName: "persistence_latency", metricType: Timer},
Expand Down
15 changes: 15 additions & 0 deletions common/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,21 @@ func CreateReplicationServiceBusyRetryPolicy() backoff.RetryPolicy {
return policy
}

// ValidIDLength checks if id is valid according to its length
func ValidIDLength(
id string,
scope metrics.Scope,
warnLimit int,
errorLimit int,
metricsCounter int,
) bool {
valid := len(id) <= errorLimit
if len(id) > warnLimit {
scope.IncCounter(metricsCounter)
}
return valid
}

// IsServiceTransientError checks if the error is a transient error.
func IsServiceTransientError(err error) bool {
switch err.(type) {
Expand Down
24 changes: 18 additions & 6 deletions service/frontend/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,20 @@ type Config struct {
RPS dynamicconfig.IntPropertyFn
MaxDomainRPSPerInstance dynamicconfig.IntPropertyFnWithDomainFilter
GlobalDomainRPS dynamicconfig.IntPropertyFnWithDomainFilter
MaxIDLengthLimit dynamicconfig.IntPropertyFnWithDomainFilter
MaxIDLengthWarnLimit dynamicconfig.IntPropertyFn
MaxRawTaskListNameLimit dynamicconfig.IntPropertyFnWithDomainFilter
EnableClientVersionCheck dynamicconfig.BoolPropertyFn
DisallowQuery dynamicconfig.BoolPropertyFnWithDomainFilter
ShutdownDrainDuration dynamicconfig.DurationPropertyFn

// id length limits
MaxIDLengthWarnLimit dynamicconfig.IntPropertyFn
DomainNameMaxLength dynamicconfig.IntPropertyFnWithDomainFilter
IdentityMaxLength dynamicconfig.IntPropertyFnWithDomainFilter
WorkflowIDMaxLength dynamicconfig.IntPropertyFnWithDomainFilter
SignalNameMaxLength dynamicconfig.IntPropertyFnWithDomainFilter
WorkflowTypeMaxLength dynamicconfig.IntPropertyFnWithDomainFilter
RequestIDMaxLength dynamicconfig.IntPropertyFnWithDomainFilter
TaskListNameMaxLength dynamicconfig.IntPropertyFnWithDomainFilter

// Persistence settings
HistoryMgrNumConns dynamicconfig.IntPropertyFn

Expand Down Expand Up @@ -112,9 +119,14 @@ func NewConfig(dc *dynamicconfig.Collection, numHistoryShards int, enableReadFro
RPS: dc.GetIntProperty(dynamicconfig.FrontendRPS, 1200),
MaxDomainRPSPerInstance: dc.GetIntPropertyFilteredByDomain(dynamicconfig.FrontendMaxDomainRPSPerInstance, 1200),
GlobalDomainRPS: dc.GetIntPropertyFilteredByDomain(dynamicconfig.FrontendGlobalDomainRPS, 0),
MaxIDLengthLimit: dc.GetIntPropertyFilteredByDomain(dynamicconfig.MaxIDLengthLimit, 1000),
MaxIDLengthWarnLimit: dc.GetIntProperty(dynamicconfig.MaxIDLengthWarnLimit, 128),
MaxRawTaskListNameLimit: dc.GetIntPropertyFilteredByDomain(dynamicconfig.MaxRawTaskListNameLimit, 1000),
MaxIDLengthWarnLimit: dc.GetIntProperty(dynamicconfig.MaxIDLengthWarnLimit, common.DefaultIDLengthWarnLimit),
DomainNameMaxLength: dc.GetIntPropertyFilteredByDomain(dynamicconfig.DomainNameMaxLength, common.DefaultIDLengthErrorLimit),
IdentityMaxLength: dc.GetIntPropertyFilteredByDomain(dynamicconfig.IdentityMaxLength, common.DefaultIDLengthErrorLimit),
WorkflowIDMaxLength: dc.GetIntPropertyFilteredByDomain(dynamicconfig.WorkflowIDMaxLength, common.DefaultIDLengthErrorLimit),
SignalNameMaxLength: dc.GetIntPropertyFilteredByDomain(dynamicconfig.SignalNameMaxLength, common.DefaultIDLengthErrorLimit),
WorkflowTypeMaxLength: dc.GetIntPropertyFilteredByDomain(dynamicconfig.WorkflowTypeMaxLength, common.DefaultIDLengthErrorLimit),
RequestIDMaxLength: dc.GetIntPropertyFilteredByDomain(dynamicconfig.RequestIDMaxLength, common.DefaultIDLengthErrorLimit),
TaskListNameMaxLength: dc.GetIntPropertyFilteredByDomain(dynamicconfig.TaskListNameMaxLength, common.DefaultIDLengthErrorLimit),
HistoryMgrNumConns: dc.GetIntProperty(dynamicconfig.FrontendHistoryMgrNumConns, 10),
EnableAdminProtection: dc.GetBoolProperty(dynamicconfig.EnableAdminProtection, false),
AdminOperationToken: dc.GetStringProperty(dynamicconfig.AdminOperationToken, common.DefaultAdminOperationToken),
Expand Down
Loading

0 comments on commit 44911ff

Please sign in to comment.