Skip to content

Commit

Permalink
CDNC-3181 Cleanup the unused watchdog code (uber#5096)
Browse files Browse the repository at this point in the history
* Removed the Watchdog code and it's service calls

* Removed watchdog occurences and dependencies

---------

Co-authored-by: David Porter <david.porter@uber.com>
  • Loading branch information
agautam478 and davidporter-id-au authored Jul 12, 2023
1 parent 29a0c97 commit 04cca1e
Show file tree
Hide file tree
Showing 8 changed files with 1 addition and 433 deletions.
2 changes: 0 additions & 2 deletions common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,6 @@ const (
const (
// DefaultESAnalyzerPause controls if we want to dynamically pause the analyzer
DefaultESAnalyzerPause = false
// DefaultCorruptWorkflowWatchdogPause controls if we want to dynamically pause the watchdog
DefaultCorruptWorkflowWatchdogPause = false
// DefaultESAnalyzerTimeWindow controls how many days to go back for ElasticSearch Analyzer
DefaultESAnalyzerTimeWindow = time.Hour * 24 * 30
// DefaultESAnalyzerMaxNumDomains controls how many domains to check
Expand Down
23 changes: 1 addition & 22 deletions common/dynamicconfig/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -1707,12 +1707,7 @@ const (
// Default value: false
// Allowed filters: N/A
EnableESAnalyzer
// EnableWatchDog decides whether to enable watchdog system worker
// KeyName: system.enableWatchdog
// Value type: Bool
// Default value: false
// Allowed filters: N/A
EnableWatchDog

// EnableStickyQuery is indicates if sticky query should be enabled per domain
// KeyName: system.enableStickyQuery
// Value type: Bool
Expand Down Expand Up @@ -1805,12 +1800,6 @@ const (
// Default value: false
ESAnalyzerEnableAvgDurationBasedChecks

// CorruptWorkflowWatchdogPause defines if we want to dynamically pause the watchdog workflow
// KeyName: worker.CorruptWorkflowWatchdogPause
// Value type: bool
// Default value: false
CorruptWorkflowWatchdogPause

// Lockdown defines if we want to allow failovers of domains to this cluster
// KeyName: system.Lockdown
// Value type: bool
Expand Down Expand Up @@ -3822,11 +3811,6 @@ var BoolKeys = map[BoolKey]DynamicBool{
Description: "EnableESAnalyzer decides whether to enable system workers for processing ElasticSearch Analyzer",
DefaultValue: false,
},
EnableWatchDog: DynamicBool{
KeyName: "system.EnableWatchDog",
Description: "EnableWatchDog decides whether to enable watchdog system worker",
DefaultValue: false,
},
EnableStickyQuery: DynamicBool{
KeyName: "system.enableStickyQuery",
Description: "EnableStickyQuery is indicates if sticky query should be enabled per domain",
Expand Down Expand Up @@ -3912,11 +3896,6 @@ var BoolKeys = map[BoolKey]DynamicBool{
Description: "ESAnalyzerEnableAvgDurationBasedChecks controls if we want to enable avg duration based task refreshes",
DefaultValue: false,
},
CorruptWorkflowWatchdogPause: DynamicBool{
KeyName: "worker.CorruptWorkflowWatchdogPause",
Description: "CorruptWorkflowWatchdogPause defines if we want to dynamically pause the watchdog workflow",
DefaultValue: false,
},
Lockdown: DynamicBool{
KeyName: "system.Lockdown",
Description: "Lockdown defines if we want to allow failovers of domains to this cluster",
Expand Down
9 changes: 0 additions & 9 deletions common/metrics/defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1255,8 +1255,6 @@ const (
CheckDataCorruptionWorkflowScope
// ESAnalyzerScope is scope used by ElasticSearch Analyzer (esanalyzer) workflow
ESAnalyzerScope
// WatchDogScope is scope used by WatchDog workflow
WatchDogScope

NumWorkerScopes
)
Expand Down Expand Up @@ -1826,7 +1824,6 @@ var ScopeDefs = map[ServiceIdx]map[int]scopeDefinition{
BatcherScope: {operation: "batcher"},
ParentClosePolicyProcessorScope: {operation: "ParentClosePolicyProcessor"},
ESAnalyzerScope: {operation: "ESAnalyzer"},
WatchDogScope: {operation: "WatchDog"},
},
}

Expand Down Expand Up @@ -2417,9 +2414,6 @@ const (
ESAnalyzerNumStuckWorkflowsRefreshed
ESAnalyzerNumStuckWorkflowsFailedToRefresh
ESAnalyzerNumLongRunningWorkflows
WatchDogNumDeletedCorruptWorkflows
WatchDogNumFailedToDeleteCorruptWorkflows
WatchDogNumCorruptWorkflowProcessed

NumWorkerMetrics
)
Expand Down Expand Up @@ -3017,9 +3011,6 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
ESAnalyzerNumStuckWorkflowsRefreshed: {metricName: "es_analyzer_num_stuck_workflows_refreshed", metricType: Counter},
ESAnalyzerNumStuckWorkflowsFailedToRefresh: {metricName: "es_analyzer_num_stuck_workflows_failed_to_refresh", metricType: Counter},
ESAnalyzerNumLongRunningWorkflows: {metricName: "es_analyzer_num_long_running_workflows", metricType: Counter},
WatchDogNumDeletedCorruptWorkflows: {metricName: "watchdog_num_deleted_corrupt_workflows", metricType: Counter},
WatchDogNumFailedToDeleteCorruptWorkflows: {metricName: "watchdog_num_failed_to_delete_corrupt_workflows", metricType: Counter},
WatchDogNumCorruptWorkflowProcessed: {metricName: "watchdog_num_corrupt_workflows_processed", metricType: Counter},
},
}

Expand Down
7 changes: 0 additions & 7 deletions service/frontend/adminHandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -388,8 +388,6 @@ func (adh *adminHandlerImpl) MaintainCorruptWorkflow(
tag.WorkflowRunID(request.GetExecution().GetRunID()),
)

scope := adh.GetMetricsClient().Scope(metrics.WatchDogScope)
tagged := scope.Tagged(metrics.DomainTag(request.Domain))
resp := &types.AdminMaintainWorkflowResponse{
HistoryDeleted: false,
ExecutionsDeleted: false,
Expand All @@ -416,11 +414,6 @@ func (adh *adminHandlerImpl) MaintainCorruptWorkflow(
logger.Info(fmt.Sprintf("Will delete workflow because (%v) returned corrupted error (%#v)",
functionName, err))
resp, err = adh.DeleteWorkflow(ctx, request)
if err == nil {
tagged.AddCounter(metrics.WatchDogNumDeletedCorruptWorkflows, 1)
} else {
tagged.AddCounter(metrics.WatchDogNumFailedToDeleteCorruptWorkflows, 1)
}
return resp, nil
}
}
Expand Down
27 changes: 0 additions & 27 deletions service/worker/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ import (
"github.com/uber/cadence/service/worker/scanner/tasklist"
"github.com/uber/cadence/service/worker/scanner/timers"
"github.com/uber/cadence/service/worker/shadower"
"github.com/uber/cadence/service/worker/watchdog"
)

type (
Expand All @@ -73,7 +72,6 @@ type (
ScannerCfg *scanner.Config
BatcherCfg *batcher.Config
ESAnalyzerCfg *esanalyzer.Config
WatchdogConfig *watchdog.Config
failoverManagerCfg *failovermanager.Config
ThrottledLogRPS dynamicconfig.IntPropertyFn
PersistenceGlobalMaxQPS dynamicconfig.IntPropertyFn
Expand Down Expand Up @@ -176,14 +174,10 @@ func NewConfig(params *resource.Params) *Config {
ESAnalyzerWorkflowVersionDomains: dc.GetStringProperty(dynamicconfig.ESAnalyzerWorkflowVersionMetricDomains),
ESAnalyzerWorkflowTypeDomains: dc.GetStringProperty(dynamicconfig.ESAnalyzerWorkflowTypeMetricDomains),
},
WatchdogConfig: &watchdog.Config{
CorruptWorkflowWatchdogPause: dc.GetBoolProperty(dynamicconfig.CorruptWorkflowWatchdogPause),
},
EnableBatcher: dc.GetBoolProperty(dynamicconfig.EnableBatcher),
EnableParentClosePolicyWorker: dc.GetBoolProperty(dynamicconfig.EnableParentClosePolicyWorker),
NumParentClosePolicySystemWorkflows: dc.GetIntProperty(dynamicconfig.NumParentClosePolicySystemWorkflows),
EnableESAnalyzer: dc.GetBoolProperty(dynamicconfig.EnableESAnalyzer),
EnableWatchDog: dc.GetBoolProperty(dynamicconfig.EnableWatchDog),
EnableFailoverManager: dc.GetBoolProperty(dynamicconfig.EnableFailoverManager),
EnableWorkflowShadower: dc.GetBoolProperty(dynamicconfig.EnableWorkflowShadower),
ThrottledLogRPS: dc.GetIntProperty(dynamicconfig.WorkerThrottledLogRPS),
Expand Down Expand Up @@ -242,9 +236,6 @@ func (s *Service) Start() {
if s.config.EnableESAnalyzer() {
s.startESAnalyzer()
}
if s.config.EnableWatchDog() {
s.startWatchDog()
}
if s.config.EnableFailoverManager() {
s.startFailoverManager()
}
Expand Down Expand Up @@ -306,24 +297,6 @@ func (s *Service) startESAnalyzer() {
}
}

func (s *Service) startWatchDog() {
watchdog := watchdog.New(
s.params.PublicClient,
s.GetFrontendClient(),
s.GetClientBean(),
s.GetLogger(),
s.GetMetricsClient(),
s.params.MetricScope,
s.Resource,
s.GetDomainCache(),
s.config.WatchdogConfig,
)

if err := watchdog.Start(); err != nil {
s.GetLogger().Fatal("error starting watchdog", tag.Error(err))
}
}

func (s *Service) startBatcher() {
params := &batcher.BootstrapParams{
Config: *s.config.BatcherCfg,
Expand Down
102 changes: 0 additions & 102 deletions service/worker/watchdog/client.go

This file was deleted.

Loading

0 comments on commit 04cca1e

Please sign in to comment.