Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resource_control: allow configuration of the maximum retry time for the local bucket #8352

Merged
merged 5 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 57 additions & 15 deletions client/resource_group/controller/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@
defaultTargetPeriod = 5 * time.Second
// defaultMaxWaitDuration is the max duration to wait for the token before throwing error.
defaultMaxWaitDuration = 30 * time.Second
// defaultLTBTokenRPCMaxDelay is the upper bound of backoff delay for local token bucket RPC.
defaultLTBTokenRPCMaxDelay = 1 * time.Second
// defaultWaitRetryTimes is the times to retry when waiting for the token.
defaultWaitRetryTimes = 10
defaultWaitRetryTimes = 20
// defaultWaitRetryInterval is the interval to retry when waiting for the token.
defaultWaitRetryInterval = 50 * time.Millisecond
)
Expand All @@ -77,23 +79,35 @@

// Because the resource manager has not been deployed in microservice mode,
// do not enable this function.
defaultDegradedModeWaitDuration = 0
defaultDegradedModeWaitDuration = time.Duration(0)
defaultAvgBatchProportion = 0.7
)

// Config is the configuration of the resource manager controller which includes some option for client needed.
type Config struct {
// TokenRPCParams is the parameters for local bucket RPC.
type TokenRPCParams struct {
// WaitRetryInterval is the interval to retry when waiting for the token.
WaitRetryInterval Duration `toml:"wait-retry-interval" json:"wait-retry-interval"`

// WaitRetryTimes is the times to retry when waiting for the token.
WaitRetryTimes int `toml:"wait-retry-times" json:"wait-retry-times"`
}

// LocalBucketConfig is the configuration for local bucket. not export to server side.
type LocalBucketConfig struct {
TokenRPCParams `toml:"token-rpc-params" json:"token-rpc-params"`
}

// BaseConfig is the configuration of the resource manager controller which includes some option for client needed.
// TODO: unified the configuration for client and server, server side in pkg/mcs/resourcemanger/config.go.
type BaseConfig struct {
// EnableDegradedMode is to control whether resource control client enable degraded mode when server is disconnect.
DegradedModeWaitDuration Duration `toml:"degraded-mode-wait-duration" json:"degraded-mode-wait-duration"`

// LTBMaxWaitDuration is the max wait time duration for local token bucket.
LTBMaxWaitDuration Duration `toml:"ltb-max-wait-duration" json:"ltb-max-wait-duration"`

// WaitRetryInterval is the interval to retry when waiting for the token.
WaitRetryInterval Duration `toml:"wait-retry-interval" json:"wait-retry-interval"`

// WaitRetryTimes is the times to retry when waiting for the token.
WaitRetryTimes int `toml:"wait-retry-times" json:"wait-retry-times"`
// LTBTokenRPCMaxDelay is the upper bound of backoff delay for local token bucket RPC.
LTBTokenRPCMaxDelay Duration `toml:"ltb-token-rpc-max-delay" json:"ltb-token-rpc-max-delay"`

// RequestUnit is the configuration determines the coefficients of the RRU and WRU cost.
// This configuration should be modified carefully.
Expand All @@ -103,15 +117,43 @@
EnableControllerTraceLog bool `toml:"enable-controller-trace-log" json:"enable-controller-trace-log,string"`
}

// Config is the configuration of the resource manager controller.
type Config struct {
BaseConfig
LocalBucketConfig
}

// Adjust adjusts the configuration.
func (c *Config) Adjust() {
// valid the configuration, TODO: separately add the valid function.
if c.BaseConfig.LTBMaxWaitDuration.Duration == 0 {
c.BaseConfig.LTBMaxWaitDuration = NewDuration(defaultMaxWaitDuration)

Check warning on line 130 in client/resource_group/controller/config.go

View check run for this annotation

Codecov / codecov/patch

client/resource_group/controller/config.go#L130

Added line #L130 was not covered by tests
}
if c.LocalBucketConfig.WaitRetryInterval.Duration == 0 {
c.LocalBucketConfig.WaitRetryInterval = NewDuration(defaultWaitRetryInterval)

Check warning on line 133 in client/resource_group/controller/config.go

View check run for this annotation

Codecov / codecov/patch

client/resource_group/controller/config.go#L133

Added line #L133 was not covered by tests
}
// adjust the client settings. calculate the retry times.
if int(c.BaseConfig.LTBTokenRPCMaxDelay.Duration) != int(c.LocalBucketConfig.WaitRetryInterval.Duration)*c.LocalBucketConfig.WaitRetryTimes {
c.LocalBucketConfig.WaitRetryTimes = int(c.BaseConfig.LTBTokenRPCMaxDelay.Duration / c.LocalBucketConfig.WaitRetryInterval.Duration)
nolouch marked this conversation as resolved.
Show resolved Hide resolved
}
}

// DefaultConfig returns the default resource manager controller configuration.
func DefaultConfig() *Config {
return &Config{
DegradedModeWaitDuration: NewDuration(defaultDegradedModeWaitDuration),
LTBMaxWaitDuration: NewDuration(defaultMaxWaitDuration),
WaitRetryInterval: NewDuration(defaultWaitRetryInterval),
WaitRetryTimes: defaultWaitRetryTimes,
RequestUnit: DefaultRequestUnitConfig(),
EnableControllerTraceLog: false,
BaseConfig: BaseConfig{
DegradedModeWaitDuration: NewDuration(defaultDegradedModeWaitDuration),
RequestUnit: DefaultRequestUnitConfig(),
EnableControllerTraceLog: false,
LTBMaxWaitDuration: NewDuration(defaultMaxWaitDuration),
LTBTokenRPCMaxDelay: NewDuration(defaultLTBTokenRPCMaxDelay),
},
LocalBucketConfig: LocalBucketConfig{
TokenRPCParams: TokenRPCParams{
WaitRetryInterval: NewDuration(defaultWaitRetryInterval),
WaitRetryTimes: defaultWaitRetryTimes,
},
},
}
}

Expand Down
9 changes: 5 additions & 4 deletions client/resource_group/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@
log.Info("load resource controller config", zap.Reflect("config", config), zap.Reflect("ru-config", controller.ruConfig))
controller.calculators = []ResourceCalculator{newKVCalculator(controller.ruConfig), newSQLCalculator(controller.ruConfig)}
controller.safeRuConfig.Store(controller.ruConfig)
enableControllerTraceLog.Store(config.EnableControllerTraceLog)
return controller, nil
}

Expand All @@ -201,12 +202,13 @@
if err != nil {
return nil, err
}
config := DefaultConfig()
defer config.Adjust()
kvs := resp.GetKvs()
if len(kvs) == 0 {
log.Warn("[resource group controller] server does not save config, load config failed")
return DefaultConfig(), nil
return config, nil

Check warning on line 210 in client/resource_group/controller/controller.go

View check run for this annotation

Codecov / codecov/patch

client/resource_group/controller/controller.go#L210

Added line #L210 was not covered by tests
}
config := DefaultConfig()
err = json.Unmarshal(kvs[0].GetValue(), config)
if err != nil {
return nil, err
Expand Down Expand Up @@ -309,7 +311,6 @@
watchRetryTimer.Reset(watchRetryInterval)
}
}

case <-emergencyTokenAcquisitionTicker.C:
c.executeOnAllGroups((*groupCostController).resetEmergencyTokenAcquisition)
/* channels */
Expand Down Expand Up @@ -391,6 +392,7 @@
if err := json.Unmarshal(item.Kv.Value, config); err != nil {
continue
}
config.Adjust()
c.ruConfig = GenerateRUConfig(config)

// Stay compatible with serverless
Expand All @@ -404,7 +406,6 @@
}
log.Info("load resource controller config after config changed", zap.Reflect("config", config), zap.Reflect("ruConfig", c.ruConfig))
}

case gc := <-c.tokenBucketUpdateChan:
go gc.handleTokenBucketUpdateEvent(c.loopCtx)
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/mcs/resourcemanager/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ const (
defaultDegradedModeWaitDuration = time.Second * 0
// defaultMaxWaitDuration is the max duration to wait for the token before throwing error.
defaultMaxWaitDuration = 30 * time.Second
// defaultLTBTokenRPCMaxDelay is the upper bound of backoff delay for local token bucket RPC.
defaultLTBTokenRPCMaxDelay = 1 * time.Second
)

// Config is the configuration for the resource manager.
Expand Down Expand Up @@ -99,6 +101,9 @@ type ControllerConfig struct {
// LTBMaxWaitDuration is the max wait time duration for local token bucket.
LTBMaxWaitDuration typeutil.Duration `toml:"ltb-max-wait-duration" json:"ltb-max-wait-duration"`

// LTBTokenRPCMaxDelay is the upper bound of backoff delay for local token bucket RPC.
LTBTokenRPCMaxDelay typeutil.Duration `toml:"ltb-token-rpc-max-delay" json:"ltb-token-rpc-max-delay"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LTB is not a general abbreviation, Is it more appropriate to use the full name? for example: LocalTokenBucketMaxBackoffDelay. rest LGTM

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is the same as the previous name, best to maintain consistency.


// RequestUnit is the configuration determines the coefficients of the RRU and WRU cost.
// This configuration should be modified carefully.
RequestUnit RequestUnitConfig `toml:"request-unit" json:"request-unit"`
Expand All @@ -119,6 +124,9 @@ func (rmc *ControllerConfig) Adjust(meta *configutil.ConfigMetaData) {
if !meta.IsDefined("ltb-max-wait-duration") {
configutil.AdjustDuration(&rmc.LTBMaxWaitDuration, defaultMaxWaitDuration)
}
if !meta.IsDefined("ltb-token-rpc-max-delay") {
configutil.AdjustDuration(&rmc.LTBTokenRPCMaxDelay, defaultLTBTokenRPCMaxDelay)
}
failpoint.Inject("enableDegradedMode", func() {
configutil.AdjustDuration(&rmc.DegradedModeWaitDuration, time.Second)
})
Expand Down
6 changes: 4 additions & 2 deletions pkg/mcs/resourcemanager/server/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ func TestControllerConfig(t *testing.T) {
cfgData := `
[controller]
ltb-max-wait-duration = "60s"
ltb-token-rpc-max-delay = "500ms"
degraded-mode-wait-duration = "2s"
[controller.request-unit]
read-base-cost = 1.0
Expand All @@ -42,8 +43,9 @@ read-cpu-ms-cost = 5.0
err = cfg.Adjust(&meta)
re.NoError(err)

re.Equal(time.Second*2, cfg.Controller.DegradedModeWaitDuration.Duration)
re.Equal(time.Second*60, cfg.Controller.LTBMaxWaitDuration.Duration)
re.Equal(2*time.Second, cfg.Controller.DegradedModeWaitDuration.Duration)
re.Equal(60*time.Second, cfg.Controller.LTBMaxWaitDuration.Duration)
re.Equal(500*time.Millisecond, cfg.Controller.LTBTokenRPCMaxDelay.Duration)
re.LessOrEqual(math.Abs(cfg.Controller.RequestUnit.CPUMsCost-5), 1e-7)
re.LessOrEqual(math.Abs(cfg.Controller.RequestUnit.WriteCostPerByte-4), 1e-7)
re.LessOrEqual(math.Abs(cfg.Controller.RequestUnit.WriteBaseCost-3), 1e-7)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1433,12 +1433,14 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupControllerConfigCh

configURL := "/resource-manager/api/v1/config/controller"
waitDuration := 10 * time.Second
tokenRPCMaxDelay := 2 * time.Second
readBaseCost := 1.5
defaultCfg := controller.DefaultConfig()
expectCfg := server.ControllerConfig{
// failpoint enableDegradedMode will setup and set it be 1s.
DegradedModeWaitDuration: typeutil.NewDuration(time.Second),
LTBMaxWaitDuration: typeutil.Duration(defaultCfg.LTBMaxWaitDuration),
LTBTokenRPCMaxDelay: typeutil.Duration(defaultCfg.LTBTokenRPCMaxDelay),
RequestUnit: server.RequestUnitConfig(defaultCfg.RequestUnit),
EnableControllerTraceLog: defaultCfg.EnableControllerTraceLog,
}
Expand All @@ -1461,6 +1463,13 @@ func (suite *resourceManagerClientTestSuite) TestResourceGroupControllerConfigCh
value: waitDuration,
expected: func(ruConfig *controller.RUConfig) { ruConfig.DegradedModeWaitDuration = waitDuration },
},
{
configJSON: fmt.Sprintf(`{"ltb-token-rpc-max-delay": "%v"}`, tokenRPCMaxDelay),
value: waitDuration,
expected: func(ruConfig *controller.RUConfig) {
ruConfig.WaitRetryTimes = int(tokenRPCMaxDelay / ruConfig.WaitRetryInterval)
},
},
{
configJSON: fmt.Sprintf(`{"ltb-max-wait-duration": "%v"}`, waitDuration),
value: waitDuration,
Expand Down