Skip to content

admission: enable admission control #68535

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/generated/settings/settings-for-tenants.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Setting Type Default Description
admission.kv.enabled boolean false when true, work performed by the KV layer is subject to admission control
admission.sql_kv_response.enabled boolean false when true, work performed by the SQL layer when receiving a KV response is subject to admission control
admission.sql_sql_response.enabled boolean false when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control
admission.kv.enabled boolean true when true, work performed by the KV layer is subject to admission control
admission.sql_kv_response.enabled boolean true when true, work performed by the SQL layer when receiving a KV response is subject to admission control
admission.sql_sql_response.enabled boolean true when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control
bulkio.backup.file_size byte size 128 MiB target size for individual data files produced during BACKUP
bulkio.backup.read_timeout duration 5m0s amount of time after which a read attempt is considered timed out, which causes the backup to fail
bulkio.backup.read_with_priority_after duration 1m0s amount of time since the read-as-of time above which a BACKUP should use priority when retrying reads
Expand Down
6 changes: 3 additions & 3 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
<table>
<thead><tr><th>Setting</th><th>Type</th><th>Default</th><th>Description</th></tr></thead>
<tbody>
<tr><td><code>admission.kv.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when true, work performed by the KV layer is subject to admission control</td></tr>
<tr><td><code>admission.sql_kv_response.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when true, work performed by the SQL layer when receiving a KV response is subject to admission control</td></tr>
<tr><td><code>admission.sql_sql_response.enabled</code></td><td>boolean</td><td><code>false</code></td><td>when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control</td></tr>
<tr><td><code>admission.kv.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, work performed by the KV layer is subject to admission control</td></tr>
<tr><td><code>admission.sql_kv_response.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, work performed by the SQL layer when receiving a KV response is subject to admission control</td></tr>
<tr><td><code>admission.sql_sql_response.enabled</code></td><td>boolean</td><td><code>true</code></td><td>when true, work performed by the SQL layer when receiving a DistSQL response is subject to admission control</td></tr>
<tr><td><code>bulkio.backup.file_size</code></td><td>byte size</td><td><code>128 MiB</code></td><td>target size for individual data files produced during BACKUP</td></tr>
<tr><td><code>bulkio.backup.read_timeout</code></td><td>duration</td><td><code>5m0s</code></td><td>amount of time after which a read attempt is considered timed out, which causes the backup to fail</td></tr>
<tr><td><code>bulkio.backup.read_with_priority_after</code></td><td>duration</td><td><code>1m0s</code></td><td>amount of time since the read-as-of time above which a BACKUP should use priority when retrying reads</td></tr>
Expand Down
47 changes: 21 additions & 26 deletions pkg/cmd/roachtest/tests/kv.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ func registerKV(r registry.Registry) {
blockSize int
splits int // 0 implies default, negative implies 0
// If true, load-based splitting will be disabled.
disableLoadSplits bool
encryption bool
sequential bool
admissionControlEnabled bool
concMultiplier int
duration time.Duration
tracing bool // `trace.debug.enable`
tags []string
owner registry.Owner // defaults to KV
disableLoadSplits bool
encryption bool
sequential bool
admissionControlDisabled bool
concMultiplier int
duration time.Duration
tracing bool // `trace.debug.enable`
tags []string
owner registry.Owner // defaults to KV
}
computeNumSplits := func(opts kvOptions) int {
// TODO(ajwerner): set this default to a more sane value or remove it and
Expand Down Expand Up @@ -89,9 +89,7 @@ func registerKV(r registry.Registry) {
t.Fatalf("failed to enable tracing: %v", err)
}
}
if opts.admissionControlEnabled {
EnableAdmissionControl(ctx, t, c)
}
SetAdmissionControl(ctx, t, c, !opts.admissionControlDisabled)

t.Status("running workload")
m := c.NewMonitor(ctx, c.Range(1, nodes))
Expand Down Expand Up @@ -150,13 +148,8 @@ func registerKV(r registry.Registry) {
// CPU overload test, to stress admission control.
{nodes: 1, cpus: 8, readPercent: 50, concMultiplier: 8192, duration: 20 * time.Minute},
// IO write overload test, to stress admission control.
//
// TODO(sumeerbhola): re-enable when admission control is enabled by default. This
// test crashes the cluster every now and then. See:
// https://github.com/cockroachdb/cockroach/issues/70247
//
// {nodes: 1, cpus: 8, readPercent: 0, concMultiplier: 4096, blockSize: 1 << 16, /* 64 KB */
// duration: 20 * time.Minute},
{nodes: 1, cpus: 8, readPercent: 0, concMultiplier: 4096, blockSize: 1 << 16, /* 64 KB */
duration: 20 * time.Minute},
{nodes: 1, cpus: 8, readPercent: 95},
{nodes: 1, cpus: 32, readPercent: 0},
{nodes: 1, cpus: 32, readPercent: 95},
Expand All @@ -167,8 +160,8 @@ func registerKV(r registry.Registry) {
{nodes: 3, cpus: 8, readPercent: 95, splits: -1 /* no splits */},
{nodes: 3, cpus: 32, readPercent: 0},
{nodes: 3, cpus: 32, readPercent: 95},
{nodes: 3, cpus: 32, readPercent: 0, admissionControlEnabled: true},
{nodes: 3, cpus: 32, readPercent: 95, admissionControlEnabled: true},
{nodes: 3, cpus: 32, readPercent: 0, admissionControlDisabled: true},
{nodes: 3, cpus: 32, readPercent: 95, admissionControlDisabled: true},
{nodes: 3, cpus: 32, readPercent: 0, splits: -1 /* no splits */},
{nodes: 3, cpus: 32, readPercent: 95, splits: -1 /* no splits */},

Expand All @@ -182,9 +175,9 @@ func registerKV(r registry.Registry) {
{nodes: 3, cpus: 32, readPercent: 0, blockSize: 1 << 16 /* 64 KB */},
{nodes: 3, cpus: 32, readPercent: 95, blockSize: 1 << 16 /* 64 KB */},
{nodes: 3, cpus: 32, readPercent: 0, blockSize: 1 << 16, /* 64 KB */
admissionControlEnabled: true},
admissionControlDisabled: true},
{nodes: 3, cpus: 32, readPercent: 95, blockSize: 1 << 16, /* 64 KB */
admissionControlEnabled: true},
admissionControlDisabled: true},

// Configs with large batch sizes.
{nodes: 3, cpus: 8, readPercent: 0, batchSize: 16},
Expand Down Expand Up @@ -241,8 +234,8 @@ func registerKV(r registry.Registry) {
if opts.sequential {
nameParts = append(nameParts, "seq")
}
if opts.admissionControlEnabled {
nameParts = append(nameParts, "admission")
if opts.admissionControlDisabled {
nameParts = append(nameParts, "no-admission")
}
if opts.concMultiplier != 0 { // support legacy test name which didn't include this multiplier
nameParts = append(nameParts, fmt.Sprintf("conc=%d", opts.concMultiplier))
Expand Down Expand Up @@ -873,7 +866,9 @@ func registerKVMultiStoreWithOverload(r registry.Registry) {
t.Fatalf("failed to configure zone for %s: %v", name, err)
}
}
EnableAdmissionControl(ctx, t, c)
// Defensive, since admission control is enabled by default. This test can
// fail if admission control is disabled.
SetAdmissionControl(ctx, t, c, true)
if _, err := db.ExecContext(ctx,
"SET CLUSTER SETTING kv.range_split.by_load_enabled = 'false'"); err != nil {
t.Fatalf("failed to disable load based splitting: %v", err)
Expand Down
30 changes: 13 additions & 17 deletions pkg/cmd/roachtest/tests/tpcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -693,9 +693,9 @@ func registerTPCC(r registry.Registry) {
EstimatedMax: gceOrAws(cloud, 2400, 3000),
})
registerTPCCBenchSpec(r, tpccBenchSpec{
Nodes: 3,
CPUs: 16,
AdmissionControlEnabled: true,
Nodes: 3,
CPUs: 16,
AdmissionControlDisabled: true,

LoadWarehouses: gceOrAws(cloud, 3000, 3500),
EstimatedMax: gceOrAws(cloud, 2400, 3000),
Expand Down Expand Up @@ -801,12 +801,12 @@ func (l tpccBenchLoadConfig) numLoadNodes(d tpccBenchDistribution) int {
}

type tpccBenchSpec struct {
Nodes int
CPUs int
Chaos bool
AdmissionControlEnabled bool
Distribution tpccBenchDistribution
LoadConfig tpccBenchLoadConfig
Nodes int
CPUs int
Chaos bool
AdmissionControlDisabled bool
Distribution tpccBenchDistribution
LoadConfig tpccBenchLoadConfig

// The number of warehouses to load into the cluster before beginning
// benchmarking. Should be larger than EstimatedMax and should be a
Expand Down Expand Up @@ -857,8 +857,8 @@ func registerTPCCBenchSpec(r registry.Registry, b tpccBenchSpec) {
if b.Chaos {
nameParts = append(nameParts, "chaos")
}
if b.AdmissionControlEnabled {
nameParts = append(nameParts, "admission")
if b.AdmissionControlDisabled {
nameParts = append(nameParts, "no-admission")
}

opts := []spec.Option{spec.CPU(b.CPUs)}
Expand Down Expand Up @@ -1034,9 +1034,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
c.EncryptDefault(false)
c.EncryptAtRandom(false)
c.Start(ctx, append(b.startOpts(), roachNodes)...)
if b.AdmissionControlEnabled {
EnableAdmissionControl(ctx, t, c)
}
SetAdmissionControl(ctx, t, c, !b.AdmissionControlDisabled)
useHAProxy := b.Chaos
const restartWait = 15 * time.Second
{
Expand Down Expand Up @@ -1125,9 +1123,7 @@ func runTPCCBench(ctx context.Context, t test.Test, c cluster.Cluster, b tpccBen
}

c.Start(ctx, append(b.startOpts(), roachNodes)...)
if b.AdmissionControlEnabled {
EnableAdmissionControl(ctx, t, c)
}
SetAdmissionControl(ctx, t, c, !b.AdmissionControlDisabled)
}

s := search.NewLineSearcher(1, b.LoadWarehouses, b.EstimatedMax, initStepSize, precision)
Expand Down
14 changes: 9 additions & 5 deletions pkg/cmd/roachtest/tests/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,20 @@ func WaitForUpdatedReplicationReport(ctx context.Context, t test.Test, db *gosql
}
}

// EnableAdmissionControl enables the admission control cluster settings on
// the given cluster.
func EnableAdmissionControl(ctx context.Context, t test.Test, c cluster.Cluster) {
// SetAdmissionControl sets the admission control cluster settings on the
// given cluster.
func SetAdmissionControl(ctx context.Context, t test.Test, c cluster.Cluster, enabled bool) {
db := c.Conn(ctx, 1)
defer db.Close()
val := "true"
if !enabled {
val = "false"
}
for _, setting := range []string{"admission.kv.enabled", "admission.sql_kv_response.enabled",
"admission.sql_sql_response.enabled"} {
if _, err := db.ExecContext(
ctx, "SET CLUSTER SETTING "+setting+" = 'true'"); err != nil {
t.Fatalf("failed to enable admission control: %v", err)
ctx, "SET CLUSTER SETTING "+setting+" = '"+val+"'"); err != nil {
t.Fatalf("failed to set admission control to %t: %v", enabled, err)
}
}
}
6 changes: 3 additions & 3 deletions pkg/util/admission/work_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,23 +36,23 @@ import (
var KVAdmissionControlEnabled = settings.RegisterBoolSetting(
"admission.kv.enabled",
"when true, work performed by the KV layer is subject to admission control",
false).WithPublic()
true).WithPublic()

// SQLKVResponseAdmissionControlEnabled controls whether response processing
// in SQL, for KV requests, is enabled.
var SQLKVResponseAdmissionControlEnabled = settings.RegisterBoolSetting(
"admission.sql_kv_response.enabled",
"when true, work performed by the SQL layer when receiving a KV response is subject to "+
"admission control",
false).WithPublic()
true).WithPublic()

// SQLSQLResponseAdmissionControlEnabled controls whether response processing
// in SQL, for DistSQL requests, is enabled.
var SQLSQLResponseAdmissionControlEnabled = settings.RegisterBoolSetting(
"admission.sql_sql_response.enabled",
"when true, work performed by the SQL layer when receiving a DistSQL response is subject "+
"to admission control",
false).WithPublic()
true).WithPublic()

var admissionControlEnabledSettings = [numWorkKinds]*settings.BoolSetting{
KVWork: KVAdmissionControlEnabled,
Expand Down