Skip to content

Commit e3eb6e0

Browse files
craig[bot]couchandMasha Schneiderknz
committed
26605: ui: collect statement stats from all nodes r=couchand a=couchand The initial statement stats API endpoint only exposed the local node's statements. This extends it to query all the other nodes as well, properly aggregates across nodes, and provides the per-node details on the statement details page. <img width="659" alt="screen shot 2018-06-28 at 5 19 00 pm" src="https://user-images.githubusercontent.com/793969/42061214-65fe50bc-7af7-11e8-81f1-839256985e03.png"> 26654: workload: add geo-distributed support to tpccbench r=m-schneider a=m-schneider This patch is adding support for a zones flag which will allow tpccbench to benchmark over geodistributed clusters. Closes #25878 Release note: None 27095: sql: clean up "alter table relocate" r=knz a=knz On the way to address #26840. The RELOCATE action on ALTER TABLE is experimental, i.e. not meant to have a stable API for users, however there is no reason for the code that implements it to mention "testing" or "experimental" other than the grammar and tests. This patch cleans up the code accordingly, removing references of "testing" in the code. In addition the patch cleans up further by ensuring the experimental feature is marked with the "EXPERIMENTAL" keyword instead of "TESTING" throughout the code base and tests. Release note: None 27096: sql: refer to "show ranges" using the "experimental" keyword r=knz a=knz On the way to address #26840. Both the syntax `SHOW TESTING_RANGES` and `SHOW EXPERIMENTAL_RANGES` are recognized, but only the latter is canonical. Ensure it is used throughout. Release note: None 27097: sql: lowercase session var name in test r=knz a=knz Found while working on #26840. Release note: None Co-authored-by: Andrew Couch <andrew@cockroachlabs.com> Co-authored-by: Masha Schneider <masha@cockroachlabs.com> Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
6 parents 63b11a9 + 8606ba2 + 5f8fbd9 + 2f65d98 + 02a96fb + c5d7462 commit e3eb6e0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+2211
-1379
lines changed

docs/RFCS/20170314_sql_split_syntax.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,20 +146,20 @@ The statement returns only after the relocations are complete.
146146
*Interleaved tables*: the command works as expected (the ranges may contain rows
147147
for parent or child tables/indexes).
148148

149-
### 3. `ALTER TABLE/INDEX TESTING_RELOCATE` ###
149+
### 3. `ALTER TABLE/INDEX EXPERIMENTAL_RELOCATE` ###
150150

151-
The `TESTING_RELOCATE` statements can be used to relocate specific ranges to
151+
The `EXPERIMENTAL_RELOCATE` statements can be used to relocate specific ranges to
152152
specific stores. This is very low-level functionality and is intended to be used
153153
sparingly, mainly for setting up tests which benefit from a predetermined data
154154
distribution. The rebalancing queues should be stopped in order to make the
155155
relocations "stick".
156156

157157
```sql
158-
ALTER TABLE <table> TESTING_RELOCATE <select_statement>
159-
ALTER INDEX <table>@<index> TESTING_RELOCATE <select_statement>
158+
ALTER TABLE <table> EXPERIMENTAL_RELOCATE <select_statement>
159+
ALTER INDEX <table>@<index> EXPERIMENTAL_RELOCATE <select_statement>
160160
```
161161

162-
`TESTING_RELOCATE` takes a select statement with the following result schema:
162+
`EXPERIMENTAL_RELOCATE` takes a select statement with the following result schema:
163163
- the first column is the relocation information: an array of integers, where
164164
each integer is a store ID. This indicates the set of replicas for the range;
165165
the first replica in the array will be the new lease owner.
@@ -175,13 +175,13 @@ Examples:
175175
CREATE TABLE t (k1, k2, k3, v INT, PRIMARY KEY (k1, k2, k3))
176176

177177
-- Move the range containing /t/primary/1/2/3 to store 1:
178-
ALTER TABLE t TESTING_RELOCATE VALUES (ARRAY[1], 1, 2, 3)
178+
ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[1], 1, 2, 3)
179179

180180
-- Move the range containing /t/primary/1/2 to stores 5,6,7 (with 5 as lease owner):
181-
ALTER TABLE t TESTING_RELOCATE VALUES (ARRAY[5,6,7], 1, 2)
181+
ALTER TABLE t EXPERIMENTAL_RELOCATE VALUES (ARRAY[5,6,7], 1, 2)
182182

183183
-- Move even k1s to store 1, odd k1s to store 2:
184-
ALTER TABLE t TESTING_RELOCATE SELECT ARRAY[1+i%2], i FROM GENERATE_SERIES(1, 10) AS g(i)
184+
ALTER TABLE t EXPERIMENTAL_RELOCATE SELECT ARRAY[1+i%2], i FROM GENERATE_SERIES(1, 10) AS g(i)
185185
```
186186

187187
The statement returns only after the relocations are complete.

pkg/cmd/roachtest/copy.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func registerCopy(r *registry) {
6363

6464
rangeCount := func() int {
6565
var count int
66-
const q = "SELECT count(*) FROM [SHOW TESTING_RANGES FROM TABLE bank.bank]"
66+
const q = "SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE bank.bank]"
6767
if err := db.QueryRow(q).Scan(&count); err != nil {
6868
t.Fatalf("failed to get range count: %v", err)
6969
}

pkg/cmd/roachtest/large_range.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ range_max_bytes: %d
103103
t.Status("checking for single range")
104104
rangeCount := func() int {
105105
var count int
106-
const q = "SELECT count(*) FROM [SHOW TESTING_RANGES FROM TABLE bank.bank]"
106+
const q = "SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE bank.bank]"
107107
if err := db.QueryRow(q).Scan(&count); err != nil {
108108
t.Fatalf("failed to get range count: %v", err)
109109
}

pkg/cmd/roachtest/tpcc.go

Lines changed: 107 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/cockroachdb/cockroach/pkg/util/color"
3232
"github.com/cockroachdb/cockroach/pkg/util/search"
3333
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
34+
"golang.org/x/sync/errgroup"
3435
)
3536

3637
func registerTPCC(r *registry) {
@@ -90,6 +91,7 @@ type tpccBenchSpec struct {
9091
CPUs int
9192
Chaos bool
9293
Geo bool
94+
Zones []string
9395

9496
// The number of warehouses to load into the cluster before beginning
9597
// benchmarking. Should be larger than EstimatedMax and should be a
@@ -110,7 +112,7 @@ type tpccBenchSpec struct {
110112
func registerTPCCBenchSpec(r *registry, b tpccBenchSpec) {
111113
nameParts := []string{
112114
"tpccbench",
113-
fmt.Sprintf("nodes=%d", b.Nodes),
115+
fmt.Sprintf("Nodes=%d", b.Nodes),
114116
fmt.Sprintf("cpu=%d", b.CPUs),
115117
}
116118
if b.Chaos {
@@ -122,14 +124,17 @@ func registerTPCCBenchSpec(r *registry, b tpccBenchSpec) {
122124
name := strings.Join(nameParts, "/")
123125

124126
opts := []createOption{cpu(b.CPUs)}
127+
nodeCount := nodes(b.Nodes+1, opts...)
128+
125129
if b.Geo {
126130
opts = append(opts, geo())
131+
opts = append(opts, zones(fmt.Sprintf(`"%s"`, strings.Join(b.Zones, `","`))))
132+
nodeCount = nodes(b.Nodes+len(b.Zones), opts...)
127133
}
128-
nodes := nodes(b.Nodes+1, opts...)
129134

130135
r.Add(testSpec{
131136
Name: name,
132-
Nodes: nodes,
137+
Nodes: nodeCount,
133138
Run: func(ctx context.Context, t *test, c *cluster) {
134139
runTPCCBench(ctx, t, c, b)
135140
},
@@ -140,7 +145,11 @@ func registerTPCCBenchSpec(r *registry, b tpccBenchSpec) {
140145
// function is idempotent and first checks whether a compatible dataset exists,
141146
// performing an expensive dataset restore only if it doesn't.
142147
func loadTPCCBench(
143-
ctx context.Context, c *cluster, b tpccBenchSpec, roachNodes, loadNode nodeListOption,
148+
ctx context.Context,
149+
c *cluster,
150+
b tpccBenchSpec,
151+
roachNodes, loadNode nodeListOption,
152+
zones []string,
144153
) error {
145154
db := c.Conn(ctx, 1)
146155
defer db.Close()
@@ -174,7 +183,7 @@ func loadTPCCBench(
174183
if b.StoreDirVersion != "" {
175184
c.l.printf("ingesting existing tpcc store dump\n")
176185

177-
urlBase, err := c.RunWithBuffer(ctx, c.l, loadNode,
186+
urlBase, err := c.RunWithBuffer(ctx, c.l, c.Node(loadNode[0]),
178187
fmt.Sprintf(`./workload fixtures url tpcc --warehouses=%d`, b.LoadWarehouses))
179188
if err != nil {
180189
return err
@@ -193,11 +202,17 @@ func loadTPCCBench(
193202
return err
194203
}
195204

205+
zonesArg := ""
206+
207+
if b.Geo {
208+
zonesArg = fmt.Sprintf(`--partitions=%d --zones="%s" --partition-affinity=0`, len(zones), strings.Join(zones, ","))
209+
}
210+
196211
// Split and scatter the tables. Set duration to 1ms so that the load
197212
// generation doesn't actually run.
198213
cmd = fmt.Sprintf(
199214
"./workload run tpcc --warehouses=%d --split --scatter "+
200-
"--duration=1ms {pgurl:1}", b.LoadWarehouses)
215+
"--duration=3m %s {pgurl:1}", b.LoadWarehouses, zonesArg)
201216
return c.RunE(ctx, loadNode, cmd)
202217
}
203218

@@ -220,25 +235,38 @@ func loadTPCCBench(
220235
// test. The `--wipe` flag will prevent this cluster from being destroyed, so it
221236
// can then be used during future runs.
222237
func runTPCCBench(ctx context.Context, t *test, c *cluster, b tpccBenchSpec) {
223-
if b.Geo {
224-
// TODO(m-schneider): add support for geo-distributed benchmarking.
225-
t.Fatal("geo-distributed benchmarking not supported")
226-
}
227-
228238
roachNodes := c.Range(1, c.nodes-1)
229-
loadNode := c.Node(c.nodes)
239+
loadNodes := c.Node(c.nodes)
240+
zoneCount := 1
241+
nodesPerRegion := c.nodes - 1
230242

231243
// Disable write barrier on mounted SSDs.
232244
if !c.isLocal() {
233245
c.RemountNoBarrier(ctx)
234246
}
235247

236-
c.Put(ctx, cockroach, "./cockroach", c.All())
237-
c.Put(ctx, workload, "./workload", loadNode)
248+
zoneCount = len(b.Zones)
249+
nodesPerRegion = c.nodes / zoneCount
250+
251+
roachNodes = nodeListOption{}
252+
loadNodes = nodeListOption{}
253+
254+
for i := 1; i <= c.nodes; i += nodesPerRegion {
255+
roachNodes = roachNodes.merge(c.Range(i, i+nodesPerRegion-2))
256+
loadNodes = loadNodes.merge(c.Node(i + nodesPerRegion - 1))
257+
}
258+
259+
c.Put(ctx, cockroach, "./cockroach", roachNodes)
260+
c.Put(ctx, workload, "./workload", loadNodes)
238261
c.Start(ctx, roachNodes)
239262

240263
useHAProxy := b.Chaos
241264
if useHAProxy {
265+
if len(loadNodes) > 1 {
266+
t.Fatal("distributed chaos benchmarking not supported")
267+
}
268+
loadNode := c.Node(loadNodes[0])
269+
242270
t.Status("installing haproxy")
243271
c.Install(ctx, loadNode, "haproxy")
244272
c.Run(ctx, loadNode, fmt.Sprintf("./cockroach gen haproxy --insecure --host %s",
@@ -249,7 +277,7 @@ func runTPCCBench(ctx context.Context, t *test, c *cluster, b tpccBenchSpec) {
249277
m := newMonitor(ctx, c, roachNodes)
250278
m.Go(func(ctx context.Context) error {
251279
t.Status("setting up dataset")
252-
err := loadTPCCBench(ctx, c, b, roachNodes, loadNode)
280+
err := loadTPCCBench(ctx, c, b, roachNodes, c.Node(loadNodes[0]), b.Zones)
253281
if err != nil {
254282
return err
255283
}
@@ -268,9 +296,9 @@ func runTPCCBench(ctx context.Context, t *test, c *cluster, b tpccBenchSpec) {
268296
c.Start(ctx, roachNodes)
269297
time.Sleep(10 * time.Second)
270298

271-
// Set up the load geneartion configuration.
299+
// Set up the load generation configuration.
272300
loadDur := 3 * time.Minute
273-
loadDone := make(chan time.Time, 1)
301+
loadDone := make(chan time.Time, zoneCount)
274302
extraFlags := ""
275303

276304
// If we're running chaos in this configuration, modify this config.
@@ -289,42 +317,71 @@ func runTPCCBench(ctx context.Context, t *test, c *cluster, b tpccBenchSpec) {
289317
m.Go(ch.Runner(c, m))
290318
}
291319

292-
t.Status(fmt.Sprintf("running benchmark, warehouses=%d", warehouses))
293-
connStr := fmt.Sprintf("{pgurl:1-%d}", len(roachNodes))
294-
if useHAProxy {
295-
connStr = fmt.Sprintf("{pgurl:%d}", loadNode[0])
296-
}
297-
cmd := fmt.Sprintf("ulimit -n 32768; "+
298-
"./workload run tpcc --warehouses=%d --ramp=30s --duration=%s%s %s",
299-
warehouses, loadDur, extraFlags, connStr)
300-
301-
out, err := c.RunWithBuffer(ctx, c.l, loadNode, cmd)
302-
loadDone <- timeutil.Now()
303-
if err != nil {
304-
return false, errors.Wrapf(err, "error running tpcc load generator:\n\n%s\n", out)
305-
}
306-
307-
// Parse the stats header and stats lines from the output.
308-
str := string(out)
309-
lines := strings.Split(str, "\n")
310-
for i, line := range lines {
311-
if strings.Contains(line, "tpmC") {
312-
lines = lines[i:]
313-
}
314-
if i == len(lines)-1 {
315-
return false, errors.Errorf("tpmC not found in output:\n\n%s\n", out)
316-
}
320+
// If we're running multiple load generators, run them in parallel and then
321+
// aggregate tpmCs.
322+
var eg errgroup.Group
323+
tpmCs := make(chan float64)
324+
for i := 0; i < len(loadNodes); i++ {
325+
// Copy for goroutine
326+
i := i
327+
eg.Go(func() error {
328+
totalWarehouses := warehouses
329+
partitionFlag := ""
330+
connStr := fmt.Sprintf("{pgurl:1-%d}", len(roachNodes))
331+
if useHAProxy {
332+
connStr = fmt.Sprintf("{pgurl:%d}", loadNodes[0])
333+
}
334+
if b.Geo {
335+
partitionFlag = fmt.Sprintf("--split --partitions=%d --partition-affinity=%d --active-warehouses=%d",
336+
zoneCount, i, warehouses/zoneCount)
337+
totalWarehouses = b.LoadWarehouses
338+
connStr = fmt.Sprintf("{pgurl:%s}", c.Range((i*nodesPerRegion)+1, (i*nodesPerRegion+1)+2).String())
339+
}
340+
341+
t.Status(fmt.Sprintf("running benchmark, warehouses=%d", warehouses))
342+
343+
cmd := fmt.Sprintf("ulimit -n 32768; "+
344+
"./workload run tpcc --warehouses=%d --ramp=30s --duration=%s%s %s %s",
345+
totalWarehouses, loadDur, extraFlags, partitionFlag, connStr)
346+
out, err := c.RunWithBuffer(ctx, c.l, c.Node(loadNodes[i]), cmd)
347+
loadDone <- timeutil.Now()
348+
if err != nil {
349+
return err
350+
}
351+
352+
// Parse the stats header and stats lines from the output.
353+
str := string(out)
354+
lines := strings.Split(str, "\n")
355+
for i, line := range lines {
356+
if strings.Contains(line, "tpmC") {
357+
lines = lines[i:]
358+
}
359+
if i == len(lines)-1 {
360+
return errors.Errorf("tpmC not found in output:\n\n%s\n", out)
361+
}
362+
}
363+
headerLine, statsLine := lines[0], lines[1]
364+
c.l.printf("%s\n%s\n", headerLine, statsLine)
365+
366+
// Parse tpmC value from stats line.
367+
fields := strings.Fields(statsLine)
368+
tpmC, err := strconv.ParseFloat(fields[1], 64)
369+
if err != nil {
370+
return err
371+
}
372+
tpmCs <- tpmC
373+
return nil
374+
})
317375
}
318-
headerLine, statsLine := lines[0], lines[1]
319-
c.l.printf("%s\n%s\n", headerLine, statsLine)
320-
321-
// Parse tpmC value from stats line.
322-
fields := strings.Fields(statsLine)
323-
tpmC, err := strconv.ParseFloat(fields[1], 64)
376+
close(tpmCs)
377+
err = eg.Wait()
324378
if err != nil {
325379
return false, err
326380
}
327-
381+
tpmC := float64(0)
382+
for partialTpMc := range tpmCs {
383+
tpmC += partialTpMc
384+
}
328385
// Determine the fraction of the maximum possible tpmC realized.
329386
maxTpmC := 12.8 * float64(warehouses)
330387
tpmCRatio := tpmC / maxTpmC
@@ -403,6 +460,7 @@ func registerTPCCBench(r *registry) {
403460
Nodes: 9,
404461
CPUs: 16,
405462
Geo: true,
463+
Zones: []string{"us-central1-b", "us-west1-b", "europe-west2-b"},
406464

407465
LoadWarehouses: 5000,
408466
EstimatedMax: 2000,

pkg/server/admin.go

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,18 +1516,6 @@ func (s *adminServer) DataDistribution(
15161516
return resp, nil
15171517
}
15181518

1519-
func (s *adminServer) Queries(
1520-
ctx context.Context, req *serverpb.QueriesRequest,
1521-
) (*serverpb.QueriesResponse, error) {
1522-
stmtStats := s.server.pgServer.SQLServer.GetUnscrubbedStmtStats()
1523-
lastReset := s.server.pgServer.SQLServer.GetStmtStatsLastReset()
1524-
1525-
return &serverpb.QueriesResponse{
1526-
Queries: stmtStats,
1527-
LastReset: lastReset,
1528-
}, nil
1529-
}
1530-
15311519
// sqlQuery allows you to incrementally build a SQL query that uses
15321520
// placeholders. Instead of specific placeholders like $1, you instead use the
15331521
// temporary placeholder $.

0 commit comments

Comments
 (0)