Skip to content
Merged
Show file tree
Hide file tree
Changes from 55 commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
01f876c
Basic skeleton app.
pstibrany Aug 21, 2020
75c09dc
Added bigtable index reader.
pstibrany Aug 21, 2020
1529d81
Added files.
pstibrany Aug 21, 2020
64e1a3b
Initial version of scanner, with single table scanning.
pstibrany Aug 21, 2020
a571181
Misc bug fixes and improvements.
pstibrany Aug 21, 2020
f47f145
Beautify.
pstibrany Aug 21, 2020
61fb582
Index tables can now be found via schema file.
pstibrany Aug 21, 2020
857c255
Added scanning of tables, files generated into subdir.
pstibrany Aug 21, 2020
baa586a
Generate files in compressed form.
pstibrany Aug 21, 2020
59c903e
Mark that table has been scanned by created a local file.
pstibrany Aug 21, 2020
6b445f0
Use snappy compression.
pstibrany Aug 21, 2020
d71ebe2
Explain plan file.
pstibrany Aug 21, 2020
7589ee6
Delete local plan files after upload.
pstibrany Aug 21, 2020
82fabd2
Added Dockerfile.
pstibrany Aug 21, 2020
4f22c1c
Wait until context is finished to avoid restarts.
pstibrany Aug 21, 2020
1b404f1
Add tables limit for testing.
pstibrany Aug 21, 2020
779ce73
Add support for ignored user.
pstibrany Aug 21, 2020
cdb953e
Move files under scanner package.
pstibrany Aug 24, 2020
7f41c7e
Renamed struct.
pstibrany Aug 24, 2020
b3a1493
Extract config shared between commands
pstibrany Aug 24, 2020
a74de28
Added builder component. Supports single plan file for now.
pstibrany Aug 24, 2020
995872a
Fix meta.json, add tenant ID.
pstibrany Aug 24, 2020
2eb7093
Added heartbeating (updating progress file),
pstibrany Aug 24, 2020
072f1f5
Check that previous heartbeat exists.
pstibrany Aug 24, 2020
5b575ba
Cleanup on startup, upload and delete local block.
pstibrany Aug 24, 2020
3ca55a1
Use client directly to fetch chunks.
pstibrany Aug 24, 2020
8c13eb6
Export position of reading of plan file.
pstibrany Aug 24, 2020
f7253b1
Make sure to import http/pprof.
pstibrany Aug 24, 2020
86808a2
Added scheduler that scans for plans.
pstibrany Aug 25, 2020
594ee89
Added scheduler proto file.
pstibrany Aug 26, 2020
736654f
Use server (to get gRPC).
pstibrany Aug 26, 2020
d877eda
Expose current scheduler status via web page.
pstibrany Aug 26, 2020
846b18f
Builder now fetches plan files from scheduler.
pstibrany Aug 26, 2020
9c426dd
Added next plan interval, plan logger, renamed progress to inprogress.
pstibrany Aug 26, 2020
2577ee6
Metrics, inprogress
pstibrany Aug 26, 2020
861e959
Detect multiple plan files for the same day index, and treat them as …
pstibrany Aug 26, 2020
7b5ef10
Use starting file by scheduler. Cleanup dirs after each plan build.
pstibrany Aug 26, 2020
d4067ec
Stop without trying to receive another plan.
pstibrany Aug 26, 2020
9b47115
Don't upload errors when build fails due to builder being stopped.
pstibrany Aug 26, 2020
f47f5c4
Fixed tests.
pstibrany Aug 26, 2020
bad2ee1
Report number of different index entries found.
pstibrany Aug 26, 2020
5337dec
Report number of different index entries found.
pstibrany Aug 26, 2020
d577722
Log basic scan info.
pstibrany Aug 26, 2020
1ac483c
Log uploading of finished status file.
pstibrany Aug 26, 2020
daf5a67
Added test for scanner processor.
pstibrany Aug 26, 2020
ab85b55
Added test for TSDB builder.
pstibrany Aug 27, 2020
370a813
If chunk is not found on the storage, keep building, but log and incr…
pstibrany Aug 27, 2020
0021711
Added verification of generated plans before upload.
pstibrany Aug 27, 2020
6b48acf
Make lint happy.
pstibrany Aug 27, 2020
a6465ee
Move heartbeating setup to processPlanFile to make lint happy.
pstibrany Aug 27, 2020
9df1615
Don't treat failure to delete block, or to stop hearbeat as reasons t…
pstibrany Aug 27, 2020
61ff51d
Log progress when verifying plans. Use errors.
pstibrany Aug 28, 2020
89b2fe0
Pass builder name to scheduler for logging.
pstibrany Aug 28, 2020
4f7521f
Log and track generated block size.
pstibrany Aug 28, 2020
c99a9cb
Search for tables now always goes through all configured periods, ski…
pstibrany Aug 28, 2020
4d28ff8
Revert back unintended change.
pstibrany Aug 31, 2020
ce8472e
Review feedback.
pstibrany Aug 31, 2020
fbe297f
Added test for parsing bigtable rows.
pstibrany Aug 31, 2020
6002ea7
Use errors package for constructing errors.
pstibrany Aug 31, 2020
4c65d39
Use table-based log.
pstibrany Aug 31, 2020
cf7cd23
Log list of ignored users.
pstibrany Aug 31, 2020
615ea79
Use common prefix for all metrics.
pstibrany Aug 31, 2020
f0b3c13
Set user plans only after removing obsolete progress files.
pstibrany Aug 31, 2020
86a2760
Fix formatting of ignored users.
pstibrany Aug 31, 2020
900741d
Renamed local-dir flag to output-dir.
pstibrany Aug 31, 2020
9848eb8
Simplify check.
pstibrany Aug 31, 2020
8eb6580
Fix test.
pstibrany Aug 31, 2020
b6257a9
Add support for using file with allowed users.
pstibrany Aug 31, 2020
ecb16f5
Support allowed users via flag.
pstibrany Aug 31, 2020
54be79a
Fixed compilation error.
pstibrany Sep 2, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ SED ?= $(shell which gsed 2>/dev/null || which sed)

# We don't want find to scan inside a bunch of directories, to accelerate the
# 'make: Entering directory '/go/src/github.com/cortexproject/cortex' phase.
DONT_FIND := -name tools -prune -o -name vendor -prune -o -name .git -prune -o -name .cache -prune -o -name .pkg -prune -o -name packaging -prune -o
DONT_FIND := -name vendor -prune -o -name .git -prune -o -name .cache -prune -o -name .pkg -prune -o -name packaging -prune -o

# Get a list of directories containing Dockerfiles
DOCKERFILES := $(shell find . $(DONT_FIND) -type f -name 'Dockerfile' -print)
Expand Down Expand Up @@ -68,6 +68,7 @@ pkg/ruler/rules/rules.pb.go: pkg/ruler/rules/rules.proto
pkg/ruler/ruler.pb.go: pkg/ruler/rules/rules.proto
pkg/ring/kv/memberlist/kv.pb.go: pkg/ring/kv/memberlist/kv.proto
pkg/chunk/grpc/grpc.pb.go: pkg/chunk/grpc/grpc.proto
tools/blocksconvert/scheduler.pb.go: tools/blocksconvert/scheduler.proto

all: $(UPTODATE_FILES)
test: protos
Expand Down
9 changes: 9 additions & 0 deletions cmd/blocksconvert/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM alpine:3.12
RUN apk add --no-cache ca-certificates
COPY blocksconvert /
ENTRYPOINT ["/blocksconvert"]

ARG revision
LABEL org.opencontainers.image.title="blocksconvert" \
org.opencontainers.image.source="https://github.com/cortexproject/cortex/tree/master/tools/blocksconvert" \
org.opencontainers.image.revision="${revision}"
102 changes: 102 additions & 0 deletions cmd/blocksconvert/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package main

import (
"context"
"flag"
"fmt"
"os"
"strings"

"github.com/go-kit/kit/log/level"
"github.com/prometheus/client_golang/prometheus"
"github.com/weaveworks/common/server"
"github.com/weaveworks/common/signals"

"github.com/cortexproject/cortex/pkg/cortex"
"github.com/cortexproject/cortex/pkg/util"
"github.com/cortexproject/cortex/pkg/util/services"
"github.com/cortexproject/cortex/tools/blocksconvert"
"github.com/cortexproject/cortex/tools/blocksconvert/builder"
"github.com/cortexproject/cortex/tools/blocksconvert/scanner"
"github.com/cortexproject/cortex/tools/blocksconvert/scheduler"
)

type Config struct {
Target string
ServerConfig server.Config

SharedConfig blocksconvert.SharedConfig
ScannerConfig scanner.Config
BuilderConfig builder.Config
SchedulerConfig scheduler.Config
}

func main() {
cfg := Config{}
flag.StringVar(&cfg.Target, "target", "", "Module to run: Scanner, Scheduler, Builder")
cfg.SharedConfig.RegisterFlags(flag.CommandLine)
cfg.ScannerConfig.RegisterFlags(flag.CommandLine)
cfg.BuilderConfig.RegisterFlags(flag.CommandLine)
cfg.SchedulerConfig.RegisterFlags(flag.CommandLine)
cfg.ServerConfig.RegisterFlags(flag.CommandLine)
flag.Parse()

util.InitLogger(&cfg.ServerConfig)

cortex.DisableSignalHandling(&cfg.ServerConfig)
serv, err := server.New(cfg.ServerConfig)
if err != nil {
level.Error(util.Logger).Log("msg", "Unable to initialize server", "err", err.Error())
os.Exit(1)
}

cfg.Target = strings.ToLower(cfg.Target)

registry := prometheus.DefaultRegisterer

var targetService services.Service
switch cfg.Target {
case "scanner":
targetService, err = scanner.NewScanner(cfg.ScannerConfig, cfg.SharedConfig, util.Logger, registry)
case "builder":
targetService, err = builder.NewBuilder(cfg.BuilderConfig, cfg.SharedConfig, util.Logger, registry)
case "scheduler":
targetService, err = scheduler.NewScheduler(cfg.SchedulerConfig, cfg.SharedConfig, util.Logger, registry, serv.HTTP, serv.GRPC)
default:
err = fmt.Errorf("unknown target")
}

if err != nil {
level.Error(util.Logger).Log("msg", "failed to initialize", "err", err)
os.Exit(1)
}

servService := cortex.NewServerService(serv, func() []services.Service {
return []services.Service{targetService}
})
servManager, err := services.NewManager(servService, targetService)
if err == nil {
servManager.AddListener(services.NewManagerListener(nil, nil, func(service services.Service) {
servManager.StopAsync()
}))

err = services.StartManagerAndAwaitHealthy(context.Background(), servManager)
}
if err != nil {
level.Error(util.Logger).Log("msg", "Unable to start", "err", err.Error())
os.Exit(1)
}

// Setup signal handler and ask service maanger to stop when signal arrives.
handler := signals.NewHandler(serv.Log)
go func() {
handler.Loop()
servManager.StopAsync()
}()

// We only wait for target service. If any other service fails, listener will stop it (via manager)
if err := targetService.AwaitTerminated(context.Background()); err != nil {
level.Error(util.Logger).Log("msg", cfg.Target+" failed", "err", targetService.FailureCase())
os.Exit(1)
}
}
6 changes: 3 additions & 3 deletions pkg/chunk/gcp/bigtable_index_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,17 +116,17 @@ func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client
// We hash the row key and prepend it back to the key for better distribution.
// We preserve the existing key to make migrations and o11y easier.
if cfg.DistributeKeys {
hashValue = hashPrefix(hashValue) + "-" + hashValue
hashValue = HashPrefix(hashValue) + "-" + hashValue
}

return hashValue, string(rangeValue)
},
}
}

// hashPrefix calculates a 64bit hash of the input string and hex-encodes
// HashPrefix calculates a 64bit hash of the input string and hex-encodes
// the result, taking care to zero pad etc.
func hashPrefix(input string) string {
func HashPrefix(input string) string {
prefix := hashAdd(hashNew(), input)
var encodedUint64 [8]byte
binary.LittleEndian.PutUint64(encodedUint64[:], prefix)
Expand Down
2 changes: 1 addition & 1 deletion pkg/chunk/gcp/fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ var Fixtures = func() []testutils.Fixture {
for _, columnKeyClient := range []bool{true, false} {
for _, hashPrefix := range []bool{true, false} {
fixtures = append(fixtures, &fixture{
name: fmt.Sprintf("bigtable-columnkey:%v-gcsObjectClient:%v-hashPrefix:%v", columnKeyClient, gcsObjectClient, hashPrefix),
name: fmt.Sprintf("bigtable-columnkey:%v-gcsObjectClient:%v-HashPrefix:%v", columnKeyClient, gcsObjectClient, hashPrefix),
columnKeyClient: columnKeyClient,
gcsObjectClient: gcsObjectClient,
hashPrefix: hashPrefix,
Expand Down
Loading