From f86d12f49887fbeaeff1c39eeb09d754c69b477d Mon Sep 17 00:00:00 2001 From: Christian Haudum Date: Mon, 12 Dec 2022 13:51:05 +0100 Subject: [PATCH 01/62] Update dskit to latest commit Diff: https://github.com/grafana/dskit/compare/b1b307db4f30..3e308a49441b Signed-off-by: Christian Haudum --- go.mod | 2 +- go.sum | 4 +- .../grafana/dskit/grpcutil/health_check.go | 62 ++++++++++++------- .../dskit/kv/memberlist/memberlist_client.go | 4 +- .../grafana/dskit/modules/modules.go | 2 +- .../grafana/dskit/multierror/multierror.go | 3 + .../grafana/dskit/netutil/netutil.go | 2 - .../grafana/dskit/ring/client/pool.go | 30 ++++++--- vendor/github.com/grafana/dskit/ring/http.go | 2 +- .../grafana/dskit/runtimeconfig/manager.go | 2 +- .../grafana/dskit/services/README.md | 4 +- .../grafana/dskit/services/manager.go | 2 +- .../grafana/dskit/services/services.go | 2 +- vendor/modules.txt | 2 +- 14 files changed, 78 insertions(+), 45 deletions(-) diff --git a/go.mod b/go.mod index c8dab99ac68e8..930689e8f2d55 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,7 @@ require ( github.com/google/uuid v1.3.0 github.com/gorilla/mux v1.8.0 github.com/gorilla/websocket v1.5.0 - github.com/grafana/dskit v0.0.0-20220928083349-b1b307db4f30 + github.com/grafana/dskit v0.0.0-20221212120341-3e308a49441b github.com/grafana/go-gelf/v2 v2.0.1 github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 diff --git a/go.sum b/go.sum index e163e46d59578..89c5d9aa0964f 100644 --- a/go.sum +++ b/go.sum @@ -726,8 +726,8 @@ github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gotestyourself/gotestyourself v2.2.0+incompatible/go.mod h1:zZKM6oeNM8k+FRljX1mnzVYeS8wiGgQyvST1/GafPbY= -github.com/grafana/dskit v0.0.0-20220928083349-b1b307db4f30 h1:Lbeu0ddFATI+cgXh6LzjAk9TdoU7WUZ2hry+5H4xXdM= -github.com/grafana/dskit v0.0.0-20220928083349-b1b307db4f30/go.mod h1:NTfOwhBMmR7TyG4E3RB4F1qhvk+cawoXacyN30yipVY= +github.com/grafana/dskit v0.0.0-20221212120341-3e308a49441b h1:3Di+jzpE0CHlzlYtjDq9xL5xinR4FUQ7GoQ44JkfQLc= +github.com/grafana/dskit v0.0.0-20221212120341-3e308a49441b/go.mod h1:rJRGBDtyQNA3OFh7WecUILvxkgGrdIuA4f9wgZOn3V0= github.com/grafana/go-gelf/v2 v2.0.1 h1:BOChP0h/jLeD+7F9mL7tq10xVkDG15he3T1zHuQaWak= github.com/grafana/go-gelf/v2 v2.0.1/go.mod h1:lexHie0xzYGwCgiRGcvZ723bSNyNI8ZRD4s0CLobh90= github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 h1:xLuzPoOzdfNb/RF/IENCw+oLVdZB4G21VPhkHBgwSHY= diff --git a/vendor/github.com/grafana/dskit/grpcutil/health_check.go b/vendor/github.com/grafana/dskit/grpcutil/health_check.go index 2b567b36804f4..44b5e15e7657c 100644 --- a/vendor/github.com/grafana/dskit/grpcutil/health_check.go +++ b/vendor/github.com/grafana/dskit/grpcutil/health_check.go @@ -4,29 +4,64 @@ import ( "context" "github.com/gogo/status" + "go.uber.org/atomic" "google.golang.org/grpc/codes" "google.golang.org/grpc/health/grpc_health_v1" "github.com/grafana/dskit/services" ) +// Check is a function that determines if this gRPC application is healthy. +type Check func(ctx context.Context) bool + +// WithManager returns a new Check that tests if the managed services are healthy. +func WithManager(manager *services.Manager) Check { + return func(ctx context.Context) bool { + states := manager.ServicesByState() + + // Given this is a health check endpoint for the whole instance, we should consider + // it healthy after all services have been started (running) and until all + // services are terminated. Some services, like ingesters, are still + // fully functioning while stopping. + if len(states[services.New]) > 0 || len(states[services.Starting]) > 0 || len(states[services.Failed]) > 0 { + return false + } + + return len(states[services.Running]) > 0 || len(states[services.Stopping]) > 0 + } +} + +// WithShutdownRequested returns a new Check that returns false when shutting down. +func WithShutdownRequested(requested *atomic.Bool) Check { + return func(ctx context.Context) bool { + return !requested.Load() + } +} + // HealthCheck fulfills the grpc_health_v1.HealthServer interface by ensuring -// the services being managed by the provided service manager are healthy. +// each of the provided Checks indicates the application is healthy. type HealthCheck struct { - sm *services.Manager + checks []Check } // NewHealthCheck returns a new HealthCheck for the provided service manager. func NewHealthCheck(sm *services.Manager) *HealthCheck { + return NewHealthCheckFrom(WithManager(sm)) +} + +// NewHealthCheckFrom returns a new HealthCheck that uses each of the provided Checks. +func NewHealthCheckFrom(checks ...Check) *HealthCheck { return &HealthCheck{ - sm: sm, + checks: checks, } } // Check implements the grpc healthcheck. -func (h *HealthCheck) Check(_ context.Context, _ *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) { - if !h.isHealthy() { - return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_NOT_SERVING}, nil +func (h *HealthCheck) Check(ctx context.Context, _ *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) { + for _, check := range h.checks { + if !check(ctx) { + return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_NOT_SERVING}, nil + } } return &grpc_health_v1.HealthCheckResponse{Status: grpc_health_v1.HealthCheckResponse_SERVING}, nil @@ -36,18 +71,3 @@ func (h *HealthCheck) Check(_ context.Context, _ *grpc_health_v1.HealthCheckRequ func (h *HealthCheck) Watch(_ *grpc_health_v1.HealthCheckRequest, _ grpc_health_v1.Health_WatchServer) error { return status.Error(codes.Unimplemented, "Watching is not supported") } - -// isHealthy returns whether the instance should be considered healthy. -func (h *HealthCheck) isHealthy() bool { - states := h.sm.ServicesByState() - - // Given this is an health check endpoint for the whole instance, we should consider - // it healthy after all services have been started (running) and until all - // services are terminated. Some services, like ingesters, are still - // fully functioning while stopping. - if len(states[services.New]) > 0 || len(states[services.Starting]) > 0 || len(states[services.Failed]) > 0 { - return false - } - - return len(states[services.Running]) > 0 || len(states[services.Stopping]) > 0 -} diff --git a/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go b/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go index ced269af9fa8c..390eca606f546 100644 --- a/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go +++ b/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go @@ -141,8 +141,8 @@ type KVConfig struct { AdvertiseAddr string `yaml:"advertise_addr"` AdvertisePort int `yaml:"advertise_port"` - ClusterLabel string `yaml:"cluster_label" category:"experimental"` - ClusterLabelVerificationDisabled bool `yaml:"cluster_label_verification_disabled" category:"experimental"` + ClusterLabel string `yaml:"cluster_label" category:"advanced"` + ClusterLabelVerificationDisabled bool `yaml:"cluster_label_verification_disabled" category:"advanced"` // List of members to join JoinMembers flagext.StringSlice `yaml:"join_members"` diff --git a/vendor/github.com/grafana/dskit/modules/modules.go b/vendor/github.com/grafana/dskit/modules/modules.go index 60e68ca25ac3e..0a28797489bea 100644 --- a/vendor/github.com/grafana/dskit/modules/modules.go +++ b/vendor/github.com/grafana/dskit/modules/modules.go @@ -18,7 +18,7 @@ type module struct { // initFn for this module (can return nil) initFn func() (services.Service, error) - // is this module user visible (i.e intended to be passed to `InitModuleServices`) + // is this module user visible (i.e. intended to be passed to `InitModuleServices`) userVisible bool } diff --git a/vendor/github.com/grafana/dskit/multierror/multierror.go b/vendor/github.com/grafana/dskit/multierror/multierror.go index fbbf0b951b744..290b5bcef7cf4 100644 --- a/vendor/github.com/grafana/dskit/multierror/multierror.go +++ b/vendor/github.com/grafana/dskit/multierror/multierror.go @@ -1,3 +1,6 @@ +// Provenance-includes-location: https://github.com/thanos-io/thanos/blob/2027fb30/pkg/errutil/multierror.go +// Provenance-includes-copyright: The Thanos Authors. + package multierror import ( diff --git a/vendor/github.com/grafana/dskit/netutil/netutil.go b/vendor/github.com/grafana/dskit/netutil/netutil.go index 232317d4b5439..3803c0df71184 100644 --- a/vendor/github.com/grafana/dskit/netutil/netutil.go +++ b/vendor/github.com/grafana/dskit/netutil/netutil.go @@ -2,7 +2,6 @@ package netutil import ( "net" - "strings" "github.com/go-kit/log" "github.com/go-kit/log/level" @@ -53,6 +52,5 @@ func privateNetworkInterfaces(all []net.Interface, fallback []string, logger log if len(privInts) == 0 { return fallback } - level.Debug(logger).Log("msg", "found network interfaces with private IP addresses assigned", "interfaces", strings.Join(privInts, " ")) return privInts } diff --git a/vendor/github.com/grafana/dskit/ring/client/pool.go b/vendor/github.com/grafana/dskit/ring/client/pool.go index 57b462cc4144f..eca27ef68884b 100644 --- a/vendor/github.com/grafana/dskit/ring/client/pool.go +++ b/vendor/github.com/grafana/dskit/ring/client/pool.go @@ -13,6 +13,7 @@ import ( "github.com/weaveworks/common/user" "google.golang.org/grpc/health/grpc_health_v1" + "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/ring/util" "github.com/grafana/dskit/services" ) @@ -35,9 +36,10 @@ type PoolServiceDiscovery func() ([]string, error) // PoolConfig is config for creating a Pool. type PoolConfig struct { - CheckInterval time.Duration - HealthCheckEnabled bool - HealthCheckTimeout time.Duration + CheckInterval time.Duration + HealthCheckEnabled bool + HealthCheckTimeout time.Duration + MaxConcurrentHealthChecks int // defaults to 16 } // Pool holds a cache of grpc_health_v1 clients. @@ -58,6 +60,10 @@ type Pool struct { // NewPool creates a new Pool. func NewPool(clientName string, cfg PoolConfig, discovery PoolServiceDiscovery, factory PoolFactory, clientsMetric prometheus.Gauge, logger log.Logger) *Pool { + if cfg.MaxConcurrentHealthChecks == 0 { + cfg.MaxConcurrentHealthChecks = 16 + } + p := &Pool{ cfg: cfg, discovery: discovery, @@ -173,24 +179,30 @@ func (p *Pool) removeStaleClients() { } } -// cleanUnhealthy loops through all servers and deletes any that fails a healthcheck. +// cleanUnhealthy loops through all servers and deletes any that fail a healthcheck. +// The health checks are executed concurrently with p.cfg.MaxConcurrentHealthChecks. func (p *Pool) cleanUnhealthy() { - for _, addr := range p.RegisteredAddresses() { + addresses := p.RegisteredAddresses() + _ = concurrency.ForEachJob(context.Background(), len(addresses), p.cfg.MaxConcurrentHealthChecks, func(ctx context.Context, idx int) error { + addr := addresses[idx] client, ok := p.fromCache(addr) // not ok means someone removed a client between the start of this loop and now if ok { - err := healthCheck(client, p.cfg.HealthCheckTimeout) + err := healthCheck(ctx, client, p.cfg.HealthCheckTimeout) if err != nil { level.Warn(p.logger).Log("msg", fmt.Sprintf("removing %s failing healthcheck", p.clientName), "addr", addr, "reason", err) p.RemoveClientFor(addr) } } - } + // Never return an error, because otherwise the processing would stop and + // remaining health checks would not been executed. + return nil + }) } // healthCheck will check if the client is still healthy, returning an error if it is not -func healthCheck(client PoolClient, timeout time.Duration) error { - ctx, cancel := context.WithTimeout(context.Background(), timeout) +func healthCheck(ctx context.Context, client PoolClient, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() ctx = user.InjectOrgID(ctx, "0") diff --git a/vendor/github.com/grafana/dskit/ring/http.go b/vendor/github.com/grafana/dskit/ring/http.go index 26d28e3e5d05a..6521ca205eccd 100644 --- a/vendor/github.com/grafana/dskit/ring/http.go +++ b/vendor/github.com/grafana/dskit/ring/http.go @@ -18,7 +18,7 @@ var defaultPageContent string var defaultPageTemplate = template.Must(template.New("webpage").Funcs(template.FuncMap{ "mod": func(i, j int) bool { return i%j == 0 }, "humanFloat": func(f float64) string { - return fmt.Sprintf("%.2g", f) + return fmt.Sprintf("%.3g", f) }, "timeOrEmptyString": func(t time.Time) string { if t.IsZero() { diff --git a/vendor/github.com/grafana/dskit/runtimeconfig/manager.go b/vendor/github.com/grafana/dskit/runtimeconfig/manager.go index b21fdc16fbb63..23b50bbff892e 100644 --- a/vendor/github.com/grafana/dskit/runtimeconfig/manager.go +++ b/vendor/github.com/grafana/dskit/runtimeconfig/manager.go @@ -62,7 +62,7 @@ type Manager struct { fileHashes map[string]string } -// New creates an instance of Manager and starts reload config loop based on config +// New creates an instance of Manager. Manager is a services.Service, and must be explicitly started to perform any work. func New(cfg Config, registerer prometheus.Registerer, logger log.Logger) (*Manager, error) { if len(cfg.LoadPath) == 0 { return nil, errors.New("LoadPath is empty") diff --git a/vendor/github.com/grafana/dskit/services/README.md b/vendor/github.com/grafana/dskit/services/README.md index 084deefea1249..3d9d56782e6ee 100644 --- a/vendor/github.com/grafana/dskit/services/README.md +++ b/vendor/github.com/grafana/dskit/services/README.md @@ -131,10 +131,10 @@ func (s *exampleService) Send(msg string) bool { } ``` -Now `serv` is a service that can be started, observed for state changes, or stopped. As long as service is in Running state, clients can call its `Send` method: +Now `exampleService` is a service that can be started, observed for state changes, or stopped. As long as service is in Running state, clients can call its `Send` method: ```go -s := newServ() +s := newExampleServ() s.StartAsync(context.Background()) s.AwaitRunning(context.Background()) // now collect() is running diff --git a/vendor/github.com/grafana/dskit/services/manager.go b/vendor/github.com/grafana/dskit/services/manager.go index 4da481ec5eeaf..3ef6aad483444 100644 --- a/vendor/github.com/grafana/dskit/services/manager.go +++ b/vendor/github.com/grafana/dskit/services/manager.go @@ -304,7 +304,7 @@ type funcBasedManagerListener struct { failure func(service Service) } -func (f *funcBasedManagerListener) Healthy() { +func (f funcBasedManagerListener) Healthy() { if f.healthy != nil { f.healthy() } diff --git a/vendor/github.com/grafana/dskit/services/services.go b/vendor/github.com/grafana/dskit/services/services.go index 7bb91ae84b415..856a4d98c6c9e 100644 --- a/vendor/github.com/grafana/dskit/services/services.go +++ b/vendor/github.com/grafana/dskit/services/services.go @@ -64,7 +64,7 @@ type funcBasedListener struct { failedFn func(from State, failure error) } -func (f *funcBasedListener) Starting() { +func (f funcBasedListener) Starting() { if f.startingFn != nil { f.startingFn() } diff --git a/vendor/modules.txt b/vendor/modules.txt index ebe9df96b9da2..74044d0a12514 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -681,7 +681,7 @@ github.com/gorilla/mux # github.com/gorilla/websocket v1.5.0 ## explicit; go 1.12 github.com/gorilla/websocket -# github.com/grafana/dskit v0.0.0-20220928083349-b1b307db4f30 +# github.com/grafana/dskit v0.0.0-20221212120341-3e308a49441b ## explicit; go 1.18 github.com/grafana/dskit/backoff github.com/grafana/dskit/concurrency From 7546c4041e128e875a587c0d711f2f419c5a4f97 Mon Sep 17 00:00:00 2001 From: Justin Avery Rexroad Date: Mon, 12 Dec 2022 11:10:26 -0800 Subject: [PATCH 02/62] Fix typo in helm install monolithic docs (#7909) **What this PR does / why we need it**: Add missing letter to end of word. **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the `CONTRIBUTING.md` guide - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md` --- docs/sources/installation/helm/install-monolithic/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sources/installation/helm/install-monolithic/index.md b/docs/sources/installation/helm/install-monolithic/index.md index 6f69118580379..a5a171817bb01 100644 --- a/docs/sources/installation/helm/install-monolithic/index.md +++ b/docs/sources/installation/helm/install-monolithic/index.md @@ -12,7 +12,7 @@ keywords: [] This Helm Chart installation runs the Grafana Loki *single binary* within a Kubernetes cluster. -If the filesyste is set to `filesystem`, this chart configures Loki to run the `all` target in a [monolothic](../../fundamentals/architecture/deployment-modes/#monolithic-mode), designed to work with a filesystem storage. It will also configure meta-monitoring of metrics and logs. +If the filesystem is set to `filesystem`, this chart configures Loki to run the `all` target in a [monolothic](../../fundamentals/architecture/deployment-modes/#monolithic-mode), designed to work with a filesystem storage. It will also configure meta-monitoring of metrics and logs. It is not possible to install the single binary with a different storage type. From 04ce1b87213d18f414f56a74e02a33e7b65fc8f1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 12:17:10 -0700 Subject: [PATCH 03/62] Bump azure/setup-helm from 1 to 3 (#7608) Bumps [azure/setup-helm](https://github.com/azure/setup-helm) from 1 to 3. Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/helm-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/helm-ci.yml b/.github/workflows/helm-ci.yml index 3f96a06c79a1c..dfa8ca51ba258 100644 --- a/.github/workflows/helm-ci.yml +++ b/.github/workflows/helm-ci.yml @@ -45,7 +45,7 @@ jobs: fetch-depth: 0 - name: Set up Helm - uses: azure/setup-helm@v1 + uses: azure/setup-helm@v3 with: version: v3.8.2 From 9b90aad7ce20545fa17cf0b1649448159964c36b Mon Sep 17 00:00:00 2001 From: Karsten Jeschkies Date: Mon, 12 Dec 2022 20:31:36 +0100 Subject: [PATCH 04/62] Fix sizing tool. (#7899) **What this PR does / why we need it** The current sizing tool is broken because it would not allow cross origin requests. See LogQL Analyzer (https://github.com/grafana/loki/blob/main/docs/sources/logql/analyzer/script.js#L115) for the proper way to call its API. --- docs/sources/installation/sizing/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sources/installation/sizing/index.md b/docs/sources/installation/sizing/index.md index fe88f268d2094..a8ea7d1401790 100644 --- a/docs/sources/installation/sizing/index.md +++ b/docs/sources/installation/sizing/index.md @@ -71,7 +71,7 @@ This tool helps to generate a Helm Charts `values.yaml` file based on specified - - diff --git a/docs/sources/installation/sizing/index.md b/docs/sources/installation/sizing/index.md index a8ea7d1401790..9b16bbf5c0e8e 100644 --- a/docs/sources/installation/sizing/index.md +++ b/docs/sources/installation/sizing/index.md @@ -99,7 +99,7 @@ createApp({ methods: { async fetchNodeTypes() { const url = `${API_URL}/nodes` - this.nodes = await (await fetch(url),{mode: 'cors'}).json() + this.nodes = await (await fetch(url,{mode: 'cors'})).json() } } }).mount('#app') From f5fbfabd8445b5f027d0da896bfa44c66829d705 Mon Sep 17 00:00:00 2001 From: Trevor Whitney Date: Mon, 12 Dec 2022 14:01:22 -0700 Subject: [PATCH 06/62] Add 3rd target (`backend`) to SSD/Scalable mode (#7650) **What this PR does / why we need it**: This adds a 3rd target to SSD/Scalable mode, as well as a config flag to run the legacy `read` mode from the original 2 target configuration in order to give people time to migrate before we remove this option in Loki 3.0 (hopefully). The 3rd target has the two major advantages: 1. Allows the read path to be run as a deployment and thus auto-scaled using our existing auto-scaling logic for queriers 2. Creates consistency with Mimir since they went with a 3 target model for their SSD deployment. --- integration/loki_simple_scalable_test.go | 75 +++++++++- pkg/loki/loki.go | 29 +++- pkg/loki/modules.go | 23 +-- pkg/loki/modules_test.go | 181 +++++++++++++++++------ 4 files changed, 250 insertions(+), 58 deletions(-) diff --git a/integration/loki_simple_scalable_test.go b/integration/loki_simple_scalable_test.go index dc6da63477971..47616b163edab 100644 --- a/integration/loki_simple_scalable_test.go +++ b/integration/loki_simple_scalable_test.go @@ -12,7 +12,7 @@ import ( "github.com/grafana/loki/integration/cluster" ) -func TestSimpleScalableIngestQuery(t *testing.T) { +func TestSimpleScalable_Legacy_IngestQuery(t *testing.T) { clu := cluster.New() defer func() { assert.NoError(t, clu.Cleanup()) @@ -74,3 +74,76 @@ func TestSimpleScalableIngestQuery(t *testing.T) { assert.ElementsMatch(t, []string{"fake"}, resp) }) } + +func TestSimpleScalable_IngestQuery(t *testing.T) { + clu := cluster.New() + defer func() { + assert.NoError(t, clu.Cleanup()) + }() + + var ( + tWrite = clu.AddComponent( + "write", + "-target=write", + ) + tBackend = clu.AddComponent( + "backend", + "-target=backend", + "-legacy-read-mode=false", + ) + ) + require.NoError(t, clu.Run()) + + tRead := clu.AddComponent( + "read", + "-target=read", + "-common.compactor-address="+tBackend.HTTPURL(), + "-legacy-read-mode=false", + ) + require.NoError(t, clu.Run()) + + tenantID := randStringRunes() + + now := time.Now() + cliWrite := client.New(tenantID, "", tWrite.HTTPURL()) + cliWrite.Now = now + cliRead := client.New(tenantID, "", tRead.HTTPURL()) + cliRead.Now = now + cliBackend := client.New(tenantID, "", tBackend.HTTPURL()) + cliBackend.Now = now + + t.Run("ingest logs", func(t *testing.T) { + // ingest some log lines + require.NoError(t, cliWrite.PushLogLineWithTimestamp("lineA", now.Add(-45*time.Minute), map[string]string{"job": "fake"})) + require.NoError(t, cliWrite.PushLogLineWithTimestamp("lineB", now.Add(-45*time.Minute), map[string]string{"job": "fake"})) + + require.NoError(t, cliWrite.PushLogLine("lineC", map[string]string{"job": "fake"})) + require.NoError(t, cliWrite.PushLogLine("lineD", map[string]string{"job": "fake"})) + }) + + t.Run("query", func(t *testing.T) { + resp, err := cliRead.RunRangeQuery(context.Background(), `{job="fake"}`) + require.NoError(t, err) + assert.Equal(t, "streams", resp.Data.ResultType) + + var lines []string + for _, stream := range resp.Data.Stream { + for _, val := range stream.Values { + lines = append(lines, val[1]) + } + } + assert.ElementsMatch(t, []string{"lineA", "lineB", "lineC", "lineD"}, lines) + }) + + t.Run("label-names", func(t *testing.T) { + resp, err := cliRead.LabelNames(context.Background()) + require.NoError(t, err) + assert.ElementsMatch(t, []string{"job"}, resp) + }) + + t.Run("label-values", func(t *testing.T) { + resp, err := cliRead.LabelValues(context.Background(), "job") + require.NoError(t, err) + assert.ElementsMatch(t, []string{"fake"}, resp) + }) +} diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 4ac0eedf37f97..a6ea1095f0a4b 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -71,6 +71,8 @@ type Config struct { UseBufferedLogger bool `yaml:"use_buffered_logger"` UseSyncLogger bool `yaml:"use_sync_logger"` + LegacyReadTarget bool `yaml:"legacy_read_target,omitempty"` + Common common.Config `yaml:"common,omitempty"` Server server.Config `yaml:"server,omitempty"` InternalServer internalserver.Config `yaml:"internal_server,omitempty"` @@ -114,6 +116,10 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&c.UseBufferedLogger, "log.use-buffered", true, "Uses a line-buffered logger to improve performance.") f.BoolVar(&c.UseSyncLogger, "log.use-sync", true, "Forces all lines logged to hold a mutex to serialize writes.") + //TODO(trevorwhitney): flip this to false with Loki 3.0 + f.BoolVar(&c.LegacyReadTarget, "legacy-read-mode", true, "Set to false to disable the legacy read mode and use new scalable mode with 3rd backend target. "+ + "The default will be flipped to false in the next Loki release.") + c.registerServerFlagsWithChangedDefaultValues(f) c.Common.RegisterFlags(f) c.Distributor.RegisterFlags(f) @@ -229,6 +235,13 @@ func (c *Config) Validate() error { return err } + // Honor the legacy scalable deployment topology + if c.LegacyReadTarget { + if c.isModuleEnabled(Backend) { + return fmt.Errorf("invalid target, cannot run backend target with legacy read mode") + } + } + return nil } @@ -586,6 +599,7 @@ func (t *Loki) setupModuleManager() error { mm.RegisterModule(All, nil) mm.RegisterModule(Read, nil) mm.RegisterModule(Write, nil) + mm.RegisterModule(Backend, nil) // Add dependencies deps := map[string][]string{ @@ -608,28 +622,33 @@ func (t *Loki) setupModuleManager() error { IngesterQuerier: {Ring}, IndexGatewayRing: {RuntimeConfig, Server, MemberlistKV}, All: {QueryScheduler, QueryFrontend, Querier, Ingester, Distributor, Ruler, Compactor}, - Read: {QueryScheduler, QueryFrontend, Querier, Ruler, Compactor, IndexGateway}, + Read: {QueryFrontend, Querier}, Write: {Ingester, Distributor}, + Backend: {QueryScheduler, Ruler, Compactor, IndexGateway}, MemberlistKV: {Server}, } - // Add IngesterQuerier as a dependency for store when target is either querier, ruler, or read. - if t.Cfg.isModuleEnabled(Querier) || t.Cfg.isModuleEnabled(Ruler) || t.Cfg.isModuleEnabled(Read) { + // Add IngesterQuerier as a dependency for store when target is either querier, ruler, read, or backend. + if t.Cfg.isModuleEnabled(Querier) || t.Cfg.isModuleEnabled(Ruler) || t.Cfg.isModuleEnabled(Read) || t.Cfg.isModuleEnabled(Backend) { deps[Store] = append(deps[Store], IngesterQuerier) } // If the query scheduler and querier are running together, make sure the scheduler goes // first to initialize the ring that will also be used by the querier - if (t.Cfg.isModuleEnabled(Querier) && t.Cfg.isModuleEnabled(QueryScheduler)) || t.Cfg.isModuleEnabled(Read) || t.Cfg.isModuleEnabled(All) { + if (t.Cfg.isModuleEnabled(Querier) && t.Cfg.isModuleEnabled(QueryScheduler)) || t.Cfg.isModuleEnabled(All) { deps[Querier] = append(deps[Querier], QueryScheduler) } // If the query scheduler and query frontend are running together, make sure the scheduler goes // first to initialize the ring that will also be used by the query frontend - if (t.Cfg.isModuleEnabled(QueryFrontend) && t.Cfg.isModuleEnabled(QueryScheduler)) || t.Cfg.isModuleEnabled(Read) || t.Cfg.isModuleEnabled(All) { + if (t.Cfg.isModuleEnabled(QueryFrontend) && t.Cfg.isModuleEnabled(QueryScheduler)) || t.Cfg.isModuleEnabled(All) { deps[QueryFrontend] = append(deps[QueryFrontend], QueryScheduler) } + if t.Cfg.LegacyReadTarget { + deps[Read] = append(deps[Read], QueryScheduler, Ruler, Compactor, IndexGateway) + } + if t.Cfg.InternalServer.Enable { for key, ds := range deps { idx := -1 diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index 467ec947ce48e..06fe144cf6bd4 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -99,6 +99,7 @@ const ( All string = "all" Read string = "read" Write string = "write" + Backend string = "backend" UsageReport string = "usage-report" ) @@ -543,7 +544,7 @@ func (t *Loki) initStore() (_ services.Service, err error) { t.Cfg.StorageConfig.TSDBShipperConfig.Mode = indexshipper.ModeWriteOnly t.Cfg.StorageConfig.TSDBShipperConfig.IngesterDBRetainPeriod = shipperQuerierIndexUpdateDelay(t.Cfg.StorageConfig.IndexCacheValidity, t.Cfg.StorageConfig.TSDBShipperConfig.ResyncInterval) - case t.Cfg.isModuleEnabled(Querier), t.Cfg.isModuleEnabled(Ruler), t.Cfg.isModuleEnabled(Read), t.isModuleActive(IndexGateway): + case t.Cfg.isModuleEnabled(Querier), t.Cfg.isModuleEnabled(Ruler), t.Cfg.isModuleEnabled(Read), t.Cfg.isModuleEnabled(Backend), t.isModuleActive(IndexGateway): // We do not want query to do any updates to index t.Cfg.StorageConfig.BoltDBShipperConfig.Mode = indexshipper.ModeReadOnly t.Cfg.StorageConfig.TSDBShipperConfig.Mode = indexshipper.ModeReadOnly @@ -592,12 +593,13 @@ func (t *Loki) initStore() (_ services.Service, err error) { // Only queriers should use the AsyncStore, it should never be used in ingesters. asyncStore = true - if t.Cfg.isModuleEnabled(Read) { - // we want to use the actual storage when running the index-gateway, so we remove the Addr from the config + // The legacy Read target includes the index gateway, so disable the index-gateway client in that configuration. + if t.Cfg.LegacyReadTarget && t.Cfg.isModuleEnabled(Read) { t.Cfg.StorageConfig.BoltDBShipperConfig.IndexGatewayClientConfig.Disabled = true t.Cfg.StorageConfig.TSDBShipperConfig.IndexGatewayClientConfig.Disabled = true } - case t.Cfg.isModuleEnabled(IndexGateway): + // Backend target includes the index gateway + case t.Cfg.isModuleEnabled(IndexGateway), t.Cfg.isModuleEnabled(Backend): // we want to use the actual storage when running the index-gateway, so we remove the Addr from the config t.Cfg.StorageConfig.BoltDBShipperConfig.IndexGatewayClientConfig.Disabled = true t.Cfg.StorageConfig.TSDBShipperConfig.IndexGatewayClientConfig.Disabled = true @@ -711,7 +713,8 @@ func (t *Loki) supportIndexDeleteRequest() bool { // compactorAddress returns the configured address of the compactor. // It prefers grpc address over http. If the address is grpc then the bool would be true otherwise false func (t *Loki) compactorAddress() (string, bool, error) { - if t.Cfg.isModuleEnabled(All) || t.Cfg.isModuleEnabled(Read) { + legacyReadMode := t.Cfg.LegacyReadTarget && t.Cfg.isModuleEnabled(Read) + if t.Cfg.isModuleEnabled(All) || legacyReadMode || t.Cfg.isModuleEnabled(Backend) { // In single binary or read modes, this module depends on Server return fmt.Sprintf("%s:%d", t.Cfg.Server.GRPCListenAddress, t.Cfg.Server.GRPCListenPort), true, nil } @@ -859,7 +862,8 @@ func (t *Loki) initRulerStorage() (_ services.Service, err error) { // unfortunately there is no way to generate a "default" config and compare default against actual // to determine if it's unconfigured. the following check, however, correctly tests this. // Single binary integration tests will break if this ever drifts - if (t.Cfg.isModuleEnabled(All) || t.Cfg.isModuleEnabled(Read)) && t.Cfg.Ruler.StoreConfig.IsDefaults() { + legacyReadMode := t.Cfg.LegacyReadTarget && t.Cfg.isModuleEnabled(Read) + if (t.Cfg.isModuleEnabled(All) || legacyReadMode || t.Cfg.isModuleEnabled(Backend)) && t.Cfg.Ruler.StoreConfig.IsDefaults() { level.Info(util_log.Logger).Log("msg", "Ruler storage is not configured; ruler will not be started.") return } @@ -1048,9 +1052,10 @@ func (t *Loki) initIndexGateway() (services.Service, error) { } func (t *Loki) initIndexGatewayRing() (_ services.Service, err error) { - // IndexGateway runs by default on read target, and should always assume + // IndexGateway runs by default on legacy read and backend targets, and should always assume // ring mode when run in this way. - if t.isModuleActive(Read) { + legacyReadMode := t.Cfg.LegacyReadTarget && t.isModuleActive(Read) + if legacyReadMode || t.isModuleActive(Backend) { t.Cfg.IndexGateway.Mode = indexgateway.RingMode } @@ -1063,7 +1068,7 @@ func (t *Loki) initIndexGatewayRing() (_ services.Service, err error) { t.Cfg.IndexGateway.Ring.ListenPort = t.Cfg.Server.GRPCListenPort managerMode := indexgateway.ClientMode - if t.Cfg.isModuleEnabled(IndexGateway) || t.Cfg.isModuleEnabled(Read) { + if t.Cfg.isModuleEnabled(IndexGateway) || legacyReadMode || t.Cfg.isModuleEnabled(Backend) { managerMode = indexgateway.ServerMode } rm, err := indexgateway.NewRingManager(managerMode, t.Cfg.IndexGateway, util_log.Logger, prometheus.DefaultRegisterer) diff --git a/pkg/loki/modules_test.go b/pkg/loki/modules_test.go index dad7663c343f9..ddc45a4755b51 100644 --- a/pkg/loki/modules_test.go +++ b/pkg/loki/modules_test.go @@ -1,6 +1,7 @@ package loki import ( + "fmt" "path/filepath" "testing" "time" @@ -161,33 +162,34 @@ func TestMultiKVSetup(t *testing.T) { } } -func TestIndexGatewayRingMode_when_TargetIsRead(t *testing.T) { +func TestIndexGatewayRingMode_when_TargetIsLegacyReadOrBackend(t *testing.T) { dir := t.TempDir() - t.Run("IndexGateway always set to ring mode when running as part of read target", func(t *testing.T) { - cfg := minimalWorkingConfig(t, dir, Read) - c, err := New(cfg) - require.NoError(t, err) - - services, err := c.ModuleManager.InitModuleServices(Read) - defer func() { - for _, service := range services { - service.StopAsync() - } - }() - - require.NoError(t, err) - assert.Equal(t, c.Cfg.IndexGateway.Mode, indexgateway.RingMode) - }) + type ringModeTestCase struct { + name string + transformer func(cfg *Config) + target string + } - t.Run("When IndexGateway is running independent of Read target", func(t *testing.T) { - t.Run("IndexGateway respects configured simple mode", func(t *testing.T) { - cfg := minimalWorkingConfig(t, dir, IndexGatewayRing) - cfg.IndexGateway.Mode = indexgateway.SimpleMode + for _, tc := range []ringModeTestCase{ + { + name: "leagcy read", + target: Read, + }, + { + name: "backend", + target: Backend, + transformer: func(cfg *Config) { + cfg.LegacyReadTarget = false + }, + }, + } { + t.Run(fmt.Sprintf("IndexGateway always set to ring mode when running as part of %s", tc.name), func(t *testing.T) { + cfg := minimalWorkingConfig(t, dir, tc.target, tc.transformer) c, err := New(cfg) require.NoError(t, err) - services, err := c.ModuleManager.InitModuleServices(IndexGateway) + services, err := c.ModuleManager.InitModuleServices(Read) defer func() { for _, service := range services { service.StopAsync() @@ -195,33 +197,71 @@ func TestIndexGatewayRingMode_when_TargetIsRead(t *testing.T) { }() require.NoError(t, err) - assert.Equal(t, c.Cfg.IndexGateway.Mode, indexgateway.SimpleMode) + assert.Equal(t, c.Cfg.IndexGateway.Mode, indexgateway.RingMode) }) + } - t.Run("IndexGateway respects configured ring mode", func(t *testing.T) { - cfg := minimalWorkingConfig(t, dir, IndexGatewayRing) - cfg.IndexGateway.Mode = indexgateway.RingMode - c, err := New(cfg) - require.NoError(t, err) - - services, err := c.ModuleManager.InitModuleServices(IndexGateway) - defer func() { - for _, service := range services { - service.StopAsync() - } - }() + type indexModeTestCase struct { + name string + target string + transformer func(cfg *Config) + } - require.NoError(t, err) - assert.Equal(t, c.Cfg.IndexGateway.Mode, indexgateway.RingMode) + for _, tc := range []indexModeTestCase{ + { + name: "index gateway", + target: IndexGateway, + }, + { + name: "new read target", + target: Read, + transformer: func(cfg *Config) { + cfg.LegacyReadTarget = false + }, + }, + } { + t.Run(fmt.Sprintf("When target is %s", tc.name), func(t *testing.T) { + t.Run("IndexGateway config respects configured simple mode", func(t *testing.T) { + cfg := minimalWorkingConfig(t, dir, IndexGatewayRing, tc.transformer) + cfg.IndexGateway.Mode = indexgateway.SimpleMode + c, err := New(cfg) + require.NoError(t, err) + + services, err := c.ModuleManager.InitModuleServices(IndexGateway) + defer func() { + for _, service := range services { + service.StopAsync() + } + }() + + require.NoError(t, err) + assert.Equal(t, c.Cfg.IndexGateway.Mode, indexgateway.SimpleMode) + }) + + t.Run("IndexGateway config respects configured ring mode", func(t *testing.T) { + cfg := minimalWorkingConfig(t, dir, IndexGatewayRing) + cfg.IndexGateway.Mode = indexgateway.RingMode + c, err := New(cfg) + require.NoError(t, err) + + services, err := c.ModuleManager.InitModuleServices(IndexGateway) + defer func() { + for _, service := range services { + service.StopAsync() + } + }() + + require.NoError(t, err) + assert.Equal(t, c.Cfg.IndexGateway.Mode, indexgateway.RingMode) + }) }) - - }) + } } -func TestIndexGatewayClientConfig_when_TargetIsQuerierOrRead(t *testing.T) { +func TestIndexGatewayClientConfig(t *testing.T) { dir := t.TempDir() - t.Run("IndexGateway client is disabled when running querier target", func(t *testing.T) { + t.Run("IndexGateway client is enabled when running querier target", func(t *testing.T) { cfg := minimalWorkingConfig(t, dir, Querier) cfg.SchemaConfig.Configs[0].IndexType = config.BoltDBShipperType cfg.SchemaConfig.Configs[0].IndexTables.Period = 24 * time.Hour @@ -240,8 +280,56 @@ func TestIndexGatewayClientConfig_when_TargetIsQuerierOrRead(t *testing.T) { assert.False(t, c.Cfg.StorageConfig.TSDBShipperConfig.IndexGatewayClientConfig.Disabled) }) - t.Run("IndexGateway client is endabled when running read target", func(t *testing.T) { - cfg := minimalWorkingConfig(t, dir, Read) + t.Run("IndexGateway client is disabled when running legacy read target", func(t *testing.T) { + cfg := minimalWorkingConfig(t, dir, Read, func(cfg *Config) { + cfg.LegacyReadTarget = true + }) + cfg.SchemaConfig.Configs[0].IndexType = config.BoltDBShipperType + cfg.SchemaConfig.Configs[0].IndexTables.Period = 24 * time.Hour + cfg.CompactorConfig.SharedStoreType = config.StorageTypeFileSystem + cfg.CompactorConfig.WorkingDirectory = dir + c, err := New(cfg) + require.NoError(t, err) + + services, err := c.ModuleManager.InitModuleServices(Read) + defer func() { + for _, service := range services { + service.StopAsync() + } + }() + + require.NoError(t, err) + assert.True(t, c.Cfg.StorageConfig.BoltDBShipperConfig.IndexGatewayClientConfig.Disabled) + assert.True(t, c.Cfg.StorageConfig.TSDBShipperConfig.IndexGatewayClientConfig.Disabled) + }) + + t.Run("IndexGateway client is enabled when running new read target", func(t *testing.T) { + cfg := minimalWorkingConfig(t, dir, Read, func(cfg *Config) { + cfg.LegacyReadTarget = false + }) + cfg.SchemaConfig.Configs[0].IndexType = config.BoltDBShipperType + cfg.SchemaConfig.Configs[0].IndexTables.Period = 24 * time.Hour + cfg.CompactorConfig.SharedStoreType = config.StorageTypeFileSystem + cfg.CompactorConfig.WorkingDirectory = dir + c, err := New(cfg) + require.NoError(t, err) + + services, err := c.ModuleManager.InitModuleServices(Read) + defer func() { + for _, service := range services { + service.StopAsync() + } + }() + + require.NoError(t, err) + assert.False(t, c.Cfg.StorageConfig.BoltDBShipperConfig.IndexGatewayClientConfig.Disabled) + assert.False(t, c.Cfg.StorageConfig.TSDBShipperConfig.IndexGatewayClientConfig.Disabled) + }) + + t.Run("IndexGateway client is disabled when running backend target", func(t *testing.T) { + cfg := minimalWorkingConfig(t, dir, Backend, func(cfg *Config) { + cfg.LegacyReadTarget = false + }) cfg.SchemaConfig.Configs[0].IndexType = config.BoltDBShipperType cfg.SchemaConfig.Configs[0].IndexTables.Period = 24 * time.Hour cfg.CompactorConfig.SharedStoreType = config.StorageTypeFileSystem @@ -264,7 +352,7 @@ func TestIndexGatewayClientConfig_when_TargetIsQuerierOrRead(t *testing.T) { const localhost = "localhost" -func minimalWorkingConfig(t *testing.T, dir, target string) Config { +func minimalWorkingConfig(t *testing.T, dir, target string, cfgTransformers ...func(*Config)) Config { prepareGlobalMetricsRegistry(t) cfg := Config{} @@ -315,5 +403,12 @@ func minimalWorkingConfig(t *testing.T, dir, target string) Config { cfg.Ruler.Config.StoreConfig.Local.Directory = dir cfg.Common.CompactorAddress = "http://localhost:0" + + for _, transformer := range cfgTransformers { + if transformer != nil { + transformer(&cfg) + } + } + return cfg } From 9d5665e34aba3fb01cd17aa5c16c562af0ea8a06 Mon Sep 17 00:00:00 2001 From: Charlie N Date: Tue, 13 Dec 2022 00:31:28 +0100 Subject: [PATCH 07/62] [Jsonnet] Fix memberlist when using a stateful ruler (#6662) Signed-off-by: Whyeasy **What this PR does / why we need it**: When using stateful rulers and the memberlist as its ring, generating the resources with jsonnet throws in an error regarding the `ruler_deployment`. The `ruler_statefulset` didn't receive the meberlist labels either. **Checklist** - [ ] Documentation added - [ ] Tests updated - [x] Is this an important fix or new feature? Add an entry in the `CHANGELOG.md`. - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md` Signed-off-by: Whyeasy --- CHANGELOG.md | 1 + production/ksonnet/loki/memberlist.libsonnet | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81bda247ef241..37c57665bfd36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -147,6 +147,7 @@ Check the history of the branch FIXME. #### Jsonnet * [6189](https://github.com/grafana/loki/pull/6189) **irizzant**: Add creation of a `ServiceMonitor` object for Prometheus scraping through configuration parameter `create_service_monitor`. Simplify mixin usage by adding (https://github.com/prometheus-operator/kube-prometheus) library. +* [6662](https://github.com/grafana/loki/pull/6662) **Whyeasy**: Fixes memberlist error when using a stateful ruler. ### Notes diff --git a/production/ksonnet/loki/memberlist.libsonnet b/production/ksonnet/loki/memberlist.libsonnet index 6a1b1f2ff8e41..df483ea06aeb7 100644 --- a/production/ksonnet/loki/memberlist.libsonnet +++ b/production/ksonnet/loki/memberlist.libsonnet @@ -124,7 +124,8 @@ index_gateway_statefulset+: if !$._config.memberlist_ring_enabled then {} else gossipLabel, ingester_statefulset+: if !$._config.memberlist_ring_enabled then {} else gossipLabel, query_scheduler_deployment+: if !$._config.memberlist_ring_enabled then {} else gossipLabel, - ruler_deployment+: if !$._config.memberlist_ring_enabled || !$._config.ruler_enabled then {} else gossipLabel, + ruler_deployment+: if !$._config.memberlist_ring_enabled || !$._config.ruler_enabled || $._config.stateful_rulers then {} else gossipLabel, + ruler_statefulset+: if !$._config.memberlist_ring_enabled || !$._config.ruler_enabled || !$._config.stateful_rulers then {} else gossipLabel, // Headless service (= no assigned IP, DNS returns all targets instead) pointing to gossip network members. gossip_ring_service: From 79e0d96978fcfa218a37b3731d5135599962ff6b Mon Sep 17 00:00:00 2001 From: Tobias Wolf Date: Tue, 13 Dec 2022 18:01:40 +0100 Subject: [PATCH 08/62] Helm: Decouple the Canary from self-monitoring (#7757) **What this PR does / why we need it**: The Canary has no dependency on the Grafana Agent-based self-monitoring. This PR decouples that dependency, allowing the canary to be deployed with the other self-monitoring features. **Which issue(s) this PR fixes**: Fixes #7364 --- docs/sources/installation/helm/concepts.md | 2 +- .../helm/monitor-and-alert/index.md | 6 +- docs/sources/installation/helm/reference.md | 218 +++++++++--------- production/helm/loki/CHANGELOG.md | 6 + production/helm/loki/Chart.yaml | 2 +- production/helm/loki/README.md | 2 +- production/helm/loki/ci/ingress-values.yaml | 5 +- .../loki/templates/loki-canary/_helpers.tpl | 2 +- .../loki/templates/loki-canary/daemonset.yaml | 2 +- .../loki/templates/loki-canary/service.yaml | 2 +- .../templates/loki-canary/serviceaccount.yaml | 4 +- .../loki/templates/tests/test-canary.yaml | 2 +- production/helm/loki/templates/validate.yaml | 12 +- production/helm/loki/values.yaml | 56 ++--- 14 files changed, 159 insertions(+), 162 deletions(-) diff --git a/docs/sources/installation/helm/concepts.md b/docs/sources/installation/helm/concepts.md index c4d2adf7e7ad0..9a2191640ddae 100644 --- a/docs/sources/installation/helm/concepts.md +++ b/docs/sources/installation/helm/concepts.md @@ -25,7 +25,7 @@ This chart includes dashboards for monitoring Loki. These require the scrape con ## Canary -This chart installs the [canary](../../../operations/loki-canary) and its alerts by default. This is another tool to verify the Loki deployment is in a healthy state. It can be disabled with `monitoring.selfMonitoring.lokiCanary.enabled=false`. +This chart installs the [canary](../../../operations/loki-canary) and its alerts by default. This is another tool to verify the Loki deployment is in a healthy state. It can be disabled with `monitoring.lokiCanary.enabled=false`. ## Gateway diff --git a/docs/sources/installation/helm/monitor-and-alert/index.md b/docs/sources/installation/helm/monitor-and-alert/index.md index cde667a0e4e3a..c33082133acbc 100644 --- a/docs/sources/installation/helm/monitor-and-alert/index.md +++ b/docs/sources/installation/helm/monitor-and-alert/index.md @@ -119,8 +119,6 @@ Rules and alerts are automatically deployed. ... selfMonitoring: enabled: true - lokiCanary: - enabled: false logsInstance: clients: - url: @@ -130,7 +128,9 @@ Rules and alerts are automatically deployed. key: username password: name: primary-credentials-logs - key: password + key: password + lokiCanary: + enabled: false ``` 5. Install the `Loki meta-motoring` connection on Grafana Cloud. diff --git a/docs/sources/installation/helm/reference.md b/docs/sources/installation/helm/reference.md index 9c97481811dd0..c84a51484604d 100644 --- a/docs/sources/installation/helm/reference.md +++ b/docs/sources/installation/helm/reference.md @@ -1730,268 +1730,268 @@ null - monitoring.rules.additionalGroups - list - Additional groups to add to the rules file + monitoring.lokiCanary.annotations + object + Additional annotations for the `loki-canary` Daemonset
-[]
+{}
 
- monitoring.rules.alerting + monitoring.lokiCanary.enabled bool - Include alerting rules +
 true
 
- monitoring.rules.annotations - object - Additional annotations for the rules PrometheusRule resource + monitoring.lokiCanary.extraArgs + list + Additional CLI arguments for the `loki-canary' command
-{}
+[]
 
- monitoring.rules.enabled - bool - If enabled, create PrometheusRule resource with Loki recording rules + monitoring.lokiCanary.extraEnv + list + Environment variables to add to the canary pods
-true
+[]
 
- monitoring.rules.labels - object - Additional labels for the rules PrometheusRule resource + monitoring.lokiCanary.extraEnvFrom + list + Environment variables from secrets or configmaps to add to the canary pods
-{}
+[]
 
- monitoring.rules.namespace - string - Alternative namespace to create recording rules PrometheusRule resource in + monitoring.lokiCanary.image + object + Image to use for loki canary
-null
+{
+  "pullPolicy": "IfNotPresent",
+  "registry": "docker.io",
+  "repository": "grafana/loki-canary",
+  "tag": null
+}
 
- monitoring.selfMonitoring.enabled - bool - + monitoring.lokiCanary.image.pullPolicy + string + Docker image pull policy
-true
+"IfNotPresent"
 
- monitoring.selfMonitoring.grafanaAgent.annotations - object - Grafana Agent annotations + monitoring.lokiCanary.image.registry + string + The Docker registry
-{}
+"docker.io"
 
- monitoring.selfMonitoring.grafanaAgent.enableConfigReadAPI - bool - Enable the config read api on port 8080 of the agent + monitoring.lokiCanary.image.repository + string + Docker image repository
-false
+"grafana/loki-canary"
 
- monitoring.selfMonitoring.grafanaAgent.installOperator - bool - Controls whether to install the Grafana Agent Operator and its CRDs. Note that helm will not install CRDs if this flag is enabled during an upgrade. In that case install the CRDs manually from https://github.com/grafana/agent/tree/main/production/operator/crds + monitoring.lokiCanary.image.tag + string + Overrides the image tag whose default is the chart's appVersion
-true
+null
 
- monitoring.selfMonitoring.grafanaAgent.labels + monitoring.lokiCanary.nodeSelector object - Additional Grafana Agent labels + Node selector for canary pods
 {}
 
- monitoring.selfMonitoring.grafanaAgent.namespace - string - Alternative namespace for Grafana Agent resources -
-null
-
- - - - monitoring.selfMonitoring.logsInstance.annotations + monitoring.lokiCanary.resources object - LogsInstance annotations + Resource requests and limits for the canary
 {}
 
- monitoring.selfMonitoring.logsInstance.clients - string - Additional clients for remote write + monitoring.lokiCanary.tolerations + list + Tolerations for canary pods
-null
+[]
 
- monitoring.selfMonitoring.logsInstance.labels - object - Additional LogsInstance labels + monitoring.rules.additionalGroups + list + Additional groups to add to the rules file
-{}
+[]
 
- monitoring.selfMonitoring.logsInstance.namespace - string - Alternative namespace for LogsInstance resources + monitoring.rules.alerting + bool + Include alerting rules
-null
+true
 
- monitoring.selfMonitoring.lokiCanary.annotations + monitoring.rules.annotations object - Additional annotations for the `loki-canary` Daemonset + Additional annotations for the rules PrometheusRule resource
 {}
 
- monitoring.selfMonitoring.lokiCanary.enabled + monitoring.rules.enabled bool - + If enabled, create PrometheusRule resource with Loki recording rules
 true
 
- monitoring.selfMonitoring.lokiCanary.extraArgs - list - Additional CLI arguments for the `loki-canary' command + monitoring.rules.labels + object + Additional labels for the rules PrometheusRule resource
-[]
+{}
 
- monitoring.selfMonitoring.lokiCanary.extraEnv - list - Environment variables to add to the canary pods + monitoring.rules.namespace + string + Alternative namespace to create recording rules PrometheusRule resource in
-[]
+null
 
- monitoring.selfMonitoring.lokiCanary.extraEnvFrom - list - Environment variables from secrets or configmaps to add to the canary pods + monitoring.selfMonitoring.enabled + bool +
-[]
+true
 
- monitoring.selfMonitoring.lokiCanary.image + monitoring.selfMonitoring.grafanaAgent.annotations object - Image to use for loki canary + Grafana Agent annotations
-{
-  "pullPolicy": "IfNotPresent",
-  "registry": "docker.io",
-  "repository": "grafana/loki-canary",
-  "tag": null
-}
+{}
 
- monitoring.selfMonitoring.lokiCanary.image.pullPolicy - string - Docker image pull policy + monitoring.selfMonitoring.grafanaAgent.enableConfigReadAPI + bool + Enable the config read api on port 8080 of the agent
-"IfNotPresent"
+false
 
- monitoring.selfMonitoring.lokiCanary.image.registry - string - The Docker registry + monitoring.selfMonitoring.grafanaAgent.installOperator + bool + Controls whether to install the Grafana Agent Operator and its CRDs. Note that helm will not install CRDs if this flag is enabled during an upgrade. In that case install the CRDs manually from https://github.com/grafana/agent/tree/main/production/operator/crds
-"docker.io"
+true
 
- monitoring.selfMonitoring.lokiCanary.image.repository - string - Docker image repository + monitoring.selfMonitoring.grafanaAgent.labels + object + Additional Grafana Agent labels
-"grafana/loki-canary"
+{}
 
- monitoring.selfMonitoring.lokiCanary.image.tag + monitoring.selfMonitoring.grafanaAgent.namespace string - Overrides the image tag whose default is the chart's appVersion + Alternative namespace for Grafana Agent resources
 null
 
- monitoring.selfMonitoring.lokiCanary.nodeSelector + monitoring.selfMonitoring.logsInstance.annotations object - Node selector for canary pods + LogsInstance annotations
 {}
 
- monitoring.selfMonitoring.lokiCanary.resources + monitoring.selfMonitoring.logsInstance.clients + string + Additional clients for remote write +
+null
+
+ + + + monitoring.selfMonitoring.logsInstance.labels object - Resource requests and limits for the canary + Additional LogsInstance labels
 {}
 
- monitoring.selfMonitoring.lokiCanary.tolerations - list - Tolerations for canary pods + monitoring.selfMonitoring.logsInstance.namespace + string + Alternative namespace for LogsInstance resources
-[]
+null
 
diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index 650f2825b469a..4288f806388b6 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -11,6 +11,12 @@ Entries should be ordered as follows: Entries should include a reference to the pull request that introduced the change. +## 3.7.0 + +**BREAKING**: Configuration values for Loki Canary moved from `monitoring.selfMonitoring.lokiCanary` to `monitoring.lokiCanary` + +- [ENHANCEMENT] Decouple the Loki Canary from the self-monitoring setup, which adds an unnecessary dependency on the Grafana Agent Operator. + ## 3.6.1 - [BUGFIX] Fix regression that produced empty PrometheusRule alerts resource diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index 4e54ac4587946..a96aa37680d05 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -4,7 +4,7 @@ name: loki description: Helm chart for Grafana Loki in simple, scalable mode type: application appVersion: 2.7.0 -version: 3.6.1 +version: 3.7.0 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index 7ed7d29c08c7c..e2117d047ea13 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 3.6.1](https://img.shields.io/badge/Version-3.6.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.7.0](https://img.shields.io/badge/AppVersion-2.7.0-informational?style=flat-square) +![Version: 3.7.0](https://img.shields.io/badge/Version-3.7.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.7.0](https://img.shields.io/badge/AppVersion-2.7.0-informational?style=flat-square) Helm chart for Grafana Loki in simple, scalable mode diff --git a/production/helm/loki/ci/ingress-values.yaml b/production/helm/loki/ci/ingress-values.yaml index b78242f0c1add..23233b487cb9e 100644 --- a/production/helm/loki/ci/ingress-values.yaml +++ b/production/helm/loki/ci/ingress-values.yaml @@ -16,8 +16,7 @@ read: write: replicas: 1 monitoring: - selfMonitoring: - lokiCanary: - enabled: false + lokiCanary: + enabled: false test: enabled: false diff --git a/production/helm/loki/templates/loki-canary/_helpers.tpl b/production/helm/loki/templates/loki-canary/_helpers.tpl index 6ef5064c3cf8a..28ce60d10cc50 100644 --- a/production/helm/loki/templates/loki-canary/_helpers.tpl +++ b/production/helm/loki/templates/loki-canary/_helpers.tpl @@ -25,7 +25,7 @@ app.kubernetes.io/component: canary Docker image name for loki-canary */}} {{- define "loki-canary.image" -}} -{{- $dict := dict "service" .Values.monitoring.selfMonitoring.lokiCanary.image "global" .Values.global.image "defaultVersion" .Chart.AppVersion -}} +{{- $dict := dict "service" .Values.monitoring.lokiCanary.image "global" .Values.global.image "defaultVersion" .Chart.AppVersion -}} {{- include "loki.baseImage" $dict -}} {{- end -}} diff --git a/production/helm/loki/templates/loki-canary/daemonset.yaml b/production/helm/loki/templates/loki-canary/daemonset.yaml index 8540d813c222e..0c7b5a34b42d7 100644 --- a/production/helm/loki/templates/loki-canary/daemonset.yaml +++ b/production/helm/loki/templates/loki-canary/daemonset.yaml @@ -1,4 +1,4 @@ -{{- with .Values.monitoring.selfMonitoring.lokiCanary -}} +{{- with .Values.monitoring.lokiCanary -}} {{- if .enabled -}} --- apiVersion: apps/v1 diff --git a/production/helm/loki/templates/loki-canary/service.yaml b/production/helm/loki/templates/loki-canary/service.yaml index ef12be38e9a4b..5bc2538927ba8 100644 --- a/production/helm/loki/templates/loki-canary/service.yaml +++ b/production/helm/loki/templates/loki-canary/service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.monitoring.selfMonitoring.lokiCanary.enabled -}} +{{- if .Values.monitoring.lokiCanary.enabled -}} --- apiVersion: v1 kind: Service diff --git a/production/helm/loki/templates/loki-canary/serviceaccount.yaml b/production/helm/loki/templates/loki-canary/serviceaccount.yaml index 4c19925161275..5c2973bedf106 100644 --- a/production/helm/loki/templates/loki-canary/serviceaccount.yaml +++ b/production/helm/loki/templates/loki-canary/serviceaccount.yaml @@ -1,4 +1,4 @@ -{{- if .Values.monitoring.selfMonitoring.lokiCanary.enabled -}} +{{- if .Values.monitoring.lokiCanary.enabled -}} --- apiVersion: v1 kind: ServiceAccount @@ -7,7 +7,7 @@ metadata: labels: {{- include "loki-canary.labels" . | nindent 4 }} annotations: - {{- with .Values.monitoring.selfMonitoring.lokiCanary.annotations }} + {{- with .Values.monitoring.lokiCanary.annotations }} {{- toYaml . | nindent 4 }} {{- end }} automountServiceAccountToken: {{ .Values.serviceAccount.automountServiceAccountToken }} diff --git a/production/helm/loki/templates/tests/test-canary.yaml b/production/helm/loki/templates/tests/test-canary.yaml index ace291cb0e69e..246fb4dfdc4c3 100644 --- a/production/helm/loki/templates/tests/test-canary.yaml +++ b/production/helm/loki/templates/tests/test-canary.yaml @@ -1,5 +1,5 @@ {{- with .Values.test }} -{{- if and .enabled $.Values.monitoring.selfMonitoring.enabled $.Values.monitoring.selfMonitoring.lokiCanary.enabled }} +{{- if and .enabled $.Values.monitoring.selfMonitoring.enabled $.Values.monitoring.lokiCanary.enabled }} --- apiVersion: v1 kind: Pod diff --git a/production/helm/loki/templates/validate.yaml b/production/helm/loki/templates/validate.yaml index cd1e85b34edeb..2ea355d0c2563 100644 --- a/production/helm/loki/templates/validate.yaml +++ b/production/helm/loki/templates/validate.yaml @@ -2,22 +2,14 @@ {{- fail "Top level 'config' is not allowed. Most common configuration sections are exposed under the `loki` section. If you need to override the whole config, provide the configuration as a string that can contain template expressions under `loki.config`. Alternatively, you can provide the configuration as an external secret." }} {{- end }} -{{ with .Values.monitoring.selfMonitoring}} - -{{- if and (not .enabled) .lokiCanary.enabled }} -{{- fail "Loki Canary requires self monitoring to also be enabled"}} -{{- end }} - -{{- if and (not .enabled) $.Values.test.enabled }} +{{- if and (not .Values.monitoring.selfMonitoring.enabled) .Values.test.enabled }} {{- fail "Helm test requires self monitoring to be enabled"}} {{- end }} -{{- if and (not .lokiCanary.enabled) $.Values.test.enabled }} +{{- if and (not .Values.monitoring.lokiCanary.enabled) .Values.test.enabled }} {{- fail "Helm test requires the Loki Canary to be enabled"}} {{- end }} -{{- end}} - {{- if and .Values.test.enabled (not .Values.test.prometheusAddress) }} {{- fail "Helm test requires a prometheusAddress for an instance scraping the Loki canary's metrics"}} {{- end }} diff --git a/production/helm/loki/values.yaml b/production/helm/loki/values.yaml index 631905342bf2d..caac7c937c544 100644 --- a/production/helm/loki/values.yaml +++ b/production/helm/loki/values.yaml @@ -697,34 +697,34 @@ monitoring: # -- Additional clients for remote write clients: null - # The Loki canary pushes logs to and queries from this loki installation to test - # that it's working correctly - lokiCanary: - enabled: true - # -- Additional annotations for the `loki-canary` Daemonset - annotations: {} - # -- Additional CLI arguments for the `loki-canary' command - extraArgs: [] - # -- Environment variables to add to the canary pods - extraEnv: [] - # -- Environment variables from secrets or configmaps to add to the canary pods - extraEnvFrom: [] - # -- Resource requests and limits for the canary - resources: {} - # -- Node selector for canary pods - nodeSelector: {} - # -- Tolerations for canary pods - tolerations: [] - # -- Image to use for loki canary - image: - # -- The Docker registry - registry: docker.io - # -- Docker image repository - repository: grafana/loki-canary - # -- Overrides the image tag whose default is the chart's appVersion - tag: null - # -- Docker image pull policy - pullPolicy: IfNotPresent + # The Loki canary pushes logs to and queries from this loki installation to test + # that it's working correctly + lokiCanary: + enabled: true + # -- Additional annotations for the `loki-canary` Daemonset + annotations: {} + # -- Additional CLI arguments for the `loki-canary' command + extraArgs: [] + # -- Environment variables to add to the canary pods + extraEnv: [] + # -- Environment variables from secrets or configmaps to add to the canary pods + extraEnvFrom: [] + # -- Resource requests and limits for the canary + resources: {} + # -- Node selector for canary pods + nodeSelector: {} + # -- Tolerations for canary pods + tolerations: [] + # -- Image to use for loki canary + image: + # -- The Docker registry + registry: docker.io + # -- Docker image repository + repository: grafana/loki-canary + # -- Overrides the image tag whose default is the chart's appVersion + tag: null + # -- Docker image pull policy + pullPolicy: IfNotPresent # Configuration for the write write: From 4768b6d997dfdf611aac290589c1c88c5b50fcd8 Mon Sep 17 00:00:00 2001 From: Kaviraj Kanagaraj Date: Tue, 13 Dec 2022 22:16:22 +0100 Subject: [PATCH 09/62] doc(api): Default value for `delete_ring_tokens` on `/ingester/shutdown` endpoint (#7921) Signed-off-by: Kaviraj **What this PR does / why we need it**: PR documents the default value for `delete_ring_tokens` params on `/ingester/shutdown` endpoint. **Which issue(s) this PR fixes**: Fixes #NA **Special notes for your reviewer**: **Checklist** - [x] Documentation added Signed-off-by: Kaviraj --- docs/sources/api/_index.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/sources/api/_index.md b/docs/sources/api/_index.md index 696ae7a4af9ab..c20e426fb6aff 100644 --- a/docs/sources/api/_index.md +++ b/docs/sources/api/_index.md @@ -220,7 +220,7 @@ gave this response: } ``` -If your cluster has +If your cluster has [Grafana Loki Multi-Tenancy](../operations/multi-tenancy/) enabled, set the `X-Scope-OrgID` header to identify the tenant you want to query. Here is the same example query for the single tenant called `Tenant1`: @@ -637,7 +637,7 @@ It accepts three URL query parameters `flush`, `delete_ring_tokens`, and `termin * `flush=`: Flag to control whether to flush any in-memory chunks the ingester holds. Defaults to `true`. * `delete_ring_tokens=`: - Flag to control whether to delete the file that contains the ingester ring tokens of the instance if the `-ingester.token-file-path` is specified. + Flag to control whether to delete the file that contains the ingester ring tokens of the instance if the `-ingester.token-file-path` is specified. Defaults to `false. * `terminate=`: Flag to control whether to terminate the Loki process after service shutdown. Defaults to `true`. @@ -1385,4 +1385,3 @@ This is helpful for scaling down WAL-enabled ingesters where we want to ensure o but instead flushed to our chunk backend. In microservices mode, the `/ingester/flush_shutdown` endpoint is exposed by the ingester. - From f93b91bfb57e65f0a783d97fc3875283d7997409 Mon Sep 17 00:00:00 2001 From: Susana Ferreira Date: Wed, 14 Dec 2022 11:47:54 +0100 Subject: [PATCH 10/62] Add configuration documentation generation tool (#7916) **What this PR does / why we need it**: Add a tool to generate configuration flags documentation based on the flags properties defined on registration on the code. This tool is based on the [Mimir doc generation tool](https://github.com/grafana/mimir/tree/main/tools/doc-generator) and adapted according to Loki configuration specifications. Prior to this PR, the configuration flags documentation was dispersed across two sources: * [_index.md](https://github.com/grafana/loki/blob/5550cd65ecd2299b219d26501221df0b191d8a78/docs/sources/configuration/_index.md) * configuration flags registration in the code This meant that there was no single source of truth. In this PR, the previous `_index.md` file is replaced with the new file generated by the tool. The next step includes adding a CI step that validates if the _index.md file was generated according to the flags settings. This will be done in a follow-up PR. **NOTE:** this is not a documentation update PR. Apart from some minor typo fixes, the documentation changes on the code, were copied from the `_index.md` file. **Which issue(s) this PR fixes**: Fixes https://github.com/grafana/loki-private/issues/83 **Special notes for your reviewer**: Files: * [docs/sources/configuration/index.template](https://github.com/grafana/loki/blob/5550cd65ecd2299b219d26501221df0b191d8a78/docs/sources/configuration/index.template): template used to generate the final configuration file * [/docs/sources/configuration/_index.md](https://github.com/grafana/loki/blob/c32e5d0acb3cdacc9e50bb71a83a9ba42721e0e2/docs/sources/configuration/_index.md): file generated by tool * `loki/pkg` directory files updated with up-to-date documentation from `_index.md` file * [tools/doc-generator](https://github.com/grafana/loki/tree/5550cd65ecd2299b219d26501221df0b191d8a78/tools/doc-generator) directory with documentation generation tool. **Checklist** - [ ] Reviewed the `CONTRIBUTING.md` guide - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md` --- docs/sources/configuration/_index.md | 5038 ++++++++++------- docs/sources/configuration/index.template | 98 + go.mod | 1 + go.sum | 1 + pkg/ingester/client/client.go | 8 +- pkg/ingester/ingester.go | 28 +- pkg/ingester/wal.go | 4 +- pkg/logql/engine.go | 4 +- pkg/loki/common/common.go | 22 +- pkg/loki/config_test.go | 6 +- pkg/loki/loki.go | 70 +- pkg/lokifrontend/config.go | 2 +- pkg/lokifrontend/frontend/v1/frontend.go | 2 +- pkg/querier/querier.go | 12 +- .../queryrange/queryrangebase/roundtrip.go | 2 +- pkg/ruler/base/ruler.go | 22 +- pkg/ruler/base/ruler_ring.go | 8 +- pkg/ruler/base/storage.go | 14 +- pkg/ruler/config.go | 12 +- pkg/ruler/config/alertmanager.go | 2 +- pkg/ruler/storage/cleaner/cleaner.go | 1 + pkg/ruler/storage/cleaner/config.go | 5 +- pkg/ruler/storage/instance/instance.go | 12 +- pkg/scheduler/scheduler.go | 6 +- .../chunk/client/aws/s3_storage_client.go | 2 +- .../client/baidubce/bos_storage_client.go | 8 +- pkg/storage/chunk/client/hedging/hedging.go | 4 +- pkg/storage/config/schema_config.go | 9 +- pkg/storage/factory.go | 14 +- .../indexshipper/compactor/compactor.go | 16 +- .../stores/shipper/indexgateway/config.go | 6 +- pkg/util/ring_config.go | 2 +- pkg/util/validation/limits.go | 2 +- pkg/validation/limits.go | 84 +- tools/doc-generator/main.go | 185 + tools/doc-generator/parse/parser.go | 645 +++ tools/doc-generator/parse/root_blocks.go | 224 + tools/doc-generator/parse/util.go | 62 + tools/doc-generator/parse/util_test.go | 52 + tools/doc-generator/writer.go | 245 + .../mitchellh/go-wordwrap/LICENSE.md | 21 + .../mitchellh/go-wordwrap/README.md | 39 + .../mitchellh/go-wordwrap/wordwrap.go | 73 + vendor/modules.txt | 3 + 44 files changed, 4784 insertions(+), 2292 deletions(-) create mode 100644 docs/sources/configuration/index.template create mode 100644 tools/doc-generator/main.go create mode 100644 tools/doc-generator/parse/parser.go create mode 100644 tools/doc-generator/parse/root_blocks.go create mode 100644 tools/doc-generator/parse/util.go create mode 100644 tools/doc-generator/parse/util_test.go create mode 100644 tools/doc-generator/writer.go create mode 100644 vendor/github.com/mitchellh/go-wordwrap/LICENSE.md create mode 100644 vendor/github.com/mitchellh/go-wordwrap/README.md create mode 100644 vendor/github.com/mitchellh/go-wordwrap/wordwrap.go diff --git a/docs/sources/configuration/_index.md b/docs/sources/configuration/_index.md index 43e052a13416d..99af10a382407 100644 --- a/docs/sources/configuration/_index.md +++ b/docs/sources/configuration/_index.md @@ -1,8 +1,13 @@ --- -title: Configuration +description: Describes parameters used to configure Grafana Loki. +menuTitle: Configuration parameters +title: Grafana Loki configuration parameters weight: 500 --- -# Configuring Grafana Loki + +# Grafana Loki configuration parameters + + Grafana Loki is configured in a YAML file (usually referred to as `loki.yaml` ) which contains information on the Loki server and its individual components, @@ -13,7 +18,7 @@ Configuration examples can be found in the [Configuration Examples](examples/) d ## Printing Loki Config At Runtime If you pass Loki the flag `-print-config-stderr` or `-log-config-reverse-order`, (or `-print-config-stderr=true`) -Loki will dump the entire config object it has created from the built in defaults combined first with +Loki will dump the entire config object it has created from the built-in defaults combined first with overrides from config file, and second by overrides from flags. The result is the value for every config object in the Loki config struct, which is very large... @@ -42,7 +47,7 @@ To specify which configuration file to load, pass the `-config.file` flag at the command line. The value can be a list of comma separated paths, then the first file that exists will be used. If no `-config.file` argument is specified, Loki will look up the `config.yaml` in the -current working directory and the `config/` sub-directory and try to use that. +current working directory and the `config/` subdirectory and try to use that. The file is written in [YAML format](https://en.wikipedia.org/wiki/YAML), defined by the scheme below. @@ -91,27 +96,29 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set ### Supported contents and default values of `loki.yaml` ```yaml -# A comma-separated list of components to run. -# The default value "all" runs Loki in single binary mode. -# The value "read" is an alias to run only read-path related components such as -# the querier and query-frontend, but all in the same process. -# The value "write" is an alias to run only write-path related components such as -# the distributor and compactor, but all in the same process. -# Supported values: all, compactor, distributor, ingester, querier, query-scheduler, -# ingester-querier, query-frontend, index-gateway, ruler, table-manager, read, write. -# A full list of available targets can be printed when running Loki with the -# `-list-targets` command line flag. +# A comma-separated list of components to run. The default value 'all' runs Loki +# in single binary mode. The value 'read' is an alias to run only read-path +# related components such as the querier and query-frontend, but all in the same +# process. The value 'write' is an alias to run only write-path related +# components such as the distributor and compactor, but all in the same process. +# Supported values: all, compactor, distributor, ingester, querier, +# query-scheduler, ingester-querier, query-frontend, index-gateway, ruler, +# table-manager, read, write. A full list of available targets can be printed +# when running Loki with the '-list-targets' command line flag. +# CLI flag: -target [target: | default = "all"] # Enables authentication through the X-Scope-OrgID header, which must be present -# if true. If false, the OrgID will always be set to "fake". +# if true. If false, the OrgID will always be set to 'fake'. +# CLI flag: -auth.enabled [auth_enabled: | default = true] -# The amount of virtual memory in bytes to reserve as ballast in order to optimize -# garbage collection. Larger ballasts result in fewer garbage collection passes, reducing -# CPU overhead at the cost of heap size. The ballast will not consume physical memory, -# because it is never read from. It will, however, distort metrics, because it is -# counted as live memory. +# The amount of virtual memory in bytes to reserve as ballast in order to +# optimize garbage collection. Larger ballasts result in fewer garbage +# collection passes, reducing CPU overhead at the cost of heap size. The ballast +# will not consume physical memory, because it is never read from. It will, +# however, distort metrics, because it is counted as live memory. +# CLI flag: -config.ballast-bytes [ballast_bytes: | default = 0] # Configures the server of the launched module(s). @@ -120,12 +127,12 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set # Configures the distributor. [distributor: ] -# Configures the querier. Only appropriate when running all modules or -# just the querier. +# Configures the querier. Only appropriate when running all modules or just the +# querier. [querier: ] -# The query_scheduler block configures the Loki query scheduler. -# When configured it separates the tenant query queues from the query-frontend +# The query_scheduler block configures the Loki query scheduler. When configured +# it separates the tenant query queues from the query-frontend. [query_scheduler: ] # The frontend block configures the Loki query-frontend. @@ -138,122 +145,144 @@ Pass the `-config.expand-env` flag at the command line to enable this way of set # The ruler block configures the Loki ruler. [ruler: ] -# The ingester_client block configures how the distributor will connect -# to ingesters. Only appropriate when running all components, the distributor, -# or the querier. +# The ingester_client block configures how the distributor will connect to +# ingesters. Only appropriate when running all components, the distributor, or +# the querier. [ingester_client: ] -# The ingester block configures the ingester and how the ingester will register itself -# to a key value store. +# The ingester block configures the ingester and how the ingester will register +# itself to a key value store. [ingester: ] -# Configures the index gateway server. +# The index_gateway block configures the Loki index gateway server, responsible +# for serving index queries without the need to constantly interact with the +# object store. [index_gateway: ] -# Configures where Loki will store data. +# The storage_config block configures one of many possible stores for both the +# index and chunks. Which configuration to be picked should be defined in +# schema_config block. [storage_config: ] -# Configures how Loki will store data in the specific store. +# The chunk_store_config block configures how chunks will be cached and how long +# to wait before saving them to the backing store. [chunk_store_config: ] # Configures the chunk index schema and where it is stored. [schema_config: ] -# The compactor block configures the compactor component, which compacts index shards -# for performance. +# The compactor block configures the compactor component, which compacts index +# shards for performance. [compactor: ] -# Configures limits per-tenant or globally. +# The limits_config block configures global and per-tenant limits in Loki. [limits_config: ] -# The frontend_worker configures the worker - running within the Loki -# querier - picking up and executing queries enqueued by the query-frontend. +# The frontend_worker configures the worker - running within the Loki querier - +# picking up and executing queries enqueued by the query-frontend. [frontend_worker: ] # The table_manager block configures the table manager for retention. [table_manager: ] -# Configuration for "runtime config" module, responsible for reloading runtime +# Configuration for 'runtime config' module, responsible for reloading runtime # configuration file. [runtime_config: ] # Configuration for tracing. [tracing: ] -# Common configuration to be shared between multiple modules. -# If a more specific configuration is given in other sections, -# the related configuration within this section will be ignored. -[common: ] - -# Configuration for usage report +# Configuration for usage report. [analytics: ] + +# Common configuration to be shared between multiple modules. If a more specific +# configuration is given in other sections, the related configuration within +# this section will be ignored. +[common: ] ``` -## server +### server -The `server` block -configures the HTTP and gRPC server communication of the launched service(s). +Configures the `server` of the launched module(s). ```yaml -# HTTP server listen host +# HTTP server listen network, default tcp +# CLI flag: -server.http-listen-network +[http_listen_network: | default = "tcp"] + +# HTTP server listen address. # CLI flag: -server.http-listen-address -[http_listen_address: ] +[http_listen_address: | default = ""] -# HTTP server listen port +# HTTP server listen port. # CLI flag: -server.http-listen-port -[http_listen_port: | default = 80] +[http_listen_port: | default = 3100] + +# Maximum number of simultaneous http connections, <=0 to disable +# CLI flag: -server.http-conn-limit +[http_listen_conn_limit: | default = 0] + +# gRPC server listen network +# CLI flag: -server.grpc-listen-network +[grpc_listen_network: | default = "tcp"] + +# gRPC server listen address. +# CLI flag: -server.grpc-listen-address +[grpc_listen_address: | default = ""] + +# gRPC server listen port. +# CLI flag: -server.grpc-listen-port +[grpc_listen_port: | default = 9095] + +# Maximum number of simultaneous grpc connections, <=0 to disable +# CLI flag: -server.grpc-conn-limit +[grpc_listen_conn_limit: | default = 0] + +# Comma-separated list of cipher suites to use. If blank, the default Go cipher +# suites is used. +# CLI flag: -server.tls-cipher-suites +[tls_cipher_suites: | default = ""] + +# Minimum TLS version to use. Allowed values: VersionTLS10, VersionTLS11, +# VersionTLS12, VersionTLS13. If blank, the Go TLS minimum version is used. +# CLI flag: -server.tls-min-version +[tls_min_version: | default = ""] -# TLS configuration for serving over HTTPS http_tls_config: # HTTP server cert path. # CLI flag: -server.http-tls-cert-path [cert_file: | default = ""] + # HTTP server key path. # CLI flag: -server.http-tls-key-path [key_file: | default = ""] + # HTTP TLS Client Auth type. # CLI flag: -server.http-tls-client-auth [client_auth_type: | default = ""] + # HTTP TLS Client CA path. # CLI flag: -server.http-tls-ca-path [client_ca_file: | default = ""] - # HTTP TLS Cipher Suites. - # CLI flag: -server.http-tls-cipher-suites - [tls_cipher_suites: | default = ""] - # HTTP TLS Min Version. - # CLI flag: -server.http-tls-min-version - [tls_min_version: | default = ""] - -# gRPC server listen host -# CLI flag: -server.grpc-listen-address -[grpc_listen_address: ] -# gRPC server listen port -# CLI flag: -server.grpc-listen-port -[grpc_listen_port: | default = 9095] - -# TLS configuration for serving over gRPC grpc_tls_config: - # gRPC server cert path. + # GRPC TLS server cert path. # CLI flag: -server.grpc-tls-cert-path [cert_file: | default = ""] - # gRPC server key path. + + # GRPC TLS server key path. # CLI flag: -server.grpc-tls-key-path [key_file: | default = ""] - # gRPC TLS Client Auth type. + + # GRPC TLS Client Auth type. # CLI flag: -server.grpc-tls-client-auth [client_auth_type: | default = ""] - # gRPC TLS Client CA path. + + # GRPC TLS Client CA path. # CLI flag: -server.grpc-tls-ca-path [client_ca_file: | default = ""] - # GRPC TLS Cipher Suites. - # CLI flag: -server.grpc-tls-cipher-suites - [tls_cipher_suites: | default = ""] - # GRPC TLS Min Version. - # CLI flag: -server.grpc-tls-min-version - [tls_min_version: | default = ""] -# Register instrumentation handlers (/metrics, etc.) +# Register the intrumentation handlers (/metrics etc). # CLI flag: -server.register-instrumentation [register_instrumentation: | default = true] @@ -271,13 +300,13 @@ grpc_tls_config: # Idle timeout for HTTP server # CLI flag: -server.http-idle-timeout -[http_server_idle_timeout: | default = 120s] +[http_server_idle_timeout: | default = 2m] -# Max gRPC message size that can be received +# Limit on the size of a gRPC message this server can receive (bytes). # CLI flag: -server.grpc-max-recv-msg-size-bytes [grpc_server_max_recv_msg_size: | default = 4194304] -# Max gRPC message size that can be sent +# Limit on the size of a gRPC message this server can send (bytes). # CLI flag: -server.grpc-max-send-msg-size-bytes [grpc_server_max_send_msg_size: | default = 4194304] @@ -285,70 +314,152 @@ grpc_tls_config: # CLI flag: -server.grpc-max-concurrent-streams [grpc_server_max_concurrent_streams: | default = 100] -# Log only messages with the given severity or above. Supported values [debug, +# The duration after which an idle connection should be closed. Default: +# infinity +# CLI flag: -server.grpc.keepalive.max-connection-idle +[grpc_server_max_connection_idle: | default = 2562047h47m16.854775807s] + +# The duration for the maximum amount of time a connection may exist before it +# will be closed. Default: infinity +# CLI flag: -server.grpc.keepalive.max-connection-age +[grpc_server_max_connection_age: | default = 2562047h47m16.854775807s] + +# An additive period after max-connection-age after which the connection will be +# forcibly closed. Default: infinity +# CLI flag: -server.grpc.keepalive.max-connection-age-grace +[grpc_server_max_connection_age_grace: | default = 2562047h47m16.854775807s] + +# Duration after which a keepalive probe is sent in case of no activity over the +# connection., Default: 2h +# CLI flag: -server.grpc.keepalive.time +[grpc_server_keepalive_time: | default = 2h] + +# After having pinged for keepalive check, the duration after which an idle +# connection should be closed, Default: 20s +# CLI flag: -server.grpc.keepalive.timeout +[grpc_server_keepalive_timeout: | default = 20s] + +# Minimum amount of time a client should wait before sending a keepalive ping. +# If client sends keepalive ping more often, server will send GOAWAY and close +# the connection. +# CLI flag: -server.grpc.keepalive.min-time-between-pings +[grpc_server_min_time_between_pings: | default = 10s] + +# If true, server allows keepalive pings even when there are no active +# streams(RPCs). If false, and client sends ping when there are no active +# streams, server will send GOAWAY and close the connection. +# CLI flag: -server.grpc.keepalive.ping-without-stream-allowed +[grpc_server_ping_without_stream_allowed: | default = true] + +# Output log messages in the given format. Valid formats: [logfmt, json] +# CLI flag: -log.format +[log_format: | default = "logfmt"] + +# Only log messages with the given severity or above. Valid levels: [debug, # info, warn, error] # CLI flag: -log.level [log_level: | default = "info"] -# Base path to serve all API routes from (e.g., /v1/). +# Optionally log the source IPs. +# CLI flag: -server.log-source-ips-enabled +[log_source_ips_enabled: | default = false] + +# Header field storing the source IPs. Only used if +# server.log-source-ips-enabled is true. If not set the default Forwarded, +# X-Real-IP and X-Forwarded-For headers are used +# CLI flag: -server.log-source-ips-header +[log_source_ips_header: | default = ""] + +# Regex for matching the source IPs. Only used if server.log-source-ips-enabled +# is true. If not set the default Forwarded, X-Real-IP and X-Forwarded-For +# headers are used +# CLI flag: -server.log-source-ips-regex +[log_source_ips_regex: | default = ""] + +# Optionally log requests at info level instead of debug level. +# CLI flag: -server.log-request-at-info-level-enabled +[log_request_at_info_level_enabled: | default = false] + +# Base path to serve all API routes from (e.g. /v1/) # CLI flag: -server.path-prefix [http_path_prefix: | default = ""] ``` -## distributor +### distributor -The `distributor` block configures the distributor component. +Configures the `distributor`. ```yaml -# Configures the distributors ring, used when the "global" ingestion rate -# strategy is enabled. ring: kvstore: - # The backend storage to use for the ring. Supported values are - # consul, etcd, inmemory, memberlist + # Backend storage to use for the ring. Supported values are: consul, etcd, + # inmemory, memberlist, multi. # CLI flag: -distributor.ring.store - store: + [store: | default = "consul"] # The prefix for the keys in the store. Should end with a /. # CLI flag: -distributor.ring.prefix [prefix: | default = "collectors/"] - # Configuration for a Consul client. Only applies if store is "consul" - # The CLI flags prefix for this block config is: distributor.ring - [consul: ] + # Configuration for a Consul client. Only applies if store is consul. + # The CLI flags prefix for this block configuration is: distributor.ring + [consul: ] - # Configuration for an ETCD v3 client. Only applies if store is "etcd" - # The CLI flags prefix for this block config is: distributor.ring - [etcd: ] + # Configuration for an ETCD v3 client. Only applies if store is etcd. + # The CLI flags prefix for this block configuration is: distributor.ring + [etcd: ] - # The heartbeat timeout after which ingesters are skipped for - # reading and writing. - # CLI flag: -distributor.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] + multi: + # Primary backend storage used by multi-client. + # CLI flag: -distributor.ring.multi.primary + [primary: | default = ""] + + # Secondary backend storage used by multi-client. + # CLI flag: -distributor.ring.multi.secondary + [secondary: | default = ""] + + # Mirror writes to secondary store. + # CLI flag: -distributor.ring.multi.mirror-enabled + [mirror_enabled: | default = false] - rate_store: - # The max number of concurrent requests to make to ingester stream apis - # CLI flag: -distributor.rate-store.max-request-parallelism - [max_request_parallelism: | default = 200] - # The interval on which distributors will update current stream rates - # from ingesters - # CLI flag: -distributor.rate-store.stream-rate-update-interval - [stream_rate_update_interval: | default = 1s] - # Timeout for communication between distributors and ingesters when updating - # rates - # CLI flag: -distributor.rate-store.ingester-request-timeout - [ingester_request_timeout: | default = 1s] + # Timeout for storing value to secondary store. + # CLI flag: -distributor.ring.multi.mirror-timeout + [mirror_timeout: | default = 2s] + + # Period at which to heartbeat to the ring. 0 = disabled. + # CLI flag: -distributor.ring.heartbeat-period + [heartbeat_period: | default = 5s] + + # The heartbeat timeout after which distributors are considered unhealthy + # within the ring. 0 = never (timeout disabled). + # CLI flag: -distributor.ring.heartbeat-timeout + [heartbeat_timeout: | default = 1m] + + # Name of network interface to read address from. + # CLI flag: -distributor.ring.instance-interface-names + [instance_interface_names: | default = []] + +rate_store: + # The max number of concurrent requests to make to ingester stream apis + # CLI flag: -distributor.rate-store.max-request-parallelism + [max_request_parallelism: | default = 200] + + # The interval on which distributors will update current stream rates from + # ingesters + # CLI flag: -distributor.rate-store.stream-rate-update-interval + [stream_rate_update_interval: | default = 1s] + + # Timeout for communication between distributors and any given ingester when + # updating rates + # CLI flag: -distributor.rate-store.ingester-request-timeout + [ingester_request_timeout: | default = 500ms] ``` -## querier +### querier -The `querier` block configures the Loki Querier. +Configures the `querier`. Only appropriate when running all modules or just the querier. ```yaml -# Timeout when querying ingesters or storage during the execution of a query request. -# CLI flag: -querier.query-timeout -[query_timeout: | default = 1m] - # Maximum duration for which the live tailing requests should be served. # CLI flag: -querier.tail-max-duration [tail_max_duration: | default = 1h] @@ -357,46 +468,43 @@ The `querier` block configures the Loki Querier. # CLI flag: -querier.extra-query-delay [extra_query_delay: | default = 0s] -# Maximum lookback beyond which queries are not sent to ingester. -# 0 means all queries are sent to ingester. +# Maximum lookback beyond which queries are not sent to ingester. 0 means all +# queries are sent to ingester. # CLI flag: -querier.query-ingesters-within [query_ingesters_within: | default = 3h] +engine: + # Deprecated: Use querier.query-timeout instead. Timeout for query execution. + # CLI flag: -querier.engine.timeout + [timeout: | default = 5m] + + # The maximum amount of time to look back for log lines. Used only for instant + # log queries. + # CLI flag: -querier.engine.max-lookback-period + [max_look_back_period: | default = 30s] + # The maximum number of concurrent queries allowed. # CLI flag: -querier.max-concurrent [max_concurrent: | default = 10] -# Only query the store, and not attempt any ingesters. -# This is useful for running a standalone querier pool operating only against -# stored data. +# Only query the store, and not attempt any ingesters. This is useful for +# running a standalone querier pool operating only against stored data. # CLI flag: -querier.query-store-only [query_store_only: | default = false] -# When true, queriers only query the ingesters, and not stored data. -# This is useful when the object store is unavailable. +# When true, queriers only query the ingesters, and not stored data. This is +# useful when the object store is unavailable. # CLI flag: -querier.query-ingester-only [query_ingester_only: | default = false] # When true, allow queries to span multiple tenants. # CLI flag: -querier.multi-tenant-queries-enabled [multi_tenant_queries_enabled: | default = false] - -# Configuration options for the LogQL engine. -engine: - # Timeout for query execution. - # Deprecated: use querier.query-timeout instead. - # CLI flag: -querier.engine.timeout - [timeout: | default = 3m] - - # The maximum amount of time to look back for log lines. Only - # applicable for instant log queries. - # CLI flag: -querier.engine.max-lookback-period - [max_look_back_period: | default = 30s] ``` -## query_scheduler +### query_scheduler -The `query_scheduler` block configures the Loki query scheduler. +The `query_scheduler` block configures the Loki query scheduler. When configured it separates the tenant query queues from the query-frontend. ```yaml # Maximum number of outstanding requests per tenant per query-scheduler. @@ -406,123 +514,202 @@ The `query_scheduler` block configures the Loki query scheduler. [max_outstanding_requests_per_tenant: | default = 100] # If a querier disconnects without sending notification about graceful shutdown, -# the query-scheduler will keep the querier in the tenant's shard until the forget delay has passed. -# This feature is useful to reduce the blast radius when shuffle-sharding is enabled. +# the query-scheduler will keep the querier in the tenant's shard until the +# forget delay has passed. This feature is useful to reduce the blast radius +# when shuffle-sharding is enabled. # CLI flag: -query-scheduler.querier-forget-delay -[querier_forget_delay: | default = 0] +[querier_forget_delay: | default = 0s] # This configures the gRPC client used to report errors back to the # query-frontend. -[grpc_client_config: ] - -# Set to true to have the query schedulers create and place themselves in a ring. -# If no frontend_address or scheduler_address are present -# anywhere else in the configuration, Loki will toggle this value to true. +# The CLI flags prefix for this block configuration is: +# query-scheduler.grpc-client-config +[grpc_client_config: ] + +# Set to true to have the query schedulers create and place themselves in a +# ring. If no frontend_address or scheduler_address are present anywhere else in +# the configuration, Loki will toggle this value to true. +# CLI flag: -query-scheduler.use-scheduler-ring [use_scheduler_ring: | default = false] -# The hash ring configuration. This option is required only if use_scheduler_ring is true -# The CLI flags prefix for this block config is scheduler.ring -[scheduler_ring: ] -``` +# The hash ring configuration. This option is required only if +# use_scheduler_ring is true. +scheduler_ring: + kvstore: + # Backend storage to use for the ring. Supported values are: consul, etcd, + # inmemory, memberlist, multi. + # CLI flag: -query-scheduler.ring.store + [store: | default = "consul"] -## frontend + # The prefix for the keys in the store. Should end with a /. + # CLI flag: -query-scheduler.ring.prefix + [prefix: | default = "collectors/"] -The `frontend` block configures the Loki query-frontend. + # Configuration for a Consul client. Only applies if store is consul. + # The CLI flags prefix for this block configuration is: query-scheduler.ring + [consul: ] -```yaml -# Maximum number of outstanding requests per tenant per frontend; requests -# beyond this error with HTTP 429. -# CLI flag: -querier.max-outstanding-requests-per-tenant -[max_outstanding_per_tenant: | default = 2048] + # Configuration for an ETCD v3 client. Only applies if store is etcd. + # The CLI flags prefix for this block configuration is: query-scheduler.ring + [etcd: ] -# In the event a tenant is repeatedly sending queries that lead the querier to crash -# or be killed due to an out-of-memory error, the crashed querier will be disconnected -# from the query frontend and a new querier will be immediately assigned to the tenant’s shard. -# This invalidates the assumption that shuffle sharding can be used to reduce the -# impact on tenants. This option mitigates the impact by configuring a delay between when -# a querier disconnects because of a crash and when the crashed querier is actually removed -# from the tenant's shard. -# CLI flag: -query-frontend.querier-forget-delay -[querier_forget_delay: | default = 0s] + multi: + # Primary backend storage used by multi-client. + # CLI flag: -query-scheduler.ring.multi.primary + [primary: | default = ""] -# Compress HTTP responses. -# CLI flag: -querier.compress-http-responses -[compress_responses: | default = false] + # Secondary backend storage used by multi-client. + # CLI flag: -query-scheduler.ring.multi.secondary + [secondary: | default = ""] -# URL of downstream Loki. -# CLI flag: -frontend.downstream-url -[downstream_url: | default = ""] + # Mirror writes to secondary store. + # CLI flag: -query-scheduler.ring.multi.mirror-enabled + [mirror_enabled: | default = false] -# Log queries that are slower than the specified duration. Set to 0 to disable. -# Set to < 0 to enable on all queries. -# CLI flag: -frontend.log-queries-longer-than -[log_queries_longer_than: | default = 0s] + # Timeout for storing value to secondary store. + # CLI flag: -query-scheduler.ring.multi.mirror-timeout + [mirror_timeout: | default = 2s] -# URL of querier for tail proxy. -# CLI flag: -frontend.tail-proxy-url -[tail_proxy_url: | default = ""] + # Period at which to heartbeat to the ring. 0 = disabled. + # CLI flag: -query-scheduler.ring.heartbeat-period + [heartbeat_period: | default = 15s] -tail_tls_config: - # Path to the client certificate file, which will be used for authenticating - # with the server. Also requires the key path to be configured. - # CLI flag: -frontend.tail-tls-config.tls-cert-path - [tls_cert_path: | default = ""] + # The heartbeat timeout after which compactors are considered unhealthy within + # the ring. 0 = never (timeout disabled). + # CLI flag: -query-scheduler.ring.heartbeat-timeout + [heartbeat_timeout: | default = 1m] - # Path to the key file for the client certificate. Also requires the client - # certificate to be configured. - # CLI flag: -frontend.tail-tls-config.tls-key-path - [tls_key_path: | default = ""] + # File path where tokens are stored. If empty, tokens are not stored at + # shutdown and restored at startup. + # CLI flag: -query-scheduler.ring.tokens-file-path + [tokens_file_path: | default = ""] - # Path to the CA certificates file to validate server certificate against. If - # not set, the host's root CA certificates are used. - # CLI flag: -frontend.tail-tls-config.tls-ca-path - [tls_ca_path: | default = ""] + # True to enable zone-awareness and replicate blocks across different + # availability zones. + # CLI flag: -query-scheduler.ring.zone-awareness-enabled + [zone_awareness_enabled: | default = false] - # Skip validating server certificate. - # CLI flag: -frontend.tail-tls-config.tls-insecure-skip-verify - [tls_insecure_skip_verify: | default = false] + # Instance ID to register in the ring. + # CLI flag: -query-scheduler.ring.instance-id + [instance_id: | default = ""] - # Override the default cipher suite list (separated by commas). - # CLI flag: -frontend.tail-tls-config.tls_cipher_suites - [tls_cipher_suites: | default = ""] + # Name of network interface to read address from. + # CLI flag: -query-scheduler.ring.instance-interface-names + [instance_interface_names: | default = []] - # Override the default minimum TLS version. - # CLI flag: -frontend.tail-tls-config.tls_min_version - [tls_min_version: | default = ""] + # Port to advertise in the ring (defaults to server.grpc-listen-port). + # CLI flag: -query-scheduler.ring.instance-port + [instance_port: | default = 0] + + # IP address to advertise in the ring. + # CLI flag: -query-scheduler.ring.instance-addr + [instance_addr: | default = ""] + # The availability zone where this instance is running. Required if + # zone-awareness is enabled. + # CLI flag: -query-scheduler.ring.instance-availability-zone + [instance_availability_zone: | default = ""] +``` + +### frontend + +The `frontend` block configures the Loki query-frontend. + +```yaml +# Log queries that are slower than the specified duration. Set to 0 to disable. +# Set to < 0 to enable on all queries. +# CLI flag: -frontend.log-queries-longer-than +[log_queries_longer_than: | default = 0s] + +# Max body size for downstream prometheus. +# CLI flag: -frontend.max-body-size +[max_body_size: | default = 10485760] + +# True to enable query statistics tracking. When enabled, a message with some +# statistics is logged for every query. +# CLI flag: -frontend.query-stats-enabled +[query_stats_enabled: | default = false] + +# Maximum number of outstanding requests per tenant per frontend; requests +# beyond this error with HTTP 429. +# CLI flag: -querier.max-outstanding-requests-per-tenant +[max_outstanding_per_tenant: | default = 2048] + +# In the event a tenant is repeatedly sending queries that lead the querier to +# crash or be killed due to an out-of-memory error, the crashed querier will be +# disconnected from the query frontend and a new querier will be immediately +# assigned to the tenant’s shard. This invalidates the assumption that shuffle +# sharding can be used to reduce the impact on tenants. This option mitigates +# the impact by configuring a delay between when a querier disconnects because +# of a crash and when the crashed querier is actually removed from the tenant's +# shard. +# CLI flag: -query-frontend.querier-forget-delay +[querier_forget_delay: | default = 0s] # DNS hostname used for finding query-schedulers. # CLI flag: -frontend.scheduler-address [scheduler_address: | default = ""] # How often to resolve the scheduler-address, in order to look for new -# query-scheduler instances. -# Also used to determine how often to poll the scheduler-ring for addresses if configured. +# query-scheduler instances. Also used to determine how often to poll the +# scheduler-ring for addresses if the scheduler-ring is configured. # CLI flag: -frontend.scheduler-dns-lookup-period [scheduler_dns_lookup_period: | default = 10s] # Number of concurrent workers forwarding queries to single query-scheduler. # CLI flag: -frontend.scheduler-worker-concurrency [scheduler_worker_concurrency: | default = 5] + +# The grpc_client block configures the gRPC client used to communicate between +# two Loki components. +# The CLI flags prefix for this block configuration is: +# frontend.grpc-client-config +[grpc_client_config: ] + +# Name of network interface to read address from. This address is sent to +# query-scheduler and querier, which uses it to send the query response back to +# query-frontend. +# CLI flag: -frontend.instance-interface-names +[instance_interface_names: | default = [en0]] + +# Compress HTTP responses. +# CLI flag: -querier.compress-http-responses +[compress_responses: | default = false] + +# URL of downstream Loki. +# CLI flag: -frontend.downstream-url +[downstream_url: | default = ""] + +# URL of querier for tail proxy. +# CLI flag: -frontend.tail-proxy-url +[tail_proxy_url: | default = ""] + +# The TLS configuration. +[tail_tls_config: ] ``` -## query_range +### query_range -The `query_range` block configures query splitting and caching in the Loki query-frontend. +The `query_range` block configures the query splitting and caching in the Loki query-frontend. ```yaml -# Deprecated: Split queries by day and execute in parallel. -# Use -querier.split-queries-by-interval instead. -# CLI flag: -querier.split-queries-by-day -[split_queries_by_day: | default = false] +# Deprecated: Use -querier.split-queries-by-interval instead. CLI flag: +# -querier.split-queries-by-day. Split queries by day and execute in parallel. +[split_queries_by_interval: ] # Mutate incoming queries to align their start and end with their step. # CLI flag: -querier.align-querier-with-step [align_queries_with_step: | default = false] results_cache: - # The CLI flags prefix for this block config is: frontend - cache: + # The cache block configures the cache backend. + # The CLI flags prefix for this block configuration is: frontend + [cache: ] + + # Use compression in results cache. Supported values are: 'snappy' and '' + # (disable compression). + # CLI flag: -frontend.compression + [compression: | default = ""] # Cache query results. # CLI flag: -querier.cache-results @@ -537,48 +724,28 @@ results_cache: # query ASTs. This feature is supported only by the chunks storage engine. # CLI flag: -querier.parallelise-shardable-queries [parallelise_shardable_queries: | default = true] + +# List of headers forwarded by the query Frontend to downstream querier. +# CLI flag: -frontend.forward-headers-list +[forward_headers_list: | default = []] ``` -## ruler +### ruler The `ruler` block configures the Loki ruler. ```yaml # URL of alerts return path. # CLI flag: -ruler.external.url -[external_url: | default = ] - -# Labels to add to all alerts -external_labels: - [: ...] - -ruler_client: - # Path to the client certificate file, which will be used for authenticating - # with the server. Also requires the key path to be configured. - # CLI flag: -ruler.client.tls-cert-path - [tls_cert_path: | default = ""] - - # Path to the key file for the client certificate. Also requires the client - # certificate to be configured. - # CLI flag: -ruler.client.tls-key-path - [tls_key_path: | default = ""] - - # Path to the CA certificates file to validate server certificate against. If - # not set, the host's root CA certificates are used. - # CLI flag: -ruler.client.tls-ca-path - [tls_ca_path: | default = ""] - - # Skip validating server certificate. - # CLI flag: -ruler.client.tls-insecure-skip-verify - [tls_insecure_skip_verify: | default = false] +[external_url: ] - # Override the default cipher suite list (separated by commas). - # CLI flag: -ruler.client.tls_cipher_suites - [tls_cipher_suites: | default = ""] +# Labels to add to all alerts. +[external_labels: ] - # Override the default minimum TLS version. - # CLI flag: -ruler.client.tls_min_version - [tls_min_version: | default = ""] +# The grpc_client block configures the gRPC client used to communicate between +# two Loki components. +# The CLI flags prefix for this block configuration is: ruler.client +[ruler_client: ] # How frequently to evaluate rules. # CLI flag: -ruler.evaluation-interval @@ -588,96 +755,48 @@ ruler_client: # CLI flag: -ruler.poll-interval [poll_interval: | default = 1m] +# Deprecated: Use -ruler-storage. CLI flags and their respective YAML config +# options instead. storage: - # Method to use for backend rule storage (azure, gcs, s3, swift, local, bos). + # Method to use for backend rule storage (configdb, azure, gcs, s3, swift, + # local, bos) # CLI flag: -ruler.storage.type - [type: ] + [type: | default = ""] # Configures backend rule storage for Azure. + # The CLI flags prefix for this block configuration is: ruler.storage [azure: ] # Configures backend rule storage for GCS. + # The CLI flags prefix for this block configuration is: ruler.storage [gcs: ] # Configures backend rule storage for S3. + # The CLI flags prefix for this block configuration is: ruler [s3: ] + # Configures backend rule storage for Baidu Object Storage (BOS). + # The CLI flags prefix for this block configuration is: ruler.storage + [bos: ] + # Configures backend rule storage for Swift. + # The CLI flags prefix for this block configuration is: ruler.storage [swift: ] # Configures backend rule storage for a local file system directory. [local: ] - # Configures backend rule storage for Baidu Object Storage (BOS). - [bos: ] - - # The `hedging` block configures how to hedge storage requests. - [hedging: ] - -# Remote-write configuration to send rule samples to a Prometheus remote-write endpoint. -remote_write: - # Enable remote-write functionality. - # CLI flag: -ruler.remote-write.enabled - [enabled: | default = false] - # Minimum period to wait between refreshing remote-write reconfigurations. - # This should be greater than or equivalent to -limits.per-user-override-period. - [config_refresh_period: | default = 10s] - - # Deprecated: Use `clients` instead - # Configure remote write client. - [client: ] - - # Configure remote write clients. - # A map with remote client id as key. - clients: - [: ] - -wal: - # The directory in which to write tenant WAL files. Each tenant will have its own - # directory one level below this directory. - [dir: | default = "ruler-wal"] - # Frequency with which to run the WAL truncation process. - [truncate_frequency: | default = 60m] - # Minimum and maximum time series should exist in the WAL for. - [min_age: | default = 5m] - [max_age: | default = 4h] - -wal_cleaner: - # The minimum age of a WAL to consider for cleaning. - [min_age: | default = 12h] - # How often to run the WAL cleaner. - [period: | default = 0s (disabled)] - # File path to store temporary rule files. # CLI flag: -ruler.rule-path -[rule_path: | default = "/rules"] +[rule_path: | default = "/rules"] -# Comma-separated list of Alertmanager URLs to send notifications to. -# Each Alertmanager URL is treated as a separate group in the configuration. -# Multiple Alertmanagers in HA per group can be supported by using DNS -# resolution via -ruler.alertmanager-discovery. +# Comma-separated list of Alertmanager URLs to send notifications to. Each +# Alertmanager URL is treated as a separate group in the configuration. Multiple +# Alertmanagers in HA per group can be supported by using DNS resolution via +# '-ruler.alertmanager-discovery'. # CLI flag: -ruler.alertmanager-url [alertmanager_url: | default = ""] - -alertmanager_client: - # Sets the `Authorization` header on every remote write request with the - # configured username and password. - # password and password_file are mutually exclusive. - [basic_auth_username: ] - [basic_auth_password: ] - - # Optional `Authorization` header configuration. - authorization: - # Sets the authentication type. - [type: | default: Bearer] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [credentials_file: ] - # Use DNS SRV records to discover Alertmanager hosts. # CLI flag: -ruler.alertmanager-discovery [enable_alertmanager_discovery: | default = false] @@ -686,13 +805,12 @@ alertmanager_client: # CLI flag: -ruler.alertmanager-refresh-interval [alertmanager_refresh_interval: | default = 1m] -# If enabled, then requests to Alertmanager use the v2 API. +# If enabled requests to Alertmanager will utilize the V2 API. # CLI flag: -ruler.alertmanager-use-v2 [enable_alertmanager_v2: | default = false] -# List of alert relabel configs -alert_relabel_configs: - [- ...] +# List of alert relabel configs. +[alert_relabel_configs: ] # Capacity of the queue for notifications to be sent to the Alertmanager. # CLI flag: -ruler.notification-queue-capacity @@ -702,6 +820,91 @@ alert_relabel_configs: # CLI flag: -ruler.notification-timeout [notification_timeout: | default = 10s] +alertmanager_client: + # Path to the client certificate file, which will be used for authenticating + # with the server. Also requires the key path to be configured. + # CLI flag: -ruler.alertmanager-client.tls-cert-path + [tls_cert_path: | default = ""] + + # Path to the key file for the client certificate. Also requires the client + # certificate to be configured. + # CLI flag: -ruler.alertmanager-client.tls-key-path + [tls_key_path: | default = ""] + + # Path to the CA certificates file to validate server certificate against. If + # not set, the host's root CA certificates are used. + # CLI flag: -ruler.alertmanager-client.tls-ca-path + [tls_ca_path: | default = ""] + + # Override the expected name on the server certificate. + # CLI flag: -ruler.alertmanager-client.tls-server-name + [tls_server_name: | default = ""] + + # Skip validating server certificate. + # CLI flag: -ruler.alertmanager-client.tls-insecure-skip-verify + [tls_insecure_skip_verify: | default = false] + + # Override the default cipher suite list (separated by commas). Allowed + # values: + # + # Secure Ciphers: + # - TLS_RSA_WITH_AES_128_CBC_SHA + # - TLS_RSA_WITH_AES_256_CBC_SHA + # - TLS_RSA_WITH_AES_128_GCM_SHA256 + # - TLS_RSA_WITH_AES_256_GCM_SHA384 + # - TLS_AES_128_GCM_SHA256 + # - TLS_AES_256_GCM_SHA384 + # - TLS_CHACHA20_POLY1305_SHA256 + # - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA + # - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA + # - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA + # - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA + # - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 + # - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 + # - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 + # - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 + # - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 + # - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 + # + # Insecure Ciphers: + # - TLS_RSA_WITH_RC4_128_SHA + # - TLS_RSA_WITH_3DES_EDE_CBC_SHA + # - TLS_RSA_WITH_AES_128_CBC_SHA256 + # - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA + # - TLS_ECDHE_RSA_WITH_RC4_128_SHA + # - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA + # - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 + # - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 + # CLI flag: -ruler.alertmanager-client.tls-cipher-suites + [tls_cipher_suites: | default = ""] + + # Override the default minimum TLS version. Allowed values: VersionTLS10, + # VersionTLS11, VersionTLS12, VersionTLS13 + # CLI flag: -ruler.alertmanager-client.tls-min-version + [tls_min_version: | default = ""] + + # HTTP Basic authentication username. It overrides the username set in the URL + # (if any). + # CLI flag: -ruler.alertmanager-client.basic-auth-username + [basic_auth_username: | default = ""] + + # HTTP Basic authentication password. It overrides the password set in the URL + # (if any). + # CLI flag: -ruler.alertmanager-client.basic-auth-password + [basic_auth_password: | default = ""] + + # HTTP Header authorization type (default: Bearer). + # CLI flag: -ruler.alertmanager-client.type + [type: | default = "Bearer"] + + # HTTP Header authorization credentials. + # CLI flag: -ruler.alertmanager-client.credentials + [credentials: | default = ""] + + # HTTP Header authorization credentials file. + # CLI flag: -ruler.alertmanager-client.credentials-file + [credentials_file: | default = ""] + # Max time to tolerate outage for restoring "for" state of alert. # CLI flag: -ruler.for-outage-tolerance [for_outage_tolerance: | default = 1h] @@ -719,395 +922,164 @@ alert_relabel_configs: # CLI flag: -ruler.enable-sharding [enable_sharding: | default = false] +# The sharding strategy to use. Supported values are: default, shuffle-sharding. +# CLI flag: -ruler.sharding-strategy +[sharding_strategy: | default = "default"] + # Time to spend searching for a pending ruler when shutting down. # CLI flag: -ruler.search-pending-for [search_pending_for: | default = 5m] -# Ring used by Loki ruler. -# The CLI flags prefix for this block config is ruler.ring +# Ring used by Loki ruler. The CLI flags prefix for this block configuration is +# 'ruler.ring'. ring: kvstore: # Backend storage to use for the ring. Supported values are: consul, etcd, # inmemory, memberlist, multi. - # CLI flag: -.store - [store: | default = "memberlist"] + # CLI flag: -ruler.ring.store + [store: | default = "consul"] # The prefix for the keys in the store. Should end with a /. - # CLI flag: -.prefix - [prefix: | default = "collectors/"] + # CLI flag: -ruler.ring.prefix + [prefix: | default = "rulers/"] - # The consul_config configures the consul client. - [consul: ] + # Configuration for a Consul client. Only applies if store is consul. + # The CLI flags prefix for this block configuration is: ruler.ring + [consul: ] - # The etcd_config configures the etcd client. - [etcd: ] + # Configuration for an ETCD v3 client. Only applies if store is etcd. + # The CLI flags prefix for this block configuration is: ruler.ring + [etcd: ] multi: # Primary backend storage used by multi-client. - # CLI flag: -.multi.primary + # CLI flag: -ruler.ring.multi.primary [primary: | default = ""] # Secondary backend storage used by multi-client. - # CLI flag: -.multi.secondary + # CLI flag: -ruler.ring.multi.secondary [secondary: | default = ""] # Mirror writes to secondary store. - # CLI flag: -.multi.mirror-enabled + # CLI flag: -ruler.ring.multi.mirror-enabled [mirror_enabled: | default = false] # Timeout for storing value to secondary store. - # CLI flag: -.multi.mirror-timeout + # CLI flag: -ruler.ring.multi.mirror-timeout [mirror_timeout: | default = 2s] # Interval between heartbeats sent to the ring. 0 = disabled. - # CLI flag: -.heartbeat-period - [heartbeat_period: | default = 15s] + # CLI flag: -ruler.ring.heartbeat-period + [heartbeat_period: | default = 5s] - # The heartbeat timeout after which ruler ring members are considered unhealthy - # within the ring. 0 = never (timeout disabled). - # CLI flag: -.heartbeat-timeout + # The heartbeat timeout after which ruler ring members are considered + # unhealthy within the ring. 0 = never (timeout disabled). + # CLI flag: -ruler.ring.heartbeat-timeout [heartbeat_timeout: | default = 1m] # Name of network interface to read addresses from. - # CLI flag: -.instance-interface-names - [instance_interface_names: | default = []] + # CLI flag: -ruler.ring.instance-interface-names + [instance_interface_names: | default = []] # The number of tokens the lifecycler will generate and put into the ring if # it joined without transferring tokens from another lifecycler. - # CLI flag: -.num-tokens + # CLI flag: -ruler.ring.num-tokens [num_tokens: | default = 128] -``` - -## bos_storage_config -The `bos_storage_config` block configures Baidu Object Storage (BOS) as general storage for data generated by Loki. +# Period with which to attempt to flush rule groups. +# CLI flag: -ruler.flush-period +[flush_period: | default = 1m] -```yaml -# Name of BOS bucket. -# CLI flag: .baidubce.bucket-name -[ bucket_name: | default = "" ] -# BOS endpoint to connect to. -# CLI flag: .baidubce.endpoint -[ endpoint: | default = "bj.bcebos.com" ] -# Baidu Cloud Engine (BCE) Access Key ID -# CLI flag: .baidubce.access-key-id -[ access_key_id: | default = "" ] -# BCE Secret Access Key -# CLI flag: .baidubce.secret-access-key -[ secret_access_key: | default = "" ] -``` +# Enable the ruler api. +# CLI flag: -ruler.enable-api +[enable_api: | default = true] -## azure_storage_config +# Comma separated list of tenants whose rules this ruler can evaluate. If +# specified, only these tenants will be handled by ruler, otherwise this ruler +# can process rules from all tenants. Subject to sharding. +# CLI flag: -ruler.enabled-tenants +[enabled_tenants: | default = ""] -The `azure_storage_config` configures Azure as a general storage for different data generated by Loki. +# Comma separated list of tenants whose rules this ruler cannot evaluate. If +# specified, a ruler that would normally pick the specified tenant(s) for +# processing will ignore them instead. Subject to sharding. +# CLI flag: -ruler.disabled-tenants +[disabled_tenants: | default = ""] -```yaml -# Azure Cloud environment. Supported values are: AzureGlobal, -# AzureChinaCloud, AzureGermanCloud, AzureUSGovernment. -# CLI flag: -.azure.environment -[environment: | default = "AzureGlobal"] +# Report the wall time for ruler queries to complete as a per user metric and as +# an info level log message. +# CLI flag: -ruler.query-stats-enabled +[query_stats_enabled: | default = false] -# Azure storage account name. -# CLI flag: -.azure.account-name -[account_name: | default = ""] +# Disable the rule_group label on exported metrics. +# CLI flag: -ruler.disable-rule-group-label +[disable_rule_group_label: | default = false] -# Azure storage account key. -# CLI flag: -.azure.account-key -[account_key: | default = ""] - -# Name of the storage account blob container used to store chunks. -# This container must be created before running Loki. -# CLI flag: -.azure.container-name -[container_name: | default = "loki"] - -# Azure storage endpoint suffix without schema. The storage account name will -# be prefixed to this value to create the FQDN. -# CLI flag: -.azure.endpoint-suffix -[endpoint_suffix: | default = ""] - -# Use Managed Identity to authenticate to the Azure storage account. -# CLI flag: -.azure.use-managed-identity -[use_managed_identity: | default = false] - -# User assigned identity ID to authenticate to the Azure storage account. -# CLI flag: -.azure.user-assigned-id -[user_assigned_id: | default = ""] - -# Chunk delimiter to build the blobID -# CLI flag: -.azure.chunk-delimiter -[chunk_delimiter: | default = "-"] - -# Preallocated buffer size for downloads. -# CLI flag: -.azure.download-buffer-size -[download_buffer_size: | default = 512000] - -# Preallocated buffer size for uploads. -# CLI flag: -.azure.upload-buffer-size -[upload_buffer_size: | default = 256000] - -# Number of buffers used to used to upload a chunk. -# CLI flag: -.azure.download-buffer-count -[upload_buffer_count: | default = 1] - -# Timeout for requests made against azure blob storage. -# CLI flag: -.azure.request-timeout -[request_timeout: | default = 30s] - -# Number of retries for a request which times out. -# CLI flag: -.azure.max-retries -[max_retries: | default = 5] - -# Minimum time to wait before retrying a request. -# CLI flag: -.azure.min-retry-delay -[min_retry_delay: | default = 10ms] - -# Maximum time to wait before retrying a request. -# CLI flag: -.azure.max-retry-delay -[max_retry_delay: | default = 500ms] -``` - -## gcs_storage_config - -The `gcs_storage_config` configures GCS as a general storage for different data generated by Loki. - -```yaml -# Name of GCS bucket to put chunks in. -# CLI flag: -.gcs.bucketname -[bucket_name: | default = ""] - -# Service account key content in JSON format. -# CLI flag: -.gcs.service-account -[service_account: | default = ""] - -# The size of the buffer that GCS client for each PUT request. 0 to disable -# buffering. -# CLI flag: -.gcs.chunk-buffer-size -[chunk_buffer_size: | default = 0] - -# The duration after which the requests to GCS should be timed out. -# CLI flag: -.gcs.request-timeout -[request_timeout: | default = 0s] - -# Enable HTTP/2 when connecting to GCS. This configuration only applies to GET operations. -# CLI flag: -.gcs.enable-http2 -[enable_http2: | default = true] -``` - -## s3_storage_config - -The `s3_storage_config` configures S3 as a general storage for different data generated by Loki. - -```yaml -# S3 endpoint URL with escaped Key and Secret encoded. If only region is -# specified as a host, proper endpoint will be deduced. Use -# inmemory:/// to use a mock in-memory implementation. -# CLI flag: -.s3.url -[s3: | default = ] - -# Set this to `true` to force the request to use path-style addressing. -# CLI flag: -.s3.force-path-style -[s3forcepathstyle: | default = false] - -# Comma separated list of bucket names to evenly distribute chunks over. -# Overrides any buckets specified in s3.url flag -# CLI flag: -.s3.buckets -[bucketnames: | default = ""] - -# S3 Endpoint to connect to. -# CLI flag: -.s3.endpoint -[endpoint: | default = ""] - -# AWS region to use. -# CLI flag: -.s3.region -[region: | default = ""] - -# AWS Access Key ID -# CLI flag: -.s3.access-key-id -[access_key_id: | default = ""] - -# AWS Secret Access Key -# CLI flag: -.s3.secret-access-key -[secret_access_key: | default = ""] - -# Disable https on S3 connection. -# CLI flag: -.s3.insecure -[insecure: | default = false] - -# Enable AES256 AWS server-side encryption -# CLI flag: -.s3.sse-encryption -[sse_encryption: | default = false] - -http_config: - # The maximum amount of time an idle connection will be held open. - # CLI flag: -.s3.http.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # If non-zero, specifies the amount of time to wait for a server's - # response headers after fully writing the request. - # CLI flag: -.s3.http.response-header-timeout - [response_header_timeout: | default = 0s] - - # Set to true to skip verifying the certificate chain and hostname. - # CLI flag: -.s3.http.insecure-skip-verify - [insecure_skip_verify: | default = false] - - # Path to the trusted CA file that signed the SSL certificate of the S3 - # endpoint. - # CLI flag: -.s3.http.ca-file - [ca_file: | default = ""] -``` - -## swift_storage_config - -The `swift_storage_config` configures Swift as a general storage for different data generated by Loki. - -```yaml -# Openstack authentication URL. -# CLI flag: -.swift.auth-url -[auth_url: | default = ""] - -# Openstack username for the api. -# CLI flag: -.swift.username -[username: | default = ""] - -# Openstack user's domain name. -# CLI flag: -.swift.user-domain-name -[user_domain_name: | default = ""] - -# Openstack user's domain ID. -# CLI flag: -.swift.user-domain-id -[user_domain_id: | default = ""] - -# Openstack user ID for the API. -# CLI flag: -.swift.user-id -[user_id: | default = ""] - -# Openstack API key. -# CLI flag: -.swift.password -[password: | default = ""] - -# Openstack user's domain ID. -# CLI flag: -.swift.domain-id -[domain_id: | default = ""] - -# Openstack user's domain name. -# CLI flag: -.swift.domain-name -[domain_name: | default = ""] - -# Openstack project ID (v2,v3 auth only). -# CLI flag: -.swift.project-id -[project_id: | default = ""] - -# Openstack project name (v2,v3 auth only). -# CLI flag: -.swift.project-name -[project_name: | default = ""] - -# ID of the project's domain (v3 auth only), only needed if it differs the -# from user domain. -# CLI flag: -.swift.project-domain-id -[project_domain_id: | default = ""] - -# Name of the project's domain (v3 auth only), only needed if it differs -# from the user domain. -# CLI flag: -.swift.project-domain-name -[project_domain_name: | default = ""] - -# Openstack Region to use eg LON, ORD - default is use first region (v2,v3 -# auth only) -# CLI flag: -.swift.region-name -[region_name: | default = ""] - -# Name of the Swift container to put chunks in. -# CLI flag: -.swift.container-name -[container_name: | default = ""] -``` - -## hedging - -The `hedging` block configures how to hedge storage requests. - -The hedging implementation sends a second storage request once a first request has -been outstanding for more than a configured expected latency for this class of requests. -Calculate your latency to be the 99th percentile of object storage response times. - -```yaml -# An optional duration that sets the quantity of time after a first storage request -# is sent and before a second request is sent, when no response is received for the first -# storage request. The recommended duration is the measured 99th percentile of object -# storage response times, to reduce long tail latency. This option is most impactful -# when used with queriers, and has minimal to no impact on other components. -# The default value of 0 disables the hedging of storage requests. -# Example: "at: 500ms" -[at: | default = 0] - -# An optional maximum quantity of hedged requests to be issued for a given request. -[up_to: | default = 2] - -# Caps the rate of hedged requests by optionally defining the maximum quantity of -# hedged requests issued per second. -[max_per_second: | default = 5] -``` - -## local_storage_config - -The `local_storage_config` configures a (local) file system as a general storage for different data generated by Loki. +wal: + # The directory in which to write tenant WAL files. Each tenant will have its + # own directory one level below this directory. + # CLI flag: -ruler.wal.dir + [dir: | default = "ruler-wal"] -```yaml -# Filesystem directory to be used as storage. -# CLI flag: -.local.directory -[directory: | default = ""] -``` + # Frequency with which to run the WAL truncation process. + # CLI flag: -ruler.wal.truncate-frequency + [truncate_frequency: | default = 1h] -## frontend_worker + # Minimum age that samples must exist in the WAL before being truncated. + # CLI flag: -ruler.wal.min-age + [min_age: | default = 5m] -The `frontend_worker` configures the worker - running within the Loki querier - picking up and executing queries enqueued by the query-frontend. + # Maximum age that samples must exist in the WAL before being truncated. + # CLI flag: -ruler.wal.max-age + [max_age: | default = 4h] -```yaml -# Address of query frontend service, in host:port format. -# CLI flag: -querier.frontend-address -[frontend_address: | default = ""] +wal_cleaner: + # The minimum age of a WAL to consider for cleaning. + # CLI flag: -ruler.wal-cleaner.min-age + [min_age: | default = 12h] -# Number of simultaneous queries to process. -# CLI flag: -querier.worker-parallelism -[parallelism: | default = 10] + # How often to run the WAL cleaner. 0 = disabled. + # CLI flag: -ruler.wal-cleaer.period + [period: | default = 0s] -# Force worker concurrency to match the -querier.max-concurrent option. Overrides querier.worker-parallelism. -# CLI flag: -querier.worker-match-max-concurrent -[match_max_concurrent: | default = true] +# Remote-write configuration to send rule samples to a Prometheus remote-write +# endpoint. +remote_write: + # Deprecated: Use 'clients' instead. Configure remote write client. + [client: ] -# How often to query the frontend_address DNS to resolve frontend addresses. -# Also used to determine how often to poll the scheduler-ring for addresses if configured. -# CLI flag: -querier.dns-lookup-period -[dns_lookup_duration: | default = 3s] + # Configure remote write clients. A map with remote client id as key. + [clients: ] -# The CLI flags prefix for this block config is: querier.frontend-client -[grpc_client_config: ] + # Enable remote-write functionality. + # CLI flag: -ruler.remote-write.enabled + [enabled: | default = false] -# DNS hostname used for finding query-schedulers. -# CLI flag: -querier.scheduler-address -[scheduler_address: | default = ""] + # Minimum period to wait between refreshing remote-write reconfigurations. + # This should be greater than or equivalent to + # -limits.per-user-override-period. + # CLI flag: -ruler.remote-write.config-refresh-period + [config_refresh_period: | default = 10s] ``` -## ingester_client +### ingester_client -The `ingester_client` block configures how connections to ingesters -operate. +The `ingester_client` block configures how the distributor will connect to ingesters. Only appropriate when running all components, the distributor, or the querier. ```yaml -# Configures how connections are pooled +# Configures how connections are pooled. pool_config: - # Whether or not to do health checks. - # CLI flag: -distributor.health-check-ingesters - [health_check_ingesters: | default = false] - - # How frequently to clean up clients for servers that have gone away after - # a health check. + # How frequently to clean up clients for ingesters that have gone away. # CLI flag: -distributor.client-cleanup-period [client_cleanup_period: | default = 15s] - # How quickly a dead client will be removed after it has been detected - # to disappear. Set this to a value to allow time for a secondary - # health check to recover the missing client. + # Run a health check on each ingester client during periodic cleanup. + # CLI flag: -distributor.health-check-ingesters + [health_check_ingesters: | default = true] + + # How quickly a dead client will be removed after it has been detected to + # disappear. Set this to a value to allow time for a secondary health check to + # recover the missing client. # CLI flag: -ingester.client.healthcheck-timeout [remote_timeout: | default = 1s] @@ -1115,23 +1087,23 @@ pool_config: # CLI flag: -ingester.client.timeout [remote_timeout: | default = 5s] -# Configures how the gRPC connection to ingesters work as a client -# The CLI flags prefix for this block config is: ingester.client -[grpc_client_config: ] +# Configures how the gRPC connection to ingesters work as a client. +# The CLI flags prefix for this block configuration is: ingester.client +[grpc_client_config: ] ``` -## ingester +### ingester -The `ingester` block configures the Loki Ingesters. +The `ingester` block configures the ingester and how the ingester will register itself to a key value store. ```yaml -# Configures how the lifecycle of the ingester will operate -# and where it will register for discovery. +# Configures how the lifecycle of the ingester will operate and where it will +# register for discovery. lifecycler: ring: kvstore: # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist + # inmemory, memberlist, multi. # CLI flag: -ring.store [store: | default = "consul"] @@ -1139,57 +1111,124 @@ lifecycler: # CLI flag: -ring.prefix [prefix: | default = "collectors/"] - # The consul_config configures the consul client. - # CLI flag: - [consul: ] + # Configuration for a Consul client. Only applies if store is consul. + [consul: ] + + # Configuration for an ETCD v3 client. Only applies if store is etcd. + [etcd: ] + + multi: + # Primary backend storage used by multi-client. + # CLI flag: -multi.primary + [primary: | default = ""] - # The etcd_config configures the etcd client. - # CLI flag: - [etcd: ] + # Secondary backend storage used by multi-client. + # CLI flag: -multi.secondary + [secondary: | default = ""] + + # Mirror writes to secondary store. + # CLI flag: -multi.mirror-enabled + [mirror_enabled: | default = false] + + # Timeout for storing value to secondary store. + # CLI flag: -multi.mirror-timeout + [mirror_timeout: | default = 2s] # The heartbeat timeout after which ingesters are skipped for reads/writes. + # 0 = never (timeout disabled). # CLI flag: -ring.heartbeat-timeout [heartbeat_timeout: | default = 1m] # The number of ingesters to write to and read from. - # CLI flag: -ingester.replication-factor + # CLI flag: -distributor.replication-factor [replication_factor: | default = 3] - # The number of tokens the lifecycler will generate and put into the ring if - # it joined without transferring tokens from another lifecycler. + # True to enable the zone-awareness and replicate ingested samples across + # different availability zones. + # CLI flag: -distributor.zone-awareness-enabled + [zone_awareness_enabled: | default = false] + + # Comma-separated list of zones to exclude from the ring. Instances in + # excluded zones will be filtered out from the ring. + # CLI flag: -distributor.excluded-zones + [excluded_zones: | default = ""] + + # Number of tokens for each ingester. # CLI flag: -ingester.num-tokens [num_tokens: | default = 128] - # Period at which to heartbeat to the underlying ring. + # Period at which to heartbeat to consul. 0 = disabled. # CLI flag: -ingester.heartbeat-period [heartbeat_period: | default = 5s] - # How long to wait to claim tokens and chunks from another member when - # that member is leaving. Will join automatically after the duration expires. - # CLI flag: -ingester.join-after - [join_after: | default = 0s] + # Heartbeat timeout after which instance is assumed to be unhealthy. 0 = + # disabled. + # CLI flag: -ingester.heartbeat-timeout + [heartbeat_timeout: | default = 1m] - # Observe tokens after generating to resolve collisions. Useful when using a gossip ring. + # Observe tokens after generating to resolve collisions. Useful when using + # gossiping ring. # CLI flag: -ingester.observe-period [observe_period: | default = 0s] - # Minimum duration to wait before becoming ready. This is to work around race - # conditions with ingesters exiting and updating the ring. + # Period to wait for a claim from another member; will join automatically + # after this. + # CLI flag: -ingester.join-after + [join_after: | default = 0s] + + # Minimum duration to wait after the internal readiness checks have passed but + # before succeeding the readiness endpoint. This is used to slowdown + # deployment controllers (eg. Kubernetes) after an instance is ready and + # before they proceed with a rolling update, to give the rest of the cluster + # instances enough time to receive ring updates. # CLI flag: -ingester.min-ready-duration [min_ready_duration: | default = 15s] - # Name of network interfaces to read addresses from. + # Name of network interface to read address from. # CLI flag: -ingester.lifecycler.interface - interface_names: + [interface_names: | default = []] - - [ ... | default = []] - - # Duration to sleep before exiting to ensure metrics are scraped. + # Duration to sleep for before exiting, to ensure metrics are scraped. # CLI flag: -ingester.final-sleep - [final_sleep: | default = 30s] - -# Number of times to try and transfer chunks when leaving before -# falling back to flushing to the store. Zero = no transfers are done. + [final_sleep: | default = 0s] + + # File path where tokens are stored. If empty, tokens are not stored at + # shutdown and restored at startup. + # CLI flag: -ingester.tokens-file-path + [tokens_file_path: | default = ""] + + # The availability zone where this instance is running. + # CLI flag: -ingester.availability-zone + [availability_zone: | default = ""] + + # Unregister from the ring upon clean shutdown. It can be useful to disable + # for rolling restarts with consistent naming in conjunction with + # -distributor.extend-writes=false. + # CLI flag: -ingester.unregister-on-shutdown + [unregister_on_shutdown: | default = true] + + # When enabled the readiness probe succeeds only after all instances are + # ACTIVE and healthy in the ring, otherwise only the instance itself is + # checked. This option should be disabled if in your cluster multiple + # instances can be rolled out simultaneously, otherwise rolling updates may be + # slowed down. + # CLI flag: -ingester.readiness-check-ring-health + [readiness_check_ring_health: | default = true] + + # IP address to advertise in the ring. + # CLI flag: -ingester.lifecycler.addr + [address: | default = ""] + + # port to advertise in consul (defaults to server.grpc-listen-port). + # CLI flag: -ingester.lifecycler.port + [port: | default = 0] + + # ID to register in the ring. + # CLI flag: -ingester.lifecycler.ID + [id: | default = ""] + +# Number of times to try and transfer chunks before falling back to flushing. If +# set to 0 or negative value, transfers are disabled. # CLI flag: -ingester.max-transfer-retries [max_transfer_retries: | default = 0] @@ -1197,11 +1236,11 @@ lifecycler: # CLI flag: -ingester.concurrent-flushes [concurrent_flushes: | default = 32] -# How often should the ingester see if there are any blocks to flush +# How often should the ingester see if there are any blocks to flush. # CLI flag: -ingester.flush-check-period [flush_check_period: | default = 30s] -# The timeout before a flush is cancelled +# The timeout before a flush is cancelled. # CLI flag: -ingester.flush-op-timeout [flush_op_timeout: | default = 10m] @@ -1209,268 +1248,294 @@ lifecycler: # CLI flag: -ingester.chunks-retain-period [chunk_retain_period: | default = 0s] -# How long chunks should sit in-memory with no updates before -# being flushed if they don't hit the max block size. This means -# that half-empty chunks will still be flushed after a certain -# period as long as they receive no further activity. +# How long chunks should sit in-memory with no updates before being flushed if +# they don't hit the max block size. This means that half-empty chunks will +# still be flushed after a certain period as long as they receive no further +# activity. # CLI flag: -ingester.chunks-idle-period [chunk_idle_period: | default = 30m] -# The targeted _uncompressed_ size in bytes of a chunk block -# When this threshold is exceeded the head block will be cut and compressed inside the chunk. +# The targeted _uncompressed_ size in bytes of a chunk block When this threshold +# is exceeded the head block will be cut and compressed inside the chunk. # CLI flag: -ingester.chunks-block-size [chunk_block_size: | default = 262144] -# A target _compressed_ size in bytes for chunks. -# This is a desired size not an exact size, chunks may be slightly bigger -# or significantly smaller if they get flushed for other reasons (e.g. chunk_idle_period) -# A value of 0 creates chunks with a fixed 10 blocks, -# A non zero value will create chunks with a variable number of blocks to meet the target size. +# A target _compressed_ size in bytes for chunks. This is a desired size not an +# exact size, chunks may be slightly bigger or significantly smaller if they get +# flushed for other reasons (e.g. chunk_idle_period). A value of 0 creates +# chunks with a fixed 10 blocks, a non zero value will create chunks with a +# variable number of blocks to meet the target size. # CLI flag: -ingester.chunk-target-size [chunk_target_size: | default = 1572864] -# The compression algorithm to use for chunks. (supported: gzip, lz4, snappy) -# You should choose your algorithm depending on your need: -# - `gzip` highest compression ratio but also slowest decompression speed. (144 kB per chunk) -# - `lz4` fastest compression speed (188 kB per chunk) -# - `snappy` fast and popular compression algorithm (272 kB per chunk) +# The algorithm to use for compressing chunk. (none, gzip, lz4-64k, snappy, +# lz4-256k, lz4-1M, lz4, flate, zstd) # CLI flag: -ingester.chunk-encoding -[chunk_encoding: | default = gzip] +[chunk_encoding: | default = "gzip"] + +# The maximum duration of a timeseries chunk in memory. If a timeseries runs for +# longer than this, the current chunk will be flushed to the store and a new +# chunk created. +# CLI flag: -ingester.max-chunk-age +[max_chunk_age: | default = 2h] + +# Forget about ingesters having heartbeat timestamps older than +# `ring.kvstore.heartbeat_timeout`. This is equivalent to clicking on the +# `/ring` `forget` button in the UI: the ingester is removed from the ring. This +# is a useful setting when you are sure that an unhealthy node won't return. An +# example is when not using stateful sets or the equivalent. Use +# `memberlist.rejoin_interval` > 0 to handle network partition cases when using +# a memberlist. +# CLI flag: -ingester.autoforget-unhealthy +[autoforget_unhealthy: | default = false] # Parameters used to synchronize ingesters to cut chunks at the same moment. -# Sync period is used to roll over incoming entry to a new chunk. If chunk's utilization -# isn't high enough (eg. less than 50% when sync_min_utilization is set to 0.5), then -# this chunk rollover doesn't happen. +# Sync period is used to roll over incoming entry to a new chunk. If chunk's +# utilization isn't high enough (eg. less than 50% when sync_min_utilization is +# set to 0.5), then this chunk rollover doesn't happen. # CLI flag: -ingester.sync-period -[sync_period: | default = 0] +[sync_period: | default = 0s] +# Minimum utilization of chunk when doing synchronization. # CLI flag: -ingester.sync-min-utilization -[sync_min_utilization: | Default = 0] +[sync_min_utilization: | default = 0] -# The maximum number of errors a stream will report to the user -# when a push fails. 0 to make unlimited. +# The maximum number of errors a stream will report to the user when a push +# fails. 0 to make unlimited. # CLI flag: -ingester.max-ignored-stream-errors [max_returned_stream_errors: | default = 10] -# The maximum duration of a timeseries chunk in memory. If a timeseries runs for longer than this, -# the current chunk will be flushed to the store and a new chunk created. -# CLI flag: -ingester.max-chunk-age -[max_chunk_age: | default = 2h] - -# How far in the past an ingester is allowed to query the store for data. -# This is only useful for running multiple Loki binaries with a shared ring -# with a `filesystem` store, which is NOT shared between the binaries. -# When using any "shared" object store like S3 or GCS, this value must always be left as 0. -# It is an error to configure this to a non-zero value when using any object store other -# than `filesystem`. -# Use a value of -1 to allow the ingester to query the store infinitely far back in time. +# How far back should an ingester be allowed to query the store for data, for +# use only with boltdb-shipper/tsdb index and filesystem object store. -1 for +# infinite. # CLI flag: -ingester.query-store-max-look-back-period -[query_store_max_look_back_period: | default = 0] - -# Forget about ingesters having heartbeat timestamps older than `ring.kvstore.heartbeat_timeout`. -# This is equivalent to clicking on the `/ring` `forget` button in the UI: -# the ingester is removed from the ring. -# This is a useful setting when you are sure that an unhealthy node won't return. -# An example is when not using stateful sets or the equivalent. -# Use `memberlist.rejoin_interval` > 0 to handle network partition cases when using a memberlist. -# CLI flag: -ingester.autoforget-unhealthy -[autoforget_unhealthy: | default = false] +[query_store_max_look_back_period: | default = 0s] # The ingester WAL (Write Ahead Log) records incoming logs and stores them on # the local file systems in order to guarantee persistence of acknowledged data # in the event of a process crash. wal: - # Enables writing to WAL. + # Enable writing of ingested data into WAL. # CLI flag: -ingester.wal-enabled [enabled: | default = true] # Directory where the WAL data should be stored and/or recovered from. # CLI flag: -ingester.wal-dir - [dir: | default = "wal"] - - # When WAL is enabled, should chunks be flushed to long-term storage on shutdown. - # CLI flag: -ingester.flush-on-shutdown - [flush_on_shutdown: | default = false] + [dir: | default = "wal"] # Interval at which checkpoints should be created. - # CLI flag: ingester.checkpoint-duration + # CLI flag: -ingester.checkpoint-duration [checkpoint_duration: | default = 5m] - # Maximum memory size the WAL may use during replay. After hitting this, - # it will flush data to storage before continuing. - # A unit suffix (KB, MB, GB) may be applied. - [replay_memory_ceiling: | default = 4GB] + # When WAL is enabled, should chunks be flushed to long-term storage on + # shutdown. + # CLI flag: -ingester.flush-on-shutdown + [flush_on_shutdown: | default = false] -# Shard factor used in the ingesters for the in process reverse index. -# This MUST be evenly divisible by ALL schema shard factors or Loki will not start. + # Maximum memory size the WAL may use during replay. After hitting this, it + # will flush data to storage before continuing. A unit suffix (KB, MB, GB) may + # be applied. + # CLI flag: -ingester.wal-replay-memory-ceiling + [replay_memory_ceiling: | default = 4GB] + +# Shard factor used in the ingesters for the in process reverse index. This MUST +# be evenly divisible by ALL schema shard factors or Loki will not start. +# CLI flag: -ingester.index-shards [index_shards: | default = 32] + +# Maximum number of dropped streams to keep in memory during tailing. +# CLI flag: -ingester.tailer.max-dropped-streams +[max_dropped_streams: | default = 10] ``` -## consul_config +### index_gateway -The `consul_config` configures the consul client. The supported CLI flags used to reference this config block are: +The `index_gateway` block configures the Loki index gateway server, responsible for serving index queries without the need to constantly interact with the object store. ```yaml - # The hostname and port of Consul. -# CLI flag: -.consul.hostname -[host: | default = "localhost:8500"] +# Defines in which mode the index gateway server will operate (default to +# 'simple'). It supports two modes: +# - 'simple': an index gateway server instance is responsible for handling, +# storing and returning requests for all indices for all tenants. +# - 'ring': an index gateway server instance is responsible for a subset of +# tenants instead of all tenants. +# CLI flag: -index-gateway.mode +[mode: | default = "simple"] + +# Defines the ring to be used by the index gateway servers and clients in case +# the servers are configured to run in 'ring' mode. In case this isn't +# configured, this block supports inheriting configuration from the common ring +# section. +ring: + kvstore: + # Backend storage to use for the ring. Supported values are: consul, etcd, + # inmemory, memberlist, multi. + # CLI flag: -index-gateway.ring.store + [store: | default = "consul"] -# The ACL Token used to interact with Consul. -# CLI flag: -.consul.acl-token -[acl_token: ] + # The prefix for the keys in the store. Should end with a /. + # CLI flag: -index-gateway.ring.prefix + [prefix: | default = "collectors/"] -# The HTTP timeout when communicating with Consul -# CLI flag: -.consul.client-timeout -[http_client_timeout: | default = 20s] + # Configuration for a Consul client. Only applies if store is consul. + # The CLI flags prefix for this block configuration is: index-gateway.ring + [consul: ] -# Whether or not consistent reads to Consul are enabled. -# CLI flag: -.consul.consistent-reads -[consistent_reads: | default = true] -``` + # Configuration for an ETCD v3 client. Only applies if store is etcd. + # The CLI flags prefix for this block configuration is: index-gateway.ring + [etcd: ] -## etcd_config + multi: + # Primary backend storage used by multi-client. + # CLI flag: -index-gateway.ring.multi.primary + [primary: | default = ""] -The `etcd_config` configures the etcd client. The supported CLI flags used to reference this config block are: + # Secondary backend storage used by multi-client. + # CLI flag: -index-gateway.ring.multi.secondary + [secondary: | default = ""] -```yaml -# The etcd endpoints to connect to. -# CLI flag: -.etcd.endpoints -[endpoints: | default = []] + # Mirror writes to secondary store. + # CLI flag: -index-gateway.ring.multi.mirror-enabled + [mirror_enabled: | default = false] -# The dial timeout for the etcd connection. -# CLI flag: -.etcd.dial-timeout -[dial_timeout: | default = 10s] + # Timeout for storing value to secondary store. + # CLI flag: -index-gateway.ring.multi.mirror-timeout + [mirror_timeout: | default = 2s] -# The maximum number of retries to do for failed ops. -# CLI flag: -.etcd.max-retries -[max_retries: | default = 10] -``` + # Period at which to heartbeat to the ring. 0 = disabled. + # CLI flag: -index-gateway.ring.heartbeat-period + [heartbeat_period: | default = 15s] + + # The heartbeat timeout after which compactors are considered unhealthy within + # the ring. 0 = never (timeout disabled). + # CLI flag: -index-gateway.ring.heartbeat-timeout + [heartbeat_timeout: | default = 1m] -## memberlist_config + # File path where tokens are stored. If empty, tokens are not stored at + # shutdown and restored at startup. + # CLI flag: -index-gateway.ring.tokens-file-path + [tokens_file_path: | default = ""] -The `memberlist_config` block configures the gossip ring to discover and connect -between distributors, ingesters and queriers. The configuration is unique for all -three components to ensure a single shared ring. + # True to enable zone-awareness and replicate blocks across different + # availability zones. + # CLI flag: -index-gateway.ring.zone-awareness-enabled + [zone_awareness_enabled: | default = false] -When a `memberlist_config` with least 1 `join_members` is defined, a `kvstore` of type `memberlist` is -automatically configured for the `distributor`, `ingester`, and `ruler` rings unless otherwise specified in -those components specific configuration sections. + # Instance ID to register in the ring. + # CLI flag: -index-gateway.ring.instance-id + [instance_id: | default = ""] -```yaml -# Name of the node in memberlist cluster. Defaults to hostname. -# CLI flag: -memberlist.nodename -[node_name: | default = ""] - -# Add random suffix to the node name. -# CLI flag: -memberlist.randomize-node-name -[randomize_node_name: | default = true] - -# The timeout for establishing a connection with a remote node, and for -# read/write operations. Uses memberlist LAN defaults if 0. -# CLI flag: -memberlist.stream-timeout -[stream_timeout: | default = 0s] - -# Multiplication factor used when sending out messages (factor * log(N+1)). -# CLI flag: -memberlist.retransmit-factor -[retransmit_factor: | default = 0] - -# How often to use pull/push sync. Uses memberlist LAN defaults if 0. -# CLI flag: -memberlist.pullpush-interval -[pull_push_interval: | default = 0s] - -# How often to gossip. Uses memberlist LAN defaults if 0. -# CLI flag: -memberlist.gossip-interval -[gossip_interval: | default = 0s] - -# How many nodes to gossip to. Uses memberlist LAN defaults if 0. -# CLI flag: -memberlist.gossip-nodes -[gossip_nodes: | default = 0] - -# How long to keep gossiping to dead nodes, to give them chance to refute their -# death. Uses memberlist LAN defaults if 0. -# CLI flag: -memberlist.gossip-to-dead-nodes-time -[gossip_to_dead_nodes_time: | default = 0s] - -# How soon can dead node's name be reclaimed with new address. Defaults to 0, -# which is disabled. -# CLI flag: -memberlist.dead-node-reclaim-time -[dead_node_reclaim_time: | default = 0s] - -# Other cluster members to join. Can be specified multiple times. It can be an -# IP, hostname or an entry specified in the DNS Service Discovery format (see -# https://grafana.com/docs/mimir/latest/operators-guide/configuring/about-dns-service-discovery/ -# for more details). -# CLI flag: -memberlist.join -[join_members: | default = ] - -# Min backoff duration to join other cluster members. -# CLI flag: -memberlist.min-join-backoff -[min_join_backoff: | default = 1s] - -# Max backoff duration to join other cluster members. -# CLI flag: -memberlist.max-join-backoff -[max_join_backoff: | default = 1m] - -# Max number of retries to join other cluster members. -# CLI flag: -memberlist.max-join-retries -[max_join_retries: | default = 10] - -# If this node fails to join memberlist cluster, abort. -# CLI flag: -memberlist.abort-if-join-fails -[abort_if_cluster_join_fails: | default = true] - -# If not 0, how often to rejoin the cluster. Occasional rejoin can help to fix -# the cluster split issue, and is harmless otherwise. For example when using -# only few components as a seed nodes (via -memberlist.join), then it's -# recommended to use rejoin. If -memberlist.join points to dynamic service that -# resolves to all gossiping nodes (eg. Kubernetes headless service), then rejoin -# is not needed. -# CLI flag: -memberlist.rejoin-interval -[rejoin_interval: | default = 0s] - -# How long to keep LEFT ingesters in the ring. -# CLI flag: -memberlist.left-ingesters-timeout -[left_ingesters_timeout: | default = 5m] - -# Timeout for leaving memberlist cluster. -# CLI flag: -memberlist.leave-timeout -[leave_timeout: | default = 5s] - -# IP address to listen on for gossip messages. Multiple addresses may be -# specified. Defaults to 0.0.0.0 -# CLI flag: -memberlist.bind-addr -[bind_addr: | default = ] - -# Port to listen on for gossip messages. -# CLI flag: -memberlist.bind-port -[bind_port: | default = 7946] - -# Timeout used when connecting to other nodes to send packet. -# CLI flag: -memberlist.packet-dial-timeout -[packet_dial_timeout: | default = 5s] - -# Timeout for writing 'packet' data. -# CLI flag: -memberlist.packet-write-timeout -[packet_write_timeout: | default = 5s] + # Name of network interface to read address from. + # CLI flag: -index-gateway.ring.instance-interface-names + [instance_interface_names: | default = []] + + # Port to advertise in the ring (defaults to server.grpc-listen-port). + # CLI flag: -index-gateway.ring.instance-port + [instance_port: | default = 0] + + # IP address to advertise in the ring. + # CLI flag: -index-gateway.ring.instance-addr + [instance_addr: | default = ""] + + # The availability zone where this instance is running. Required if + # zone-awareness is enabled. + # CLI flag: -index-gateway.ring.instance-availability-zone + [instance_availability_zone: | default = ""] + + # How many index gateway instances are assigned to each tenant. + # CLI flag: -replication-factor + [replication_factor: | default = 3] ``` -## storage_config +### storage_config -The `storage_config` block configures one of many possible stores for both the -index and chunks. Which configuration to be picked should be defined in schema_config -block. +The `storage_config` block configures one of many possible stores for both the index and chunks. Which configuration to be picked should be defined in schema_config block. ```yaml # Configures storing chunks in AWS. Required options only required when aws is # present. aws: - # S3 or S3-compatible endpoint URL with escaped Key and Secret encoded. - # If only region is specified as a host, the proper endpoint will be deduced. - # Use inmemory:/// to use a mock in-memory implementation. + dynamodb: + # DynamoDB endpoint URL with escaped Key and Secret encoded. If only region + # is specified as a host, proper endpoint will be deduced. Use + # inmemory:/// to use a mock in-memory implementation. + # CLI flag: -dynamodb.url + [dynamodb_url: ] + + # DynamoDB table management requests per second limit. + # CLI flag: -dynamodb.api-limit + [api_limit: | default = 2] + + # DynamoDB rate cap to back off when throttled. + # CLI flag: -dynamodb.throttle-limit + [throttle_limit: | default = 10] + + metrics: + # Use metrics-based autoscaling, via this query URL + # CLI flag: -metrics.url + [url: | default = ""] + + # Queue length above which we will scale up capacity + # CLI flag: -metrics.target-queue-length + [target_queue_length: | default = 100000] + + # Scale up capacity by this multiple + # CLI flag: -metrics.scale-up-factor + [scale_up_factor: | default = 1.3] + + # Ignore throttling below this level (rate per second) + # CLI flag: -metrics.ignore-throttle-below + [ignore_throttle_below: | default = 1] + + # query to fetch ingester queue length + # CLI flag: -metrics.queue-length-query + [queue_length_query: | default = "sum(avg_over_time(cortex_ingester_flush_queue_length{job=\"cortex/ingester\"}[2m]))"] + + # query to fetch throttle rates per table + # CLI flag: -metrics.write-throttle-query + [write_throttle_query: | default = "sum(rate(cortex_dynamo_throttled_total{operation=\"DynamoDB.BatchWriteItem\"}[1m])) by (table) > 0"] + + # query to fetch write capacity usage per table + # CLI flag: -metrics.usage-query + [write_usage_query: | default = "sum(rate(cortex_dynamo_consumed_capacity_total{operation=\"DynamoDB.BatchWriteItem\"}[15m])) by (table) > 0"] + + # query to fetch read capacity usage per table + # CLI flag: -metrics.read-usage-query + [read_usage_query: | default = "sum(rate(cortex_dynamo_consumed_capacity_total{operation=\"DynamoDB.QueryPages\"}[1h])) by (table) > 0"] + + # query to fetch read errors per table + # CLI flag: -metrics.read-error-query + [read_error_query: | default = "sum(increase(cortex_dynamo_failures_total{operation=\"DynamoDB.QueryPages\",error=\"ProvisionedThroughputExceededException\"}[1m])) by (table) > 0"] + + # Number of chunks to group together to parallelise fetches (zero to + # disable) + # CLI flag: -dynamodb.chunk-gang-size + [chunk_gang_size: | default = 10] + + # Max number of chunk-get operations to start in parallel + # CLI flag: -dynamodb.chunk.get-max-parallelism + [chunk_get_max_parallelism: | default = 32] + + backoff_config: + # Minimum backoff time + # CLI flag: -dynamodb.min-backoff + [min_period: | default = 100ms] + + # Maximum backoff time + # CLI flag: -dynamodb.max-backoff + [max_period: | default = 50s] + + # Maximum number of times to retry an operation + # CLI flag: -dynamodb.max-retries + [max_retries: | default = 20] + + # S3 endpoint URL with escaped Key and Secret encoded. If only region is + # specified as a host, proper endpoint will be deduced. Use + # inmemory:/// to use a mock in-memory implementation. # CLI flag: -s3.url - [s3: ] + [s3: ] - # Set to true to force the request to use path-style addressing + # Set this to `true` to force the request to use path-style addressing. # CLI flag: -s3.force-path-style [s3forcepathstyle: | default = false] @@ -1487,19 +1552,20 @@ aws: # CLI flag: -s3.region [region: | default = ""] - # AWS Access Key ID. + # AWS Access Key ID # CLI flag: -s3.access-key-id [access_key_id: | default = ""] - # AWS Secret Access Key. + # AWS Secret Access Key # CLI flag: -s3.secret-access-key [secret_access_key: | default = ""] - # Disable https on S3 connection. + # Disable https on s3 connection. # CLI flag: -s3.insecure [insecure: | default = false] - # Enable AES256 AWS Server Side Encryption. + # Enable AWS Server Side Encryption [Deprecated: Use .sse instead. if + # s3.sse-encryption is enabled, it assumes .sse.type SSE-S3] # CLI flag: -s3.sse-encryption [sse_encryption: | default = false] @@ -1522,253 +1588,279 @@ aws: # CLI flag: -s3.http.ca-file [ca_file: | default = ""] - # Configures back off when s3 get Object. - backoff_config: - # Minimum duration to back off. - # CLI flag: -s3.backoff-min-period - [min_period: | default = 100ms] - - # The duration to back off. - # CLI flag: -s3.backoff-max-period - [max_period: | default = 3s] - - # Number of times to back off and retry before failing. - # CLI flag: -s3.backoff-retries - [max_retries: | default = 5] - - # Configure the DynamoDB connection - dynamodb: - # URL for DynamoDB with escaped Key and Secret encoded. If only region is specified as a - # host, the proper endpoint will be deduced. Use inmemory:/// to - # use a mock in-memory implementation. - # CLI flag: -dynamodb.url - dynamodb_url: + # The signature version to use for authenticating against S3. Supported values + # are: v4, v2. + # CLI flag: -s3.signature-version + [signature_version: | default = "v4"] - # DynamoDB table management requests per-second limit. - # CLI flag: -dynamodb.api-limit - [api_limit: | default = 2.0] + sse: + # Enable AWS Server Side Encryption. Supported values: SSE-KMS, SSE-S3. + # CLI flag: -s3.sse.type + [type: | default = ""] - # DynamoDB rate cap to back off when throttled. - # CLI flag: -dynamodb.throttle-limit - [throttle_limit: | default = 10.0] + # KMS Key ID used to encrypt objects in S3 + # CLI flag: -s3.sse.kms-key-id + [kms_key_id: | default = ""] - # Metrics-based autoscaling configuration. - metrics: - # Use metrics-based autoscaling via this Prometheus query URL. - # CLI flag: -metrics.url - [url: ] + # KMS Encryption Context used for object encryption. It expects JSON + # formatted string. + # CLI flag: -s3.sse.kms-encryption-context + [kms_encryption_context: | default = ""] - # Queue length above which we will scale up capacity. - # CLI flag: -metrics.target-queue-length - [target_queue_length: | default = 100000] + # Configures back off when S3 get Object. + backoff_config: + # Minimum backoff time when s3 get Object + # CLI flag: -s3.min-backoff + [min_period: | default = 100ms] - # Scale up capacity by this multiple - # CLI flag: -metrics.scale-up-factor - [scale_up_factor: | default = 1.3] + # Maximum backoff time when s3 get Object + # CLI flag: -s3.max-backoff + [max_period: | default = 3s] - # Ignore throttling below this level (rate per second) - # CLI flag: -metrics.ignore-throttle-below - [ignore_throttle_below: | default = 1] + # Maximum number of times to retry when s3 get Object + # CLI flag: -s3.max-retries + [max_retries: | default = 5] - # Number of chunks to group together to parallelise fetches (0 to disable) - # CLI flag: -dynamodb.chunk-gang-size - [chunk_gang_size: | default = 10] +# The azure_storage_config block configures the connection to Azure object +# storage backend. +[azure: ] - # Max number of chunk get operations to start in parallel. - # CLI flag: -dynamodb.chunk.get-max-parallelism - [chunk_get_max_parallelism: | default = 32] +# The bos_storage_config block configures the connection to Baidu Object Storage +# (BOS) object storage backend. +[bos: ] -# Configures storing indexes in Bigtable. Required fields only required -# when bigtable is defined in config. +# Configures storing indexes in Bigtable. Required fields only required when +# bigtable is defined in config. bigtable: - # BigTable project ID - # CLI flag: -bigtable.project - project: + # Bigtable project ID. + # CLI flag: -bigtable.project + [project: | default = ""] - # BigTable instance ID + # Bigtable instance ID. Please refer to + # https://cloud.google.com/docs/authentication/production for more information + # about how to configure authentication. # CLI flag: -bigtable.instance - instance: + [instance: | default = ""] - # Configures the gRPC client used to connect to Bigtable. - # The CLI flags prefix for this block config is: bigtable - [grpc_client_config: ] + # The grpc_client block configures the gRPC client used to communicate between + # two Loki components. + # The CLI flags prefix for this block configuration is: bigtable + [grpc_client_config: ] -# Configures storing chunks in GCS. Required fields only required -# when gcs is defined in config. -gcs: - # Name of GCS bucket to put chunks in. - # CLI flag: -gcs.bucketname - bucket_name: + # If enabled, once a tables info is fetched, it is cached. + # CLI flag: -bigtable.table-cache.enabled + [table_cache_enabled: | default = true] - # The size of the buffer that the GCS client uses for each PUT request. 0 - # to disable buffering. - # CLI flag: -gcs.chunk-buffer-size - [chunk_buffer_size: | default = 0] + # Duration to cache tables before checking again. + # CLI flag: -bigtable.table-cache.expiration + [table_cache_expiration: | default = 30m] - # The duration after which the requests to GCS should be timed out. - # CLI flag: -gcs.request-timeout - [request_timeout: | default = 0s] +# Configures storing chunks in GCS. Required fields only required when gcs is +# defined in config. +[gcs: ] -# Configures storing chunks and/or the index in Cassandra +# Configures storing chunks and/or the index in Cassandra. cassandra: - # Comma-separated hostnames or IPs of Cassandra instances + # Comma-separated hostnames or IPs of Cassandra instances. # CLI flag: -cassandra.addresses - addresses: + [addresses: | default = ""] - # Port that cassandra is running on + # Port that Cassandra is running on # CLI flag: -cassandra.port [port: | default = 9042] - # Keyspace to use in Cassandra + # Keyspace to use in Cassandra. # CLI flag: -cassandra.keyspace - keyspace: + [keyspace: | default = ""] - # Consistency level for Cassandra + # Consistency level for Cassandra. # CLI flag: -cassandra.consistency [consistency: | default = "QUORUM"] # Replication factor to use in Cassandra. # CLI flag: -cassandra.replication-factor - [replication_factor: | default = 1] + [replication_factor: | default = 3] - # Instruct the Cassandra driver to not attempt to get host - # info from the system.peers table. + # Instruct the cassandra driver to not attempt to get host info from the + # system.peers table. # CLI flag: -cassandra.disable-initial-host-lookup [disable_initial_host_lookup: | default = false] - # Use SSL when connecting to Cassandra instances. + # Use SSL when connecting to cassandra instances. # CLI flag: -cassandra.ssl [SSL: | default = false] - # Require SSL certificate validation when SSL is enabled. + # Require SSL certificate validation. # CLI flag: -cassandra.host-verification [host_verification: | default = true] - # Path to CA certificate file to verify the peer when SSL is enabled. + # Policy for selecting Cassandra host. Supported values are: round-robin, + # token-aware. + # CLI flag: -cassandra.host-selection-policy + [host_selection_policy: | default = "round-robin"] + + # Path to certificate file to verify the peer. # CLI flag: -cassandra.ca-path - [CA_path: ] + [CA_path: | default = ""] - # Path to client certificate file when SSL is enabled. + # Path to certificate file used by TLS. # CLI flag: -cassandra.tls-cert-path - [tls_cert_path: ] + [tls_cert_path: | default = ""] - # Path to key certificate file when SSL is enabled. + # Path to private key file used by TLS. # CLI flag: -cassandra.tls-key-path - [tls_key_path: ] + [tls_key_path: | default = ""] - # Enable password authentication when connecting to Cassandra. + # Enable password authentication when connecting to cassandra. # CLI flag: -cassandra.auth [auth: | default = false] - # Username for password authentication when auth is true. + # Username to use when connecting to cassandra. # CLI flag: -cassandra.username - [username: ] + [username: | default = ""] - # Password for password authentication when auth is true. + # Password to use when connecting to cassandra. # CLI flag: -cassandra.password - [password: ] - - # Timeout when connecting to Cassandra. - # CLI flag: -cassandra.timeout - [timeout: | default = 600ms] - - # Initial connection timeout during initial dial to server. - # CLI flag: -cassandra.connect-timeout - [connect_timeout: | default = 600ms] - -swift: - # Openstack authentication URL. - # CLI flag: -ruler.storage.swift.auth-url - [auth_url: | default = ""] + [password: | default = ""] - # Openstack username for the api. - # CLI flag: -ruler.storage.swift.username - [username: | default = ""] + # File containing password to use when connecting to cassandra. + # CLI flag: -cassandra.password-file + [password_file: | default = ""] - # Openstack user's domain name. - # CLI flag: -ruler.storage.swift.user-domain-name - [user_domain_name: | default = ""] + # If set, when authenticating with cassandra a custom authenticator will be + # expected during the handshake. This flag can be set multiple times. + # CLI flag: -cassandra.custom-authenticator + [custom_authenticators: | default = []] - # Openstack user's domain id. - # CLI flag: -ruler.storage.swift.user-domain-id - [user_domain_id: | default = ""] + # Timeout when connecting to cassandra. + # CLI flag: -cassandra.timeout + [timeout: | default = 2s] - # Openstack userid for the api. - # CLI flag: -ruler.storage.swift.user-id - [user_id: | default = ""] + # Initial connection timeout, used during initial dial to server. + # CLI flag: -cassandra.connect-timeout + [connect_timeout: | default = 5s] + + # Interval to retry connecting to cassandra nodes marked as DOWN. + # CLI flag: -cassandra.reconnent-interval + [reconnect_interval: | default = 1s] + + # Number of retries to perform on a request. Set to 0 to disable retries. + # CLI flag: -cassandra.max-retries + [max_retries: | default = 0] + + # Maximum time to wait before retrying a failed request. + # CLI flag: -cassandra.retry-max-backoff + [retry_max_backoff: | default = 10s] + + # Minimum time to wait before retrying a failed request. + # CLI flag: -cassandra.retry-min-backoff + [retry_min_backoff: | default = 100ms] + + # Limit number of concurrent queries to Cassandra. Set to 0 to disable the + # limit. + # CLI flag: -cassandra.query-concurrency + [query_concurrency: | default = 0] + + # Number of TCP connections per host. + # CLI flag: -cassandra.num-connections + [num_connections: | default = 2] + + # Convict hosts of being down on failure. + # CLI flag: -cassandra.convict-hosts-on-failure + [convict_hosts_on_failure: | default = true] + + # Table options used to create index or chunk tables. This value is used as + # plain text in the table `WITH` like this, "CREATE TABLE + # (...) WITH ". For details, + # see https://cortexmetrics.io/docs/production/cassandra. By default it will + # use the default table options of your Cassandra cluster. + # CLI flag: -cassandra.table-options + [table_options: | default = ""] + +# Configures storing index in BoltDB. Required fields only required when boltdb +# is present in the configuration. +boltdb: + # Location of BoltDB index files. + # CLI flag: -boltdb.dir + [directory: | default = ""] - # Openstack api key. - # CLI flag: -ruler.storage.swift.password - [password: | default = ""] +# Configures storing the chunks on the local file system. Required fields only +# required when filesystem is present in the configuration. +filesystem: + # Directory to store chunks in. + # CLI flag: -local.chunk-directory + [directory: | default = ""] - # Openstack user's domain id. - # CLI flag: -ruler.storage.swift.domain-id - [domain_id: | default = ""] +# The swift_storage_config block configures the connection to OpenStack Object +# Storage (Swift) object storage backend. +[swift: ] - # Openstack user's domain name. - # CLI flag: -ruler.storage.swift.domain-name - [domain_name: | default = ""] +grpc_store: + # Hostname or IP of the gRPC store instance. + # CLI flag: -grpc-store.server-address + [server_address: | default = ""] - # Openstack project id (v2,v3 auth only). - # CLI flag: -ruler.storage.swift.project-id - [project_id: | default = ""] +hedging: + # If set to a non-zero value a second request will be issued at the provided + # duration. Default is 0 (disabled) + # CLI flag: -store.hedge-requests-at + [at: | default = 0s] - # Openstack project name (v2,v3 auth only). - # CLI flag: -ruler.storage.swift.project-name - [project_name: | default = ""] + # The maximum of hedge requests allowed. + # CLI flag: -store.hedge-requests-up-to + [up_to: | default = 2] - # Id of the project's domain (v3 auth only), only needed if it differs the - # from user domain. - # CLI flag: -ruler.storage.swift.project-domain-id - [project_domain_id: | default = ""] + # The maximum of hedge requests allowed per seconds. + # CLI flag: -store.hedge-max-per-second + [max_per_second: | default = 5] - # Name of the project's domain (v3 auth only), only needed if it differs - # from the user domain. - # CLI flag: -ruler.storage.swift.project-domain-name - [project_domain_name: | default = ""] +# Cache validity for active index entries. Should be no higher than +# -ingester.max-chunk-idle. +# CLI flag: -store.index-cache-validity +[index_cache_validity: | default = 5m] - # Openstack Region to use eg LON, ORD - default is use first region (v2,v3 - # auth only) - # CLI flag: -ruler.storage.swift.region-name - [region_name: | default = ""] +# The cache block configures the cache backend. +# The CLI flags prefix for this block configuration is: store.index-cache-read +[index_queries_cache_config: ] - # Name of the Swift container to put chunks in. - # CLI flag: -ruler.storage.swift.container-name - [container_name: | default = "cortex"] +# Disable broad index queries which results in reduced cache usage and faster +# query performance at the expense of somewhat higher QPS on the index store. +# CLI flag: -store.disable-broad-index-queries +[disable_broad_index_queries: | default = false] -# Configures storing index in BoltDB. Required fields only -# required when boltdb is present in the configuration. -boltdb: - # Location of BoltDB index files. - # CLI flag: -boltdb.dir - directory: +# Maximum number of parallel chunk reads. +# CLI flag: -store.max-parallel-get-chunk +[max_parallel_get_chunk: | default = 150] -# Configures storing the chunks on the local file system. Required -# fields only required when filesystem is present in the configuration. -filesystem: - # Directory to store chunks in. - # CLI flag: -local.chunk-directory - directory: +# The maximum number of chunks to fetch per batch. +# CLI flag: -store.max-chunk-batch-size +[max_chunk_batch_size: | default = 50] -# Configures storing index in an Object Store(GCS/S3/Azure/Swift/Filesystem) in the form of -# boltdb files. -# Required fields only required when boltdb-shipper is defined in config. +# Configures storing index in an Object Store (GCS/S3/Azure/Swift/Filesystem) in +# the form of boltdb files. Required fields only required when boltdb-shipper is +# defined in config. boltdb_shipper: - # Directory where ingesters would write boltdb files which would then be + # Directory where ingesters would write index files which would then be # uploaded by shipper to configured storage # CLI flag: -boltdb.shipper.active-index-directory [active_index_directory: | default = ""] - # Shared store for keeping boltdb files. Supported types: gcs, s3, azure, + # Shared store for keeping index files. Supported types: gcs, s3, azure, # filesystem # CLI flag: -boltdb.shipper.shared-store [shared_store: | default = ""] - # Cache location for restoring boltDB files for queries + # Prefix to add to Object Keys in Shared store. Path separator(if any) should + # always be a '/'. Prefix should never start with a separator but should + # always end with it + # CLI flag: -boltdb.shipper.shared-store.key-prefix + [shared_store_key_prefix: | default = "index/"] + + # Cache location for restoring index files from storage for queries # CLI flag: -boltdb.shipper.cache-location [cache_location: | default = ""] - # TTL for boltDB files restored in cache for queries + # TTL for index files restored in cache for queries # CLI flag: -boltdb.shipper.cache-ttl [cache_ttl: | default = 24h] @@ -1776,414 +1868,306 @@ boltdb_shipper: # CLI flag: -boltdb.shipper.resync-interval [resync_interval: | default = 5m] - # Number of days of index to be kept downloaded for queries. Works only with - # tables created with 24h period. + # Number of days of common index to be kept downloaded for queries. For per + # tenant index query readiness, use limits overrides config. # CLI flag: -boltdb.shipper.query-ready-num-days [query_ready_num_days: | default = 0] index_gateway_client: - # "Hostname or IP of the Index Gateway gRPC server. + # The grpc_client block configures the gRPC client used to communicate + # between two Loki components. + # The CLI flags prefix for this block configuration is: + # boltdb.shipper.index-gateway-client.grpc + [grpc_client_config: ] + + # Hostname or IP of the Index Gateway gRPC server running in simple mode. # CLI flag: -boltdb.shipper.index-gateway-client.server-address [server_address: | default = ""] - # Configures the gRPC client used to connect to the Index Gateway gRPC server. - # The CLI flags prefix for this block config is: boltdb.shipper.index-gateway-client - [grpc_client_config: ] - - # Configures if gateway requests should be logged or not. + # Whether requests sent to the gateway should be logged or not. # CLI flag: -boltdb.shipper.index-gateway-client.log-gateway-requests - [log_gateway_requests: | default = false] + [log_gateway_requests: | default = false] -# Cache validity for active index entries. Should be no higher than -# the chunk_idle_period in the ingester settings. -# CLI flag: -store.index-cache-validity -[index_cache_validity: | default = 5m] + # Use boltdb-shipper index store as backup for indexing chunks. When enabled, + # boltdb-shipper needs to be configured under storage_config + # CLI flag: -boltdb.shipper.use-boltdb-shipper-as-backup + [use_boltdb_shipper_as_backup: | default = false] -# Disable broad index queries, which results in reduced cache usage and faster query -# performance at the expense of a somewhat higher QPS on the index store. -# CLI flag: -store.disable-broad-index-queries -[disable_broad_index_queries: | default = false] + [ingestername: | default = ""] -# The maximum number of chunks to fetch per batch. -# CLI flag: -store.max-chunk-batch-size -[max_chunk_batch_size: | default = 50] + [mode: | default = ""] + + [ingesterdbretainperiod: ] + + # Build per tenant index files + # CLI flag: -boltdb.shipper.build-per-tenant-index + [build_per_tenant_index: | default = false] + +tsdb_shipper: + # Directory where ingesters would write index files which would then be + # uploaded by shipper to configured storage + # CLI flag: -tsdb.shipper.active-index-directory + [active_index_directory: | default = ""] + + # Shared store for keeping index files. Supported types: gcs, s3, azure, + # filesystem + # CLI flag: -tsdb.shipper.shared-store + [shared_store: | default = ""] + + # Prefix to add to Object Keys in Shared store. Path separator(if any) should + # always be a '/'. Prefix should never start with a separator but should + # always end with it + # CLI flag: -tsdb.shipper.shared-store.key-prefix + [shared_store_key_prefix: | default = "index/"] + + # Cache location for restoring index files from storage for queries + # CLI flag: -tsdb.shipper.cache-location + [cache_location: | default = ""] + + # TTL for index files restored in cache for queries + # CLI flag: -tsdb.shipper.cache-ttl + [cache_ttl: | default = 24h] + + # Resync downloaded files with the storage + # CLI flag: -tsdb.shipper.resync-interval + [resync_interval: | default = 5m] + + # Number of days of common index to be kept downloaded for queries. For per + # tenant index query readiness, use limits overrides config. + # CLI flag: -tsdb.shipper.query-ready-num-days + [query_ready_num_days: | default = 0] + + index_gateway_client: + # The grpc_client block configures the gRPC client used to communicate + # between two Loki components. + # The CLI flags prefix for this block configuration is: + # tsdb.shipper.index-gateway-client.grpc + [grpc_client_config: ] + + # Hostname or IP of the Index Gateway gRPC server running in simple mode. + # CLI flag: -tsdb.shipper.index-gateway-client.server-address + [server_address: | default = ""] + + # Whether requests sent to the gateway should be logged or not. + # CLI flag: -tsdb.shipper.index-gateway-client.log-gateway-requests + [log_gateway_requests: | default = false] -# Config for how the cache for index queries should be built. -# The CLI flags prefix for this block config is: store.index-cache-read -index_queries_cache_config: + # Use boltdb-shipper index store as backup for indexing chunks. When enabled, + # boltdb-shipper needs to be configured under storage_config + # CLI flag: -tsdb.shipper.use-boltdb-shipper-as-backup + [use_boltdb_shipper_as_backup: | default = false] + + [ingestername: | default = ""] + + [mode: | default = ""] + + [ingesterdbretainperiod: ] ``` -## chunk_store_config +### chunk_store_config -The `chunk_store_config` block configures how chunks will be cached and how long -to wait before saving them to the backing store. +The `chunk_store_config` block configures how chunks will be cached and how long to wait before saving them to the backing store. ```yaml -# The cache configuration for storing chunks -# The CLI flags prefix for this block config is: store.chunks-cache +# The cache block configures the cache backend. +# The CLI flags prefix for this block configuration is: store.chunks-cache [chunk_cache_config: ] -# The cache configuration for deduplicating writes -# The CLI flags prefix for this block config is: store.index-cache-write +# The cache block configures the cache backend. +# The CLI flags prefix for this block configuration is: store.index-cache-write [write_dedupe_cache_config: ] -# Cache index entries older than this period. Default is disabled. +# Cache index entries older than this period. 0 to disable. # CLI flag: -store.cache-lookups-older-than -[cache_lookups_older_than: ] +[cache_lookups_older_than: | default = 0s] -# Limit how long back data can be queried. Default is disabled. -# This should always be set to a value less than or equal to -# what is set in `table_manager.retention_period` . +# This flag is deprecated. Use -querier.max-query-lookback instead. # CLI flag: -store.max-look-back-period -[max_look_back_period: ] +[max_look_back_period: | default = 0s] ``` -## cache_config +### schema_config + +Configures the chunk index schema and where it is stored. -The `cache_config` block configures how Loki will cache requests, chunks, and -the index to a backing cache store. +```yaml +[configs: ] +``` -The memcached configuration variable addresses is experimental. +### compactor + +The `compactor` block configures the compactor component, which compacts index shards for performance. ```yaml -# NOTE: `fifocache` is deprecated. Use `embedded_cache` instead. -# Enable in-memory cache (auto-enabled for the chunks & query results cache if no other cache is configured). -# CLI flag: -.cache.enable-fifocache -[enable_fifocache: ] +# Directory where files can be downloaded for compaction. +# CLI flag: -boltdb.shipper.compactor.working-directory +[working_directory: | default = ""] -# The default validity of entries for caches unless overridden. -# NOTE In Loki versions older than 1.4.0 this was "defaul_validity". -# CLI flag: -.default-validity -[default_validity: ] +# The shared store used for storing boltdb files. Supported types: gcs, s3, +# azure, swift, filesystem, bos. +# CLI flag: -boltdb.shipper.compactor.shared-store +[shared_store: | default = ""] -# Configures embedded cache settings. -embedded_cache: - # Whether embedded cache is enabled. - # CLI flag: -.embedded-cache.enabled - [enabled: | default = false] +# Prefix to add to object keys in shared store. Path separator(if any) should +# always be a '/'. Prefix should never start with a separator but should always +# end with it. +# CLI flag: -boltdb.shipper.compactor.shared-store.key-prefix +[shared_store_key_prefix: | default = "index/"] - # Maximum memory size of the cache in MB. - # CLI flag: -.embedded-cache.max-size-mb - [max_size_mb: | default = 100] +# Interval at which to re-run the compaction operation. +# CLI flag: -boltdb.shipper.compactor.compaction-interval +[compaction_interval: | default = 10m] - # The time to live for items in the cache before they get purged. - # CLI flag: -.embedded-cache.ttl - [ttl: | default = 1hr] +# Interval at which to apply/enforce retention. 0 means run at same interval as +# compaction. If non-zero, it should always be a multiple of compaction +# interval. +# CLI flag: -boltdb.shipper.compactor.apply-retention-interval +[apply_retention_interval: | default = 0s] -# Configures the background cache when memcached is used. -background: - # How many goroutines to use to write back to memcached. - # CLI flag: -.background.write-back-concurrency - [writeback_goroutines: | default = 10] +# (Experimental) Activate custom (per-stream,per-tenant) retention. +# CLI flag: -boltdb.shipper.compactor.retention-enabled +[retention_enabled: | default = false] - # How many chunks to buffer for background write back to memcached. - # CLI flag: -.background.write-back-buffer - [writeback_buffer: = 10000] +# Delay after which chunks will be fully deleted during retention. +# CLI flag: -boltdb.shipper.compactor.retention-delete-delay +[retention_delete_delay: | default = 2h] -# Configures memcached settings. -memcached: - # Configures how long keys stay in memcached. - # CLI flag: -.memcached.expiration - expiration: +# The total amount of worker to use to delete chunks. +# CLI flag: -boltdb.shipper.compactor.retention-delete-worker-count +[retention_delete_worker_count: | default = 150] - # Configures how many keys to fetch in each batch request. - # CLI flag: -.memcached.batchsize - batch_size: | default = 1024 +# The maximum amount of time to spend running retention and deletion on any +# given table in the index. +# CLI flag: -boltdb.shipper.compactor.retention-table-timeout +[retention_table_timeout: | default = 0s] - # Maximum active requests to memcached. - # CLI flag: -.memcached.parallelism - [parallelism: | default = 100] +# The max number of delete requests to run per compaction cycle. +# CLI flag: -boltdb.shipper.compactor.delete-batch-size +[delete_batch_size: | default = 70] -# Configures how to connect to one or more memcached servers. -memcached_client: - # The hostname to use for memcached services when caching chunks. If - # empty, no memcached will be used. A SRV lookup will be used. - # CLI flag: -.memcached.hostname - [host: ] +# Allow cancellation of delete request until duration after they are created. +# Data would be deleted only after delete requests have been older than this +# duration. Ideally this should be set to at least 24h. +# CLI flag: -boltdb.shipper.compactor.delete-request-cancel-period +[delete_request_cancel_period: | default = 24h] - # SRV service used to discover memcached servers. - # CLI flag: -.memcached.service - [service: | default = "memcached"] +# Constrain the size of any single delete request. When a delete request > +# delete_max_interval is input, the request is sharded into smaller requests of +# no more than delete_max_interval +# CLI flag: -boltdb.shipper.compactor.delete-max-interval +[delete_max_interval: | default = 0s] - # (Experimental) Comma-separated addresses list in DNS Service Discovery format: - # https://grafana.com/docs/mimir/latest/operators-guide/configuring/about-dns-service-discovery/ - # CLI flag: -.memcached.addresses - [addresses: | default = ""] +# Maximum number of tables to compact in parallel. While increasing this value, +# please make sure compactor has enough disk space allocated to be able to store +# and compact as many tables. +# CLI flag: -boltdb.shipper.compactor.max-compaction-parallelism +[max_compaction_parallelism: | default = 1] - # Maximum time to wait before giving up on memcached requests. - # CLI flag: -.memcached.timeout - [timeout: | default = 100ms] +# Number of upload/remove operations to execute in parallel when finalizing a +# compaction. NOTE: This setting is per compaction operation, which can be +# executed in parallel. The upper bound on the number of concurrent uploads is +# upload_parallelism * max_compaction_parallelism. +# CLI flag: -boltdb.shipper.compactor.upload-parallelism +[upload_parallelism: | default = 10] - # The maximum number of idle connections in the memcached client pool. - # CLI flag: -.memcached.max-idle-conns - [max_idle_conns: | default = 16] +# The hash ring configuration used by compactors to elect a single instance for +# running compactions. The CLI flags prefix for this block config is: +# boltdb.shipper.compactor.ring +compactor_ring: + kvstore: + # Backend storage to use for the ring. Supported values are: consul, etcd, + # inmemory, memberlist, multi. + # CLI flag: -boltdb.shipper.compactor.ring.store + [store: | default = "consul"] - # The period with which to poll the DNS for memcached servers. - # CLI flag: -.memcached.update-interval - [update_interval: | default = 1m] + # The prefix for the keys in the store. Should end with a /. + # CLI flag: -boltdb.shipper.compactor.ring.prefix + [prefix: | default = "collectors/"] - # Whether or not to use a consistent hash to discover multiple memcached servers. - # CLI flag: -.memcached.consistent-hash - [consistent_hash: | default = true] + # Configuration for a Consul client. Only applies if store is consul. + # The CLI flags prefix for this block configuration is: + # boltdb.shipper.compactor.ring + [consul: ] - # Trip the circuit breaker after this number of consecutive dial failures. - # A value of 0 disables the circuit breaker. - # CLI flag: -.memcached.circuit-breaker-consecutive-failures - [circuit_breaker_consecutive_failures: | default = 10] + # Configuration for an ETCD v3 client. Only applies if store is etcd. + # The CLI flags prefix for this block configuration is: + # boltdb.shipper.compactor.ring + [etcd: ] - # Duration the circuit breaker remains open after tripping. - # If set to 0, the duration is 60 seconds. - # CLI flag: -.memcached.circuit-breaker-timeout - [circuit_breaker_timeout: | default = 10s] + multi: + # Primary backend storage used by multi-client. + # CLI flag: -boltdb.shipper.compactor.ring.multi.primary + [primary: | default = ""] - # Reset the circuit breaker counts after this duration. - # A value of 0 never resets the circuit breaker. - # CLI flag: -.memcached.circuit-breaker-interval - [circuit_breaker_interval: | default = 10s] + # Secondary backend storage used by multi-client. + # CLI flag: -boltdb.shipper.compactor.ring.multi.secondary + [secondary: | default = ""] - # The maximum size of an item stored in memcached. - # Bigger items are not stored. If set to 0, no maximum size is enforced. - # CLI flag: -.memcached.max-item-size - [max_item_size: | default = 0] + # Mirror writes to secondary store. + # CLI flag: -boltdb.shipper.compactor.ring.multi.mirror-enabled + [mirror_enabled: | default = false] -redis: - # Redis Server or Cluster configuration endpoint to use for caching. A comma-separated list of endpoints - # for Redis Cluster or Redis Sentinel. If empty, no redis will be used. - # CLI flag: -.redis.endpoint - [endpoint: ] + # Timeout for storing value to secondary store. + # CLI flag: -boltdb.shipper.compactor.ring.multi.mirror-timeout + [mirror_timeout: | default = 2s] - # Redis Sentinel master name. An empty string for Redis Server or Redis Cluster. - # CLI flag: -.redis.master-name - [master_name: ] + # Period at which to heartbeat to the ring. 0 = disabled. + # CLI flag: -boltdb.shipper.compactor.ring.heartbeat-period + [heartbeat_period: | default = 15s] - # Maximum time to wait before giving up on redis requests. - # CLI flag: -.redis.timeout - [timeout: | default = 500ms] + # The heartbeat timeout after which compactors are considered unhealthy within + # the ring. 0 = never (timeout disabled). + # CLI flag: -boltdb.shipper.compactor.ring.heartbeat-timeout + [heartbeat_timeout: | default = 1m] - # How long keys stay in the redis. - # CLI flag: -.redis.expiration - [expiration: | default = 0s] + # File path where tokens are stored. If empty, tokens are not stored at + # shutdown and restored at startup. + # CLI flag: -boltdb.shipper.compactor.ring.tokens-file-path + [tokens_file_path: | default = ""] - # Database index. - # CLI flag: -.redis.db - [db: | default = 0] + # True to enable zone-awareness and replicate blocks across different + # availability zones. + # CLI flag: -boltdb.shipper.compactor.ring.zone-awareness-enabled + [zone_awareness_enabled: | default = false] - # Maximum number of connections in the pool. - # CLI flag: -.redis.pool-size - [pool_size: | default = 0] + # Instance ID to register in the ring. + # CLI flag: -boltdb.shipper.compactor.ring.instance-id + [instance_id: | default = ""] - # Username to use when connecting to redis. - # CLI flag: -.redis.username - [username: ] + # Name of network interface to read address from. + # CLI flag: -boltdb.shipper.compactor.ring.instance-interface-names + [instance_interface_names: | default = []] - # Password to use when connecting to redis. - # CLI flag: -.redis.password - [password: ] + # Port to advertise in the ring (defaults to server.grpc-listen-port). + # CLI flag: -boltdb.shipper.compactor.ring.instance-port + [instance_port: | default = 0] - # Enables connecting to redis with TLS. - # CLI flag: -.redis.tls-enabled - [tls_enabled: | default = false] + # IP address to advertise in the ring. + # CLI flag: -boltdb.shipper.compactor.ring.instance-addr + [instance_addr: | default = ""] - # Skip validating server certificate. - # CLI flag: -.redis.tls-insecure-skip-verify - [tls_insecure_skip_verify: | default = false] - - # Close connections after remaining idle for this duration. - # If the value is zero, then idle connections are not closed. - # CLI flag: -.redis.idle-timeout - [idle_timeout: | default = 0s] - - # Close connections older than this duration. If the value is zero, then - # the pool does not close connections based on age. - # CLI flag: -.redis.max-connection-age - [max_connection_age: | default = 0s] - -fifocache: - # Maximum memory size of the cache in bytes. A unit suffix (KB, MB, GB) may be - # applied. - # CLI flag: -.fifocache.max-size-bytes - [max_size_bytes: | default = "1GB"] - - # Maximum number of entries in the cache. - # CLI flag: -.fifocache.max-size-items - [max_size_items: | default = 0] - - # Deprecated: The expiry duration for the cache. Use `-.fifocache.ttl`. - # The default value of 0 disables expiration. - # CLI flag: -.fifocache.duration - [validity: ] - - # The time for items to live in the cache before those items are purged. - # The value of 0 disables auto-expiration. - # CLI flag: -.fifocache.ttl - [ttl: | default = 1h] -``` - -## schema_config - -The `schema_config` block configures schemas from given dates. - -```yaml -# The configuration for chunk index schemas. -configs: -- [] -``` - -### period_config - -The `period_config` block configures what index schemas should be used -for from specific time periods. - -```yaml -# The date of the first day that index buckets should be created. Use -# a date in the past if this is your only period_config, otherwise -# use a date when you want the schema to switch over. -# In YYYY-MM-DD format, for example: 2018-04-15. -[from: ] - -# store and object_store below affect which key is -# used. - -# Which store to use for the index. Either aws, aws-dynamo, gcp, bigtable, bigtable-hashed, -# cassandra, boltdb or boltdb-shipper. -store: - -# Which store to use for the chunks. Either aws, azure, gcp, -# bigtable, gcs, cassandra, swift or filesystem. If omitted, defaults to the same -# value as store. -[object_store: ] - -# The schema version to use, current recommended schema is v11. -schema: - -# Configures how the index is updated and stored. -index: - # Table prefix for all period tables. - prefix: - # Table period. - [period: | default = 168h] - # A map to be added to all managed tables. - tags: - [: ...] - -# Configured how the chunks are updated and stored. -chunks: - # Table prefix for all period tables. - prefix: - # Table period. - [period: | default = 168h] - # A map to be added to all managed tables. - tags: - [: ...] - -# How many shards will be created. Only used if schema is v10 or greater. -[row_shards: | default = 16] -``` - -## compactor - -The `compactor` block configures the compactor component. This component periodically -compacts index shards to more performant forms. - -```yaml -# Directory where files can be downloaded for compaction. -# CLI flag: -boltdb.shipper.compactor.working-directory -[working_directory: ] - -# The shared store used for storing boltdb files. -# Supported types: gcs, s3, azure, swift, filesystem, bos. -# CLI flag: -boltdb.shipper.compactor.shared-store -[shared_store: ] - -# Prefix to add to object keys in shared store. -# Path separator(if any) should always be a '/'. -# Prefix should never start with a separator but should always end with it. -# CLI flag: -boltdb.shipper.compactor.shared-store.key-prefix -[shared_store_key_prefix: | default = "index/"] - -# Interval at which to re-run the compaction operation (or retention if enabled). -# CLI flag: -boltdb.shipper.compactor.compaction-interval -[compaction_interval: | default = 10m] - -# Number of upload/remove operations to execute in parallel when finalizing a compaction. -# CLI flag: -boltdb.shipper.compactor.upload-parallelism -# -# NOTE: This setting is per compaction operation, which can be -# executed in parallel. The upper bound on the number of concurrent -# uploads is upload_parallelism * max_compaction_parallelism -[upload_parallelism: | default = 10] - -# (Experimental) Activate custom (per-stream,per-tenant) retention. -# CLI flag: -boltdb.shipper.compactor.retention-enabled -[retention_enabled: | default = false] - -# Delay after which chunks will be fully deleted during retention. -# CLI flag: -boltdb.shipper.compactor.retention-delete-delay -[retention_delete_delay: | default = 2h] - -# The total amount of worker to use to delete chunks. -# CLI flag: -boltdb.shipper.compactor.retention-delete-worker-count -[retention_delete_worker_count: | default = 150] - -# Allow cancellation of delete request until duration after they are created. -# Data would be deleted only after delete requests have been older than this duration. -# Ideally this should be set to at least 24h. -# CLI flag: -boltdb.shipper.compactor.delete-request-cancel-period -[delete_request_cancel_period: | default = 24h] - -# Constrain the size of a delete request. When a delete request that spans > delete_max_interval -# is input, the request is sharded into smaller requests of no more than delete_max_interval. -# -# 0 means no delete_max_interval. -# CLI flag: -boltdb.shipper.compactor.delete-max-interval -[delete_max_interval: | default = 0] - -# The max number of delete requests to run per compaction cycle. -# CLI flag: -boltdb.shipper.compactor.delete-batch-size -[delete_batch_size: | default = 70] - -# The maximum amount of time to spend running retention and deletion -# on any given table in the index. 0 is no timeout -# -# NOTE: This timeout prioritizes runtime over completeness of retention/deletion. -# It may take several compaction runs to fully perform retention and process -# all outstanding delete requests -# CLI flag: -boltdb.shipper.compactor.retention-table-timeout -[retention_table_timeout: | default = 0] - -# Maximum number of tables to compact in parallel. -# While increasing this value, please make sure compactor has enough disk space -# allocated to be able to store and compact as many tables. -# CLI flag: -boltdb.shipper.compactor.max-compaction-parallelism -[max_compaction_parallelism: | default = 1] + # The availability zone where this instance is running. Required if + # zone-awareness is enabled. + # CLI flag: -boltdb.shipper.compactor.ring.instance-availability-zone + [instance_availability_zone: | default = ""] -# Deprecated: Deletion mode. -# Use deletion_mode per tenant configuration instead. -# CLI flag: -boltdb.shipper.compactor.deletion-mode -[deletion_mode: | default = "disabled"] +# Number of tables that compactor will try to compact. Newer tables are chosen +# when this is less than the number of tables available. +# CLI flag: -boltdb.shipper.compactor.tables-to-compact +[tables_to_compact: | default = 0] -# The hash ring configuration used by compactors to elect a single instance for running compactions -# The CLI flags prefix for this block config is: boltdb.shipper.compactor.ring -[compactor_ring: ] - -# Number of tables that compactor will try to compact. Newer tables -# are chosen when this is less than the number of tables available -# CLI flag: -boltdb.shipper.compact.tables-to-compact -[tables_to_compact: | default: 0] - -# Do not compact N latest tables. Together with +# Do not compact N latest tables. Together with # -boltdb.shipper.compactor.run-once and -# -boltdb.shipper.compactor.tables-to-compact, this is useful when -# clearing compactor backlogs. -# CLI flag: -boltdb.shipper.compact.skip-latest-n-tables -[skip_latest_n_tables: | default: 0] - -# The hash ring configuration used by compactors to elect a single instance for running compactions -# The CLI flags prefix for this block config is: boltdb.shipper.compactor.ring -[compactor_ring: ] +# -boltdb.shipper.compactor.tables-to-compact, this is useful when clearing +# compactor backlogs. +# CLI flag: -boltdb.shipper.compactor.skip-latest-n-tables +[skip_latest_n_tables: | default = 0] + +# Deprecated: Use deletion_mode per tenant configuration instead. +[deletion_mode: | default = ""] ``` -## limits_config +### limits_config The `limits_config` block configures global and per-tenant limits in Loki. @@ -2191,15 +2175,14 @@ The `limits_config` block configures global and per-tenant limits in Loki. # Whether the ingestion rate limit should be applied individually to each # distributor instance (local), or evenly shared across the cluster (global). # The ingestion rate strategy cannot be overridden on a per-tenant basis. -# # - local: enforces the limit on a per distributor basis. The actual effective -# rate limit will be N times higher, where N is the number of distributor -# replicas. +# rate limit will be N times higher, where N is the number of distributor +# replicas. # - global: enforces the limit globally, configuring a per-distributor local -# rate limiter as "ingestion_rate / N", where N is the number of distributor -# replicas (it's automatically adjusted if the number of replicas change). -# The global strategy requires the distributors to form their own ring, which -# is used to keep track of the current number of healthy distributor replicas. +# rate limiter as 'ingestion_rate / N', where N is the number of distributor +# replicas (it's automatically adjusted if the number of replicas change). The +# global strategy requires the distributors to form their own ring, which is +# used to keep track of the current number of healthy distributor replicas. # CLI flag: -distributor.ingestion-rate-limit-strategy [ingestion_rate_strategy: | default = "global"] @@ -2207,18 +2190,19 @@ The `limits_config` block configures global and per-tenant limits in Loki. # CLI flag: -distributor.ingestion-rate-limit-mb [ingestion_rate_mb: | default = 4] -# Per-user allowed ingestion burst size (in sample size). Units in MB. -# The burst size refers to the per-distributor local rate limiter even in the -# case of the "global" strategy, and should be set at least to the maximum logs -# size expected in a single push request. +# Per-user allowed ingestion burst size (in sample size). Units in MB. The burst +# size refers to the per-distributor local rate limiter even in the case of the +# 'global' strategy, and should be set at least to the maximum logs size +# expected in a single push request. # CLI flag: -distributor.ingestion-burst-size-mb -[ingestion_burst_size_mb: | default = 6] +[ingestion_burst_size_mb: | default = 6] -# Maximum length of a label name. +# Maximum length accepted for label names. # CLI flag: -validation.max-length-label-name [max_label_name_length: | default = 1024] -# Maximum length of a label value. +# Maximum length accepted for label value. This setting also applies to the +# metric name. # CLI flag: -validation.max-length-label-value [max_label_value_length: | default = 2048] @@ -2232,10 +2216,10 @@ The `limits_config` block configures global and per-tenant limits in Loki. # Maximum accepted sample age before rejecting. # CLI flag: -validation.reject-old-samples.max-age -[reject_old_samples_max_age: | default = 168h] +[reject_old_samples_max_age: | default = 1w] -# Duration for a table to be created/deleted before/after it's -# needed. Samples won't be accepted before this time. +# Duration which table will be created/deleted before/after it's needed; we +# won't accept sample from before this time. # CLI flag: -validation.create-grace-period [creation_grace_period: | default = 10m] @@ -2243,33 +2227,30 @@ The `limits_config` block configures global and per-tenant limits in Loki. # CLI flag: -validation.enforce-metric-name [enforce_metric_name: | default = true] -# Maximum line size on ingestion path. Example: 256kb. -# There is no limit when unset or set to 0. +# Maximum line size on ingestion path. Example: 256kb. There is no limit when +# unset or set to 0. # CLI flag: -distributor.max-line-size -[max_line_size: | default = 0 ] +[max_line_size: | default = 0B] -# Truncate log lines when they exceed max_line_size. +# Whether to truncate lines that exceed max_line_size. # CLI flag: -distributor.max-line-size-truncate -[max_line_size_truncate: | default = false ] - -# Alter the log line timestamp during ingestion when the timestamp is the same as the -# previous entry for the same stream. When enabled, if a log line in a push request has -# the same timestamp as the previous line for the same stream, one nanosecond is added -# to the log line. This will preserve the received order of log lines with the exact -# same timestamp when they are queried, by slightly altering their stored timestamp. -# NOTE: This is imperfect, because Loki accepts out of order writes, and another push -# request for the same stream could contain duplicate timestamps to existing -# entries and they will not be incremented. +[max_line_size_truncate: | default = false] + +# Alter the log line timestamp during ingestion when the timestamp is the same +# as the previous entry for the same stream. When enabled, if a log line in a +# push request has the same timestamp as the previous line for the same stream, +# one nanosecond is added to the log line. This will preserve the received order +# of log lines with the exact same timestamp when they are queried, by slightly +# altering their stored timestamp. NOTE: This is imperfect, because Loki accepts +# out of order writes, and another push request for the same stream could +# contain duplicate timestamps to existing entries and they will not be +# incremented. # CLI flag: -validation.increment-duplicate-timestamps -[increment_duplicate_timestamp: | default = false ] +[increment_duplicate_timestamp: | default = false] -# Maximum number of log entries that will be returned for a query. -# CLI flag: -validation.max-entries-limit -[max_entries_limit_per_query: | default = 5000 ] - -# Maximum number of active streams per user, per ingester. 0 to make it unlimited. +# Maximum number of active streams per user, per ingester. 0 to disable. # CLI flag: -ingester.max-streams-per-user -[max_streams_per_user: | default 0] +[max_streams_per_user: | default = 0] # Maximum number of active streams per user, across the cluster. 0 to disable. # When the global limit is enabled, each ingester is configured with a dynamic @@ -2282,22 +2263,46 @@ The `limits_config` block configures global and per-tenant limits in Loki. # CLI flag: -ingester.unordered-writes [unordered_writes: | default = true] -# Maximum number of chunks that can be fetched by a single query. +# Maximum byte rate per second per stream, also expressible in human readable +# forms (1MB, 256KB, etc). +# CLI flag: -ingester.per-stream-rate-limit +[per_stream_rate_limit: | default = 3MB] + +# Maximum burst bytes per stream, also expressible in human readable forms (1MB, +# 256KB, etc). This is how far above the rate limit a stream can 'burst' before +# the stream is limited. +# CLI flag: -ingester.per-stream-rate-limit-burst +[per_stream_rate_limit_burst: | default = 15MB] + +# Maximum number of chunks that can be fetched in a single query. # CLI flag: -store.query-chunk-limit [max_chunks_per_query: | default = 2000000] +# Limit the maximum of unique series that is returned by a metric query. When +# the limit is reached an error is returned. +# CLI flag: -querier.max-query-series +[max_query_series: | default = 500] + +# Limit how far back in time series data and metadata can be queried, up until +# lookback duration ago. This limit is enforced in the query frontend, the +# querier and the ruler. If the requested time range is outside the allowed +# range, the request will not fail, but will be modified to only query data +# within the allowed time range. The default value of 0 does not set a limit. +# CLI flag: -querier.max-query-lookback +[max_query_lookback: | default = 0s] + # The limit to length of chunk store queries. 0 to disable. # CLI flag: -store.max-query-length -[max_query_length: | default = 721h] +[max_query_length: | default = 30d1h] # Maximum number of queries that will be scheduled in parallel by the frontend. # CLI flag: -querier.max-query-parallelism [max_query_parallelism: | default = 32] -# Limit the maximum of unique series that is returned by a metric query. -# When the limit is reached an error is returned. -# CLI flag: -querier.max-query-series -[max_query_series: | default = 500] +# Maximum number of queries will be scheduled in parallel by the frontend for +# TSDB schemas. +# CLI flag: -querier.tsdb-max-query-parallelism +[tsdb_max_query_parallelism: | default = 512] # Cardinality limit for index queries. # CLI flag: -store.cardinality-limit @@ -2311,51 +2316,12 @@ The `limits_config` block configures global and per-tenant limits in Loki. # CLI flag: -querier.max-concurrent-tail-requests [max_concurrent_tail_requests: | default = 10] -# Duration to delay the evaluation of rules to ensure. -# CLI flag: -ruler.evaluation-delay-duration -[ruler_evaluation_delay_duration: | default = 0s] - -# Maximum number of rules per rule group per-tenant. 0 to disable. -# CLI flag: -ruler.max-rules-per-rule-group -[ruler_max_rules_per_rule_group: | default = 0] - -# Maximum number of rule groups per-tenant. 0 to disable. -# CLI flag: -ruler.max-rule-groups-per-tenant -[ruler_max_rule_groups_per_tenant: | default = 0] - -# Ruler alertmanager configuration per tenant. -[ruler_alertmanager_config: ] - -# Retention to apply for the store, if the retention is enable on the compactor side. -# CLI flag: -store.retention -[retention_period: | default = 744h] - -# Per-stream retention to apply, if the retention is enable on the compactor side. -# Example: -# retention_stream: -# - selector: '{namespace="dev"}' -# priority: 1 -# period: 24h -# - selector: '{container="nginx"}' -# priority: 1 -# period: 744h -# Selector is a Prometheus labels matchers that will apply the `period` retention only if -# the stream is matching. In case multiple stream are matching, the highest -# priority will be picked. If no rule is matched the `retention_period` is used. -[retention_stream: | default = none] - -# Feature renamed to 'runtime configuration', flag deprecated in favor of -runtime-config.file -# (runtime_config.file in YAML). -# CLI flag: -limits.per-user-override-config -[per_tenant_override_config: ] - -# Feature renamed to 'runtime configuration'; flag deprecated in favor of -# -runtime-config.reload-period (runtime_config.period in YAML). -# CLI flag: -limits.per-user-override-period -[per_tenant_override_period: | default = 10s] +# Maximum number of log entries that will be returned for a query. +# CLI flag: -validation.max-entries-limit +[max_entries_limit_per_query: | default = 5000] -# Most recent allowed cacheable result per-tenant, to prevent caching very recent results that -# might still be in flux. +# Most recent allowed cacheable result per-tenant, to prevent caching very +# recent results that might still be in flux. # CLI flag: -frontend.max-cache-freshness [max_cache_freshness_per_query: | default = 1m] @@ -2369,775 +2335,1631 @@ The `limits_config` block configures global and per-tenant limits in Loki. # CLI flag: -frontend.max-queriers-per-tenant [max_queriers_per_tenant: | default = 0] -# Maximum byte rate per second per stream, -# also expressible in human readable forms (1MB, 256KB, etc). -# CLI flag: -ingester.per-stream-rate-limit -[per_stream_rate_limit: | default = "3MB"] +# Number of days of index to be kept always downloaded for queries. Applies only +# to per user index in boltdb-shipper index store. 0 to disable. +# CLI flag: -store.query-ready-index-num-days +[query_ready_index_num_days: | default = 0] -# Maximum burst bytes per stream, -# also expressible in human readable forms (1MB, 256KB, etc). -# This is how far above the rate limit a stream can "burst" before the stream is limited. -# CLI flag: -ingester.per-stream-rate-limit-burst -[per_stream_rate_limit_burst: | default = "15MB"] +# Timeout when querying backends (ingesters or storage) during the execution of +# a query request. If a specific per-tenant timeout is used, this timeout is +# ignored. +# CLI flag: -querier.query-timeout +[query_timeout: | default = 1m] -# Configures the distributor to shard streams that are too big -shard_streams: - # Whether to enable stream sharding - # - # CLI flag: -shard-streams.enabled - [enabled: | default = false] +# Split queries by a time interval and execute in parallel. The value 0 disables +# splitting by time. This also determines how cache keys are chosen when result +# caching is enabled. +# CLI flag: -querier.split-queries-by-interval +[split_queries_by_interval: | default = 30m] - # Enable logging when sharding streams because logging on the read path may - # impact performance. When disabled, stream sharding will emit no logs - # regardless of log level - # - # CLI flag: -shard-streams.logging-enabled - [logging_enabled: | default = false] - - # Threshold that determines how much the stream should be sharded. - # The formula used is n = ceil(stream size + ingested rate / desired rate), where n is the number of shards. - # For instance, if a stream ingestion is at 10MB, desired rate is 3MB (default), and a stream of size 1MB is - # received, the given stream will be split into n = ceil((1 + 10)/3) = 4 shards. - # - # CLI flag: -shard-streams.desired-rate - [desired_rate: | default = 3MB] - -# Limit how far back in time series data and metadata can be queried, -# up until lookback duration ago. -# This limit is enforced in the query frontend, the querier and the ruler. -# If the requested time range is outside the allowed range, the request will not fail, -# but will be modified to only query data within the allowed time range. -# The default value of 0 does not set a limit. -# CLI flag: -querier.max-query-lookback -[max_query_lookback: | default = 0] +# Limit queries that can be sharded. Queries within the time range of now and +# now minus this sharding lookback are not sharded. The default value of 0s +# disables the lookback, causing sharding of all queries at all times. +# CLI flag: -frontend.min-sharding-lookback +[min_sharding_lookback: | default = 0s] + +# Duration to delay the evaluation of rules to ensure the underlying metrics +# have been pushed to Cortex. +# CLI flag: -ruler.evaluation-delay-duration +[ruler_evaluation_delay_duration: | default = 0s] + +# Maximum number of rules per rule group per-tenant. 0 to disable. +# CLI flag: -ruler.max-rules-per-rule-group +[ruler_max_rules_per_rule_group: | default = 0] + +# Maximum number of rule groups per-tenant. 0 to disable. +# CLI flag: -ruler.max-rule-groups-per-tenant +[ruler_max_rule_groups_per_tenant: | default = 0] # Disable recording rules remote-write. -[ruler_remote_write_disabled: | default = false] +[ruler_remote_write_disabled: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# The URL of the endpoint to send samples to. -[ruler_remote_write_url: ] +# Deprecated: Use 'ruler_remote_write_config' instead. The URL of the endpoint +# to send samples to. +[ruler_remote_write_url: | default = ""] -# Deprecated: Use `ruler_remote_write_config` instead. -# Timeout for requests to the remote write endpoint. +# Deprecated: Use 'ruler_remote_write_config' instead. Timeout for requests to +# the remote write endpoint. [ruler_remote_write_timeout: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Custom HTTP headers to be sent along with each remote write request. -# Be aware that headers that are set by Loki itself can't be overwritten. +# Deprecated: Use 'ruler_remote_write_config' instead. Custom HTTP headers to be +# sent along with each remote write request. Be aware that headers that are set +# by Loki itself can't be overwritten. [ruler_remote_write_headers: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# List of remote write relabel configurations. -[ruler_remote_write_relabel_configs: ] +# Deprecated: Use 'ruler_remote_write_config' instead. List of remote write +# relabel configurations. +[ruler_remote_write_relabel_configs: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Number of samples to buffer per shard before we block reading of more -# samples from the WAL. It is recommended to have enough capacity in each -# shard to buffer several requests to keep throughput up while processing -# occasional slow remote requests. +# Deprecated: Use 'ruler_remote_write_config' instead. Number of samples to +# buffer per shard before we block reading of more samples from the WAL. It is +# recommended to have enough capacity in each shard to buffer several requests +# to keep throughput up while processing occasional slow remote requests. [ruler_remote_write_queue_capacity: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Minimum number of shards, i.e. amount of concurrency. +# Deprecated: Use 'ruler_remote_write_config' instead. Minimum number of shards, +# i.e. amount of concurrency. [ruler_remote_write_queue_min_shards: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Maximum number of shards, i.e. amount of concurrency. +# Deprecated: Use 'ruler_remote_write_config' instead. Maximum number of shards, +# i.e. amount of concurrency. [ruler_remote_write_queue_max_shards: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Maximum number of samples per send. +# Deprecated: Use 'ruler_remote_write_config' instead. Maximum number of samples +# per send. [ruler_remote_write_queue_max_samples_per_send: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Maximum time a sample will wait in buffer. +# Deprecated: Use 'ruler_remote_write_config' instead. Maximum time a sample +# will wait in buffer. [ruler_remote_write_queue_batch_send_deadline: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Initial retry delay. Gets doubled for every retry. +# Deprecated: Use 'ruler_remote_write_config' instead. Initial retry delay. Gets +# doubled for every retry. [ruler_remote_write_queue_min_backoff: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Maximum retry delay. +# Deprecated: Use 'ruler_remote_write_config' instead. Maximum retry delay. [ruler_remote_write_queue_max_backoff: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Retry upon receiving a 429 status code from the remote-write storage. -# This is experimental and might change in the future. +# Deprecated: Use 'ruler_remote_write_config' instead. Retry upon receiving a +# 429 status code from the remote-write storage. This is experimental and might +# change in the future. [ruler_remote_write_queue_retry_on_ratelimit: ] -# Deprecated: Use `ruler_remote_write_config` instead. -# Configures AWS's Signature Verification 4 signing process to -# sign every remote write request. -[ruler_remote_write_sigv4_config: ] - -# Configures global and per-tenant limits for remote write clients. -# A map with remote client id as key. -ruler_remote_write_config: - [: ] - -# Limit queries that can be sharded. -# Queries within the time range of now and now minus this sharding lookback -# are not sharded. The default value of 0s disables the lookback, causing -# sharding of all queries at all times. -# CLI flag: -frontend.min-sharding-lookback -[min_sharding_lookback: | default = 0s] - -# Split queries by a time interval and execute in parallel. The value 0 disables splitting by time. -# This also determines how cache keys are chosen when result caching is enabled -# CLI flag: -querier.split-queries-by-interval -[split_queries_by_interval: | default = 30m] - -# Deprecated: Use deletion_mode per tenant configuration instead. -# CLI flag: -compactor.allow_deletes -[allow_deletes: | default = false] - -# Deletion mode. -# Can be one of "disabled", "filter-only", or "filter-and-delete". -# When set to "filter-only" or "filter-and-delete", and if -# retention_enabled is true, then the log entry deletion API endpoints are available. -# CLI flag: -boltdb.shipper.compactor.deletion-mode -[deletion_mode: | default = "filter-and-delete"] -``` - -## sigv4_config - -The `sigv4_config` block configures AWS's Signature Verification 4 signing process to -sign every remote write request. - -```yaml -# The AWS region. If blank, the region from the default credentials chain -# is used. -[region: ] - -# The AWS API keys. If blank, the environment variables `AWS_ACCESS_KEY_ID` -# and `AWS_SECRET_ACCESS_KEY` are used. -[access_key: ] -[secret_key: ] - -# Named AWS profile used to authenticate. -[profile: ] - -# AWS Role ARN, an alternative to using AWS API keys. -[role_arn: ] -``` +# Deprecated: Use 'ruler_remote_write_config' instead. Configures AWS's +# Signature Verification 4 signing process to sign every remote write request. +ruler_remote_write_sigv4_config: + [region: | default = ""] -## alertmanager_config + [access_key: | default = ""] -The `alertmanager_config` block configures the alertmanager for the ruler alerts. + [secret_key: | default = ""] -```yaml -# Comma-separated list of Alertmanager URLs to send notifications to. -# Each Alertmanager URL is treated as a separate group in the configuration. -# Multiple Alertmanagers in HA per group can be supported by using DNS -# resolution via -ruler.alertmanager-discovery. -[alertmanager_url: | default = ""] + [profile: | default = ""] + [role_arn: | default = ""] -alertmanager_client: - # Sets the `Authorization` header on every remote write request with the - # configured username and password. - # password and password_file are mutually exclusive. - [basic_auth_username: ] - [basic_auth_password: ] - - # Optional `Authorization` header configuration. - authorization: - # Sets the authentication type. - [type: | default: Bearer] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [credentials_file: ] +# Configures global and per-tenant limits for remote write clients. A map with +# remote client id as key. +[ruler_remote_write_config: ] -# Use DNS SRV records to discover Alertmanager hosts. -[enable_alertmanager_discovery: | default = false] +# Deletion mode. Can be one of 'disabled', 'filter-only', or +# 'filter-and-delete'. When set to 'filter-only' or 'filter-and-delete', and if +# retention_enabled is true, then the log entry deletion API endpoints are +# available. +# CLI flag: -compactor.deletion-mode +[deletion_mode: | default = "filter-and-delete"] -# How long to wait between refreshing DNS resolutions of Alertmanager hosts. -[alertmanager_refresh_interval: | default = 1m] +# Retention to apply for the store, if the retention is enabled on the compactor +# side. +# CLI flag: -store.retention +[retention_period: | default = 31d] -# If enabled, then requests to Alertmanager use the v2 API. -[enable_alertmanager_v2: | default = false] +# Per-stream retention to apply, if the retention is enable on the compactor +# side. +# Example: +# retention_stream: +# - selector: '{namespace="dev"}' +# priority: 1 +# period: 24h +# - selector: '{container="nginx"}' +# priority: 1 +# period: 744h +# Selector is a Prometheus labels matchers that will apply the 'period' +# retention only if the stream is matching. In case multiple stream are +# matching, the highest priority will be picked. If no rule is matched the +# 'retention_period' is used. +[retention_stream: ] + +# Feature renamed to 'runtime configuration', flag deprecated in favor of +# -runtime-config.file (runtime_config.file in YAML). +# CLI flag: -limits.per-user-override-config +[per_tenant_override_config: | default = ""] -# List of alert relabel configs -alert_relabel_configs: - [- ...] +# Feature renamed to 'runtime configuration'; flag deprecated in favor of +# -runtime-config.reload-period (runtime_config.period in YAML). +# CLI flag: -limits.per-user-override-period +[per_tenant_override_period: | default = 10s] -# Capacity of the queue for notifications to be sent to the Alertmanager. -[notification_queue_capacity: | default = 10000] +# Deprecated: Use deletion_mode per tenant configuration instead. +[allow_deletes: ] -# HTTP timeout duration when sending notifications to the Alertmanager. -[notification_timeout: | default = 10s] -``` +shard_streams: + [enabled: ] -## remote_write_client_config + [logging_enabled: ] -The `remote_write_client_config` block configures the client for the remote write function in the ruler. + [desired_rate: ] -```yaml -# The URL of the endpoint to send samples to. -url: - -# Timeout for requests to the remote write endpoint. -[remote_timeout: | default = 30s] - -# Custom HTTP headers to be sent along with each remote write request. -# Be aware that headers that are set by Loki itself can't be overwritten. -headers: - [: ...] - -# List of remote write relabel configurations. -write_relabel_configs: - [- ...] - -# Name of the remote write config, which if specified must be unique among remote -# write configs. -# The name will be used in metrics and logging in place of a generated value -# to help users distinguish between remote write configs. -[name: ] - -# Sets the `Authorization` header on every remote write request with the -# configured username and password. -# password and password_file are mutually exclusive. -basic_auth: - [username: ] - [password: ] - [password_file: ] - -# Optional `Authorization` header configuration. -authorization: - # Sets the authentication type. - [type: | default: Bearer] - # Sets the credentials. It is mutually exclusive with - # `credentials_file`. - [credentials: ] - # Sets the credentials to the credentials read from the configured file. - # It is mutually exclusive with `credentials`. - [credentials_file: ] - -# Optionally configures AWS's Signature Verification 4 signing process to -# sign requests. Cannot be set at the same time as basic_auth, authorization, or oauth2. -# To use the default credentials from the AWS SDK, use `sigv4: {}`. -[sigv4: ] - -# Configures the remote write request's TLS settings. -tls_config: - # CA certificate to validate API server certificate with. - [ca_file: ] - # Certificate and key files for client cert authentication to the server. - [cert_file: ] - [key_file: ] - # ServerName extension to indicate the name of the server. - # https://tools.ietf.org/html/rfc4366#section-3.1 - [server_name: ] - # Disable validation of the server certificate. - [insecure_skip_verify: ] - -# Optional proxy URL. -[proxy_url: ] - -# Configure whether HTTP requests follow HTTP 3xx redirects. -[follow_redirects: | default = true] - -# Configures the queue used to write to remote storage. -queue_config: - # Number of samples to buffer per shard before we block reading of more - # samples from the WAL. It is recommended to have enough capacity in each - # shard to buffer several requests to keep throughput up while processing - # occasional slow remote requests. - [capacity: | default = 2500] - # Maximum number of shards, i.e. amount of concurrency. - [max_shards: | default = 200] - # Minimum number of shards, i.e. amount of concurrency. - [min_shards: | default = 1] - # Maximum number of samples per send. - [max_samples_per_send: | default = 500] - # Maximum time a sample will wait in buffer. - [batch_send_deadline: | default = 5s] - # Initial retry delay. Gets doubled for every retry. - [min_backoff: | default = 30ms] - # Maximum retry delay. - [max_backoff: | default = 100ms] - # Retry upon receiving a 429 status code from the remote-write storage. - # This is experimental and might change in the future. - [retry_on_http_429: | default = false] +[blocked_queries: ] ``` -### grpc_client_config +### frontend_worker -The `grpc_client_config` block configures a client connection to a gRPC service. +The `frontend_worker` configures the worker - running within the Loki querier - picking up and executing queries enqueued by the query-frontend. ```yaml -# The maximum size in bytes the client can receive. -# CLI flag: -.grpc-max-recv-msg-size -[max_recv_msg_size: | default = 104857600] - -# The maximum size in bytes the client can send. -# CLI flag: -.grpc-max-send-msg-size -[max_send_msg_size: | default = 16777216] - -# Use compression when sending messages. Supported values are: 'gzip', 'snappy', -# and '' (disable compression). -# CLI flag: -.grpc-compression -[grpc_compression: | default = ''] - -# Rate limit for gRPC client. 0 is disabled. -# CLI flag: -.grpc-client-rate-limit -[rate_limit: | default = 0] - -# Rate limit burst for gRPC client. -# CLI flag: -.grpc-client-rate-limit-burst -[rate_limit_burst: | default = 0] - -# Enable backoff and retry when a rate limit is hit. -# CLI flag: -.backoff-on-ratelimits -[backoff_on_ratelimits: | default = false] +# Address of query frontend service, in host:port format. If +# -querier.scheduler-address is set as well, querier will use scheduler instead. +# Only one of -querier.frontend-address or -querier.scheduler-address can be +# set. If neither is set, queries are only received via HTTP endpoint. +# CLI flag: -querier.frontend-address +[frontend_address: | default = ""] -# Configures backoff when enabled. -backoff_config: - # Minimum delay when backing off. - # CLI flag: -.backoff-min-period - [min_period: | default = 100ms] +# Hostname (and port) of scheduler that querier will periodically resolve, +# connect to and receive queries from. Only one of -querier.frontend-address or +# -querier.scheduler-address can be set. If neither is set, queries are only +# received via HTTP endpoint. +# CLI flag: -querier.scheduler-address +[scheduler_address: | default = ""] - # The maximum delay when backing off. - # CLI flag: -.backoff-max-period - [max_period: | default = 10s] +# How often to query DNS for query-frontend or query-scheduler address. Also +# used to determine how often to poll the scheduler-ring for addresses if the +# scheduler-ring is configured. +# CLI flag: -querier.dns-lookup-period +[dns_lookup_duration: | default = 3s] - # Number of times to backoff and retry before failing. - # CLI flag: -.backoff-retries - [max_retries: | default = 10] -``` +# Number of simultaneous queries to process per query-frontend or +# query-scheduler. +# CLI flag: -querier.worker-parallelism +[parallelism: | default = 10] -## index_gateway +# Force worker concurrency to match the -querier.max-concurrent option. +# Overrides querier.worker-parallelism. +# CLI flag: -querier.worker-match-max-concurrent +[match_max_concurrent: | default = true] -The `index_gateway` block configures the Loki index gateway server, responsible for serving index queries -without the need to constantly interact with the object store. +# Querier ID, sent to frontend service to identify requests from the same +# querier. Defaults to hostname. +# CLI flag: -querier.id +[id: | default = ""] -```yaml -# Defines in which mode the index gateway server will operate (default to 'simple'). -# It supports two modes: -# 'simple': an index gateway server instance is responsible for handling, -# storing and returning requests for all indices for all tenants. -# 'ring': an index gateway server instance is responsible for a subset of tenants instead -# of all tenants. -[mode: | default = simple] - -# Defines the ring to be used by the index gateway servers and clients in case the servers -# are configured to run in 'ring' mode. In case this isn't configured, this block supports -# inheriting configuration from the common ring section. -[ring: ] +# The grpc_client block configures the gRPC client used to communicate between +# two Loki components. +# The CLI flags prefix for this block configuration is: querier.frontend-client +[grpc_client_config: ] ``` -## table_manager +### table_manager -The `table_manager` block configures the Loki table-manager. +The `table_manager` block configures the table manager for retention. ```yaml -# Master 'off-switch' for table capacity updates, e.g. when troubleshooting. +# If true, disable all changes to DB capacity # CLI flag: -table-manager.throughput-updates-disabled [throughput_updates_disabled: | default = false] -# Master 'on-switch' for table retention deletions. +# If true, enables retention deletes of DB tables # CLI flag: -table-manager.retention-deletes-enabled [retention_deletes_enabled: | default = false] -# How far back tables will be kept before they are deleted. 0s disables -# deletion. The retention period must be a multiple of the index / chunks -# table "period" (see period_config). +# Tables older than this retention period are deleted. Must be either 0 +# (disabled) or a multiple of 24h. When enabled, be aware this setting is +# destructive to data! # CLI flag: -table-manager.retention-period [retention_period: | default = 0s] -# Period with which the table manager will poll for tables. +# How frequently to poll backend to learn our capacity. # CLI flag: -table-manager.poll-interval [poll_interval: | default = 2m] -# Duration a table will be created before it is needed. +# Periodic tables grace period (duration which table will be created/deleted +# before/after it's needed). # CLI flag: -table-manager.periodic-table.grace-period [creation_grace_period: | default = 10m] -# Configures management of the index tables for DynamoDB. -# The CLI flags prefix for this block config is: table-manager.index-table -index_tables_provisioning: - -# Configures management of the chunk tables for DynamoDB. -# The CLI flags prefix for this block config is: table-manager.chunk-table -chunk_tables_provisioning: +index_tables_provisioning: + # Enables on demand throughput provisioning for the storage provider (if + # supported). Applies only to tables which are not autoscaled. Supported by + # DynamoDB + # CLI flag: -table-manager.index-table.enable-ondemand-throughput-mode + [enable_ondemand_throughput_mode: | default = false] + + # Table default write throughput. Supported by DynamoDB + # CLI flag: -table-manager.index-table.write-throughput + [provisioned_write_throughput: | default = 1000] + + # Table default read throughput. Supported by DynamoDB + # CLI flag: -table-manager.index-table.read-throughput + [provisioned_read_throughput: | default = 300] + + write_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.index-table.write-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.index-table.write-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.index-table.write-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.index-table.write-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.index-table.write-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.index-table.write-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.index-table.write-throughput.scale.target-value + [target: | default = 80] + + read_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.index-table.read-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.index-table.read-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.index-table.read-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.index-table.read-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.index-table.read-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.index-table.read-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.index-table.read-throughput.scale.target-value + [target: | default = 80] + + # Enables on demand throughput provisioning for the storage provider (if + # supported). Applies only to tables which are not autoscaled. Supported by + # DynamoDB + # CLI flag: -table-manager.index-table.inactive-enable-ondemand-throughput-mode + [enable_inactive_throughput_on_demand_mode: | default = false] + + # Table write throughput for inactive tables. Supported by DynamoDB + # CLI flag: -table-manager.index-table.inactive-write-throughput + [inactive_write_throughput: | default = 1] + + # Table read throughput for inactive tables. Supported by DynamoDB + # CLI flag: -table-manager.index-table.inactive-read-throughput + [inactive_read_throughput: | default = 300] + + inactive_write_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.target-value + [target: | default = 80] + + inactive_read_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.target-value + [target: | default = 80] + + # Number of last inactive tables to enable write autoscale. + # CLI flag: -table-manager.index-table.inactive-write-throughput.scale-last-n + [inactive_write_scale_lastn: | default = 4] + + # Number of last inactive tables to enable read autoscale. + # CLI flag: -table-manager.index-table.inactive-read-throughput.scale-last-n + [inactive_read_scale_lastn: | default = 4] + +chunk_tables_provisioning: + # Enables on demand throughput provisioning for the storage provider (if + # supported). Applies only to tables which are not autoscaled. Supported by + # DynamoDB + # CLI flag: -table-manager.chunk-table.enable-ondemand-throughput-mode + [enable_ondemand_throughput_mode: | default = false] + + # Table default write throughput. Supported by DynamoDB + # CLI flag: -table-manager.chunk-table.write-throughput + [provisioned_write_throughput: | default = 1000] + + # Table default read throughput. Supported by DynamoDB + # CLI flag: -table-manager.chunk-table.read-throughput + [provisioned_read_throughput: | default = 300] + + write_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.chunk-table.write-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.chunk-table.write-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.chunk-table.write-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.chunk-table.write-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.chunk-table.write-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.chunk-table.write-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.chunk-table.write-throughput.scale.target-value + [target: | default = 80] + + read_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.chunk-table.read-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.chunk-table.read-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.chunk-table.read-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.chunk-table.read-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.chunk-table.read-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.chunk-table.read-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.chunk-table.read-throughput.scale.target-value + [target: | default = 80] + + # Enables on demand throughput provisioning for the storage provider (if + # supported). Applies only to tables which are not autoscaled. Supported by + # DynamoDB + # CLI flag: -table-manager.chunk-table.inactive-enable-ondemand-throughput-mode + [enable_inactive_throughput_on_demand_mode: | default = false] + + # Table write throughput for inactive tables. Supported by DynamoDB + # CLI flag: -table-manager.chunk-table.inactive-write-throughput + [inactive_write_throughput: | default = 1] + + # Table read throughput for inactive tables. Supported by DynamoDB + # CLI flag: -table-manager.chunk-table.inactive-read-throughput + [inactive_read_throughput: | default = 300] + + inactive_write_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.target-value + [target: | default = 80] + + inactive_read_scale: + # Should we enable autoscale for the table. + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.enabled + [enabled: | default = false] + + # AWS AutoScaling role ARN + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.role-arn + [role_arn: | default = ""] + + # DynamoDB minimum provision capacity. + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.min-capacity + [min_capacity: | default = 3000] + + # DynamoDB maximum provision capacity. + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.max-capacity + [max_capacity: | default = 6000] + + # DynamoDB minimum seconds between each autoscale up. + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.out-cooldown + [out_cooldown: | default = 1800] + + # DynamoDB minimum seconds between each autoscale down. + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.in-cooldown + [in_cooldown: | default = 1800] + + # DynamoDB target ratio of consumed capacity to provisioned capacity. + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.target-value + [target: | default = 80] + + # Number of last inactive tables to enable write autoscale. + # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale-last-n + [inactive_write_scale_lastn: | default = 4] + + # Number of last inactive tables to enable read autoscale. + # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale-last-n + [inactive_read_scale_lastn: | default = 4] ``` -### provision_config +### runtime_config -The `provision_config` block configures provisioning capacity for DynamoDB. +Configuration for 'runtime config' module, responsible for reloading runtime configuration file. ```yaml -# Enables on-demand throughput provisioning for the storage -# provider, if supported. Applies only to tables which are not autoscaled. -# CLI flag: -.enable-ondemand-throughput-mode -[enable_ondemand_throughput_mode: | default = false] - -# DynamoDB table default write throughput. -# CLI flag: -.write-throughput -[provisioned_write_throughput: | default = 3000] - -# DynamoDB table default read throughput. -# CLI flag: -.read-throughput -[provisioned_read_throughput: | default = 300] - -# Enables on-demand throughput provisioning for the storage provide, -# if supported. Applies only to tables which are not autoscaled. -# CLI flag: -.inactive-enable-ondemand-throughput-mode -[enable_inactive_throughput_on_demand_mode: | default = false] - -# DynamoDB table write throughput for inactive tables. -# CLI flag: -.inactive-write-throughput -[inactive_write_throughput: | default = 1] - -# DynamoDB table read throughput for inactive tables. -# CLI flag: -.inactive-read-throughput -[inactive_read_throughput: | default = 300] - -# Active table write autoscale config. -# The CLI flags prefix for this block config is: -.write-throughput -[write_scale: ] - -# Inactive table write autoscale config. -# The CLI flags prefix for this block config is: -.inactive-write-throughput -[inactive_write_scale: ] - -# Number of last inactive tables to enable write autoscale. -# CLI flag: -.enable-ondemand-throughput-mode -[inactive_write_scale_lastn: ] - -# Active table read autoscale config. -# The CLI flags prefix for this block config is: -.read-throughput -[read_scale: ] - -# Inactive table read autoscale config. -# The CLI flags prefix for this block config is: -.inactive-read-throughput -[inactive_read_scale: ] - -# Number of last inactive tables to enable read autoscale. -# CLI flag: -.enable-ondemand-throughput-mode -[inactive_read_scale_lastn: ] +# How often to check runtime config files. +# CLI flag: -runtime-config.reload-period +[period: | default = 10s] + +# Comma separated list of yaml files with the configuration that can be updated +# at runtime. Runtime config files will be merged from left to right. +# CLI flag: -runtime-config.file +[file: | default = ""] ``` -#### auto_scaling_config +### tracing -The `auto_scaling_config` block configures autoscaling for DynamoDB. +Configuration for `tracing`. ```yaml -# Whether or not autoscaling should be enabled. -# CLI flag: -.scale.enabled -[enabled: : default = false] - -# AWS AutoScaling role ARN. -# CLI flag: -.scale.role-arn -[role_arn: ] - -# DynamoDB minimum provision capacity. -# CLI flag: -.scale.min-capacity -[min_capacity: | default = 3000] - -# DynamoDB maximum provision capacity. -# CLI flag: -.scale.max-capacity -[max_capacity: | default = 6000] - -# DynamoDB minimum seconds between each autoscale up. -# CLI flag: -.scale.out-cooldown -[out_cooldown: | default = 1800] - -# DynamoDB minimum seconds between each autoscale down. -# CLI flag: -.scale.in-cooldown -[in_cooldown: | default = 1800] - -# DynamoDB target ratio of consumed capacity to provisioned capacity. -# CLI flag: -.scale.target-value -[target: | default = 80] +# Set to false to disable tracing. +# CLI flag: -tracing.enabled +[enabled: | default = true] ``` -## tracing +### analytics -The `tracing` block configures tracing for Jaeger. Currently limited to disable auto-configuration per [environment variables](https://www.jaegertracing.io/docs/1.16/client-features/) only. +Configuration for usage report. ```yaml -# Whether or not tracing should be enabled. -# CLI flag: -tracing.enabled -[enabled: : default = true] +# Enable anonymous usage reporting. +# CLI flag: -reporting.enabled +[reporting_enabled: | default = true] ``` -## common +### common -The `common` block sets common definitions to be shared by different components. -This way, one doesn't have to replicate configuration in multiple places. +Common configuration to be shared between multiple modules. If a more specific configuration is given in other sections, the related configuration within this section will be ignored. ```yaml -# A common storage configuration to be used by the different Loki components. -[storage: ] - -# When defined, the given prefix will be present in front of the endpoint paths. -[path_prefix: ] - -# How many times incoming data should be replicated to the ingester component. -[replication_factor: | default = 3] - -# When true, the ingester, compactor, and query_scheduler ring tokens will be saved -# to files in the path_prefix directory. Loki will error if you set this to true -# and path_prefix is empty. -[persist_tokens: : default = false] - -# A common list of net interfaces used internally to look for addresses. -# If a more specific "instance_interface_names" is set, this is ignored. -# If "instance_interface_names" under the common ring section is configured, -# this common "instance_interface_names" is only applied to the frontend, but not for -# ring related components (ex: distributor, ruler, etc). -[instance_interface_names: | default = []] - -# A common address used by Loki components to advertise their address. -# If a more specific "instance_addr" is set, this is ignored. -# If "instance_addr" under the common ring section is configured, this common "instance_addr" -# is only applied to the frontend, but not for ring related components (ex: distributor, ruler, etc). -[instance_addr: ] - -# A common ring configuration to be used by all Loki rings. -# If a common ring is given, its values are used to define any undefined ring values. -# For instance, you can expect the `heartbeat_period` defined in the common section -# to be used by the distributor's ring, but only if the distributor's ring itself -# doesn't have a `heartbeat_period` set. -[ring: ] - -# Address and port number where the compactor API is served. -# CLI flag: -common.compactor-address -[compactor_address: | default = ""] +[path_prefix: | default = ""] -# Address and port number where the compactor grpc requests are being served. -# CLI flag: -common.compactor-grpc-address -[compactor_grpc_address: | default = ""] +storage: + # The s3_storage_config block configures the connection to Amazon S3 object + # storage backend. + # The CLI flags prefix for this block configuration is: common + [s3: ] -## analytics + # The gcs_storage_config block configures the connection to Google Cloud + # Storage object storage backend. + # The CLI flags prefix for this block configuration is: common.storage + [gcs: ] -The `analytics` block configures the reporting of Loki analytics to grafana.com. + # The azure_storage_config block configures the connection to Azure object + # storage backend. + # The CLI flags prefix for this block configuration is: common.storage + [azure: ] -```yaml -# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration -# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/ -# -# Statistics help us better understand how Loki is used, and they show us performance -# levels for most users. This helps us prioritize features and documentation. -# For more information on what's sent, look at -# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go -# Refer to the buildReport method to see what goes into a report. -# -# When true, enables usage reporting. -# CLI flag: -reporting.enabled -[reporting_enabled: : default = true] -``` + # The bos_storage_config block configures the connection to Baidu Object + # Storage (BOS) object storage backend. + # The CLI flags prefix for this block configuration is: common.storage + [bos: ] -### storage + # The swift_storage_config block configures the connection to OpenStack Object + # Storage (Swift) object storage backend. + # The CLI flags prefix for this block configuration is: common.storage + [swift: ] -The common `storage` block defines a common storage to be reused by different -components as a way to facilitate storage configuration. -If any specific configuration for an object storage client have been provided elsewhere in the configuration file, the specific configuration will supersede the common storage configuration. + filesystem: + # Directory to store chunks in. + # CLI flag: -common.storage.filesystem.chunk-directory + [chunks_directory: | default = ""] -```yaml -# Configures Azure as the common storage. -[azure: ] + # Directory to store rules in. + # CLI flag: -common.storage.filesystem.rules-directory + [rules_directory: | default = ""] -# Configures GCS as the common storage. -[gcs: ] + hedging: + # If set to a non-zero value a second request will be issued at the provided + # duration. Default is 0 (disabled) + # CLI flag: -common.storage.hedge-requests-at + [at: | default = 0s] -# Configures S3 as the common storage. -[s3: ] + # The maximum of hedge requests allowed. + # CLI flag: -common.storage.hedge-requests-up-to + [up_to: | default = 2] -# Configures Swift as the common storage. -[swift: ] + # The maximum of hedge requests allowed per seconds. + # CLI flag: -common.storage.hedge-max-per-second + [max_per_second: | default = 5] -# Configures a (local) file system as the common storage. -[filesystem: ] +[persist_tokens: ] -# Configures Baidu Object Storage (BOS) as the common storage. -[bos: ] +[replication_factor: ] -# The `hedging_config` configures how to hedge requests for the storage. -[hedging: ] +ring: + kvstore: + # Backend storage to use for the ring. Supported values are: consul, etcd, + # inmemory, memberlist, multi. + # CLI flag: -common.storage.ring.store + [store: | default = "consul"] -``` + # The prefix for the keys in the store. Should end with a /. + # CLI flag: -common.storage.ring.prefix + [prefix: | default = "collectors/"] + + # Configuration for a Consul client. Only applies if store is consul. + # The CLI flags prefix for this block configuration is: common.storage.ring + [consul: ] + + # Configuration for an ETCD v3 client. Only applies if store is etcd. + # The CLI flags prefix for this block configuration is: common.storage.ring + [etcd: ] + + multi: + # Primary backend storage used by multi-client. + # CLI flag: -common.storage.ring.multi.primary + [primary: | default = ""] + + # Secondary backend storage used by multi-client. + # CLI flag: -common.storage.ring.multi.secondary + [secondary: | default = ""] + + # Mirror writes to secondary store. + # CLI flag: -common.storage.ring.multi.mirror-enabled + [mirror_enabled: | default = false] + + # Timeout for storing value to secondary store. + # CLI flag: -common.storage.ring.multi.mirror-timeout + [mirror_timeout: | default = 2s] + + # Period at which to heartbeat to the ring. 0 = disabled. + # CLI flag: -common.storage.ring.heartbeat-period + [heartbeat_period: | default = 15s] + + # The heartbeat timeout after which compactors are considered unhealthy within + # the ring. 0 = never (timeout disabled). + # CLI flag: -common.storage.ring.heartbeat-timeout + [heartbeat_timeout: | default = 1m] + + # File path where tokens are stored. If empty, tokens are not stored at + # shutdown and restored at startup. + # CLI flag: -common.storage.ring.tokens-file-path + [tokens_file_path: | default = ""] + + # True to enable zone-awareness and replicate blocks across different + # availability zones. + # CLI flag: -common.storage.ring.zone-awareness-enabled + [zone_awareness_enabled: | default = false] + + # Instance ID to register in the ring. + # CLI flag: -common.storage.ring.instance-id + [instance_id: | default = ""] + + # Name of network interface to read address from. + # CLI flag: -common.storage.ring.instance-interface-names + [instance_interface_names: | default = []] + + # Port to advertise in the ring (defaults to server.grpc-listen-port). + # CLI flag: -common.storage.ring.instance-port + [instance_port: | default = 0] + + # IP address to advertise in the ring. + # CLI flag: -common.storage.ring.instance-addr + [instance_addr: | default = ""] -### filesystem + # The availability zone where this instance is running. Required if + # zone-awareness is enabled. + # CLI flag: -common.storage.ring.instance-availability-zone + [instance_availability_zone: | default = ""] -The common `filesystem` block configures a local file system as a general -storage for various types of data generated by Loki. +[instance_interface_names: ] + +[instance_addr: | default = ""] + +# the http address of the compactor in the form http://host:port +# CLI flag: -common.compactor-address +[compactor_address: | default = ""] + +# the grpc address of the compactor in the form host:port +# CLI flag: -common.compactor-grpc-address +[compactor_grpc_address: | default = ""] +``` + +### consul + +Configuration for a Consul client. Only applies if store is `consul`. The supported CLI flags `` used to reference this configuration block are: + +- `boltdb.shipper.compactor.ring` +- `common.storage.ring` +- `distributor.ring` +- `index-gateway.ring` +- `query-scheduler.ring` +- `ruler.ring` + +  ```yaml -# File system directory to be used for chunks storage. -[chunks_directory: | default = ""] +# Hostname and port of Consul. +# CLI flag: -.consul.hostname +[host: | default = "localhost:8500"] + +# ACL Token used to interact with Consul. +# CLI flag: -.consul.acl-token +[acl_token: | default = ""] -# File system directory to be used for rules storage. -[rules_directory: | default = ""] +# HTTP timeout when talking to Consul +# CLI flag: -.consul.client-timeout +[http_client_timeout: | default = 20s] + +# Enable consistent reads to Consul. +# CLI flag: -.consul.consistent-reads +[consistent_reads: | default = false] + +# Rate limit when watching key or prefix in Consul, in requests per second. 0 +# disables the rate limit. +# CLI flag: -.consul.watch-rate-limit +[watch_rate_limit: | default = 1] + +# Burst size used in rate limit. Values less than 1 are treated as 1. +# CLI flag: -.consul.watch-burst-size +[watch_burst_size: | default = 1] + +# Maximum duration to wait before retrying a Compare And Swap (CAS) operation. +# CLI flag: -.consul.cas-retry-delay +[cas_retry_delay: | default = 1s] ``` -### ring +### etcd -The common `ring` block defines a ring configuration used by a Loki component. +Configuration for an ETCD v3 client. Only applies if store is `etcd`. The supported CLI flags `` used to reference this configuration block are: + +- `boltdb.shipper.compactor.ring` +- `common.storage.ring` +- `distributor.ring` +- `index-gateway.ring` +- `query-scheduler.ring` +- `ruler.ring` + +  ```yaml -# The key-value store used to share the hash ring across multiple instances. -kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -.store - [store: | default = "memberlist"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -.prefix - [prefix: | default = "collectors/"] - - # The consul_config configures the consul client. - [consul: ] - - # The etcd_config configures the etcd client. - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -.multi.mirror-timeout - [mirror_timeout: | default = 2s] - -# Interval between heartbeats sent to the ring. 0 = disabled. -# CLI flag: -.heartbeat-period -[heartbeat_period: | default = 15s] - -# The heartbeat timeout after which store gateways are considered unhealthy -# within the ring. 0 = never (timeout disabled). This option needs be set both -# on the store-gateway and querier when running in microservices mode. -# CLI flag: -.heartbeat-timeout -[heartbeat_timeout: | default = 1m] - -# File path where tokens are stored. If empty, tokens are neither stored at -# shutdown nor restored at startup. -# CLI flag: -.tokens-file-path -[tokens_file_path: | default = ""] - -# True to enable zone-awareness and replicate blocks across different -# availability zones. -# CLI flag: -.zone-awareness-enabled -[zone_awareness_enabled: | default = false] - -# Name of network interface to read addresses from. -# CLI flag: -.instance-interface-names -[instance_interface_names: | default = []] - -# IP address to advertise in the ring. -# CLI flag: -.instance-addr -[instance_addr: | default = first from instance_interface_names] - -# Port to advertise in the ring -# CLI flag: -.instance-port -[instance_port: | default = server.grpc-listen-port] - -# Instance ID to register in the ring. -# CLI flag: -.instance-id -[instance_id: | default = os.Hostname()] - -# The availability zone where this instance is running. Required if -# zone-awareness is enabled. -# CLI flag: -.instance-availability-zone -[instance_availability_zone: | default = ""] +# The etcd endpoints to connect to. +# CLI flag: -.etcd.endpoints +[endpoints: | default = []] + +# The dial timeout for the etcd connection. +# CLI flag: -.etcd.dial-timeout +[dial_timeout: | default = 10s] + +# The maximum number of retries to do for failed ops. +# CLI flag: -.etcd.max-retries +[max_retries: | default = 10] + +# Enable TLS. +# CLI flag: -.etcd.tls-enabled +[tls_enabled: | default = false] + +# Path to the client certificate file, which will be used for authenticating +# with the server. Also requires the key path to be configured. +# CLI flag: -.etcd.tls-cert-path +[tls_cert_path: | default = ""] + +# Path to the key file for the client certificate. Also requires the client +# certificate to be configured. +# CLI flag: -.etcd.tls-key-path +[tls_key_path: | default = ""] + +# Path to the CA certificates file to validate server certificate against. If +# not set, the host's root CA certificates are used. +# CLI flag: -.etcd.tls-ca-path +[tls_ca_path: | default = ""] + +# Override the expected name on the server certificate. +# CLI flag: -.etcd.tls-server-name +[tls_server_name: | default = ""] + +# Skip validating server certificate. +# CLI flag: -.etcd.tls-insecure-skip-verify +[tls_insecure_skip_verify: | default = false] + +# Override the default cipher suite list (separated by commas). Allowed values: +# +# Secure Ciphers: +# - TLS_RSA_WITH_AES_128_CBC_SHA +# - TLS_RSA_WITH_AES_256_CBC_SHA +# - TLS_RSA_WITH_AES_128_GCM_SHA256 +# - TLS_RSA_WITH_AES_256_GCM_SHA384 +# - TLS_AES_128_GCM_SHA256 +# - TLS_AES_256_GCM_SHA384 +# - TLS_CHACHA20_POLY1305_SHA256 +# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA +# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA +# - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 +# - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 +# - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 +# - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 +# - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 +# - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 +# +# Insecure Ciphers: +# - TLS_RSA_WITH_RC4_128_SHA +# - TLS_RSA_WITH_3DES_EDE_CBC_SHA +# - TLS_RSA_WITH_AES_128_CBC_SHA256 +# - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA +# - TLS_ECDHE_RSA_WITH_RC4_128_SHA +# - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 +# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 +# CLI flag: -.etcd.tls-cipher-suites +[tls_cipher_suites: | default = ""] + +# Override the default minimum TLS version. Allowed values: VersionTLS10, +# VersionTLS11, VersionTLS12, VersionTLS13 +# CLI flag: -.etcd.tls-min-version +[tls_min_version: | default = ""] + +# Etcd username. +# CLI flag: -.etcd.username +[username: | default = ""] + +# Etcd password. +# CLI flag: -.etcd.password +[password: | default = ""] ``` -## Runtime Configuration file +### grpc_client -Loki has a concept of "runtime config" file, which is simply a file that is reloaded while Loki is running. It is used by some Loki components to allow operator to change some aspects of Loki configuration without restarting it. File is specified by using `-runtime-config.file=` flag and reload period (which defaults to 10 seconds) can be changed by `-runtime-config.reload-period=` flag. Previously this mechanism was only used by limits overrides, and flags were called `-limits.per-user-override-config=` and `-limits.per-user-override-period=10s` respectively. These are still used, if `-runtime-config.file=` is not specified. +The `grpc_client` block configures the gRPC client used to communicate between two Loki components. The supported CLI flags `` used to reference this configuration block are: -At the moment, two components use runtime configuration: limits and multi KV store. +- `bigtable` +- `boltdb.shipper.index-gateway-client.grpc` +- `frontend.grpc-client-config` +- `ingester.client` +- `querier.frontend-client` +- `query-scheduler.grpc-client-config` +- `ruler.client` +- `tsdb.shipper.index-gateway-client.grpc` -Options for runtime configuration reload can also be configured via YAML: +  ```yaml -# Configuration file to periodically check and reload. -[file: : default = empty] +# gRPC client max receive message size (bytes). +# CLI flag: -.grpc-max-recv-msg-size +[max_recv_msg_size: | default = 104857600] + +# gRPC client max send message size (bytes). +# CLI flag: -.grpc-max-send-msg-size +[max_send_msg_size: | default = 104857600] + +# Use compression when sending messages. Supported values are: 'gzip', 'snappy' +# and '' (disable compression) +# CLI flag: -.grpc-compression +[grpc_compression: | default = ""] + +# Rate limit for gRPC client; 0 means disabled. +# CLI flag: -.grpc-client-rate-limit +[rate_limit: | default = 0] + +# Rate limit burst for gRPC client. +# CLI flag: -.grpc-client-rate-limit-burst +[rate_limit_burst: | default = 0] -# How often to check the file. -[period: : default 10s] +# Enable backoff and retry when we hit ratelimits. +# CLI flag: -.backoff-on-ratelimits +[backoff_on_ratelimits: | default = false] + +backoff_config: + # Minimum delay when backing off. + # CLI flag: -.backoff-min-period + [min_period: | default = 100ms] + + # Maximum delay when backing off. + # CLI flag: -.backoff-max-period + [max_period: | default = 10s] + + # Number of times to backoff and retry before failing. + # CLI flag: -.backoff-retries + [max_retries: | default = 10] + +# Enable TLS in the GRPC client. This flag needs to be enabled when any other +# TLS flag is set. If set to false, insecure connection to gRPC server will be +# used. +# CLI flag: -.tls-enabled +[tls_enabled: | default = false] + +# Path to the client certificate file, which will be used for authenticating +# with the server. Also requires the key path to be configured. +# CLI flag: -.tls-cert-path +[tls_cert_path: | default = ""] + +# Path to the key file for the client certificate. Also requires the client +# certificate to be configured. +# CLI flag: -.tls-key-path +[tls_key_path: | default = ""] + +# Path to the CA certificates file to validate server certificate against. If +# not set, the host's root CA certificates are used. +# CLI flag: -.tls-ca-path +[tls_ca_path: | default = ""] + +# Override the expected name on the server certificate. +# CLI flag: -.tls-server-name +[tls_server_name: | default = ""] + +# Skip validating server certificate. +# CLI flag: -.tls-insecure-skip-verify +[tls_insecure_skip_verify: | default = false] + +# Override the default cipher suite list (separated by commas). Allowed values: +# +# Secure Ciphers: +# - TLS_RSA_WITH_AES_128_CBC_SHA +# - TLS_RSA_WITH_AES_256_CBC_SHA +# - TLS_RSA_WITH_AES_128_GCM_SHA256 +# - TLS_RSA_WITH_AES_256_GCM_SHA384 +# - TLS_AES_128_GCM_SHA256 +# - TLS_AES_256_GCM_SHA384 +# - TLS_CHACHA20_POLY1305_SHA256 +# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA +# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA +# - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 +# - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 +# - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 +# - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 +# - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 +# - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 +# +# Insecure Ciphers: +# - TLS_RSA_WITH_RC4_128_SHA +# - TLS_RSA_WITH_3DES_EDE_CBC_SHA +# - TLS_RSA_WITH_AES_128_CBC_SHA256 +# - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA +# - TLS_ECDHE_RSA_WITH_RC4_128_SHA +# - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 +# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 +# CLI flag: -.tls-cipher-suites +[tls_cipher_suites: | default = ""] + +# Override the default minimum TLS version. Allowed values: VersionTLS10, +# VersionTLS11, VersionTLS12, VersionTLS13 +# CLI flag: -.tls-min-version +[tls_min_version: | default = ""] ``` -Example runtime configuration file: +### tls_config + +The TLS configuration. ```yaml -overrides: - tenant1: - ingestion_rate_mb: 10 - max_streams_per_user: 100000 - max_chunks_per_query: 100000 - tenant2: - max_streams_per_user: 1000000 - max_chunks_per_query: 1000000 - -multi_kv_config: - mirror-enabled: false - primary: consul +# Path to the client certificate file, which will be used for authenticating +# with the server. Also requires the key path to be configured. +# CLI flag: -frontend.tail-tls-config.tls-cert-path +[tls_cert_path: | default = ""] + +# Path to the key file for the client certificate. Also requires the client +# certificate to be configured. +# CLI flag: -frontend.tail-tls-config.tls-key-path +[tls_key_path: | default = ""] + +# Path to the CA certificates file to validate server certificate against. If +# not set, the host's root CA certificates are used. +# CLI flag: -frontend.tail-tls-config.tls-ca-path +[tls_ca_path: | default = ""] + +# Override the expected name on the server certificate. +# CLI flag: -frontend.tail-tls-config.tls-server-name +[tls_server_name: | default = ""] + +# Skip validating server certificate. +# CLI flag: -frontend.tail-tls-config.tls-insecure-skip-verify +[tls_insecure_skip_verify: | default = false] + +# Override the default cipher suite list (separated by commas). Allowed values: +# +# Secure Ciphers: +# - TLS_RSA_WITH_AES_128_CBC_SHA +# - TLS_RSA_WITH_AES_256_CBC_SHA +# - TLS_RSA_WITH_AES_128_GCM_SHA256 +# - TLS_RSA_WITH_AES_256_GCM_SHA384 +# - TLS_AES_128_GCM_SHA256 +# - TLS_AES_256_GCM_SHA384 +# - TLS_CHACHA20_POLY1305_SHA256 +# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA +# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA +# - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 +# - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 +# - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 +# - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 +# - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 +# - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 +# +# Insecure Ciphers: +# - TLS_RSA_WITH_RC4_128_SHA +# - TLS_RSA_WITH_3DES_EDE_CBC_SHA +# - TLS_RSA_WITH_AES_128_CBC_SHA256 +# - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA +# - TLS_ECDHE_RSA_WITH_RC4_128_SHA +# - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA +# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 +# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 +# CLI flag: -frontend.tail-tls-config.tls-cipher-suites +[tls_cipher_suites: | default = ""] + +# Override the default minimum TLS version. Allowed values: VersionTLS10, +# VersionTLS11, VersionTLS12, VersionTLS13 +# CLI flag: -frontend.tail-tls-config.tls-min-version +[tls_min_version: | default = ""] ``` -## Accept out-of-order writes +### cache_config + +The cache block configures the cache backend. The supported CLI flags `` used to reference this configuration block are: + +- `frontend` +- `store.chunks-cache` +- `store.index-cache-read` +- `store.index-cache-write` + +  + +```yaml +# Cache config for index entry writing.(deprecated: use embedded-cache instead) +# Enable in-memory cache (auto-enabled for the chunks & query results cache if +# no other cache is configured). +# CLI flag: -.cache.enable-fifocache +[enable_fifocache: | default = false] + +# Cache config for index entry writing.The default validity of entries for +# caches unless overridden. +# CLI flag: -.default-validity +[default_validity: | default = 1h] + +background: + # Cache config for index entry writing.At what concurrency to write back to + # cache. + # CLI flag: -.background.write-back-concurrency + [writeback_goroutines: | default = 10] -Since the beginning of Loki, log entries had to be written to Loki in order -by time. -This limitation has been lifted. -Out-of-order writes are enabled globally by default, but can be disabled/enabled -on a cluster or per-tenant basis. + # Cache config for index entry writing.How many key batches to buffer for + # background write-back. + # CLI flag: -.background.write-back-buffer + [writeback_buffer: | default = 10000] -- To disable out-of-order writes for all tenants, -place in the `limits_config` section: +memcached: + # Cache config for index entry writing.How long keys stay in the memcache. + # CLI flag: -.memcached.expiration + [expiration: | default = 0s] - ``` - limits_config: - unordered_writes: false - ``` + # Cache config for index entry writing.How many keys to fetch in each batch. + # CLI flag: -.memcached.batchsize + [batch_size: | default = 1024] -- To disable out-of-order writes for specific tenants, -configure a runtime configuration file: + # Cache config for index entry writing.Maximum active requests to memcache. + # CLI flag: -.memcached.parallelism + [parallelism: | default = 100] - ``` - runtime_config: overrides.yaml - ``` +memcached_client: + # Cache config for index entry writing.Hostname for memcached service to use. + # If empty and if addresses is unset, no memcached will be used. + # CLI flag: -.memcached.hostname + [host: | default = ""] - In the `overrides.yaml` file, add `unordered_writes` for each tenant - permitted to have out-of-order writes: + # Cache config for index entry writing.SRV service used to discover memcache + # servers. + # CLI flag: -.memcached.service + [service: | default = "memcached"] - ``` - overrides: - "tenantA": - unordered_writes: false - ``` + # Cache config for index entry writing.EXPERIMENTAL: Comma separated addresses + # list in DNS Service Discovery format: + # https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery + # CLI flag: -.memcached.addresses + [addresses: | default = ""] -How far into the past accepted out-of-order log entries may be -is configurable with `max_chunk_age`. -`max_chunk_age` defaults to 2 hour. -Loki calculates the earliest time that out-of-order entries may have -and be accepted with + # Cache config for index entry writing.Maximum time to wait before giving up + # on memcached requests. + # CLI flag: -.memcached.timeout + [timeout: | default = 100ms] + + # Cache config for index entry writing.Maximum number of idle connections in + # pool. + # CLI flag: -.memcached.max-idle-conns + [max_idle_conns: | default = 16] + + # Cache config for index entry writing.The maximum size of an item stored in + # memcached. Bigger items are not stored. If set to 0, no maximum size is + # enforced. + # CLI flag: -.memcached.max-item-size + [max_item_size: | default = 0] + + # Cache config for index entry writing.Period with which to poll DNS for + # memcache servers. + # CLI flag: -.memcached.update-interval + [update_interval: | default = 1m] + # Cache config for index entry writing.Use consistent hashing to distribute to + # memcache servers. + # CLI flag: -.memcached.consistent-hash + [consistent_hash: | default = true] + + # Cache config for index entry writing.Trip circuit-breaker after this number + # of consecutive dial failures (if zero then circuit-breaker is disabled). + # CLI flag: -.memcached.circuit-breaker-consecutive-failures + [circuit_breaker_consecutive_failures: | default = 10] + + # Cache config for index entry writing.Duration circuit-breaker remains open + # after tripping (if zero then 60 seconds is used). + # CLI flag: -.memcached.circuit-breaker-timeout + [circuit_breaker_timeout: | default = 10s] + + # Cache config for index entry writing.Reset circuit-breaker counts after this + # long (if zero then never reset). + # CLI flag: -.memcached.circuit-breaker-interval + [circuit_breaker_interval: | default = 10s] + +redis: + # Cache config for index entry writing.Redis Server or Cluster configuration + # endpoint to use for caching. A comma-separated list of endpoints for Redis + # Cluster or Redis Sentinel. If empty, no redis will be used. + # CLI flag: -.redis.endpoint + [endpoint: | default = ""] + + # Cache config for index entry writing.Redis Sentinel master name. An empty + # string for Redis Server or Redis Cluster. + # CLI flag: -.redis.master-name + [master_name: | default = ""] + + # Cache config for index entry writing.Maximum time to wait before giving up + # on redis requests. + # CLI flag: -.redis.timeout + [timeout: | default = 500ms] + + # Cache config for index entry writing.How long keys stay in the redis. + # CLI flag: -.redis.expiration + [expiration: | default = 0s] + + # Cache config for index entry writing.Database index. + # CLI flag: -.redis.db + [db: | default = 0] + + # Cache config for index entry writing.Maximum number of connections in the + # pool. + # CLI flag: -.redis.pool-size + [pool_size: | default = 0] + + # Cache config for index entry writing.Username to use when connecting to + # redis. + # CLI flag: -.redis.username + [username: | default = ""] + + # Cache config for index entry writing.Password to use when connecting to + # redis. + # CLI flag: -.redis.password + [password: | default = ""] + + # Cache config for index entry writing.Enable connecting to redis with TLS. + # CLI flag: -.redis.tls-enabled + [tls_enabled: | default = false] + + # Cache config for index entry writing.Skip validating server certificate. + # CLI flag: -.redis.tls-insecure-skip-verify + [tls_insecure_skip_verify: | default = false] + + # Cache config for index entry writing.Close connections after remaining idle + # for this duration. If the value is zero, then idle connections are not + # closed. + # CLI flag: -.redis.idle-timeout + [idle_timeout: | default = 0s] + + # Cache config for index entry writing.Close connections older than this + # duration. If the value is zero, then the pool does not close connections + # based on age. + # CLI flag: -.redis.max-connection-age + [max_connection_age: | default = 0s] + +embedded_cache: + # Cache config for index entry writing.Whether embedded cache is enabled. + # CLI flag: -.embedded-cache.enabled + [enabled: | default = false] + + # Cache config for index entry writing.Maximum memory size of the cache in MB. + # CLI flag: -.embedded-cache.max-size-mb + [max_size_mb: | default = 100] + + # Cache config for index entry writing.The time to live for items in the cache + # before they get purged. + # CLI flag: -.embedded-cache.ttl + [ttl: | default = 1h] + +fifocache: + # Cache config for index entry writing.Maximum memory size of the cache in + # bytes. A unit suffix (KB, MB, GB) may be applied. + # CLI flag: -.fifocache.max-size-bytes + [max_size_bytes: | default = "1GB"] + + # Cache config for index entry writing.deprecated: Maximum number of entries + # in the cache. + # CLI flag: -.fifocache.max-size-items + [max_size_items: | default = 0] + + # Cache config for index entry writing.The time to live for items in the cache + # before they get purged. + # CLI flag: -.fifocache.ttl + [ttl: | default = 1h] + + # Deprecated (use ttl instead): Cache config for index entry writing.The + # expiry duration for the cache. + # CLI flag: -.fifocache.duration + [validity: | default = 0s] + + # Deprecated (use max-size-items or max-size-bytes instead): Cache config for + # index entry writing.The number of entries to cache. + # CLI flag: -.fifocache.size + [size: | default = 0] + + [purgeinterval: ] + +# The maximum number of concurrent asynchronous writeback cache can occur. +# CLI flag: -.max-async-cache-write-back-concurrency +[async_cache_write_back_concurrency: | default = 16] + +# The maximum number of enqueued asynchronous writeback cache allowed. +# CLI flag: -.max-async-cache-write-back-buffer-size +[async_cache_write_back_buffer_size: | default = 500] ``` -time_of_most_recent_line - (max_chunk_age/2) + +### azure_storage_config + +The `azure_storage_config` block configures the connection to Azure object storage backend. The supported CLI flags `` used to reference this configuration block are: + +- `common.storage` +- `ruler.storage` + +  + +```yaml +# Azure Cloud environment. Supported values are: AzureGlobal, AzureChinaCloud, +# AzureGermanCloud, AzureUSGovernment. +# CLI flag: -.azure.environment +[environment: | default = "AzureGlobal"] + +# Azure storage account name. +# CLI flag: -.azure.account-name +[account_name: | default = ""] + +# Azure storage account key. +# CLI flag: -.azure.account-key +[account_key: | default = ""] + +# Name of the storage account blob container used to store chunks. This +# container must be created before running cortex. +# CLI flag: -.azure.container-name +[container_name: | default = "loki"] + +# Azure storage endpoint suffix without schema. The storage account name will be +# prefixed to this value to create the FQDN. +# CLI flag: -.azure.endpoint-suffix +[endpoint_suffix: | default = ""] + +# Use Managed Identity to authenticate to the Azure storage account. +# CLI flag: -.azure.use-managed-identity +[use_managed_identity: | default = false] + +# User assigned identity ID to authenticate to the Azure storage account. +# CLI flag: -.azure.user-assigned-id +[user_assigned_id: | default = ""] + +# Use Service Principal to authenticate through Azure OAuth. +# CLI flag: -.azure.use-service-principal +[use_service_principal: | default = false] + +# Azure Service Principal ID(GUID). +# CLI flag: -.azure.client-id +[client_id: | default = ""] + +# Azure Service Principal secret key. +# CLI flag: -.azure.client-secret +[client_secret: | default = ""] + +# Azure Tenant ID is used to authenticate through Azure OAuth. +# CLI flag: -.azure.tenant-id +[tenant_id: | default = ""] + +# Chunk delimiter for blob ID to be used +# CLI flag: -.azure.chunk-delimiter +[chunk_delimiter: | default = "-"] + +# Preallocated buffer size for downloads. +# CLI flag: -.azure.download-buffer-size +[download_buffer_size: | default = 512000] + +# Preallocated buffer size for uploads. +# CLI flag: -.azure.upload-buffer-size +[upload_buffer_size: | default = 256000] + +# Number of buffers used to used to upload a chunk. +# CLI flag: -.azure.download-buffer-count +[upload_buffer_count: | default = 1] + +# Timeout for requests made against azure blob storage. +# CLI flag: -.azure.request-timeout +[request_timeout: | default = 30s] + +# Number of retries for a request which times out. +# CLI flag: -.azure.max-retries +[max_retries: | default = 5] + +# Minimum time to wait before retrying a request. +# CLI flag: -.azure.min-retry-delay +[min_retry_delay: | default = 10ms] + +# Maximum time to wait before retrying a request. +# CLI flag: -.azure.max-retry-delay +[max_retry_delay: | default = 500ms] +``` + +### gcs_storage_config + +The `gcs_storage_config` block configures the connection to Google Cloud Storage object storage backend. The supported CLI flags `` used to reference this configuration block are: + +- `common.storage` +- `ruler.storage` + +  + +```yaml +# Name of GCS bucket. Please refer to +# https://cloud.google.com/docs/authentication/production for more information +# about how to configure authentication. +# CLI flag: -.gcs.bucketname +[bucket_name: | default = ""] + +# Service account key content in JSON format, refer to +# https://cloud.google.com/iam/docs/creating-managing-service-account-keys for +# creation. +# CLI flag: -.gcs.service-account +[service_account: | default = ""] + +# The size of the buffer that GCS client for each PUT request. 0 to disable +# buffering. +# CLI flag: -.gcs.chunk-buffer-size +[chunk_buffer_size: | default = 0] + +# The duration after which the requests to GCS should be timed out. +# CLI flag: -.gcs.request-timeout +[request_timeout: | default = 0s] + +# Enable OpenCensus (OC) instrumentation for all requests. +# CLI flag: -.gcs.enable-opencensus +[enable_opencensus: | default = true] + +# Enable HTTP2 connections. +# CLI flag: -.gcs.enable-http2 +[enable_http2: | default = true] ``` -Log entries with timestamps that are after this earliest time are accepted. -Log entries further back in time return an out-of-order error. +### s3_storage_config + +The `s3_storage_config` block configures the connection to Amazon S3 object storage backend. The supported CLI flags `` used to reference this configuration block are: + +- `common` +- `ruler` + +  + +```yaml +# S3 endpoint URL with escaped Key and Secret encoded. If only region is +# specified as a host, proper endpoint will be deduced. Use +# inmemory:/// to use a mock in-memory implementation. +# CLI flag: -.storage.s3.url +[s3: ] + +# Set this to `true` to force the request to use path-style addressing. +# CLI flag: -.storage.s3.force-path-style +[s3forcepathstyle: | default = false] -For example, if `max_chunk_age` is 2 hours -and the stream `{foo="bar"}` has one entry at `8:00`, -Loki will accept data for that stream as far back in time as `7:00`. -If another log line is written at `10:00`, -Loki will accept data for that stream as far back in time as `9:00`. +# Comma separated list of bucket names to evenly distribute chunks over. +# Overrides any buckets specified in s3.url flag +# CLI flag: -.storage.s3.buckets +[bucketnames: | default = ""] + +# S3 Endpoint to connect to. +# CLI flag: -.storage.s3.endpoint +[endpoint: | default = ""] + +# AWS region to use. +# CLI flag: -.storage.s3.region +[region: | default = ""] + +# AWS Access Key ID +# CLI flag: -.storage.s3.access-key-id +[access_key_id: | default = ""] + +# AWS Secret Access Key +# CLI flag: -.storage.s3.secret-access-key +[secret_access_key: | default = ""] + +# Disable https on s3 connection. +# CLI flag: -.storage.s3.insecure +[insecure: | default = false] + +# Enable AWS Server Side Encryption [Deprecated: Use .sse instead. if +# s3.sse-encryption is enabled, it assumes .sse.type SSE-S3] +# CLI flag: -.storage.s3.sse-encryption +[sse_encryption: | default = false] + +http_config: + # The maximum amount of time an idle connection will be held open. + # CLI flag: -.storage.s3.http.idle-conn-timeout + [idle_conn_timeout: | default = 1m30s] + + # If non-zero, specifies the amount of time to wait for a server's response + # headers after fully writing the request. + # CLI flag: -.storage.s3.http.response-header-timeout + [response_header_timeout: | default = 0s] + + # Set to true to skip verifying the certificate chain and hostname. + # CLI flag: -.storage.s3.http.insecure-skip-verify + [insecure_skip_verify: | default = false] + + # Path to the trusted CA file that signed the SSL certificate of the S3 + # endpoint. + # CLI flag: -.storage.s3.http.ca-file + [ca_file: | default = ""] + +# The signature version to use for authenticating against S3. Supported values +# are: v4, v2. +# CLI flag: -.storage.s3.signature-version +[signature_version: | default = "v4"] + +sse: + # Enable AWS Server Side Encryption. Supported values: SSE-KMS, SSE-S3. + # CLI flag: -.storage.s3.sse.type + [type: | default = ""] + + # KMS Key ID used to encrypt objects in S3 + # CLI flag: -.storage.s3.sse.kms-key-id + [kms_key_id: | default = ""] + + # KMS Encryption Context used for object encryption. It expects JSON formatted + # string. + # CLI flag: -.storage.s3.sse.kms-encryption-context + [kms_encryption_context: | default = ""] + +# Configures back off when S3 get Object. +backoff_config: + # Minimum backoff time when s3 get Object + # CLI flag: -.storage.s3.min-backoff + [min_period: | default = 100ms] + + # Maximum backoff time when s3 get Object + # CLI flag: -.storage.s3.max-backoff + [max_period: | default = 3s] + + # Maximum number of times to retry when s3 get Object + # CLI flag: -.storage.s3.max-retries + [max_retries: | default = 5] +``` + +### bos_storage_config + +The `bos_storage_config` block configures the connection to Baidu Object Storage (BOS) object storage backend. The supported CLI flags `` used to reference this configuration block are: + +- `common.storage` +- `ruler.storage` + +  + +```yaml +# Name of BOS bucket. +# CLI flag: -.bos.bucket-name +[bucket_name: | default = ""] + +# BOS endpoint to connect to. +# CLI flag: -.bos.endpoint +[endpoint: | default = "bj.bcebos.com"] + +# Baidu Cloud Engine (BCE) Access Key ID. +# CLI flag: -.bos.access-key-id +[access_key_id: | default = ""] + +# Baidu Cloud Engine (BCE) Secret Access Key. +# CLI flag: -.bos.secret-access-key +[secret_access_key: | default = ""] +``` + +### swift_storage_config + +The `swift_storage_config` block configures the connection to OpenStack Object Storage (Swift) object storage backend. The supported CLI flags `` used to reference this configuration block are: + +- `common.storage` +- `ruler.storage` + +  + +```yaml +# OpenStack Swift authentication API version. 0 to autodetect. +# CLI flag: -.swift.auth-version +[auth_version: | default = 0] + +# OpenStack Swift authentication URL +# CLI flag: -.swift.auth-url +[auth_url: | default = ""] + +# OpenStack Swift username. +# CLI flag: -.swift.username +[username: | default = ""] + +# OpenStack Swift user's domain name. +# CLI flag: -.swift.user-domain-name +[user_domain_name: | default = ""] + +# OpenStack Swift user's domain ID. +# CLI flag: -.swift.user-domain-id +[user_domain_id: | default = ""] + +# OpenStack Swift user ID. +# CLI flag: -.swift.user-id +[user_id: | default = ""] + +# OpenStack Swift API key. +# CLI flag: -.swift.password +[password: | default = ""] + +# OpenStack Swift user's domain ID. +# CLI flag: -.swift.domain-id +[domain_id: | default = ""] + +# OpenStack Swift user's domain name. +# CLI flag: -.swift.domain-name +[domain_name: | default = ""] + +# OpenStack Swift project ID (v2,v3 auth only). +# CLI flag: -.swift.project-id +[project_id: | default = ""] + +# OpenStack Swift project name (v2,v3 auth only). +# CLI flag: -.swift.project-name +[project_name: | default = ""] + +# ID of the OpenStack Swift project's domain (v3 auth only), only needed if it +# differs the from user domain. +# CLI flag: -.swift.project-domain-id +[project_domain_id: | default = ""] + +# Name of the OpenStack Swift project's domain (v3 auth only), only needed if it +# differs from the user domain. +# CLI flag: -.swift.project-domain-name +[project_domain_name: | default = ""] + +# OpenStack Swift Region to use (v2,v3 auth only). +# CLI flag: -.swift.region-name +[region_name: | default = ""] + +# Name of the OpenStack Swift container to put chunks in. +# CLI flag: -.swift.container-name +[container_name: | default = ""] + +# Max retries on requests error. +# CLI flag: -.swift.max-retries +[max_retries: | default = 3] + +# Time after which a connection attempt is aborted. +# CLI flag: -.swift.connect-timeout +[connect_timeout: | default = 10s] + +# Time after which an idle request is aborted. The timeout watchdog is reset +# each time some data is received, so the timeout triggers after X time no data +# is received on a request. +# CLI flag: -.swift.request-timeout +[request_timeout: | default = 5s] +``` + +### local_storage_config + +The `local_storage_config` block configures the usage of local file system as object storage backend. + +```yaml +# Directory to scan for rules +# CLI flag: -ruler.storage.local.directory +[directory: | default = ""] +``` \ No newline at end of file diff --git a/docs/sources/configuration/index.template b/docs/sources/configuration/index.template new file mode 100644 index 0000000000000..f1ded57cabffd --- /dev/null +++ b/docs/sources/configuration/index.template @@ -0,0 +1,98 @@ +--- +description: Describes parameters used to configure Grafana Loki. +menuTitle: Configuration parameters +title: Grafana Loki configuration parameters +weight: 500 +--- + +# Grafana Loki configuration parameters + +{{ .GeneratedFileWarning }} + +Grafana Loki is configured in a YAML file (usually referred to as `loki.yaml` ) +which contains information on the Loki server and its individual components, +depending on which mode Loki is launched in. + +Configuration examples can be found in the [Configuration Examples](examples/) document. + +## Printing Loki Config At Runtime + +If you pass Loki the flag `-print-config-stderr` or `-log-config-reverse-order`, (or `-print-config-stderr=true`) +Loki will dump the entire config object it has created from the built-in defaults combined first with +overrides from config file, and second by overrides from flags. + +The result is the value for every config object in the Loki config struct, which is very large... + +Many values will not be relevant to your install such as storage configs which you are not using and which you did not define, +this is expected as every option has a default value if it is being used or not. + +This config is what Loki will use to run, it can be invaluable for debugging issues related to configuration and +is especially useful in making sure your config files and flags are being read and loaded properly. + +`-print-config-stderr` is nice when running Loki directly e.g. `./loki ` as you can get a quick output of the entire Loki config. + +`-log-config-reverse-order` is the flag we run Loki with in all our environments, the config entries are reversed so +that the order of configs reads correctly top to bottom when viewed in Grafana's Explore. + +## Reload At Runtime + +Promtail can reload its configuration at runtime. If the new configuration +is not well-formed, the changes will not be applied. +A configuration reload is triggered by sending a `SIGHUP` to the Promtail process or +sending a HTTP POST request to the `/reload` endpoint (when the `--server.enable-runtime-reload` flag is enabled). + +## Configuration File Reference + +To specify which configuration file to load, pass the `-config.file` flag at the +command line. The value can be a list of comma separated paths, then the first +file that exists will be used. +If no `-config.file` argument is specified, Loki will look up the `config.yaml` in the +current working directory and the `config/` subdirectory and try to use that. + +The file is written in [YAML +format](https://en.wikipedia.org/wiki/YAML), defined by the scheme below. +Brackets indicate that a parameter is optional. For non-list parameters the +value is set to the specified default. + +### Use environment variables in the configuration + +> **Note:** This feature is only available in Loki 2.1+. + +You can use environment variable references in the configuration file to set values that need to be configurable during deployment. +To do this, pass `-config.expand-env=true` and use: + +``` +${VAR} +``` + +Where VAR is the name of the environment variable. + +Each variable reference is replaced at startup by the value of the environment variable. +The replacement is case-sensitive and occurs before the YAML file is parsed. +References to undefined variables are replaced by empty strings unless you specify a default value or custom error text. + +To specify a default value, use: + +``` +${VAR:-default_value} +``` + +Where default_value is the value to use if the environment variable is undefined. + +Pass the `-config.expand-env` flag at the command line to enable this way of setting configs. + +### Generic placeholders + +- `` : a boolean that can take the values `true` or `false` +- `` : any integer matching the regular expression `[1-9]+[0-9]*` +- `` : a duration matching the regular expression `[0-9]+(ns|us|µs|ms|[smh])` +- `` : a string matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*` +- `` : a string of unicode characters +- `` : a valid path relative to current working directory or an absolute path. +- `` : a valid string consisting of a hostname or IP followed by an optional port number +- `` : a string +- `` : a string that represents a secret, such as a password + +### Supported contents and default values of `loki.yaml` + +{{ .ConfigFile }} \ No newline at end of file diff --git a/go.mod b/go.mod index 930689e8f2d55..cd016a7a3978b 100644 --- a/go.mod +++ b/go.mod @@ -67,6 +67,7 @@ require ( github.com/klauspost/pgzip v1.2.5 github.com/mattn/go-ieproxy v0.0.1 github.com/minio/minio-go/v7 v7.0.32-0.20220706200439-ef3e45ed9cdb + github.com/mitchellh/go-wordwrap v1.0.0 github.com/mitchellh/mapstructure v1.5.0 github.com/modern-go/reflect2 v1.0.2 github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f diff --git a/go.sum b/go.sum index 89c5d9aa0964f..686956b050cd7 100644 --- a/go.sum +++ b/go.sum @@ -1024,6 +1024,7 @@ github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= +github.com/mitchellh/go-wordwrap v1.0.0 h1:6GlHJ/LTGMrIJbwgdqdl2eEH8o+Exx/0m8ir9Gns0u4= github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= github.com/mitchellh/hashstructure v0.0.0-20170609045927-2bca23e0e452/go.mod h1:QjSHrPWS+BGUVBYkbTZWEnOh3G1DutKwClXU/ABz6AQ= diff --git a/pkg/ingester/client/client.go b/pkg/ingester/client/client.go index bd6a1f78ebab2..0ce4ea9ae6031 100644 --- a/pkg/ingester/client/client.go +++ b/pkg/ingester/client/client.go @@ -41,9 +41,9 @@ type ClosableHealthAndIngesterClient struct { // Config for an ingester client. type Config struct { - PoolConfig clientpool.PoolConfig `yaml:"pool_config,omitempty"` + PoolConfig clientpool.PoolConfig `yaml:"pool_config,omitempty" doc:"description=Configures how connections are pooled."` RemoteTimeout time.Duration `yaml:"remote_timeout,omitempty"` - GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config"` + GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config" doc:"description=Configures how the gRPC connection to ingesters work as a client."` GRPCUnaryClientInterceptors []grpc.UnaryClientInterceptor `yaml:"-"` GRCPStreamClientInterceptors []grpc.StreamClientInterceptor `yaml:"-"` @@ -58,8 +58,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.GRPCClientConfig.RegisterFlagsWithPrefix("ingester.client", f) cfg.PoolConfig.RegisterFlags(f) - f.DurationVar(&cfg.PoolConfig.RemoteTimeout, "ingester.client.healthcheck-timeout", 1*time.Second, "Timeout for healthcheck rpcs.") - f.DurationVar(&cfg.RemoteTimeout, "ingester.client.timeout", 5*time.Second, "Timeout for ingester client RPCs.") + f.DurationVar(&cfg.PoolConfig.RemoteTimeout, "ingester.client.healthcheck-timeout", 1*time.Second, "How quickly a dead client will be removed after it has been detected to disappear. Set this to a value to allow time for a secondary health check to recover the missing client.") + f.DurationVar(&cfg.RemoteTimeout, "ingester.client.timeout", 5*time.Second, "The remote request timeout on the client side.") } // New returns a new ingester client. diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 4bdb46937ef31..5c262f3e28095 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -67,7 +67,7 @@ var ( // Config for an ingester. type Config struct { - LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler,omitempty"` + LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler,omitempty" doc:"description=Configures how the lifecycle of the ingester will operate and where it will register for discovery."` // Config for transferring chunks. MaxTransferRetries int `yaml:"max_transfer_retries,omitempty"` @@ -96,7 +96,7 @@ type Config struct { QueryStore bool `yaml:"-"` QueryStoreMaxLookBackPeriod time.Duration `yaml:"query_store_max_look_back_period"` - WAL WALConfig `yaml:"wal,omitempty"` + WAL WALConfig `yaml:"wal,omitempty" doc:"description=The ingester WAL (Write Ahead Log) records incoming logs and stores them on the local file systems in order to guarantee persistence of acknowledged data in the event of a process crash."` ChunkFilterer chunk.RequestChunkFilterer `yaml:"-"` // Optional wrapper that can be used to modify the behaviour of the ingester @@ -113,22 +113,22 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.WAL.RegisterFlags(f) f.IntVar(&cfg.MaxTransferRetries, "ingester.max-transfer-retries", 0, "Number of times to try and transfer chunks before falling back to flushing. If set to 0 or negative value, transfers are disabled.") - f.IntVar(&cfg.ConcurrentFlushes, "ingester.concurrent-flushes", 32, "") - f.DurationVar(&cfg.FlushCheckPeriod, "ingester.flush-check-period", 30*time.Second, "") - f.DurationVar(&cfg.FlushOpTimeout, "ingester.flush-op-timeout", 10*time.Minute, "") - f.DurationVar(&cfg.RetainPeriod, "ingester.chunks-retain-period", 0, "") - f.DurationVar(&cfg.MaxChunkIdle, "ingester.chunks-idle-period", 30*time.Minute, "") - f.IntVar(&cfg.BlockSize, "ingester.chunks-block-size", 256*1024, "") - f.IntVar(&cfg.TargetChunkSize, "ingester.chunk-target-size", 1572864, "") // 1.5 MB + f.IntVar(&cfg.ConcurrentFlushes, "ingester.concurrent-flushes", 32, "How many flushes can happen concurrently from each stream.") + f.DurationVar(&cfg.FlushCheckPeriod, "ingester.flush-check-period", 30*time.Second, "How often should the ingester see if there are any blocks to flush.") + f.DurationVar(&cfg.FlushOpTimeout, "ingester.flush-op-timeout", 10*time.Minute, "The timeout before a flush is cancelled.") + f.DurationVar(&cfg.RetainPeriod, "ingester.chunks-retain-period", 0, "How long chunks should be retained in-memory after they've been flushed.") + f.DurationVar(&cfg.MaxChunkIdle, "ingester.chunks-idle-period", 30*time.Minute, "How long chunks should sit in-memory with no updates before being flushed if they don't hit the max block size. This means that half-empty chunks will still be flushed after a certain period as long as they receive no further activity.") + f.IntVar(&cfg.BlockSize, "ingester.chunks-block-size", 256*1024, "The targeted _uncompressed_ size in bytes of a chunk block When this threshold is exceeded the head block will be cut and compressed inside the chunk.") + f.IntVar(&cfg.TargetChunkSize, "ingester.chunk-target-size", 1572864, "A target _compressed_ size in bytes for chunks. This is a desired size not an exact size, chunks may be slightly bigger or significantly smaller if they get flushed for other reasons (e.g. chunk_idle_period). A value of 0 creates chunks with a fixed 10 blocks, a non zero value will create chunks with a variable number of blocks to meet the target size.") // 1.5 MB f.StringVar(&cfg.ChunkEncoding, "ingester.chunk-encoding", chunkenc.EncGZIP.String(), fmt.Sprintf("The algorithm to use for compressing chunk. (%s)", chunkenc.SupportedEncoding())) - f.DurationVar(&cfg.SyncPeriod, "ingester.sync-period", 0, "How often to cut chunks to synchronize ingesters.") + f.DurationVar(&cfg.SyncPeriod, "ingester.sync-period", 0, "Parameters used to synchronize ingesters to cut chunks at the same moment. Sync period is used to roll over incoming entry to a new chunk. If chunk's utilization isn't high enough (eg. less than 50% when sync_min_utilization is set to 0.5), then this chunk rollover doesn't happen.") f.Float64Var(&cfg.SyncMinUtilization, "ingester.sync-min-utilization", 0, "Minimum utilization of chunk when doing synchronization.") - f.IntVar(&cfg.MaxReturnedErrors, "ingester.max-ignored-stream-errors", 10, "Maximum number of ignored stream errors to return. 0 to return all errors.") - f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 2*time.Hour, "Maximum chunk age before flushing.") + f.IntVar(&cfg.MaxReturnedErrors, "ingester.max-ignored-stream-errors", 10, "The maximum number of errors a stream will report to the user when a push fails. 0 to make unlimited.") + f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 2*time.Hour, "The maximum duration of a timeseries chunk in memory. If a timeseries runs for longer than this, the current chunk will be flushed to the store and a new chunk created.") f.DurationVar(&cfg.QueryStoreMaxLookBackPeriod, "ingester.query-store-max-look-back-period", 0, "How far back should an ingester be allowed to query the store for data, for use only with boltdb-shipper/tsdb index and filesystem object store. -1 for infinite.") - f.BoolVar(&cfg.AutoForgetUnhealthy, "ingester.autoforget-unhealthy", false, "Enable to remove unhealthy ingesters from the ring after `ring.kvstore.heartbeat_timeout`") + f.BoolVar(&cfg.AutoForgetUnhealthy, "ingester.autoforget-unhealthy", false, "Forget about ingesters having heartbeat timestamps older than `ring.kvstore.heartbeat_timeout`. This is equivalent to clicking on the `/ring` `forget` button in the UI: the ingester is removed from the ring. This is a useful setting when you are sure that an unhealthy node won't return. An example is when not using stateful sets or the equivalent. Use `memberlist.rejoin_interval` > 0 to handle network partition cases when using a memberlist.") f.IntVar(&cfg.IndexShards, "ingester.index-shards", index.DefaultIndexShards, "Shard factor used in the ingesters for the in process reverse index. This MUST be evenly divisible by ALL schema shard factors or Loki will not start.") - f.IntVar(&cfg.MaxDroppedStreams, "ingester.tailer.max-dropped-streams", 10, "Maximum number of dropped streams to keep in memory during tailing") + f.IntVar(&cfg.MaxDroppedStreams, "ingester.tailer.max-dropped-streams", 10, "Maximum number of dropped streams to keep in memory during tailing.") } func (cfg *Config) Validate() error { diff --git a/pkg/ingester/wal.go b/pkg/ingester/wal.go index 7fa9abe4b2ae8..0db8066bbd202 100644 --- a/pkg/ingester/wal.go +++ b/pkg/ingester/wal.go @@ -40,14 +40,14 @@ func (cfg *WALConfig) Validate() error { // RegisterFlags adds the flags required to config this to the given FlagSet func (cfg *WALConfig) RegisterFlags(f *flag.FlagSet) { - f.StringVar(&cfg.Dir, "ingester.wal-dir", "wal", "Directory to store the WAL and/or recover from WAL.") + f.StringVar(&cfg.Dir, "ingester.wal-dir", "wal", "Directory where the WAL data should be stored and/or recovered from.") f.BoolVar(&cfg.Enabled, "ingester.wal-enabled", true, "Enable writing of ingested data into WAL.") f.DurationVar(&cfg.CheckpointDuration, "ingester.checkpoint-duration", 5*time.Minute, "Interval at which checkpoints should be created.") f.BoolVar(&cfg.FlushOnShutdown, "ingester.flush-on-shutdown", false, "When WAL is enabled, should chunks be flushed to long-term storage on shutdown.") // Need to set default here cfg.ReplayMemoryCeiling = flagext.ByteSize(defaultCeiling) - f.Var(&cfg.ReplayMemoryCeiling, "ingester.wal-replay-memory-ceiling", "How much memory the WAL may use during replay before it needs to flush chunks to storage, i.e. 10GB. We suggest setting this to a high percentage (~75%) of available memory.") + f.Var(&cfg.ReplayMemoryCeiling, "ingester.wal-replay-memory-ceiling", "Maximum memory size the WAL may use during replay. After hitting this, it will flush data to storage before continuing. A unit suffix (KB, MB, GB) may be applied.") } // WAL interface allows us to have a no-op WAL when the WAL is disabled. diff --git a/pkg/logql/engine.go b/pkg/logql/engine.go index e8e34a14dd24e..c2c0115f4cb6d 100644 --- a/pkg/logql/engine.go +++ b/pkg/logql/engine.go @@ -113,7 +113,7 @@ type Querier interface { type EngineOpts struct { // TODO: remove this after next release. // Timeout for queries execution - Timeout time.Duration `yaml:"timeout"` + Timeout time.Duration `yaml:"timeout" doc:"deprecated"` // MaxLookBackPeriod is the maximum amount of time to look back for log lines. // only used for instant log queries. @@ -122,7 +122,7 @@ type EngineOpts struct { func (opts *EngineOpts) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { // TODO: remove this configuration after next release. - f.DurationVar(&opts.Timeout, prefix+".engine.timeout", DefaultEngineTimeout, "Timeout for query execution. Instead, rely only on querier.query-timeout. (deprecated)") + f.DurationVar(&opts.Timeout, prefix+".engine.timeout", DefaultEngineTimeout, "Use querier.query-timeout instead. Timeout for query execution.") f.DurationVar(&opts.MaxLookBackPeriod, prefix+".engine.max-lookback-period", 30*time.Second, "The maximum amount of time to look back for log lines. Used only for instant log queries.") } diff --git a/pkg/loki/common/common.go b/pkg/loki/common/common.go index 85173444f3ad9..4a9adcc0a2717 100644 --- a/pkg/loki/common/common.go +++ b/pkg/loki/common/common.go @@ -51,8 +51,10 @@ type Config struct { func (c *Config) RegisterFlags(f *flag.FlagSet) { throwaway := flag.NewFlagSet("throwaway", flag.PanicOnError) throwaway.IntVar(&c.ReplicationFactor, "common.replication-factor", 3, "How many ingesters incoming data should be replicated to.") - c.Storage.RegisterFlagsWithPrefix("common.storage", throwaway) - c.Ring.RegisterFlagsWithPrefix("", "collectors/", throwaway) + c.Storage.RegisterFlagsWithPrefix("common.storage.", f) + c.Storage.RegisterFlagsWithPrefix("common.storage.", throwaway) + c.Ring.RegisterFlagsWithPrefix("common.storage.", "collectors/", f) + c.Ring.RegisterFlagsWithPrefix("common.storage.", "collectors/", throwaway) // instance related flags. c.InstanceInterfaceNames = netutil.PrivateNetworkInterfacesWithFallback([]string{"eth0", "en0"}, util_log.Logger) @@ -74,12 +76,12 @@ type Storage struct { } func (s *Storage) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - s.S3.RegisterFlagsWithPrefix(prefix+".s3", f) - s.GCS.RegisterFlagsWithPrefix(prefix+".gcs", f) - s.Azure.RegisterFlagsWithPrefix(prefix+".azure", f) - s.Swift.RegisterFlagsWithPrefix(prefix+".swift", f) - s.BOS.RegisterFlagsWithPrefix(prefix+".bos", f) - s.FSConfig.RegisterFlagsWithPrefix(prefix+".filesystem", f) + s.S3.RegisterFlagsWithPrefix(prefix, f) + s.GCS.RegisterFlagsWithPrefix(prefix, f) + s.Azure.RegisterFlagsWithPrefix(prefix, f) + s.Swift.RegisterFlagsWithPrefix(prefix, f) + s.BOS.RegisterFlagsWithPrefix(prefix, f) + s.FSConfig.RegisterFlagsWithPrefix(prefix, f) s.Hedging.RegisterFlagsWithPrefix(prefix, f) } @@ -89,6 +91,6 @@ type FilesystemConfig struct { } func (cfg *FilesystemConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.StringVar(&cfg.ChunksDirectory, prefix+".chunk-directory", "", "Directory to store chunks in.") - f.StringVar(&cfg.RulesDirectory, prefix+".rules-directory", "", "Directory to store rules in.") + f.StringVar(&cfg.ChunksDirectory, prefix+"filesystem.chunk-directory", "", "Directory to store chunks in.") + f.StringVar(&cfg.RulesDirectory, prefix+"filesystem.rules-directory", "", "Directory to store rules in.") } diff --git a/pkg/loki/config_test.go b/pkg/loki/config_test.go index 9622cab69c5fa..73fc2cbb46aba 100644 --- a/pkg/loki/config_test.go +++ b/pkg/loki/config_test.go @@ -5,11 +5,11 @@ import ( "testing" "time" - "github.com/grafana/loki/pkg/ingester" - "github.com/grafana/loki/pkg/storage/config" - "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/pkg/ingester" + "github.com/grafana/loki/pkg/storage/config" ) func TestCrossComponentValidation(t *testing.T) { diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index a6ea1095f0a4b..5db123fe92f14 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -28,7 +28,7 @@ import ( "github.com/grafana/loki/pkg/distributor" "github.com/grafana/loki/pkg/ingester" - "github.com/grafana/loki/pkg/ingester/client" + ingester_client "github.com/grafana/loki/pkg/ingester/client" "github.com/grafana/loki/pkg/logql" "github.com/grafana/loki/pkg/loki/common" "github.com/grafana/loki/pkg/lokifrontend" @@ -63,41 +63,43 @@ import ( type Config struct { Target flagext.StringSliceCSV `yaml:"target,omitempty"` AuthEnabled bool `yaml:"auth_enabled,omitempty"` - HTTPPrefix string `yaml:"http_prefix"` + HTTPPrefix string `yaml:"http_prefix" doc:"hidden"` BallastBytes int `yaml:"ballast_bytes"` // TODO(dannyk): Remove these config options before next release; they don't need to be configurable. // These are only here to allow us to test the new functionality. - UseBufferedLogger bool `yaml:"use_buffered_logger"` - UseSyncLogger bool `yaml:"use_sync_logger"` + UseBufferedLogger bool `yaml:"use_buffered_logger" doc:"hidden"` + UseSyncLogger bool `yaml:"use_sync_logger" doc:"hidden"` - LegacyReadTarget bool `yaml:"legacy_read_target,omitempty"` - - Common common.Config `yaml:"common,omitempty"` Server server.Config `yaml:"server,omitempty"` - InternalServer internalserver.Config `yaml:"internal_server,omitempty"` + InternalServer internalserver.Config `yaml:"internal_server,omitempty" doc:"hidden"` Distributor distributor.Config `yaml:"distributor,omitempty"` Querier querier.Config `yaml:"querier,omitempty"` - CompactorHTTPClient compactor_client.HTTPConfig `yaml:"compactor_client,omitempty"` - CompactorGRPCClient compactor_client.GRPCConfig `yaml:"compactor_grpc_client,omitempty"` - IngesterClient client.Config `yaml:"ingester_client,omitempty"` + QueryScheduler scheduler.Config `yaml:"query_scheduler"` + Frontend lokifrontend.Config `yaml:"frontend,omitempty"` + QueryRange queryrange.Config `yaml:"query_range,omitempty"` + Ruler ruler.Config `yaml:"ruler,omitempty"` + IngesterClient ingester_client.Config `yaml:"ingester_client,omitempty"` Ingester ingester.Config `yaml:"ingester,omitempty"` - StorageConfig storage.Config `yaml:"storage_config,omitempty"` IndexGateway indexgateway.Config `yaml:"index_gateway"` + StorageConfig storage.Config `yaml:"storage_config,omitempty"` ChunkStoreConfig config.ChunkStoreConfig `yaml:"chunk_store_config,omitempty"` SchemaConfig config.SchemaConfig `yaml:"schema_config,omitempty"` + CompactorConfig compactor.Config `yaml:"compactor,omitempty"` + CompactorHTTPClient compactor_client.HTTPConfig `yaml:"compactor_client,omitempty" doc:"hidden"` + CompactorGRPCClient compactor_client.GRPCConfig `yaml:"compactor_grpc_client,omitempty" doc:"hidden"` LimitsConfig validation.Limits `yaml:"limits_config,omitempty"` - TableManager index.TableManagerConfig `yaml:"table_manager,omitempty"` Worker worker.Config `yaml:"frontend_worker,omitempty"` - Frontend lokifrontend.Config `yaml:"frontend,omitempty"` - Ruler ruler.Config `yaml:"ruler,omitempty"` - QueryRange queryrange.Config `yaml:"query_range,omitempty"` - RuntimeConfig runtimeconfig.Config `yaml:"runtime_config,omitempty"` - MemberlistKV memberlist.KVConfig `yaml:"memberlist"` - Tracing tracing.Config `yaml:"tracing"` - CompactorConfig compactor.Config `yaml:"compactor,omitempty"` - QueryScheduler scheduler.Config `yaml:"query_scheduler"` - UsageReport usagestats.Config `yaml:"analytics"` + TableManager index.TableManagerConfig `yaml:"table_manager,omitempty"` + MemberlistKV memberlist.KVConfig `yaml:"memberlist" doc:"hidden"` + + RuntimeConfig runtimeconfig.Config `yaml:"runtime_config,omitempty"` + Tracing tracing.Config `yaml:"tracing"` + UsageReport usagestats.Config `yaml:"analytics"` + + LegacyReadTarget bool `yaml:"legacy_read_target,omitempty" doc:"hidden"` + + Common common.Config `yaml:"common,omitempty"` } // RegisterFlags registers flag. @@ -107,12 +109,24 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { // Set the default module list to 'all' c.Target = []string{All} - f.Var(&c.Target, "target", "Comma-separated list of Loki modules to load. "+ - "The alias 'all' can be used in the list to load a number of core modules and will enable single-binary mode. "+ - "The aliases 'read' and 'write' can be used to only run components related to the read path or write path, respectively.") - f.BoolVar(&c.AuthEnabled, "auth.enabled", true, "Set to false to disable auth.") - f.IntVar(&c.BallastBytes, "config.ballast-bytes", 0, "The amount of virtual memory to reserve as a ballast in order to optimise "+ - "garbage collection. Larger ballasts result in fewer garbage collection passes, reducing compute overhead at the cost of memory usage.") + f.Var(&c.Target, "target", + "A comma-separated list of components to run. "+ + "The default value 'all' runs Loki in single binary mode. "+ + "The value 'read' is an alias to run only read-path related components such as the querier and query-frontend, but all in the same process. "+ + "The value 'write' is an alias to run only write-path related components such as the distributor and compactor, but all in the same process. "+ + "Supported values: all, compactor, distributor, ingester, querier, query-scheduler, ingester-querier, query-frontend, index-gateway, ruler, table-manager, read, write. "+ + "A full list of available targets can be printed when running Loki with the '-list-targets' command line flag. ", + ) + f.BoolVar(&c.AuthEnabled, "auth.enabled", true, + "Enables authentication through the X-Scope-OrgID header, which must be present if true. "+ + "If false, the OrgID will always be set to 'fake'.", + ) + f.IntVar(&c.BallastBytes, "config.ballast-bytes", 0, + "The amount of virtual memory in bytes to reserve as ballast in order to optimize garbage collection. "+ + "Larger ballasts result in fewer garbage collection passes, reducing CPU overhead at the cost of heap size. "+ + "The ballast will not consume physical memory, because it is never read from. "+ + "It will, however, distort metrics, because it is counted as live memory. ", + ) f.BoolVar(&c.UseBufferedLogger, "log.use-buffered", true, "Uses a line-buffered logger to improve performance.") f.BoolVar(&c.UseSyncLogger, "log.use-sync", true, "Forces all lines logged to hold a mutex to serialize writes.") diff --git a/pkg/lokifrontend/config.go b/pkg/lokifrontend/config.go index fefd6f6c56568..2d0a3f4c4e679 100644 --- a/pkg/lokifrontend/config.go +++ b/pkg/lokifrontend/config.go @@ -30,6 +30,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.TLS.RegisterFlagsWithPrefix("frontend.tail-tls-config", f) f.BoolVar(&cfg.CompressResponses, "querier.compress-http-responses", false, "Compress HTTP responses.") - f.StringVar(&cfg.DownstreamURL, "frontend.downstream-url", "", "URL of downstream Prometheus.") + f.StringVar(&cfg.DownstreamURL, "frontend.downstream-url", "", "URL of downstream Loki.") f.StringVar(&cfg.TailProxyURL, "frontend.tail-proxy-url", "", "URL of querier for tail proxy.") } diff --git a/pkg/lokifrontend/frontend/v1/frontend.go b/pkg/lokifrontend/frontend/v1/frontend.go index 4a2d0e7dda3c1..5e430e9436181 100644 --- a/pkg/lokifrontend/frontend/v1/frontend.go +++ b/pkg/lokifrontend/frontend/v1/frontend.go @@ -37,7 +37,7 @@ type Config struct { // RegisterFlags adds the flags required to config this to the given FlagSet. func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.MaxOutstandingPerTenant, "querier.max-outstanding-requests-per-tenant", 2048, "Maximum number of outstanding requests per tenant per frontend; requests beyond this error with HTTP 429.") - f.DurationVar(&cfg.QuerierForgetDelay, "query-frontend.querier-forget-delay", 0, "If a querier disconnects without sending notification about graceful shutdown, the query-frontend will keep the querier in the tenant's shard until the forget delay has passed. This feature is useful to reduce the blast radius when shuffle-sharding is enabled.") + f.DurationVar(&cfg.QuerierForgetDelay, "query-frontend.querier-forget-delay", 0, "In the event a tenant is repeatedly sending queries that lead the querier to crash or be killed due to an out-of-memory error, the crashed querier will be disconnected from the query frontend and a new querier will be immediately assigned to the tenant’s shard. This invalidates the assumption that shuffle sharding can be used to reduce the impact on tenants. This option mitigates the impact by configuring a delay between when a querier disconnects because of a crash and when the crashed querier is actually removed from the tenant's shard.") } type Limits interface { diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 326083bd5397d..71a3bea70569e 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -54,19 +54,19 @@ type Config struct { QueryStoreOnly bool `yaml:"query_store_only"` QueryIngesterOnly bool `yaml:"query_ingester_only"` MultiTenantQueriesEnabled bool `yaml:"multi_tenant_queries_enabled"` - QueryTimeout time.Duration `yaml:"query_timeout"` + QueryTimeout time.Duration `yaml:"query_timeout" doc:"hidden"` } // RegisterFlags register flags. func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.Engine.RegisterFlagsWithPrefix("querier", f) - f.DurationVar(&cfg.TailMaxDuration, "querier.tail-max-duration", 1*time.Hour, "Limit the duration for which live tailing request would be served") + f.DurationVar(&cfg.TailMaxDuration, "querier.tail-max-duration", 1*time.Hour, "Maximum duration for which the live tailing requests should be served.") f.DurationVar(&cfg.ExtraQueryDelay, "querier.extra-query-delay", 0, "Time to wait before sending more than the minimum successful query requests.") f.DurationVar(&cfg.QueryIngestersWithin, "querier.query-ingesters-within", 3*time.Hour, "Maximum lookback beyond which queries are not sent to ingester. 0 means all queries are sent to ingester.") - f.IntVar(&cfg.MaxConcurrent, "querier.max-concurrent", 10, "The maximum number of concurrent queries.") - f.BoolVar(&cfg.QueryStoreOnly, "querier.query-store-only", false, "Queriers should only query the store and not try to query any ingesters") - f.BoolVar(&cfg.QueryIngesterOnly, "querier.query-ingester-only", false, "Queriers should only query the ingesters and not try to query any store") - f.BoolVar(&cfg.MultiTenantQueriesEnabled, "querier.multi-tenant-queries-enabled", false, "Enable queries across multiple tenants. (Experimental)") + f.IntVar(&cfg.MaxConcurrent, "querier.max-concurrent", 10, "The maximum number of concurrent queries allowed.") + f.BoolVar(&cfg.QueryStoreOnly, "querier.query-store-only", false, "Only query the store, and not attempt any ingesters. This is useful for running a standalone querier pool operating only against stored data.") + f.BoolVar(&cfg.QueryIngesterOnly, "querier.query-ingester-only", false, "When true, queriers only query the ingesters, and not stored data. This is useful when the object store is unavailable.") + f.BoolVar(&cfg.MultiTenantQueriesEnabled, "querier.multi-tenant-queries-enabled", false, "When true, allow queries to span multiple tenants.") } // Validate validates the config. diff --git a/pkg/querier/queryrange/queryrangebase/roundtrip.go b/pkg/querier/queryrange/queryrangebase/roundtrip.go index 611582f2b9042..c5c701346ce6d 100644 --- a/pkg/querier/queryrange/queryrangebase/roundtrip.go +++ b/pkg/querier/queryrange/queryrangebase/roundtrip.go @@ -39,7 +39,7 @@ var PassthroughMiddleware = MiddlewareFunc(func(next Handler) Handler { // Config for query_range middleware chain. type Config struct { // Deprecated: SplitQueriesByInterval will be removed in the next major release - SplitQueriesByInterval time.Duration `yaml:"split_queries_by_interval"` + SplitQueriesByInterval time.Duration `yaml:"split_queries_by_interval" doc:"deprecated|description=Use -querier.split-queries-by-interval instead. CLI flag: -querier.split-queries-by-day. Split queries by day and execute in parallel."` AlignQueriesWithStep bool `yaml:"align_queries_with_step"` ResultsCacheConfig `yaml:"results_cache"` diff --git a/pkg/ruler/base/ruler.go b/pkg/ruler/base/ruler.go index 1b71a26347fff..fd0ceb89feda4 100644 --- a/pkg/ruler/base/ruler.go +++ b/pkg/ruler/base/ruler.go @@ -80,7 +80,7 @@ type Config struct { // This is used for template expansion in alerts; must be a valid URL. ExternalURL flagext.URLValue `yaml:"external_url"` // Labels to add to all alerts - ExternalLabels labels.Labels `yaml:"external_labels,omitempty"` + ExternalLabels labels.Labels `yaml:"external_labels,omitempty" doc:"description=Labels to add to all alerts."` // GRPC Client configuration. ClientTLSConfig grpcclient.Config `yaml:"ruler_client"` // How frequently to evaluate rules by default. @@ -88,7 +88,7 @@ type Config struct { // How frequently to poll for updated rules. PollInterval time.Duration `yaml:"poll_interval"` // Rule Storage and Polling configuration. - StoreConfig RuleStoreConfig `yaml:"storage" doc:"description=Deprecated. Use -ruler-storage.* CLI flags and their respective YAML config options instead."` + StoreConfig RuleStoreConfig `yaml:"storage" doc:"deprecated|description=Use -ruler-storage. CLI flags and their respective YAML config options instead."` // Path to store rule files for prom manager. RulePath string `yaml:"rule_path"` @@ -106,7 +106,7 @@ type Config struct { EnableSharding bool `yaml:"enable_sharding"` ShardingStrategy string `yaml:"sharding_strategy"` SearchPendingFor time.Duration `yaml:"search_pending_for"` - Ring RingConfig `yaml:"ring"` + Ring RingConfig `yaml:"ring" doc:"description=Ring used by Loki ruler. The CLI flags prefix for this block configuration is 'ruler.ring'."` FlushCheckPeriod time.Duration `yaml:"flush_period"` EnableAPI bool `yaml:"enable_api"` @@ -157,10 +157,10 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.ExternalURL.URL, _ = url.Parse("") // Must be non-nil f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.") - f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules") - f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes") + f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules.") + f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes.") - f.StringVar(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "", "Comma-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.") + f.StringVar(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "", "Comma-separated list of Alertmanager URLs to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via '-ruler.alertmanager-discovery'.") f.BoolVar(&cfg.AlertmanagerDiscovery, "ruler.alertmanager-discovery", false, "Use DNS SRV records to discover Alertmanager hosts.") f.DurationVar(&cfg.AlertmanagerRefreshInterval, "ruler.alertmanager-refresh-interval", alertmanagerRefreshIntervalDefault, "How long to wait between refreshing DNS resolutions of Alertmanager hosts.") f.BoolVar(&cfg.AlertmanangerEnableV2API, "ruler.alertmanager-use-v2", false, "If enabled requests to Alertmanager will utilize the V2 API.") @@ -168,20 +168,20 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.NotificationTimeout, "ruler.notification-timeout", alertmanagerNotificationTimeoutDefault, "HTTP timeout duration when sending notifications to the Alertmanager.") f.DurationVar(&cfg.SearchPendingFor, "ruler.search-pending-for", 5*time.Minute, "Time to spend searching for a pending ruler when shutting down.") - f.BoolVar(&cfg.EnableSharding, "ruler.enable-sharding", false, "Distribute rule evaluation using ring backend") + f.BoolVar(&cfg.EnableSharding, "ruler.enable-sharding", false, "Distribute rule evaluation using ring backend.") f.StringVar(&cfg.ShardingStrategy, "ruler.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) f.DurationVar(&cfg.FlushCheckPeriod, "ruler.flush-period", 1*time.Minute, "Period with which to attempt to flush rule groups.") - f.StringVar(&cfg.RulePath, "ruler.rule-path", "/rules", "file path to store temporary rule files for the prometheus rule managers") - f.BoolVar(&cfg.EnableAPI, "experimental.ruler.enable-api", false, "Enable the ruler api") + f.StringVar(&cfg.RulePath, "ruler.rule-path", "/rules", "File path to store temporary rule files.") + f.BoolVar(&cfg.EnableAPI, "experimental.ruler.enable-api", false, "Enable the ruler api.") f.DurationVar(&cfg.OutageTolerance, "ruler.for-outage-tolerance", time.Hour, `Max time to tolerate outage for restoring "for" state of alert.`) - f.DurationVar(&cfg.ForGracePeriod, "ruler.for-grace-period", 10*time.Minute, `Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period.`) + f.DurationVar(&cfg.ForGracePeriod, "ruler.for-grace-period", 10*time.Minute, `Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than the grace period.`) f.DurationVar(&cfg.ResendDelay, "ruler.resend-delay", time.Minute, `Minimum amount of time to wait before resending an alert to Alertmanager.`) f.Var(&cfg.EnabledTenants, "ruler.enabled-tenants", "Comma separated list of tenants whose rules this ruler can evaluate. If specified, only these tenants will be handled by ruler, otherwise this ruler can process rules from all tenants. Subject to sharding.") f.Var(&cfg.DisabledTenants, "ruler.disabled-tenants", "Comma separated list of tenants whose rules this ruler cannot evaluate. If specified, a ruler that would normally pick the specified tenant(s) for processing will ignore them instead. Subject to sharding.") f.BoolVar(&cfg.EnableQueryStats, "ruler.query-stats-enabled", false, "Report the wall time for ruler queries to complete as a per user metric and as an info level log message.") - f.BoolVar(&cfg.DisableRuleGroupLabel, "ruler.disable-rule-group-label", false, "Disable the rule_group label on exported metrics") + f.BoolVar(&cfg.DisableRuleGroupLabel, "ruler.disable-rule-group-label", false, "Disable the rule_group label on exported metrics.") cfg.RingCheckPeriod = 5 * time.Second } diff --git a/pkg/ruler/base/ruler_ring.go b/pkg/ruler/base/ruler_ring.go index 497ba61c8e470..21498fcf56c4a 100644 --- a/pkg/ruler/base/ruler_ring.go +++ b/pkg/ruler/base/ruler_ring.go @@ -60,16 +60,16 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) { // Ring flags cfg.KVStore.RegisterFlagsWithPrefix("ruler.ring.", "rulers/", f) - f.DurationVar(&cfg.HeartbeatPeriod, "ruler.ring.heartbeat-period", 5*time.Second, "Period at which to heartbeat to the ring. 0 = disabled.") - f.DurationVar(&cfg.HeartbeatTimeout, "ruler.ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which rulers are considered unhealthy within the ring. 0 = never (timeout disabled).") + f.DurationVar(&cfg.HeartbeatPeriod, "ruler.ring.heartbeat-period", 5*time.Second, "Interval between heartbeats sent to the ring. 0 = disabled.") + f.DurationVar(&cfg.HeartbeatTimeout, "ruler.ring.heartbeat-timeout", time.Minute, "The heartbeat timeout after which ruler ring members are considered unhealthy within the ring. 0 = never (timeout disabled).") // Instance flags cfg.InstanceInterfaceNames = netutil.PrivateNetworkInterfacesWithFallback([]string{"eth0", "en0"}, util_log.Logger) - f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "ruler.ring.instance-interface-names", "Name of network interface to read address from.") + f.Var((*flagext.StringSlice)(&cfg.InstanceInterfaceNames), "ruler.ring.instance-interface-names", "Name of network interface to read addresses from.") f.StringVar(&cfg.InstanceAddr, "ruler.ring.instance-addr", "", "IP address to advertise in the ring.") f.IntVar(&cfg.InstancePort, "ruler.ring.instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).") f.StringVar(&cfg.InstanceID, "ruler.ring.instance-id", hostname, "Instance ID to register in the ring.") - f.IntVar(&cfg.NumTokens, "ruler.ring.num-tokens", 128, "Number of tokens for each ruler.") + f.IntVar(&cfg.NumTokens, "ruler.ring.num-tokens", 128, "The number of tokens the lifecycler will generate and put into the ring if it joined without transferring tokens from another lifecycler.") } // ToLifecyclerConfig returns a LifecyclerConfig based on the ruler diff --git a/pkg/ruler/base/storage.go b/pkg/ruler/base/storage.go index 00ff572568860..7e9f3cf28b50f 100644 --- a/pkg/ruler/base/storage.go +++ b/pkg/ruler/base/storage.go @@ -33,12 +33,12 @@ type RuleStoreConfig struct { Type string `yaml:"type"` // Object Storage Configs - Azure azure.BlobStorageConfig `yaml:"azure"` - GCS gcp.GCSConfig `yaml:"gcs"` - S3 aws.S3Config `yaml:"s3"` - BOS baidubce.BOSStorageConfig `yaml:"bos"` - Swift openstack.SwiftConfig `yaml:"swift"` - Local local.Config `yaml:"local"` + Azure azure.BlobStorageConfig `yaml:"azure" doc:"description=Configures backend rule storage for Azure."` + GCS gcp.GCSConfig `yaml:"gcs" doc:"description=Configures backend rule storage for GCS."` + S3 aws.S3Config `yaml:"s3" doc:"description=Configures backend rule storage for S3."` + BOS baidubce.BOSStorageConfig `yaml:"bos" doc:"description=Configures backend rule storage for Baidu Object Storage (BOS)."` + Swift openstack.SwiftConfig `yaml:"swift" doc:"description=Configures backend rule storage for Swift."` + Local local.Config `yaml:"local" doc:"description=Configures backend rule storage for a local file system directory."` mock rulestore.RuleStore `yaml:"-"` } @@ -51,7 +51,7 @@ func (cfg *RuleStoreConfig) RegisterFlags(f *flag.FlagSet) { cfg.Swift.RegisterFlagsWithPrefix("ruler.storage.", f) cfg.Local.RegisterFlagsWithPrefix("ruler.storage.", f) cfg.BOS.RegisterFlagsWithPrefix("ruler.storage.", f) - f.StringVar(&cfg.Type, "ruler.storage.type", "", "Method to use for backend rule storage (configdb, azure, gcs, s3, swift, local)") + f.StringVar(&cfg.Type, "ruler.storage.type", "", "Method to use for backend rule storage (configdb, azure, gcs, s3, swift, local, bos)") } // Validate config and returns error on failure diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index b4d66e0137b82..41eda7ff17c88 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -21,7 +21,7 @@ type Config struct { // we cannot define this in the WAL config since it creates an import cycle WALCleaner cleaner.Config `yaml:"wal_cleaner,omitempty"` - RemoteWrite RemoteWriteConfig `yaml:"remote_write,omitempty"` + RemoteWrite RemoteWriteConfig `yaml:"remote_write,omitempty" doc:"description=Remote-write configuration to send rule samples to a Prometheus remote-write endpoint."` } func (c *Config) RegisterFlags(f *flag.FlagSet) { @@ -31,10 +31,10 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { c.WALCleaner.RegisterFlags(f) // TODO(owen-d, 3.0.0): remove deprecated experimental prefix in Cortex if they'll accept it. - f.BoolVar(&c.Config.EnableAPI, "ruler.enable-api", true, "Enable the ruler api") + f.BoolVar(&c.Config.EnableAPI, "ruler.enable-api", true, "Enable the ruler api.") } -// Validate overrides the embedded cortex variant which expects a cortex limits struct. Instead copy the relevant bits over. +// Validate overrides the embedded cortex variant which expects a cortex limits struct. Instead, copy the relevant bits over. func (c *Config) Validate() error { if err := c.StoreConfig.Validate(); err != nil { return fmt.Errorf("invalid ruler store config: %w", err) @@ -48,8 +48,8 @@ func (c *Config) Validate() error { } type RemoteWriteConfig struct { - Client *config.RemoteWriteConfig `yaml:"client,omitempty"` - Clients map[string]config.RemoteWriteConfig `yaml:"clients,omitempty"` + Client *config.RemoteWriteConfig `yaml:"client,omitempty" doc:"deprecated|description=Use 'clients' instead. Configure remote write client."` + Clients map[string]config.RemoteWriteConfig `yaml:"clients,omitempty" doc:"description=Configure remote write clients. A map with remote client id as key."` Enabled bool `yaml:"enabled"` ConfigRefreshPeriod time.Duration `yaml:"config_refresh_period"` } @@ -104,7 +104,7 @@ func (c *RemoteWriteConfig) Clone() (*RemoteWriteConfig, error) { // RegisterFlags adds the flags required to config this to the given FlagSet. func (c *RemoteWriteConfig) RegisterFlags(f *flag.FlagSet) { - f.BoolVar(&c.Enabled, "ruler.remote-write.enabled", false, "Remote-write recording rule samples to Prometheus-compatible remote-write receiver.") + f.BoolVar(&c.Enabled, "ruler.remote-write.enabled", false, "Enable remote-write functionality.") f.DurationVar(&c.ConfigRefreshPeriod, "ruler.remote-write.config-refresh-period", 10*time.Second, "Minimum period to wait between refreshing remote-write reconfigurations. This should be greater than or equivalent to -limits.per-user-override-period.") if c.Clients == nil { diff --git a/pkg/ruler/config/alertmanager.go b/pkg/ruler/config/alertmanager.go index c8d5721db795b..d30eec8df2be3 100644 --- a/pkg/ruler/config/alertmanager.go +++ b/pkg/ruler/config/alertmanager.go @@ -20,7 +20,7 @@ type AlertManagerConfig struct { // Enables the ruler notifier to use the Alertmananger V2 API. AlertmanangerEnableV2API bool `yaml:"enable_alertmanager_v2"` // Configuration for alert relabeling. - AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty"` + AlertRelabelConfigs []*relabel.Config `yaml:"alert_relabel_configs,omitempty" doc:"description=List of alert relabel configs."` // Capacity of the queue for notifications to be sent to the Alertmanager. NotificationQueueCapacity int `yaml:"notification_queue_capacity"` // HTTP timeout duration when sending notifications to the Alertmanager. diff --git a/pkg/ruler/storage/cleaner/cleaner.go b/pkg/ruler/storage/cleaner/cleaner.go index 15ef4d3aefeeb..b1ad8e76caa0b 100644 --- a/pkg/ruler/storage/cleaner/cleaner.go +++ b/pkg/ruler/storage/cleaner/cleaner.go @@ -1,6 +1,7 @@ // This directory was copied and adapted from https://github.com/grafana/agent/tree/main/pkg/metrics. // We cannot vendor the agent in since the agent vendors loki in, which would cause a cyclic dependency. // NOTE: many changes have been made to the original code for our use-case. + package cleaner import ( diff --git a/pkg/ruler/storage/cleaner/config.go b/pkg/ruler/storage/cleaner/config.go index 6b3ab931d48ca..592de6e74c96c 100644 --- a/pkg/ruler/storage/cleaner/config.go +++ b/pkg/ruler/storage/cleaner/config.go @@ -1,6 +1,7 @@ // This directory was copied and adapted from https://github.com/grafana/agent/tree/main/pkg/metrics. // We cannot vendor the agent in since the agent vendors loki in, which would cause a cyclic dependency. // NOTE: many changes have been made to the original code for our use-case. + package cleaner import ( @@ -14,7 +15,7 @@ type Config struct { Period time.Duration `yaml:"period,omitempty"` } -func (c Config) RegisterFlags(f *flag.FlagSet) { +func (c *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&c.MinAge, "ruler.wal-cleaner.min-age", DefaultCleanupAge, "The minimum age of a WAL to consider for cleaning.") - f.DurationVar(&c.Period, "ruler.wal-cleaer.period", DefaultCleanupPeriod, "How often to run the WAL cleaner.") + f.DurationVar(&c.Period, "ruler.wal-cleaer.period", DefaultCleanupPeriod, "How often to run the WAL cleaner. 0 = disabled.") } diff --git a/pkg/ruler/storage/instance/instance.go b/pkg/ruler/storage/instance/instance.go index 20ad6ea2b2c16..2d2e511c12d5a 100644 --- a/pkg/ruler/storage/instance/instance.go +++ b/pkg/ruler/storage/instance/instance.go @@ -53,9 +53,9 @@ var ( // Config is a specific agent that runs within the overall Prometheus // agent. It has its own set of scrape_configs and remote_write rules. type Config struct { - Tenant string - Name string - RemoteWrite []*config.RemoteWriteConfig + Tenant string `doc:"hidden"` + Name string `doc:"hidden"` + RemoteWrite []*config.RemoteWriteConfig `doc:"hidden"` Dir string `yaml:"dir"` @@ -66,7 +66,7 @@ type Config struct { MinAge time.Duration `yaml:"min_age,omitempty"` MaxAge time.Duration `yaml:"max_age,omitempty"` - RemoteFlushDeadline time.Duration `yaml:"remote_flush_deadline,omitempty"` + RemoteFlushDeadline time.Duration `yaml:"remote_flush_deadline,omitempty" doc:"hidden"` } // UnmarshalYAML implements yaml.Unmarshaler. @@ -143,8 +143,8 @@ func (c *Config) Clone() (Config, error) { } func (c *Config) RegisterFlags(f *flag.FlagSet) { - f.StringVar(&c.Dir, "ruler.wal.dir", DefaultConfig.Dir, "Directory to store the WAL and/or recover from WAL.") - f.DurationVar(&c.TruncateFrequency, "ruler.wal.truncate-frequency", DefaultConfig.TruncateFrequency, "How often to run the WAL truncation.") + f.StringVar(&c.Dir, "ruler.wal.dir", DefaultConfig.Dir, "The directory in which to write tenant WAL files. Each tenant will have its own directory one level below this directory.") + f.DurationVar(&c.TruncateFrequency, "ruler.wal.truncate-frequency", DefaultConfig.TruncateFrequency, "Frequency with which to run the WAL truncation process.") f.DurationVar(&c.MinAge, "ruler.wal.min-age", DefaultConfig.MinAge, "Minimum age that samples must exist in the WAL before being truncated.") f.DurationVar(&c.MaxAge, "ruler.wal.max-age", DefaultConfig.MaxAge, "Maximum age that samples must exist in the WAL before being truncated.") } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 7701d8166049d..8b0dc6c405b8a 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -124,14 +124,14 @@ type Config struct { GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config" doc:"description=This configures the gRPC client used to report errors back to the query-frontend."` // Schedulers ring UseSchedulerRing bool `yaml:"use_scheduler_ring"` - SchedulerRing lokiutil.RingConfig `yaml:"scheduler_ring,omitempty"` + SchedulerRing lokiutil.RingConfig `yaml:"scheduler_ring,omitempty" doc:"description=The hash ring configuration. This option is required only if use_scheduler_ring is true."` } func (cfg *Config) RegisterFlags(f *flag.FlagSet) { - f.IntVar(&cfg.MaxOutstandingPerTenant, "query-scheduler.max-outstanding-requests-per-tenant", 100, "Maximum number of outstanding requests per tenant per query scheduler. In-flight requests above this limit will fail with HTTP response status code 429.") + f.IntVar(&cfg.MaxOutstandingPerTenant, "query-scheduler.max-outstanding-requests-per-tenant", 100, "Maximum number of outstanding requests per tenant per query-scheduler. In-flight requests above this limit will fail with HTTP response status code 429.") f.DurationVar(&cfg.QuerierForgetDelay, "query-scheduler.querier-forget-delay", 0, "If a querier disconnects without sending notification about graceful shutdown, the query-scheduler will keep the querier in the tenant's shard until the forget delay has passed. This feature is useful to reduce the blast radius when shuffle-sharding is enabled.") cfg.GRPCClientConfig.RegisterFlagsWithPrefix("query-scheduler.grpc-client-config", f) - f.BoolVar(&cfg.UseSchedulerRing, "query-scheduler.use-scheduler-ring", false, "Set to true to have the query scheduler create a ring and the frontend and frontend_worker use this ring to get the addresses of the query schedulers. If frontend_address and scheduler_address are not present in the config this value will be toggle by Loki to true") + f.BoolVar(&cfg.UseSchedulerRing, "query-scheduler.use-scheduler-ring", false, "Set to true to have the query schedulers create and place themselves in a ring. If no frontend_address or scheduler_address are present anywhere else in the configuration, Loki will toggle this value to true.") cfg.SchedulerRing.RegisterFlagsWithPrefix("query-scheduler.", "collectors/", f) } diff --git a/pkg/storage/chunk/client/aws/s3_storage_client.go b/pkg/storage/chunk/client/aws/s3_storage_client.go index aca2e2306e8fb..7b23cecbb7df6 100644 --- a/pkg/storage/chunk/client/aws/s3_storage_client.go +++ b/pkg/storage/chunk/client/aws/s3_storage_client.go @@ -76,7 +76,7 @@ type S3Config struct { HTTPConfig HTTPConfig `yaml:"http_config"` SignatureVersion string `yaml:"signature_version"` SSEConfig bucket_s3.SSEConfig `yaml:"sse"` - BackoffConfig backoff.Config `yaml:"backoff_config"` + BackoffConfig backoff.Config `yaml:"backoff_config" doc:"description=Configures back off when S3 get Object."` Inject InjectRequestMiddleware `yaml:"-"` } diff --git a/pkg/storage/chunk/client/baidubce/bos_storage_client.go b/pkg/storage/chunk/client/baidubce/bos_storage_client.go index ab84c69a3116a..38050cf9b08f3 100644 --- a/pkg/storage/chunk/client/baidubce/bos_storage_client.go +++ b/pkg/storage/chunk/client/baidubce/bos_storage_client.go @@ -51,10 +51,10 @@ func (cfg *BOSStorageConfig) RegisterFlags(f *flag.FlagSet) { // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet func (cfg *BOSStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.StringVar(&cfg.BucketName, prefix+"baidubce.bucket-name", "", "Name of BOS bucket.") - f.StringVar(&cfg.Endpoint, prefix+"baidubce.endpoint", DefaultEndpoint, "BOS endpoint to connect to.") - f.StringVar(&cfg.AccessKeyID, prefix+"baidubce.access-key-id", "", "Baidu Cloud Engine (BCE) Access Key ID.") - f.Var(&cfg.SecretAccessKey, prefix+"baidubce.secret-access-key", "Baidu Cloud Engine (BCE) Secret Access Key.") + f.StringVar(&cfg.BucketName, prefix+"bos.bucket-name", "", "Name of BOS bucket.") + f.StringVar(&cfg.Endpoint, prefix+"bos.endpoint", DefaultEndpoint, "BOS endpoint to connect to.") + f.StringVar(&cfg.AccessKeyID, prefix+"bos.access-key-id", "", "Baidu Cloud Engine (BCE) Access Key ID.") + f.Var(&cfg.SecretAccessKey, prefix+"bos.secret-access-key", "Baidu Cloud Engine (BCE) Secret Access Key.") } type BOSObjectStorage struct { diff --git a/pkg/storage/chunk/client/hedging/hedging.go b/pkg/storage/chunk/client/hedging/hedging.go index 705008227d0af..23c39fab0e4c1 100644 --- a/pkg/storage/chunk/client/hedging/hedging.go +++ b/pkg/storage/chunk/client/hedging/hedging.go @@ -53,9 +53,9 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { // RegisterFlagsWithPrefix registers flags with prefix. func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.IntVar(&cfg.UpTo, prefix+"hedge-requests-up-to", 2, "The maximun of hedge requests allowed.") + f.IntVar(&cfg.UpTo, prefix+"hedge-requests-up-to", 2, "The maximum of hedge requests allowed.") f.DurationVar(&cfg.At, prefix+"hedge-requests-at", 0, "If set to a non-zero value a second request will be issued at the provided duration. Default is 0 (disabled)") - f.IntVar(&cfg.MaxPerSecond, prefix+"hedge-max-per-second", 5, "The maximun of hedge requests allowed per seconds.") + f.IntVar(&cfg.MaxPerSecond, prefix+"hedge-max-per-second", 5, "The maximum of hedge requests allowed per seconds.") } // Client returns a hedged http client. diff --git a/pkg/storage/config/schema_config.go b/pkg/storage/config/schema_config.go index adbf668f3c574..3e95804e28424 100644 --- a/pkg/storage/config/schema_config.go +++ b/pkg/storage/config/schema_config.go @@ -88,9 +88,12 @@ func (t TableRanges) ConfigForTableNumber(tableNumber int64) *PeriodConfig { // PeriodConfig defines the schema and tables to use for a period of time type PeriodConfig struct { - From DayTime `yaml:"from"` // used when working with config - IndexType string `yaml:"store"` // type of index client to use. - ObjectType string `yaml:"object_store"` // type of object client to use; if omitted, defaults to store. + // used when working with config + From DayTime `yaml:"from" doc:"description=The date of the first day that index buckets should be created. Use a date in the past if this is your only period_config, otherwise use a date when you want the schema to switch over. In YYYY-MM-DD format, for example: 2018-04-15."` + // type of index client to use. + IndexType string `yaml:"store"` + // type of object client to use; if omitted, defaults to store. + ObjectType string `yaml:"object_store"` Schema string `yaml:"schema"` IndexTables PeriodicTableConfig `yaml:"index"` ChunkTables PeriodicTableConfig `yaml:"chunks"` diff --git a/pkg/storage/factory.go b/pkg/storage/factory.go index 8157b0c714ca5..00c224ae0b6c3 100644 --- a/pkg/storage/factory.go +++ b/pkg/storage/factory.go @@ -57,14 +57,14 @@ type StoreLimits interface { // Config chooses which storage client to use. type Config struct { - AWSStorageConfig aws.StorageConfig `yaml:"aws"` + AWSStorageConfig aws.StorageConfig `yaml:"aws" doc:"description=Configures storing chunks in AWS. Required options only required when aws is present."` AzureStorageConfig azure.BlobStorageConfig `yaml:"azure"` BOSStorageConfig baidubce.BOSStorageConfig `yaml:"bos"` - GCPStorageConfig gcp.Config `yaml:"bigtable"` - GCSConfig gcp.GCSConfig `yaml:"gcs"` - CassandraStorageConfig cassandra.Config `yaml:"cassandra"` - BoltDBConfig local.BoltDBConfig `yaml:"boltdb"` - FSConfig local.FSConfig `yaml:"filesystem"` + GCPStorageConfig gcp.Config `yaml:"bigtable" doc:"description=Configures storing indexes in Bigtable. Required fields only required when bigtable is defined in config."` + GCSConfig gcp.GCSConfig `yaml:"gcs" doc:"description=Configures storing chunks in GCS. Required fields only required when gcs is defined in config."` + CassandraStorageConfig cassandra.Config `yaml:"cassandra" doc:"description=Configures storing chunks and/or the index in Cassandra."` + BoltDBConfig local.BoltDBConfig `yaml:"boltdb" doc:"description=Configures storing index in BoltDB. Required fields only required when boltdb is present in the configuration."` + FSConfig local.FSConfig `yaml:"filesystem" doc:"description=Configures storing the chunks on the local file system. Required fields only required when filesystem is present in the configuration."` Swift openstack.SwiftConfig `yaml:"swift"` GrpcConfig grpc.Config `yaml:"grpc_store"` Hedging hedging.Config `yaml:"hedging"` @@ -76,7 +76,7 @@ type Config struct { MaxParallelGetChunk int `yaml:"max_parallel_get_chunk"` MaxChunkBatchSize int `yaml:"max_chunk_batch_size"` - BoltDBShipperConfig shipper.Config `yaml:"boltdb_shipper"` + BoltDBShipperConfig shipper.Config `yaml:"boltdb_shipper" doc:"description=Configures storing index in an Object Store (GCS/S3/Azure/Swift/Filesystem) in the form of boltdb files. Required fields only required when boltdb-shipper is defined in config."` TSDBShipperConfig indexshipper.Config `yaml:"tsdb_shipper"` // Config for using AsyncStore when using async index stores like `boltdb-shipper`. diff --git a/pkg/storage/stores/indexshipper/compactor/compactor.go b/pkg/storage/stores/indexshipper/compactor/compactor.go index ae79832e7efb0..dcd3244623605 100644 --- a/pkg/storage/stores/indexshipper/compactor/compactor.go +++ b/pkg/storage/stores/indexshipper/compactor/compactor.go @@ -86,20 +86,20 @@ type Config struct { DeleteMaxInterval time.Duration `yaml:"delete_max_interval"` MaxCompactionParallelism int `yaml:"max_compaction_parallelism"` UploadParallelism int `yaml:"upload_parallelism"` - CompactorRing util.RingConfig `yaml:"compactor_ring,omitempty"` - RunOnce bool `yaml:"_"` + CompactorRing util.RingConfig `yaml:"compactor_ring,omitempty" doc:"description=The hash ring configuration used by compactors to elect a single instance for running compactions. The CLI flags prefix for this block config is: boltdb.shipper.compactor.ring"` + RunOnce bool `yaml:"_" doc:"hidden"` TablesToCompact int `yaml:"tables_to_compact"` SkipLatestNTables int `yaml:"skip_latest_n_tables"` // Deprecated - DeletionMode string `yaml:"deletion_mode"` + DeletionMode string `yaml:"deletion_mode" doc:"deprecated|description=Use deletion_mode per tenant configuration instead."` } // RegisterFlags registers flags. func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.WorkingDirectory, "boltdb.shipper.compactor.working-directory", "", "Directory where files can be downloaded for compaction.") - f.StringVar(&cfg.SharedStoreType, "boltdb.shipper.compactor.shared-store", "", "Shared store used for storing boltdb files. Supported types: gcs, s3, azure, swift, filesystem") - f.StringVar(&cfg.SharedStoreKeyPrefix, "boltdb.shipper.compactor.shared-store.key-prefix", "index/", "Prefix to add to Object Keys in Shared store. Path separator(if any) should always be a '/'. Prefix should never start with a separator but should always end with it.") + f.StringVar(&cfg.SharedStoreType, "boltdb.shipper.compactor.shared-store", "", "The shared store used for storing boltdb files. Supported types: gcs, s3, azure, swift, filesystem, bos.") + f.StringVar(&cfg.SharedStoreKeyPrefix, "boltdb.shipper.compactor.shared-store.key-prefix", "index/", "Prefix to add to object keys in shared store. Path separator(if any) should always be a '/'. Prefix should never start with a separator but should always end with it.") f.DurationVar(&cfg.CompactionInterval, "boltdb.shipper.compactor.compaction-interval", 10*time.Minute, "Interval at which to re-run the compaction operation.") f.DurationVar(&cfg.ApplyRetentionInterval, "boltdb.shipper.compactor.apply-retention-interval", 0, "Interval at which to apply/enforce retention. 0 means run at same interval as compaction. If non-zero, it should always be a multiple of compaction interval.") f.DurationVar(&cfg.RetentionDeleteDelay, "boltdb.shipper.compactor.retention-delete-delay", 2*time.Hour, "Delay after which chunks will be fully deleted during retention.") @@ -110,15 +110,15 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.DeleteMaxInterval, "boltdb.shipper.compactor.delete-max-interval", 0, "Constrain the size of any single delete request. When a delete request > delete_max_interval is input, the request is sharded into smaller requests of no more than delete_max_interval") f.DurationVar(&cfg.RetentionTableTimeout, "boltdb.shipper.compactor.retention-table-timeout", 0, "The maximum amount of time to spend running retention and deletion on any given table in the index.") f.IntVar(&cfg.MaxCompactionParallelism, "boltdb.shipper.compactor.max-compaction-parallelism", 1, "Maximum number of tables to compact in parallel. While increasing this value, please make sure compactor has enough disk space allocated to be able to store and compact as many tables.") - f.IntVar(&cfg.UploadParallelism, "boltdb.shipper.compactor.upload-parallelism", 10, "Number of upload/remove operations to execute in parallel when finalizing a compaction. ") + f.IntVar(&cfg.UploadParallelism, "boltdb.shipper.compactor.upload-parallelism", 10, "Number of upload/remove operations to execute in parallel when finalizing a compaction. NOTE: This setting is per compaction operation, which can be executed in parallel. The upper bound on the number of concurrent uploads is upload_parallelism * max_compaction_parallelism.") f.BoolVar(&cfg.RunOnce, "boltdb.shipper.compactor.run-once", false, "Run the compactor one time to cleanup and compact index files only (no retention applied)") // Deprecated flagext.DeprecatedFlag(f, "boltdb.shipper.compactor.deletion-mode", "Deprecated. This has been moved to the deletion_mode per tenant configuration.", util_log.Logger) cfg.CompactorRing.RegisterFlagsWithPrefix("boltdb.shipper.compactor.", "collectors/", f) - f.IntVar(&cfg.TablesToCompact, "boltdb.shipper.compactor.tables-to-compact", 0, "The number of most recent tables to compact in a single run. Default: all") - f.IntVar(&cfg.SkipLatestNTables, "boltdb.shipper.compactor.skip-latest-n-tables", 0, "Skip compacting latest N tables") + f.IntVar(&cfg.TablesToCompact, "boltdb.shipper.compactor.tables-to-compact", 0, "Number of tables that compactor will try to compact. Newer tables are chosen when this is less than the number of tables available.") + f.IntVar(&cfg.SkipLatestNTables, "boltdb.shipper.compactor.skip-latest-n-tables", 0, "Do not compact N latest tables. Together with -boltdb.shipper.compactor.run-once and -boltdb.shipper.compactor.tables-to-compact, this is useful when clearing compactor backlogs.") } diff --git a/pkg/storage/stores/shipper/indexgateway/config.go b/pkg/storage/stores/shipper/indexgateway/config.go index 80beaceb33dbf..ab5e0085c4d70 100644 --- a/pkg/storage/stores/shipper/indexgateway/config.go +++ b/pkg/storage/stores/shipper/indexgateway/config.go @@ -63,7 +63,7 @@ type RingCfg struct { // RegisterFlagsWithPrefix register all Index Gateway flags related to its ring but with a proper store prefix to avoid conflicts. func (cfg *RingCfg) RegisterFlags(prefix, storePrefix string, f *flag.FlagSet) { cfg.RegisterFlagsWithPrefix(prefix, storePrefix, f) - f.IntVar(&cfg.ReplicationFactor, "replication-factor", 3, "how many index gateway instances are assigned to each tenant") + f.IntVar(&cfg.ReplicationFactor, "replication-factor", 3, "How many index gateway instances are assigned to each tenant.") } // Config configures an Index Gateway server. @@ -75,11 +75,11 @@ type Config struct { // // In case it isn't explicitly set, it follows the same behavior of the other rings (ex: using the common configuration // section and the ingester configuration by default). - Ring RingCfg `yaml:"ring,omitempty"` + Ring RingCfg `yaml:"ring,omitempty" doc:"description=Defines the ring to be used by the index gateway servers and clients in case the servers are configured to run in 'ring' mode. In case this isn't configured, this block supports inheriting configuration from the common ring section."` } // RegisterFlags register all IndexGatewayClientConfig flags and all the flags of its subconfigs but with a prefix (ex: shipper). func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.Ring.RegisterFlags("index-gateway.", "collectors/", f) - f.StringVar((*string)(&cfg.Mode), "index-gateway.mode", SimpleMode.String(), "mode in which the index gateway client will be running") + f.StringVar((*string)(&cfg.Mode), "index-gateway.mode", SimpleMode.String(), "Defines in which mode the index gateway server will operate (default to 'simple'). It supports two modes:\n- 'simple': an index gateway server instance is responsible for handling, storing and returning requests for all indices for all tenants.\n- 'ring': an index gateway server instance is responsible for a subset of tenants instead of all tenants.") } diff --git a/pkg/util/ring_config.go b/pkg/util/ring_config.go index bc879139c8f8c..0db22d9616829 100644 --- a/pkg/util/ring_config.go +++ b/pkg/util/ring_config.go @@ -29,7 +29,7 @@ type RingConfig struct { ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"` // Instance details - InstanceID string `yaml:"instance_id"` + InstanceID string `yaml:"instance_id" doc:"default="` InstanceInterfaceNames []string `yaml:"instance_interface_names" doc:"default=[]"` InstancePort int `yaml:"instance_port"` InstanceAddr string `yaml:"instance_addr"` diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 46fdada9f589e..4f9a88b94121f 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -88,7 +88,7 @@ type Limits struct { RulerTenantShardSize int `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` - RulerAlertManagerConfig *config.AlertManagerConfig `yaml:"ruler_alertmanager_config" json:"ruler_alertmanager_config"` + RulerAlertManagerConfig *config.AlertManagerConfig `yaml:"ruler_alertmanager_config" json:"ruler_alertmanager_config" doc:"hidden"` // Store-gateway. StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"` diff --git a/pkg/validation/limits.go b/pkg/validation/limits.go index 1294c303cd128..0dd98a022129a 100644 --- a/pkg/validation/limits.go +++ b/pkg/validation/limits.go @@ -103,57 +103,57 @@ type Limits struct { RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` - RulerAlertManagerConfig *ruler_config.AlertManagerConfig `yaml:"ruler_alertmanager_config" json:"ruler_alertmanager_config"` + RulerAlertManagerConfig *ruler_config.AlertManagerConfig `yaml:"ruler_alertmanager_config" json:"ruler_alertmanager_config" doc:"hidden"` // TODO(dannyk): add HTTP client overrides (basic auth / tls config, etc) // Ruler remote-write limits. // this field is the inversion of the general remote_write.enabled because the zero value of a boolean is false, // and if it were ruler_remote_write_enabled, it would be impossible to know if the value was explicitly set or default - RulerRemoteWriteDisabled bool `yaml:"ruler_remote_write_disabled" json:"ruler_remote_write_disabled"` + RulerRemoteWriteDisabled bool `yaml:"ruler_remote_write_disabled" json:"ruler_remote_write_disabled" doc:"description=Disable recording rules remote-write."` // deprecated use RulerRemoteWriteConfig instead. - RulerRemoteWriteURL string `yaml:"ruler_remote_write_url" json:"ruler_remote_write_url"` + RulerRemoteWriteURL string `yaml:"ruler_remote_write_url" json:"ruler_remote_write_url" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. The URL of the endpoint to send samples to."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteTimeout time.Duration `yaml:"ruler_remote_write_timeout" json:"ruler_remote_write_timeout"` + RulerRemoteWriteTimeout time.Duration `yaml:"ruler_remote_write_timeout" json:"ruler_remote_write_timeout" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Timeout for requests to the remote write endpoint."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteHeaders OverwriteMarshalingStringMap `yaml:"ruler_remote_write_headers" json:"ruler_remote_write_headers"` + RulerRemoteWriteHeaders OverwriteMarshalingStringMap `yaml:"ruler_remote_write_headers" json:"ruler_remote_write_headers" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Custom HTTP headers to be sent along with each remote write request. Be aware that headers that are set by Loki itself can't be overwritten."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteRelabelConfigs []*util.RelabelConfig `yaml:"ruler_remote_write_relabel_configs,omitempty" json:"ruler_remote_write_relabel_configs,omitempty"` + RulerRemoteWriteRelabelConfigs []*util.RelabelConfig `yaml:"ruler_remote_write_relabel_configs,omitempty" json:"ruler_remote_write_relabel_configs,omitempty" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. List of remote write relabel configurations."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueCapacity int `yaml:"ruler_remote_write_queue_capacity" json:"ruler_remote_write_queue_capacity"` + RulerRemoteWriteQueueCapacity int `yaml:"ruler_remote_write_queue_capacity" json:"ruler_remote_write_queue_capacity" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Number of samples to buffer per shard before we block reading of more samples from the WAL. It is recommended to have enough capacity in each shard to buffer several requests to keep throughput up while processing occasional slow remote requests."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueMinShards int `yaml:"ruler_remote_write_queue_min_shards" json:"ruler_remote_write_queue_min_shards"` + RulerRemoteWriteQueueMinShards int `yaml:"ruler_remote_write_queue_min_shards" json:"ruler_remote_write_queue_min_shards" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Minimum number of shards, i.e. amount of concurrency."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueMaxShards int `yaml:"ruler_remote_write_queue_max_shards" json:"ruler_remote_write_queue_max_shards"` + RulerRemoteWriteQueueMaxShards int `yaml:"ruler_remote_write_queue_max_shards" json:"ruler_remote_write_queue_max_shards" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Maximum number of shards, i.e. amount of concurrency."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueMaxSamplesPerSend int `yaml:"ruler_remote_write_queue_max_samples_per_send" json:"ruler_remote_write_queue_max_samples_per_send"` + RulerRemoteWriteQueueMaxSamplesPerSend int `yaml:"ruler_remote_write_queue_max_samples_per_send" json:"ruler_remote_write_queue_max_samples_per_send" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Maximum number of samples per send."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueBatchSendDeadline time.Duration `yaml:"ruler_remote_write_queue_batch_send_deadline" json:"ruler_remote_write_queue_batch_send_deadline"` + RulerRemoteWriteQueueBatchSendDeadline time.Duration `yaml:"ruler_remote_write_queue_batch_send_deadline" json:"ruler_remote_write_queue_batch_send_deadline" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Maximum time a sample will wait in buffer."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueMinBackoff time.Duration `yaml:"ruler_remote_write_queue_min_backoff" json:"ruler_remote_write_queue_min_backoff"` + RulerRemoteWriteQueueMinBackoff time.Duration `yaml:"ruler_remote_write_queue_min_backoff" json:"ruler_remote_write_queue_min_backoff" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Initial retry delay. Gets doubled for every retry."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueMaxBackoff time.Duration `yaml:"ruler_remote_write_queue_max_backoff" json:"ruler_remote_write_queue_max_backoff"` + RulerRemoteWriteQueueMaxBackoff time.Duration `yaml:"ruler_remote_write_queue_max_backoff" json:"ruler_remote_write_queue_max_backoff" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Maximum retry delay."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteQueueRetryOnRateLimit bool `yaml:"ruler_remote_write_queue_retry_on_ratelimit" json:"ruler_remote_write_queue_retry_on_ratelimit"` + RulerRemoteWriteQueueRetryOnRateLimit bool `yaml:"ruler_remote_write_queue_retry_on_ratelimit" json:"ruler_remote_write_queue_retry_on_ratelimit" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Retry upon receiving a 429 status code from the remote-write storage. This is experimental and might change in the future."` // deprecated use RulerRemoteWriteConfig instead - RulerRemoteWriteSigV4Config *sigv4.SigV4Config `yaml:"ruler_remote_write_sigv4_config" json:"ruler_remote_write_sigv4_config"` + RulerRemoteWriteSigV4Config *sigv4.SigV4Config `yaml:"ruler_remote_write_sigv4_config" json:"ruler_remote_write_sigv4_config" doc:"deprecated|description=Use 'ruler_remote_write_config' instead. Configures AWS's Signature Verification 4 signing process to sign every remote write request."` - RulerRemoteWriteConfig map[string]config.RemoteWriteConfig `yaml:"ruler_remote_write_config,omitempty" json:"ruler_remote_write_config,omitempty"` + RulerRemoteWriteConfig map[string]config.RemoteWriteConfig `yaml:"ruler_remote_write_config,omitempty" json:"ruler_remote_write_config,omitempty" doc:"description=Configures global and per-tenant limits for remote write clients. A map with remote client id as key."` // Global and per tenant deletion mode DeletionMode string `yaml:"deletion_mode" json:"deletion_mode"` // Global and per tenant retention RetentionPeriod model.Duration `yaml:"retention_period" json:"retention_period"` - StreamRetention []StreamRetention `yaml:"retention_stream,omitempty" json:"retention_stream,omitempty"` + StreamRetention []StreamRetention `yaml:"retention_stream,omitempty" json:"retention_stream,omitempty" doc:"description=Per-stream retention to apply, if the retention is enable on the compactor side.\nExample:\n retention_stream:\n - selector: '{namespace=\"dev\"}'\n priority: 1\n period: 24h\n- selector: '{container=\"nginx\"}'\n priority: 1\n period: 744h\nSelector is a Prometheus labels matchers that will apply the 'period' retention only if the stream is matching. In case multiple stream are matching, the highest priority will be picked. If no rule is matched the 'retention_period' is used."` // Config for overrides, convenient if it goes here. PerTenantOverrideConfig string `yaml:"per_tenant_override_config" json:"per_tenant_override_config"` PerTenantOverridePeriod model.Duration `yaml:"per_tenant_override_period" json:"per_tenant_override_period"` // Deprecated - CompactorDeletionEnabled bool `yaml:"allow_deletes" json:"allow_deletes"` + CompactorDeletionEnabled bool `yaml:"allow_deletes" json:"allow_deletes" doc:"deprecated|description=Use deletion_mode per tenant configuration instead."` ShardStreams *shardstreams.Config `yaml:"shard_streams" json:"shard_streams"` @@ -169,51 +169,51 @@ type StreamRetention struct { // RegisterFlags adds the flags required to config this to the given FlagSet func (l *Limits) RegisterFlags(f *flag.FlagSet) { - f.StringVar(&l.IngestionRateStrategy, "distributor.ingestion-rate-limit-strategy", "global", "Whether the ingestion rate limit should be applied individually to each distributor instance (local), or evenly shared across the cluster (global).") + f.StringVar(&l.IngestionRateStrategy, "distributor.ingestion-rate-limit-strategy", "global", "Whether the ingestion rate limit should be applied individually to each distributor instance (local), or evenly shared across the cluster (global). The ingestion rate strategy cannot be overridden on a per-tenant basis.\n- local: enforces the limit on a per distributor basis. The actual effective rate limit will be N times higher, where N is the number of distributor replicas.\n- global: enforces the limit globally, configuring a per-distributor local rate limiter as 'ingestion_rate / N', where N is the number of distributor replicas (it's automatically adjusted if the number of replicas change). The global strategy requires the distributors to form their own ring, which is used to keep track of the current number of healthy distributor replicas.") f.Float64Var(&l.IngestionRateMB, "distributor.ingestion-rate-limit-mb", 4, "Per-user ingestion rate limit in sample size per second. Units in MB.") - f.Float64Var(&l.IngestionBurstSizeMB, "distributor.ingestion-burst-size-mb", 6, "Per-user allowed ingestion burst size (in sample size). Units in MB.") - f.Var(&l.MaxLineSize, "distributor.max-line-size", "maximum line length allowed, i.e. 100mb. Default (0) means unlimited.") - f.BoolVar(&l.MaxLineSizeTruncate, "distributor.max-line-size-truncate", false, "Whether to truncate lines that exceed max_line_size") - f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names") - f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name") + f.Float64Var(&l.IngestionBurstSizeMB, "distributor.ingestion-burst-size-mb", 6, "Per-user allowed ingestion burst size (in sample size). Units in MB. The burst size refers to the per-distributor local rate limiter even in the case of the 'global' strategy, and should be set at least to the maximum logs size expected in a single push request.") + f.Var(&l.MaxLineSize, "distributor.max-line-size", "Maximum line size on ingestion path. Example: 256kb. There is no limit when unset or set to 0.") + f.BoolVar(&l.MaxLineSizeTruncate, "distributor.max-line-size-truncate", false, "Whether to truncate lines that exceed max_line_size.") + f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names.") + f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name.") f.IntVar(&l.MaxLabelNamesPerSeries, "validation.max-label-names-per-series", 30, "Maximum number of label names per series.") - f.BoolVar(&l.RejectOldSamples, "validation.reject-old-samples", true, "Reject old samples.") - f.BoolVar(&l.IncrementDuplicateTimestamp, "validation.increment-duplicate-timestamps", false, "Increment the timestamp of a log line by one nanosecond in the future from a previous entry for the same stream with the same timestamp; guarantees sort order at query time.") + f.BoolVar(&l.RejectOldSamples, "validation.reject-old-samples", true, "Whether or not old samples will be rejected.") + f.BoolVar(&l.IncrementDuplicateTimestamp, "validation.increment-duplicate-timestamps", false, "Alter the log line timestamp during ingestion when the timestamp is the same as the previous entry for the same stream. When enabled, if a log line in a push request has the same timestamp as the previous line for the same stream, one nanosecond is added to the log line. This will preserve the received order of log lines with the exact same timestamp when they are queried, by slightly altering their stored timestamp. NOTE: This is imperfect, because Loki accepts out of order writes, and another push request for the same stream could contain duplicate timestamps to existing entries and they will not be incremented.") _ = l.RejectOldSamplesMaxAge.Set("7d") f.Var(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", "Maximum accepted sample age before rejecting.") _ = l.CreationGracePeriod.Set("10m") f.Var(&l.CreationGracePeriod, "validation.create-grace-period", "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.") f.BoolVar(&l.EnforceMetricName, "validation.enforce-metric-name", true, "Enforce every sample has a metric name.") - f.IntVar(&l.MaxEntriesLimitPerQuery, "validation.max-entries-limit", 5000, "Per-user entries limit per query") + f.IntVar(&l.MaxEntriesLimitPerQuery, "validation.max-entries-limit", 5000, "Maximum number of log entries that will be returned for a query.") f.IntVar(&l.MaxLocalStreamsPerUser, "ingester.max-streams-per-user", 0, "Maximum number of active streams per user, per ingester. 0 to disable.") - f.IntVar(&l.MaxGlobalStreamsPerUser, "ingester.max-global-streams-per-user", 5000, "Maximum number of active streams per user, across the cluster. 0 to disable.") - f.BoolVar(&l.UnorderedWrites, "ingester.unordered-writes", true, "Allow out of order writes.") + f.IntVar(&l.MaxGlobalStreamsPerUser, "ingester.max-global-streams-per-user", 5000, "Maximum number of active streams per user, across the cluster. 0 to disable. When the global limit is enabled, each ingester is configured with a dynamic local limit based on the replication factor and the current number of healthy ingesters, and is kept updated whenever the number of ingesters change.") + f.BoolVar(&l.UnorderedWrites, "ingester.unordered-writes", true, "When true, out-of-order writes are accepted.") _ = l.PerStreamRateLimit.Set(strconv.Itoa(defaultPerStreamRateLimit)) f.Var(&l.PerStreamRateLimit, "ingester.per-stream-rate-limit", "Maximum byte rate per second per stream, also expressible in human readable forms (1MB, 256KB, etc).") _ = l.PerStreamRateLimitBurst.Set(strconv.Itoa(defaultPerStreamBurstLimit)) - f.Var(&l.PerStreamRateLimitBurst, "ingester.per-stream-rate-limit-burst", "Maximum burst bytes per stream, also expressible in human readable forms (1MB, 256KB, etc).") + f.Var(&l.PerStreamRateLimitBurst, "ingester.per-stream-rate-limit-burst", "Maximum burst bytes per stream, also expressible in human readable forms (1MB, 256KB, etc). This is how far above the rate limit a stream can 'burst' before the stream is limited.") f.IntVar(&l.MaxChunksPerQuery, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.") _ = l.MaxQueryLength.Set("721h") - f.Var(&l.MaxQueryLength, "store.max-query-length", "Limit to length of chunk store queries, 0 to disable.") - f.IntVar(&l.MaxQuerySeries, "querier.max-query-series", 500, "Limit the maximum of unique series returned by a metric query. When the limit is reached an error is returned.") + f.Var(&l.MaxQueryLength, "store.max-query-length", "The limit to length of chunk store queries. 0 to disable.") + f.IntVar(&l.MaxQuerySeries, "querier.max-query-series", 500, "Limit the maximum of unique series that is returned by a metric query. When the limit is reached an error is returned.") _ = l.QueryTimeout.Set(DefaultPerTenantQueryTimeout) f.Var(&l.QueryTimeout, "querier.query-timeout", "Timeout when querying backends (ingesters or storage) during the execution of a query request. If a specific per-tenant timeout is used, this timeout is ignored.") _ = l.MaxQueryLookback.Set("0s") - f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.") - f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 32, "Maximum number of queries will be scheduled in parallel by the frontend.") + f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how far back in time series data and metadata can be queried, up until lookback duration ago. This limit is enforced in the query frontend, the querier and the ruler. If the requested time range is outside the allowed range, the request will not fail, but will be modified to only query data within the allowed time range. The default value of 0 does not set a limit.") + f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 32, "Maximum number of queries that will be scheduled in parallel by the frontend.") f.IntVar(&l.TSDBMaxQueryParallelism, "querier.tsdb-max-query-parallelism", 512, "Maximum number of queries will be scheduled in parallel by the frontend for TSDB schemas.") f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries.") - f.IntVar(&l.MaxStreamsMatchersPerQuery, "querier.max-streams-matcher-per-query", 1000, "Limit the number of streams matchers per query") - f.IntVar(&l.MaxConcurrentTailRequests, "querier.max-concurrent-tail-requests", 10, "Limit the number of concurrent tail requests") + f.IntVar(&l.MaxStreamsMatchersPerQuery, "querier.max-streams-matcher-per-query", 1000, "Maximum number of stream matchers per query.") + f.IntVar(&l.MaxConcurrentTailRequests, "querier.max-concurrent-tail-requests", 10, "Maximum number of concurrent tail requests.") _ = l.MinShardingLookback.Set("0s") - f.Var(&l.MinShardingLookback, "frontend.min-sharding-lookback", "Limit the sharding time range.Queries with time range that fall between now and now minus the sharding lookback are not sharded. 0 to disable.") + f.Var(&l.MinShardingLookback, "frontend.min-sharding-lookback", "Limit queries that can be sharded. Queries within the time range of now and now minus this sharding lookback are not sharded. The default value of 0s disables the lookback, causing sharding of all queries at all times.") _ = l.MaxCacheFreshness.Set("1m") f.Var(&l.MaxCacheFreshness, "frontend.max-cache-freshness", "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.") @@ -227,17 +227,17 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.") f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.") - f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "File name of per-user overrides.") + f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "Feature renamed to 'runtime configuration', flag deprecated in favor of -runtime-config.file (runtime_config.file in YAML).") _ = l.RetentionPeriod.Set("744h") - f.Var(&l.RetentionPeriod, "store.retention", "How long before chunks will be deleted from the store. (requires compactor retention enabled).") + f.Var(&l.RetentionPeriod, "store.retention", "Retention to apply for the store, if the retention is enabled on the compactor side.") _ = l.PerTenantOverridePeriod.Set("10s") - f.Var(&l.PerTenantOverridePeriod, "limits.per-user-override-period", "Period with this to reload the overrides.") + f.Var(&l.PerTenantOverridePeriod, "limits.per-user-override-period", "Feature renamed to 'runtime configuration'; flag deprecated in favor of -runtime-config.reload-period (runtime_config.period in YAML).") _ = l.QuerySplitDuration.Set("30m") - f.Var(&l.QuerySplitDuration, "querier.split-queries-by-interval", "Split queries by an interval and execute in parallel, 0 disables it. This also determines how cache keys are chosen when result caching is enabled") + f.Var(&l.QuerySplitDuration, "querier.split-queries-by-interval", "Split queries by a time interval and execute in parallel. The value 0 disables splitting by time. This also determines how cache keys are chosen when result caching is enabled.") - f.StringVar(&l.DeletionMode, "compactor.deletion-mode", "filter-and-delete", "Set the deletion mode for the user. Options are: disabled, filter-only, and filter-and-delete") + f.StringVar(&l.DeletionMode, "compactor.deletion-mode", "filter-and-delete", "Deletion mode. Can be one of 'disabled', 'filter-only', or 'filter-and-delete'. When set to 'filter-only' or 'filter-and-delete', and if retention_enabled is true, then the log entry deletion API endpoints are available.") // Deprecated dskit_flagext.DeprecatedFlag(f, "compactor.allow-deletes", "Deprecated. Instead, see compactor.deletion-mode which is another per tenant configuration", util_log.Logger) diff --git a/tools/doc-generator/main.go b/tools/doc-generator/main.go new file mode 100644 index 0000000000000..adc10051942f6 --- /dev/null +++ b/tools/doc-generator/main.go @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/tools/doc-generator/main.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. + +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" + "strings" + "text/template" + + "github.com/grafana/loki/pkg/loki" + "github.com/grafana/loki/tools/doc-generator/parse" +) + +const ( + maxLineWidth = 80 + tabWidth = 2 +) + +func removeFlagPrefix(block *parse.ConfigBlock, prefix string) { + for _, entry := range block.Entries { + switch entry.Kind { + case parse.KindBlock: + // Skip root blocks + if !entry.Root { + removeFlagPrefix(entry.Block, prefix) + } + case parse.KindField: + if strings.HasPrefix(entry.FieldFlag, prefix) { + entry.FieldFlag = "" + entry.FieldFlag[len(prefix):] + } + } + } +} + +func annotateFlagPrefix(blocks []*parse.ConfigBlock) { + // Find duplicated blocks + groups := map[string][]*parse.ConfigBlock{} + for _, block := range blocks { + groups[block.Name] = append(groups[block.Name], block) + } + + // For each duplicated block, we need to fix the CLI flags, because + // in the documentation each block will be displayed only once but + // since they're duplicated they will have a different CLI flag + // prefix, which we want to correctly document. + for _, group := range groups { + if len(group) == 1 { + continue + } + + // We need to find the CLI flags prefix of each config block. To do it, + // we pick the first entry from each config block and then find the + // different prefix across all of them. + var flags []string + for _, block := range group { + for _, entry := range block.Entries { + if entry.Kind == parse.KindField { + if len(entry.FieldFlag) > 0 { + flags = append(flags, entry.FieldFlag) + } + break + } + } + } + + var allPrefixes []string + for i, prefix := range parse.FindFlagsPrefix(flags) { + if len(prefix) > 0 { + group[i].FlagsPrefix = prefix + allPrefixes = append(allPrefixes, prefix) + } + } + + // Store all found prefixes into each block so that when we generate the + // markdown we also know which are all the prefixes for each root block. + for _, block := range group { + block.FlagsPrefixes = allPrefixes + } + } + + // Finally, we can remove the CLI flags prefix from the blocks + // which have one annotated. + for _, block := range blocks { + if block.FlagsPrefix != "" { + removeFlagPrefix(block, block.FlagsPrefix) + } + } +} + +func generateBlocksMarkdown(blocks []*parse.ConfigBlock) string { + md := &markdownWriter{} + md.writeConfigDoc(blocks) + return md.string() +} + +func generateBlockMarkdown(blocks []*parse.ConfigBlock, blockName, fieldName string) string { + // Look for the requested block. + for _, block := range blocks { + if block.Name != blockName { + continue + } + + md := &markdownWriter{} + + // Wrap the root block with another block, so that we can show the name of the + // root field containing the block specs. + md.writeConfigBlock(&parse.ConfigBlock{ + Name: blockName, + Desc: block.Desc, + Entries: []*parse.ConfigEntry{ + { + Kind: parse.KindBlock, + Name: fieldName, + Required: true, + Block: block, + BlockDesc: "", + Root: false, + }, + }, + }) + + return md.string() + } + + // If the block has not been found, we return an empty string. + return "" +} + +func main() { + // Parse the generator flags. + flag.Parse() + if flag.NArg() != 1 { + fmt.Fprintf(os.Stderr, "Usage: doc-generator template-file") + os.Exit(1) + } + + templatePath := flag.Arg(0) + + // In order to match YAML config fields with CLI flags, we map + // the memory address of the CLI flag variables and match them with + // the config struct fields' addresses. + cfg := &loki.Config{} + flags := parse.Flags(cfg) + + // Parse the config, mapping each config field with the related CLI flag. + blocks, err := parse.Config(cfg, flags, parse.RootBlocks) + if err != nil { + fmt.Fprintf(os.Stderr, "An error occurred while generating the doc: %s\n", err.Error()) + os.Exit(1) + } + + // Annotate the flags prefix for each root block, and remove the + // prefix wherever encountered in the config blocks. + annotateFlagPrefix(blocks) + + // Generate documentation markdown. + data := struct { + ConfigFile string + GeneratedFileWarning string + }{ + GeneratedFileWarning: "", + ConfigFile: generateBlocksMarkdown(blocks), + } + + // Load the template file. + tpl := template.New(filepath.Base(templatePath)) + + tpl, err = tpl.ParseFiles(templatePath) + if err != nil { + fmt.Fprintf(os.Stderr, "An error occurred while loading the template %s: %s\n", templatePath, err.Error()) + os.Exit(1) + } + + // Execute the template to inject generated doc. + if err := tpl.Execute(os.Stdout, data); err != nil { + fmt.Fprintf(os.Stderr, "An error occurred while executing the template %s: %s\n", templatePath, err.Error()) + os.Exit(1) + } +} diff --git a/tools/doc-generator/parse/parser.go b/tools/doc-generator/parse/parser.go new file mode 100644 index 0000000000000..37562a0413201 --- /dev/null +++ b/tools/doc-generator/parse/parser.go @@ -0,0 +1,645 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/tools/doc-generator/parser.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. + +package parse + +import ( + "flag" + "fmt" + "net/url" + "reflect" + "strings" + "time" + "unicode" + + "github.com/grafana/dskit/flagext" + "github.com/grafana/regexp" + "github.com/pkg/errors" + "github.com/prometheus/common/model" + prometheus_config "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/model/relabel" + "github.com/weaveworks/common/logging" + + "github.com/grafana/loki/pkg/ruler/util" + storage_config "github.com/grafana/loki/pkg/storage/config" + util_validation "github.com/grafana/loki/pkg/util/validation" + "github.com/grafana/loki/pkg/validation" +) + +var ( + yamlFieldNameParser = regexp.MustCompile("^[^,]+") + yamlFieldInlineParser = regexp.MustCompile("^[^,]*,inline$") +) + +// ExamplerConfig can be implemented by configs to provide examples. +// If string is non-empty, it will be added as comment. +// If yaml value is non-empty, it will be marshaled as yaml under the same key as it would appear in config. +type ExamplerConfig interface { + ExampleDoc() (comment string, yaml interface{}) +} + +type FieldExample struct { + Comment string + Yaml interface{} +} + +type ConfigBlock struct { + Name string + Desc string + Entries []*ConfigEntry + FlagsPrefix string + FlagsPrefixes []string +} + +func (b *ConfigBlock) Add(entry *ConfigEntry) { + b.Entries = append(b.Entries, entry) +} + +type EntryKind string + +const ( + fieldString = "string" + fieldRelabelConfig = "relabel_config..." +) + +const ( + KindBlock EntryKind = "block" + KindField EntryKind = "field" + KindSlice EntryKind = "slice" + KindMap EntryKind = "map" +) + +type ConfigEntry struct { + Kind EntryKind + Name string + Required bool + + // In case the Kind is KindBlock + Block *ConfigBlock + BlockDesc string + Root bool + + // In case the Kind is KindField + FieldFlag string + FieldDesc string + FieldType string + FieldDefault string + FieldExample *FieldExample + + // In case the Kind is KindMap or KindSlice + Element *ConfigBlock +} + +func (e ConfigEntry) Description() string { + return e.FieldDesc +} + +type RootBlock struct { + Name string + Desc string + StructType reflect.Type +} + +func Flags(cfg flagext.Registerer) map[uintptr]*flag.Flag { + fs := flag.NewFlagSet("", flag.PanicOnError) + cfg.RegisterFlags(fs) + + flags := map[uintptr]*flag.Flag{} + fs.VisitAll(func(f *flag.Flag) { + // Skip deprecated flags + if f.Value.String() == "deprecated" { + return + } + + ptr := reflect.ValueOf(f.Value).Pointer() + flags[ptr] = f + }) + + return flags +} + +// Config returns a slice of ConfigBlocks. The first ConfigBlock is a recursively expanded cfg. +// The remaining entries in the slice are all (root or not) ConfigBlocks. +func Config(cfg interface{}, flags map[uintptr]*flag.Flag, rootBlocks []RootBlock) ([]*ConfigBlock, error) { + return config(nil, cfg, flags, rootBlocks) +} + +func config(block *ConfigBlock, cfg interface{}, flags map[uintptr]*flag.Flag, rootBlocks []RootBlock) ([]*ConfigBlock, error) { + var blocks []*ConfigBlock + + // If the input block is nil it means we're generating the doc for the top-level block + if block == nil { + block = &ConfigBlock{} + blocks = append(blocks, block) + } + + // The input config is expected to be addressable. + if reflect.TypeOf(cfg).Kind() != reflect.Ptr { + t := reflect.TypeOf(cfg) + return nil, fmt.Errorf("%s is a %s while a %s is expected", t, t.Kind(), reflect.Ptr) + } + + // The input config is expected to be a pointer to struct. + v := reflect.ValueOf(cfg).Elem() + t := v.Type() + + if v.Kind() != reflect.Struct { + return nil, fmt.Errorf("%s is a %s while a %s is expected", v, v.Kind(), reflect.Struct) + } + + for i := 0; i < t.NumField(); i++ { + field := t.Field(i) + fieldValue := v.FieldByIndex(field.Index) + + // Skip fields explicitly marked as "hidden" in the doc + if isFieldHidden(field) { + continue + } + + // Skip fields not exported via yaml (unless they're inline) + fieldName := getFieldName(field) + if fieldName == "" && !isFieldInline(field) { + continue + } + + // Skip field types which are non-configurable + if field.Type.Kind() == reflect.Func { + continue + } + + // Skip deprecated fields we're still keeping for backward compatibility + // reasons (by convention we prefix them by UnusedFlag) + if strings.HasPrefix(field.Name, "UnusedFlag") { + continue + } + + // Handle custom fields in vendored libs upon which we have no control. + fieldEntry, err := getCustomFieldEntry(cfg, field, fieldValue, flags) + if err != nil { + return nil, err + } + if fieldEntry != nil { + block.Add(fieldEntry) + continue + } + + // Recursively re-iterate if it's a struct, and it's not a custom type. + if _, custom := getCustomFieldType(field.Type); (field.Type.Kind() == reflect.Struct || field.Type.Kind() == reflect.Ptr) && !custom { + // Check whether the sub-block is a root config block + rootName, rootDesc, isRoot := isRootBlock(field.Type, rootBlocks) + + // Since we're going to recursively iterate, we need to create a new sub + // block and pass it to the doc generation function. + var subBlock *ConfigBlock + + if !isFieldInline(field) { + var blockName string + var blockDesc string + + if isRoot { + blockName = rootName + + // Honor the custom description if available. + blockDesc = getFieldDescription(cfg, field, rootDesc) + } else { + blockName = fieldName + blockDesc = getFieldDescription(cfg, field, "") + } + + subBlock = &ConfigBlock{ + Name: blockName, + Desc: blockDesc, + } + + block.Add(&ConfigEntry{ + Kind: KindBlock, + Name: fieldName, + Required: isFieldRequired(field), + Block: subBlock, + BlockDesc: blockDesc, + Root: isRoot, + }) + + if isRoot { + blocks = append(blocks, subBlock) + } + } else { + subBlock = block + } + + if field.Type.Kind() == reflect.Ptr { + // If this is a pointer, it's probably nil, so we initialize it. + fieldValue = reflect.New(field.Type.Elem()) + } else if field.Type.Kind() == reflect.Struct { + fieldValue = fieldValue.Addr() + } + + // Recursively generate the doc for the sub-block + otherBlocks, err := config(subBlock, fieldValue.Interface(), flags, rootBlocks) + if err != nil { + return nil, err + } + + blocks = append(blocks, otherBlocks...) + continue + } + + var ( + element *ConfigBlock + kind = KindField + ) + { + // Add ConfigBlock for slices only if the field isn't a custom type, + // which shouldn't be inspected because doesn't have YAML tags, flag registrations, etc. + _, isCustomType := getFieldCustomType(field.Type) + isSliceOfStructs := field.Type.Kind() == reflect.Slice && (field.Type.Elem().Kind() == reflect.Struct || field.Type.Elem().Kind() == reflect.Ptr) + if !isCustomType && isSliceOfStructs { + element = &ConfigBlock{ + Name: fieldName, + Desc: getFieldDescription(cfg, field, ""), + } + kind = KindSlice + + _, err = config(element, reflect.New(field.Type.Elem()).Interface(), flags, rootBlocks) + if err != nil { + return nil, errors.Wrapf(err, "couldn't inspect slice, element_type=%s", field.Type.Elem()) + } + } + } + + fieldType, err := getFieldType(field.Type) + if err != nil { + return nil, errors.Wrapf(err, "config=%s.%s", t.PkgPath(), t.Name()) + } + + fieldFlag, err := getFieldFlag(field, fieldValue, flags) + if err != nil { + return nil, errors.Wrapf(err, "config=%s.%s", t.PkgPath(), t.Name()) + } + if fieldFlag == nil { + block.Add(&ConfigEntry{ + Kind: kind, + Name: fieldName, + Required: isFieldRequired(field), + FieldDesc: getFieldDescription(cfg, field, ""), + FieldType: fieldType, + FieldExample: getFieldExample(fieldName, field.Type), + Element: element, + }) + continue + } + + block.Add(&ConfigEntry{ + Kind: kind, + Name: fieldName, + Required: isFieldRequired(field), + FieldFlag: fieldFlag.Name, + FieldDesc: getFieldDescription(cfg, field, fieldFlag.Usage), + FieldType: fieldType, + FieldDefault: getFieldDefault(field, fieldFlag.DefValue), + FieldExample: getFieldExample(fieldName, field.Type), + Element: element, + }) + } + + return blocks, nil +} + +func getFieldName(field reflect.StructField) string { + name := field.Name + tag := field.Tag.Get("yaml") + + // If the tag is not specified, then an exported field can be + // configured via the field name (lowercase), while an unexported + // field can't be configured. + if tag == "" { + if unicode.IsLower(rune(name[0])) { + return "" + } + + return strings.ToLower(name) + } + + // Parse the field name + fieldName := yamlFieldNameParser.FindString(tag) + if fieldName == "-" { + return "" + } + + return fieldName +} + +func getFieldCustomType(t reflect.Type) (string, bool) { + // Handle custom data types used in the config + switch t.String() { + case reflect.TypeOf(&url.URL{}).String(): + return "url", true + case reflect.TypeOf(time.Duration(0)).String(): + return "duration", true + case reflect.TypeOf(flagext.StringSliceCSV{}).String(): + return fieldString, true + case reflect.TypeOf(flagext.CIDRSliceCSV{}).String(): + return fieldString, true + case reflect.TypeOf([]*util.RelabelConfig{}).String(): + return fieldRelabelConfig, true + case reflect.TypeOf([]*relabel.Config{}).String(): + return fieldRelabelConfig, true + case reflect.TypeOf([]*util_validation.BlockedQuery{}).String(): + return "blocked_query...", true + case reflect.TypeOf([]*prometheus_config.RemoteWriteConfig{}).String(): + return "remote_write_config...", true + case reflect.TypeOf(storage_config.PeriodConfig{}).String(): + return "period_config", true + case reflect.TypeOf(validation.OverwriteMarshalingStringMap{}).String(): + return "headers", true + default: + return "", false + } +} + +func getFieldType(t reflect.Type) (string, error) { + if typ, isCustom := getFieldCustomType(t); isCustom { + return typ, nil + } + + // Fallback to auto-detection of built-in data types + switch t.Kind() { + case reflect.Bool: + return "boolean", nil + case reflect.Int: + fallthrough + case reflect.Int8: + fallthrough + case reflect.Int16: + fallthrough + case reflect.Int32: + fallthrough + case reflect.Int64: + fallthrough + case reflect.Uint: + fallthrough + case reflect.Uint8: + fallthrough + case reflect.Uint16: + fallthrough + case reflect.Uint32: + fallthrough + case reflect.Uint64: + return "int", nil + case reflect.Float32: + fallthrough + case reflect.Float64: + return "float", nil + case reflect.String: + return fieldString, nil + case reflect.Slice: + // Get the type of elements + elemType, err := getFieldType(t.Elem()) + if err != nil { + return "", err + } + return "list of " + elemType + "s", nil + case reflect.Map: + return fmt.Sprintf("map of %s to %s", t.Key(), t.Elem().String()), nil + case reflect.Struct: + return t.Name(), nil + case reflect.Ptr: + return getFieldType(t.Elem()) + case reflect.Interface: + return t.Name(), nil + default: + return "", fmt.Errorf("unsupported data type %s", t.Kind()) + } +} + +func getCustomFieldType(t reflect.Type) (string, bool) { + // Handle custom data types used in the config + switch t.String() { + case reflect.TypeOf(&url.URL{}).String(): + return "url", true + case reflect.TypeOf(time.Duration(0)).String(): + return "duration", true + case reflect.TypeOf(flagext.StringSliceCSV{}).String(): + return fieldString, true + case reflect.TypeOf(flagext.CIDRSliceCSV{}).String(): + return fieldString, true + case reflect.TypeOf([]*relabel.Config{}).String(): + return fieldRelabelConfig, true + case reflect.TypeOf([]*util.RelabelConfig{}).String(): + return fieldRelabelConfig, true + case reflect.TypeOf(&prometheus_config.RemoteWriteConfig{}).String(): + return "remote_write_config...", true + case reflect.TypeOf(validation.OverwriteMarshalingStringMap{}).String(): + return "headers", true + default: + return "", false + } +} + +func getFieldFlag(field reflect.StructField, fieldValue reflect.Value, flags map[uintptr]*flag.Flag) (*flag.Flag, error) { + if isAbsentInCLI(field) { + return nil, nil + } + fieldPtr := fieldValue.Addr().Pointer() + fieldFlag, ok := flags[fieldPtr] + if !ok { + return nil, nil + } + + return fieldFlag, nil +} + +func getFieldExample(fieldKey string, fieldType reflect.Type) *FieldExample { + ex, ok := reflect.New(fieldType).Interface().(ExamplerConfig) + if !ok { + return nil + } + comment, yml := ex.ExampleDoc() + return &FieldExample{ + Comment: comment, + Yaml: map[string]interface{}{fieldKey: yml}, + } +} + +func getCustomFieldEntry(cfg interface{}, field reflect.StructField, fieldValue reflect.Value, flags map[uintptr]*flag.Flag) (*ConfigEntry, error) { + if field.Type == reflect.TypeOf(logging.Level{}) || field.Type == reflect.TypeOf(logging.Format{}) { + fieldFlag, err := getFieldFlag(field, fieldValue, flags) + if err != nil || fieldFlag == nil { + return nil, err + } + + return &ConfigEntry{ + Kind: KindField, + Name: getFieldName(field), + Required: isFieldRequired(field), + FieldFlag: fieldFlag.Name, + FieldDesc: getFieldDescription(cfg, field, fieldFlag.Usage), + FieldType: fieldString, + FieldDefault: getFieldDefault(field, fieldFlag.DefValue), + }, nil + } + if field.Type == reflect.TypeOf(flagext.URLValue{}) { + fieldFlag, err := getFieldFlag(field, fieldValue, flags) + if err != nil || fieldFlag == nil { + return nil, err + } + + return &ConfigEntry{ + Kind: KindField, + Name: getFieldName(field), + Required: isFieldRequired(field), + FieldFlag: fieldFlag.Name, + FieldDesc: getFieldDescription(cfg, field, fieldFlag.Usage), + FieldType: "url", + FieldDefault: getFieldDefault(field, fieldFlag.DefValue), + }, nil + } + if field.Type == reflect.TypeOf(flagext.Secret{}) { + fieldFlag, err := getFieldFlag(field, fieldValue, flags) + if err != nil || fieldFlag == nil { + return nil, err + } + + return &ConfigEntry{ + Kind: KindField, + Name: getFieldName(field), + Required: isFieldRequired(field), + FieldFlag: fieldFlag.Name, + FieldDesc: getFieldDescription(cfg, field, fieldFlag.Usage), + FieldType: fieldString, + FieldDefault: getFieldDefault(field, fieldFlag.DefValue), + }, nil + } + if field.Type == reflect.TypeOf(model.Duration(0)) { + fieldFlag, err := getFieldFlag(field, fieldValue, flags) + if err != nil || fieldFlag == nil { + return nil, err + } + + return &ConfigEntry{ + Kind: KindField, + Name: getFieldName(field), + Required: isFieldRequired(field), + FieldFlag: fieldFlag.Name, + FieldDesc: getFieldDescription(cfg, field, fieldFlag.Usage), + FieldType: "duration", + FieldDefault: getFieldDefault(field, fieldFlag.DefValue), + }, nil + } + if field.Type == reflect.TypeOf(flagext.Time{}) { + fieldFlag, err := getFieldFlag(field, fieldValue, flags) + if err != nil || fieldFlag == nil { + return nil, err + } + + return &ConfigEntry{ + Kind: KindField, + Name: getFieldName(field), + Required: isFieldRequired(field), + FieldFlag: fieldFlag.Name, + FieldDesc: getFieldDescription(cfg, field, fieldFlag.Usage), + FieldType: "time", + FieldDefault: getFieldDefault(field, fieldFlag.DefValue), + }, nil + } + + return nil, nil +} + +func getFieldDefault(field reflect.StructField, fallback string) string { + if v := getDocTagValue(field, "default"); v != "" { + return v + } + + return fallback +} + +func isFieldDeprecated(f reflect.StructField) bool { + return getDocTagFlag(f, "deprecated") +} + +func isFieldHidden(f reflect.StructField) bool { + return getDocTagFlag(f, "hidden") +} + +func isAbsentInCLI(f reflect.StructField) bool { + return getDocTagFlag(f, "nocli") +} + +func isFieldRequired(f reflect.StructField) bool { + return getDocTagFlag(f, "required") +} + +func isFieldInline(f reflect.StructField) bool { + return yamlFieldInlineParser.MatchString(f.Tag.Get("yaml")) +} + +func getFieldDescription(cfg interface{}, field reflect.StructField, fallback string) string { + // Set prefix + prefix := "" + if isFieldDeprecated(field) { + prefix += "Deprecated: " + } + + if desc := getDocTagValue(field, "description"); desc != "" { + return prefix + desc + } + + if methodName := getDocTagValue(field, "description_method"); methodName != "" { + structRef := reflect.ValueOf(cfg) + + if method, ok := structRef.Type().MethodByName(methodName); ok { + out := method.Func.Call([]reflect.Value{structRef}) + if len(out) == 1 { + return prefix + out[0].String() + } + } + } + + return prefix + fallback +} + +func isRootBlock(t reflect.Type, rootBlocks []RootBlock) (string, string, bool) { + for _, rootBlock := range rootBlocks { + if t == rootBlock.StructType { + return rootBlock.Name, rootBlock.Desc, true + } + } + + return "", "", false +} + +func getDocTagFlag(f reflect.StructField, name string) bool { + cfg := parseDocTag(f) + _, ok := cfg[name] + return ok +} + +func getDocTagValue(f reflect.StructField, name string) string { + cfg := parseDocTag(f) + return cfg[name] +} + +func parseDocTag(f reflect.StructField) map[string]string { + cfg := map[string]string{} + tag := f.Tag.Get("doc") + + if tag == "" { + return cfg + } + + for _, entry := range strings.Split(tag, "|") { + parts := strings.SplitN(entry, "=", 2) + + switch len(parts) { + case 1: + cfg[parts[0]] = "" + case 2: + cfg[parts[0]] = parts[1] + } + } + + return cfg +} diff --git a/tools/doc-generator/parse/root_blocks.go b/tools/doc-generator/parse/root_blocks.go new file mode 100644 index 0000000000000..4da57ebc836ce --- /dev/null +++ b/tools/doc-generator/parse/root_blocks.go @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: AGPL-3.0-only + +package parse + +import ( + "reflect" + + "github.com/grafana/dskit/crypto/tls" + "github.com/grafana/dskit/grpcclient" + "github.com/grafana/dskit/kv/consul" + "github.com/grafana/dskit/kv/etcd" + "github.com/grafana/dskit/runtimeconfig" + "github.com/weaveworks/common/server" + + "github.com/grafana/loki/pkg/distributor" + "github.com/grafana/loki/pkg/ingester" + ingester_client "github.com/grafana/loki/pkg/ingester/client" + "github.com/grafana/loki/pkg/loki/common" + frontend "github.com/grafana/loki/pkg/lokifrontend" + "github.com/grafana/loki/pkg/querier" + "github.com/grafana/loki/pkg/querier/queryrange" + querier_worker "github.com/grafana/loki/pkg/querier/worker" + "github.com/grafana/loki/pkg/ruler" + "github.com/grafana/loki/pkg/ruler/rulestore/local" + "github.com/grafana/loki/pkg/scheduler" + "github.com/grafana/loki/pkg/storage" + "github.com/grafana/loki/pkg/storage/chunk/cache" + "github.com/grafana/loki/pkg/storage/chunk/client/aws" + "github.com/grafana/loki/pkg/storage/chunk/client/azure" + "github.com/grafana/loki/pkg/storage/chunk/client/baidubce" + "github.com/grafana/loki/pkg/storage/chunk/client/gcp" + "github.com/grafana/loki/pkg/storage/chunk/client/openstack" + storage_config "github.com/grafana/loki/pkg/storage/config" + "github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor" + "github.com/grafana/loki/pkg/storage/stores/series/index" + "github.com/grafana/loki/pkg/storage/stores/shipper/indexgateway" + "github.com/grafana/loki/pkg/tracing" + "github.com/grafana/loki/pkg/usagestats" + "github.com/grafana/loki/pkg/validation" +) + +var ( + // RootBlocks is an ordered list of root blocks with their associated descriptions. + // The order is the same order that will follow the markdown generation. + // Root blocks map to the configuration variables defined in Config of pkg/loki/loki.go + RootBlocks = []RootBlock{ + { + Name: "server", + StructType: reflect.TypeOf(server.Config{}), + Desc: "Configures the server of the launched module(s).", + }, + { + Name: "distributor", + StructType: reflect.TypeOf(distributor.Config{}), + Desc: "Configures the distributor.", + }, + { + Name: "querier", + StructType: reflect.TypeOf(querier.Config{}), + Desc: "Configures the querier. Only appropriate when running all modules or just the querier.", + }, + { + Name: "query_scheduler", + StructType: reflect.TypeOf(scheduler.Config{}), + Desc: "The query_scheduler block configures the Loki query scheduler. When configured it separates the tenant query queues from the query-frontend.", + }, + { + Name: "frontend", + StructType: reflect.TypeOf(frontend.Config{}), + Desc: "The frontend block configures the Loki query-frontend.", + }, + { + Name: "query_range", + StructType: reflect.TypeOf(queryrange.Config{}), + Desc: "The query_range block configures the query splitting and caching in the Loki query-frontend.", + }, + { + Name: "ruler", + StructType: reflect.TypeOf(ruler.Config{}), + Desc: "The ruler block configures the Loki ruler.", + }, + { + Name: "ingester_client", + StructType: reflect.TypeOf(ingester_client.Config{}), + Desc: "The ingester_client block configures how the distributor will connect to ingesters. Only appropriate when running all components, the distributor, or the querier.", + }, + { + Name: "ingester", + StructType: reflect.TypeOf(ingester.Config{}), + Desc: "The ingester block configures the ingester and how the ingester will register itself to a key value store.", + }, + { + Name: "index_gateway", + StructType: reflect.TypeOf(indexgateway.Config{}), + Desc: "The index_gateway block configures the Loki index gateway server, responsible for serving index queries without the need to constantly interact with the object store.", + }, + { + Name: "storage_config", + StructType: reflect.TypeOf(storage.Config{}), + Desc: "The storage_config block configures one of many possible stores for both the index and chunks. Which configuration to be picked should be defined in schema_config block.", + }, + { + Name: "chunk_store_config", + StructType: reflect.TypeOf(storage_config.ChunkStoreConfig{}), + Desc: "The chunk_store_config block configures how chunks will be cached and how long to wait before saving them to the backing store.", + }, + { + Name: "schema_config", + StructType: reflect.TypeOf(storage_config.SchemaConfig{}), + Desc: "Configures the chunk index schema and where it is stored.", + }, + { + Name: "compactor", + StructType: reflect.TypeOf(compactor.Config{}), + Desc: "The compactor block configures the compactor component, which compacts index shards for performance.", + }, + { + Name: "limits_config", + StructType: reflect.TypeOf(validation.Limits{}), + Desc: "The limits_config block configures global and per-tenant limits in Loki.", + }, + { + Name: "frontend_worker", + StructType: reflect.TypeOf(querier_worker.Config{}), + Desc: "The frontend_worker configures the worker - running within the Loki querier - picking up and executing queries enqueued by the query-frontend.", + }, + { + Name: "table_manager", + StructType: reflect.TypeOf(index.TableManagerConfig{}), + Desc: "The table_manager block configures the table manager for retention.", + }, + + { + Name: "runtime_config", + StructType: reflect.TypeOf(runtimeconfig.Config{}), + Desc: "Configuration for 'runtime config' module, responsible for reloading runtime configuration file.", + }, + { + Name: "tracing", + StructType: reflect.TypeOf(tracing.Config{}), + Desc: "Configuration for tracing.", + }, + { + Name: "analytics", + StructType: reflect.TypeOf(usagestats.Config{}), + Desc: "Configuration for usage report.", + }, + + { + Name: "common", + StructType: reflect.TypeOf(common.Config{}), + Desc: "Common configuration to be shared between multiple modules. If a more specific configuration is given in other sections, the related configuration within this section will be ignored.", + }, + + // Non-root blocks + // StoreConfig dskit type: https://github.com/grafana/dskit/blob/main/kv/client.go#L44-L52 + { + Name: "consul", + StructType: reflect.TypeOf(consul.Config{}), + Desc: "Configuration for a Consul client. Only applies if store is consul.", + }, + { + Name: "etcd", + StructType: reflect.TypeOf(etcd.Config{}), + Desc: "Configuration for an ETCD v3 client. Only applies if store is etcd.", + }, + // GRPC client + { + Name: "grpc_client", + StructType: reflect.TypeOf(grpcclient.Config{}), + Desc: "The grpc_client block configures the gRPC client used to communicate between two Loki components.", + }, + // TLS config + { + Name: "tls_config", + StructType: reflect.TypeOf(tls.ClientConfig{}), + Desc: "The TLS configuration.", + }, + // Cache config + { + Name: "cache_config", + StructType: reflect.TypeOf(cache.Config{}), + Desc: "The cache block configures the cache backend.", + }, + // Schema periodic config + { + Name: "period_config", + StructType: reflect.TypeOf(storage_config.PeriodConfig{}), + Desc: "The period_config block configures what index schemas should be used for from specific time periods.", + }, + + // Storage config + { + Name: "azure_storage_config", + StructType: reflect.TypeOf(azure.BlobStorageConfig{}), + Desc: "The azure_storage_config block configures the connection to Azure object storage backend.", + }, + { + Name: "gcs_storage_config", + StructType: reflect.TypeOf(gcp.GCSConfig{}), + Desc: "The gcs_storage_config block configures the connection to Google Cloud Storage object storage backend.", + }, + { + Name: "s3_storage_config", + StructType: reflect.TypeOf(aws.S3Config{}), + Desc: "The s3_storage_config block configures the connection to Amazon S3 object storage backend.", + }, + { + Name: "bos_storage_config", + StructType: reflect.TypeOf(baidubce.BOSStorageConfig{}), + Desc: "The bos_storage_config block configures the connection to Baidu Object Storage (BOS) object storage backend.", + }, + { + Name: "swift_storage_config", + StructType: reflect.TypeOf(openstack.SwiftConfig{}), + Desc: "The swift_storage_config block configures the connection to OpenStack Object Storage (Swift) object storage backend.", + }, + { + Name: "local_storage_config", + StructType: reflect.TypeOf(local.Config{}), + Desc: "The local_storage_config block configures the usage of local file system as object storage backend.", + }, + } +) diff --git a/tools/doc-generator/parse/util.go b/tools/doc-generator/parse/util.go new file mode 100644 index 0000000000000..4d2deaf4fd8fa --- /dev/null +++ b/tools/doc-generator/parse/util.go @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/tools/doc-generator/util.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. + +package parse + +import ( + "math" + "strings" +) + +func FindFlagsPrefix(flags []string) []string { + if len(flags) == 0 { + return flags + } + + // Split the input flags input tokens separated by "." + // because the want to find the prefix where segments + // are dot-separated. + var tokens [][]string + for _, flag := range flags { + tokens = append(tokens, strings.Split(flag, ".")) + } + + // Find the shortest tokens. + minLength := math.MaxInt32 + for _, t := range tokens { + if len(t) < minLength { + minLength = len(t) + } + } + + // We iterate backward to find common suffixes. Each time + // a common suffix is found, we remove it from the tokens. +outer: + for i := 0; i < minLength; i++ { + lastToken := tokens[0][len(tokens[0])-1] + + // Interrupt if the last token is different across the flags. + for _, t := range tokens { + if t[len(t)-1] != lastToken { + break outer + } + } + + // The suffix token is equal across all flags, so we + // remove it from all of them and re-iterate. + for i, t := range tokens { + tokens[i] = t[:len(t)-1] + } + } + + // The remaining tokens are the different flags prefix, which we can + // now merge with the ".". + var prefixes []string + for _, t := range tokens { + prefixes = append(prefixes, strings.Join(t, ".")) + } + + return prefixes +} diff --git a/tools/doc-generator/parse/util_test.go b/tools/doc-generator/parse/util_test.go new file mode 100644 index 0000000000000..7cb123bcad2f8 --- /dev/null +++ b/tools/doc-generator/parse/util_test.go @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/tools/doc-generator/util_test.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. + +package parse + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func Test_findFlagsPrefix(t *testing.T) { + tests := []struct { + input []string + expected []string + }{ + { + input: []string{}, + expected: []string{}, + }, + { + input: []string{""}, + expected: []string{""}, + }, + { + input: []string{"", ""}, + expected: []string{"", ""}, + }, + { + input: []string{"foo", "foo", "foo"}, + expected: []string{"", "", ""}, + }, + { + input: []string{"ruler.endpoint", "alertmanager.endpoint"}, + expected: []string{"ruler", "alertmanager"}, + }, + { + input: []string{"ruler.endpoint.address", "alertmanager.endpoint.address"}, + expected: []string{"ruler", "alertmanager"}, + }, + { + input: []string{"ruler.first.address", "ruler.second.address"}, + expected: []string{"ruler.first", "ruler.second"}, + }, + } + + for _, test := range tests { + assert.Equal(t, test.expected, FindFlagsPrefix(test.input)) + } +} diff --git a/tools/doc-generator/writer.go b/tools/doc-generator/writer.go new file mode 100644 index 0000000000000..b73877fc45f86 --- /dev/null +++ b/tools/doc-generator/writer.go @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: AGPL-3.0-only +// Provenance-includes-location: https://github.com/cortexproject/cortex/blob/master/tools/doc-generator/writer.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Cortex Authors. + +package main + +import ( + "fmt" + "sort" + "strconv" + "strings" + + "github.com/grafana/regexp" + "github.com/mitchellh/go-wordwrap" + "gopkg.in/yaml.v3" + + "github.com/grafana/loki/tools/doc-generator/parse" +) + +type specWriter struct { + out strings.Builder +} + +func (w *specWriter) writeConfigBlock(b *parse.ConfigBlock, indent int) { + if len(b.Entries) == 0 { + return + } + + for i, entry := range b.Entries { + // Add a new line to separate from the previous entry + if i > 0 { + w.out.WriteString("\n") + } + + w.writeConfigEntry(entry, indent) + } +} + +func (w *specWriter) writeConfigEntry(e *parse.ConfigEntry, indent int) { + if e.Kind == parse.KindBlock { + // If the block is a root block it will have its dedicated section in the doc, + // so here we've just to write down the reference without re-iterating on it. + if e.Root { + // Description + w.writeComment(e.BlockDesc, indent, 0) + if e.Block.FlagsPrefix != "" { + w.writeComment(fmt.Sprintf("The CLI flags prefix for this block configuration is: %s", e.Block.FlagsPrefix), indent, 0) + } + + // Block reference without entries, because it's a root block + w.out.WriteString(pad(indent) + "[" + e.Name + ": <" + e.Block.Name + ">]\n") + } else { + // Description + w.writeComment(e.BlockDesc, indent, 0) + + // Name + w.out.WriteString(pad(indent) + e.Name + ":\n") + + // Entries + w.writeConfigBlock(e.Block, indent+tabWidth) + } + } + + if e.Kind == parse.KindField || e.Kind == parse.KindSlice || e.Kind == parse.KindMap { + // Description + w.writeComment(e.Description(), indent, 0) + w.writeExample(e.FieldExample, indent) + w.writeFlag(e.FieldFlag, indent) + + // Specification + fieldDefault := e.FieldDefault + if e.FieldType == "string" { + fieldDefault = strconv.Quote(fieldDefault) + } else if e.FieldType == "duration" { + fieldDefault = cleanupDuration(fieldDefault) + } + + if e.Required { + w.out.WriteString(pad(indent) + e.Name + ": <" + e.FieldType + "> | default = " + fieldDefault + "\n") + } else { + defaultValue := "" + if len(fieldDefault) > 0 { + defaultValue = " | default = " + fieldDefault + } + w.out.WriteString(pad(indent) + "[" + e.Name + ": <" + e.FieldType + ">" + defaultValue + "]\n") + } + } +} + +func (w *specWriter) writeFlag(name string, indent int) { + if name == "" { + return + } + + w.out.WriteString(pad(indent) + "# CLI flag: -" + name + "\n") +} + +func (w *specWriter) writeComment(comment string, indent, innerIndent int) { + if comment == "" { + return + } + + wrapped := wordwrap.WrapString(comment, uint(maxLineWidth-indent-innerIndent-2)) + w.writeWrappedString(wrapped, indent, innerIndent) +} + +func (w *specWriter) writeExample(example *parse.FieldExample, indent int) { + if example == nil { + return + } + + w.writeComment("Example:", indent, 0) + if example.Comment != "" { + w.writeComment(example.Comment, indent, 2) + } + + data, err := yaml.Marshal(example.Yaml) + if err != nil { + panic(fmt.Errorf("can't render example: %w", err)) + } + + w.writeWrappedString(string(data), indent, 2) +} + +func (w *specWriter) writeWrappedString(s string, indent, innerIndent int) { + lines := strings.Split(strings.TrimSpace(s), "\n") + for _, line := range lines { + w.out.WriteString(pad(indent) + "# " + pad(innerIndent) + line + "\n") + } +} + +func (w *specWriter) string() string { + return strings.TrimSpace(w.out.String()) +} + +type markdownWriter struct { + out strings.Builder +} + +func (w *markdownWriter) writeConfigDoc(blocks []*parse.ConfigBlock) { + // Deduplicate root blocks. + uniqueBlocks := map[string]*parse.ConfigBlock{} + for _, block := range blocks { + uniqueBlocks[block.Name] = block + } + + // Generate the markdown, honoring the root blocks order. + if topBlock, ok := uniqueBlocks[""]; ok { + w.writeConfigBlock(topBlock) + } + + for _, rootBlock := range parse.RootBlocks { + if block, ok := uniqueBlocks[rootBlock.Name]; ok { + // Keep the root block description. + blockToWrite := *block + blockToWrite.Desc = rootBlock.Desc + + w.writeConfigBlock(&blockToWrite) + } + } +} + +func (w *markdownWriter) writeConfigBlock(block *parse.ConfigBlock) { + // Title + if block.Name != "" { + w.out.WriteString("### " + block.Name + "\n") + w.out.WriteString("\n") + } + + // Description + if block.Desc != "" { + desc := block.Desc + + // Wrap first instance of the config block name with backticks + if block.Name != "" { + var matches int + nameRegexp := regexp.MustCompile(regexp.QuoteMeta(block.Name)) + desc = nameRegexp.ReplaceAllStringFunc(desc, func(input string) string { + if matches == 0 { + matches++ + return "`" + input + "`" + } + return input + }) + } + + // List of all prefixes used to reference this config block. + if len(block.FlagsPrefixes) > 1 { + sortedPrefixes := sort.StringSlice(block.FlagsPrefixes) + sortedPrefixes.Sort() + + desc += " The supported CLI flags `` used to reference this configuration block are:\n\n" + + for _, prefix := range sortedPrefixes { + if prefix == "" { + desc += "- _no prefix_\n" + } else { + desc += fmt.Sprintf("- `%s`\n", prefix) + } + } + + // Unfortunately the markdown compiler used by the website generator has a bug + // when there's a list followed by a code block (no matter know many newlines + // in between). To workaround, we add a non-breaking space. + desc += "\n " + } + + w.out.WriteString(desc + "\n") + w.out.WriteString("\n") + } + + // Config specs + spec := &specWriter{} + spec.writeConfigBlock(block, 0) + + w.out.WriteString("```yaml\n") + w.out.WriteString(spec.string() + "\n") + w.out.WriteString("```\n") + w.out.WriteString("\n") +} + +func (w *markdownWriter) string() string { + return strings.TrimSpace(w.out.String()) +} + +func pad(length int) string { + return strings.Repeat(" ", length) +} + +func cleanupDuration(value string) string { + // This is the list of suffixes to remove from the duration if they're not + // the whole duration value. + suffixes := []string{"0s", "0m"} + + for _, suffix := range suffixes { + re := regexp.MustCompile("(^.+\\D)" + suffix + "$") + + if groups := re.FindStringSubmatch(value); len(groups) == 2 { + value = groups[1] + } + } + + return value +} diff --git a/vendor/github.com/mitchellh/go-wordwrap/LICENSE.md b/vendor/github.com/mitchellh/go-wordwrap/LICENSE.md new file mode 100644 index 0000000000000..229851590442a --- /dev/null +++ b/vendor/github.com/mitchellh/go-wordwrap/LICENSE.md @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Mitchell Hashimoto + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/vendor/github.com/mitchellh/go-wordwrap/README.md b/vendor/github.com/mitchellh/go-wordwrap/README.md new file mode 100644 index 0000000000000..60ae3117008d9 --- /dev/null +++ b/vendor/github.com/mitchellh/go-wordwrap/README.md @@ -0,0 +1,39 @@ +# go-wordwrap + +`go-wordwrap` (Golang package: `wordwrap`) is a package for Go that +automatically wraps words into multiple lines. The primary use case for this +is in formatting CLI output, but of course word wrapping is a generally useful +thing to do. + +## Installation and Usage + +Install using `go get github.com/mitchellh/go-wordwrap`. + +Full documentation is available at +http://godoc.org/github.com/mitchellh/go-wordwrap + +Below is an example of its usage ignoring errors: + +```go +wrapped := wordwrap.WrapString("foo bar baz", 3) +fmt.Println(wrapped) +``` + +Would output: + +``` +foo +bar +baz +``` + +## Word Wrap Algorithm + +This library doesn't use any clever algorithm for word wrapping. The wrapping +is actually very naive: whenever there is whitespace or an explicit linebreak. +The goal of this library is for word wrapping CLI output, so the input is +typically pretty well controlled human language. Because of this, the naive +approach typically works just fine. + +In the future, we'd like to make the algorithm more advanced. We would do +so without breaking the API. diff --git a/vendor/github.com/mitchellh/go-wordwrap/wordwrap.go b/vendor/github.com/mitchellh/go-wordwrap/wordwrap.go new file mode 100644 index 0000000000000..ac67205bc2e5e --- /dev/null +++ b/vendor/github.com/mitchellh/go-wordwrap/wordwrap.go @@ -0,0 +1,73 @@ +package wordwrap + +import ( + "bytes" + "unicode" +) + +// WrapString wraps the given string within lim width in characters. +// +// Wrapping is currently naive and only happens at white-space. A future +// version of the library will implement smarter wrapping. This means that +// pathological cases can dramatically reach past the limit, such as a very +// long word. +func WrapString(s string, lim uint) string { + // Initialize a buffer with a slightly larger size to account for breaks + init := make([]byte, 0, len(s)) + buf := bytes.NewBuffer(init) + + var current uint + var wordBuf, spaceBuf bytes.Buffer + + for _, char := range s { + if char == '\n' { + if wordBuf.Len() == 0 { + if current+uint(spaceBuf.Len()) > lim { + current = 0 + } else { + current += uint(spaceBuf.Len()) + spaceBuf.WriteTo(buf) + } + spaceBuf.Reset() + } else { + current += uint(spaceBuf.Len() + wordBuf.Len()) + spaceBuf.WriteTo(buf) + spaceBuf.Reset() + wordBuf.WriteTo(buf) + wordBuf.Reset() + } + buf.WriteRune(char) + current = 0 + } else if unicode.IsSpace(char) { + if spaceBuf.Len() == 0 || wordBuf.Len() > 0 { + current += uint(spaceBuf.Len() + wordBuf.Len()) + spaceBuf.WriteTo(buf) + spaceBuf.Reset() + wordBuf.WriteTo(buf) + wordBuf.Reset() + } + + spaceBuf.WriteRune(char) + } else { + + wordBuf.WriteRune(char) + + if current+uint(spaceBuf.Len()+wordBuf.Len()) > lim && uint(wordBuf.Len()) < lim { + buf.WriteRune('\n') + current = 0 + spaceBuf.Reset() + } + } + } + + if wordBuf.Len() == 0 { + if current+uint(spaceBuf.Len()) <= lim { + spaceBuf.WriteTo(buf) + } + } else { + spaceBuf.WriteTo(buf) + wordBuf.WriteTo(buf) + } + + return buf.String() +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 74044d0a12514..9736aa89e7eb5 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -923,6 +923,9 @@ github.com/mitchellh/copystructure # github.com/mitchellh/go-homedir v1.1.0 ## explicit github.com/mitchellh/go-homedir +# github.com/mitchellh/go-wordwrap v1.0.0 +## explicit +github.com/mitchellh/go-wordwrap # github.com/mitchellh/mapstructure v1.5.0 ## explicit; go 1.14 github.com/mitchellh/mapstructure From 68a4d3271a006f0b057000ebf5dad0115d4ae26f Mon Sep 17 00:00:00 2001 From: Susana Ferreira Date: Wed, 14 Dec 2022 13:20:57 +0100 Subject: [PATCH 11/62] Fix documentation typos from #7916 (#7934) **What this PR does / why we need it**: Fix documentation typos from https://github.com/grafana/loki/pull/7916#pullrequestreview-1217192817 **Which issue(s) this PR fixes**: Fixes https://github.com/grafana/loki/pull/7916#pullrequestreview-1217192817 **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the `CONTRIBUTING.md` guide - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md` --- docs/sources/configuration/_index.md | 12 ++++++------ docs/sources/configuration/index.template | 6 +++--- pkg/ingester/wal.go | 2 +- pkg/querier/querier.go | 2 +- pkg/ruler/base/ruler.go | 2 +- pkg/ruler/config.go | 2 +- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/sources/configuration/_index.md b/docs/sources/configuration/_index.md index 99af10a382407..e733aa2144555 100644 --- a/docs/sources/configuration/_index.md +++ b/docs/sources/configuration/_index.md @@ -15,7 +15,7 @@ depending on which mode Loki is launched in. Configuration examples can be found in the [Configuration Examples](examples/) document. -## Printing Loki Config At Runtime +## Printing Loki config at runtime If you pass Loki the flag `-print-config-stderr` or `-log-config-reverse-order`, (or `-print-config-stderr=true`) Loki will dump the entire config object it has created from the built-in defaults combined first with @@ -34,14 +34,14 @@ is especially useful in making sure your config files and flags are being read a `-log-config-reverse-order` is the flag we run Loki with in all our environments, the config entries are reversed so that the order of configs reads correctly top to bottom when viewed in Grafana's Explore. -## Reload At Runtime +## Reload at runtime Promtail can reload its configuration at runtime. If the new configuration is not well-formed, the changes will not be applied. A configuration reload is triggered by sending a `SIGHUP` to the Promtail process or sending a HTTP POST request to the `/reload` endpoint (when the `--server.enable-runtime-reload` flag is enabled). -## Configuration File Reference +## Configuration file reference To specify which configuration file to load, pass the `-config.file` flag at the command line. The value can be a list of comma separated paths, then the first @@ -460,7 +460,7 @@ rate_store: Configures the `querier`. Only appropriate when running all modules or just the querier. ```yaml -# Maximum duration for which the live tailing requests should be served. +# Maximum duration for which the live tailing requests are served. # CLI flag: -querier.tail-max-duration [tail_max_duration: | default = 1h] @@ -990,7 +990,7 @@ ring: # CLI flag: -ruler.flush-period [flush_period: | default = 1m] -# Enable the ruler api. +# Enable the ruler API. # CLI flag: -ruler.enable-api [enable_api: | default = true] @@ -1319,7 +1319,7 @@ wal: # CLI flag: -ingester.wal-enabled [enabled: | default = true] - # Directory where the WAL data should be stored and/or recovered from. + # Directory where the WAL data is stored and/or recovered from. # CLI flag: -ingester.wal-dir [dir: | default = "wal"] diff --git a/docs/sources/configuration/index.template b/docs/sources/configuration/index.template index f1ded57cabffd..89cabcbdec8f4 100644 --- a/docs/sources/configuration/index.template +++ b/docs/sources/configuration/index.template @@ -15,7 +15,7 @@ depending on which mode Loki is launched in. Configuration examples can be found in the [Configuration Examples](examples/) document. -## Printing Loki Config At Runtime +## Printing Loki config at runtime If you pass Loki the flag `-print-config-stderr` or `-log-config-reverse-order`, (or `-print-config-stderr=true`) Loki will dump the entire config object it has created from the built-in defaults combined first with @@ -34,14 +34,14 @@ is especially useful in making sure your config files and flags are being read a `-log-config-reverse-order` is the flag we run Loki with in all our environments, the config entries are reversed so that the order of configs reads correctly top to bottom when viewed in Grafana's Explore. -## Reload At Runtime +## Reload at runtime Promtail can reload its configuration at runtime. If the new configuration is not well-formed, the changes will not be applied. A configuration reload is triggered by sending a `SIGHUP` to the Promtail process or sending a HTTP POST request to the `/reload` endpoint (when the `--server.enable-runtime-reload` flag is enabled). -## Configuration File Reference +## Configuration file reference To specify which configuration file to load, pass the `-config.file` flag at the command line. The value can be a list of comma separated paths, then the first diff --git a/pkg/ingester/wal.go b/pkg/ingester/wal.go index 0db8066bbd202..56fcf4ceb265c 100644 --- a/pkg/ingester/wal.go +++ b/pkg/ingester/wal.go @@ -40,7 +40,7 @@ func (cfg *WALConfig) Validate() error { // RegisterFlags adds the flags required to config this to the given FlagSet func (cfg *WALConfig) RegisterFlags(f *flag.FlagSet) { - f.StringVar(&cfg.Dir, "ingester.wal-dir", "wal", "Directory where the WAL data should be stored and/or recovered from.") + f.StringVar(&cfg.Dir, "ingester.wal-dir", "wal", "Directory where the WAL data is stored and/or recovered from.") f.BoolVar(&cfg.Enabled, "ingester.wal-enabled", true, "Enable writing of ingested data into WAL.") f.DurationVar(&cfg.CheckpointDuration, "ingester.checkpoint-duration", 5*time.Minute, "Interval at which checkpoints should be created.") f.BoolVar(&cfg.FlushOnShutdown, "ingester.flush-on-shutdown", false, "When WAL is enabled, should chunks be flushed to long-term storage on shutdown.") diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 71a3bea70569e..6ca088d35ec28 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -60,7 +60,7 @@ type Config struct { // RegisterFlags register flags. func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.Engine.RegisterFlagsWithPrefix("querier", f) - f.DurationVar(&cfg.TailMaxDuration, "querier.tail-max-duration", 1*time.Hour, "Maximum duration for which the live tailing requests should be served.") + f.DurationVar(&cfg.TailMaxDuration, "querier.tail-max-duration", 1*time.Hour, "Maximum duration for which the live tailing requests are served.") f.DurationVar(&cfg.ExtraQueryDelay, "querier.extra-query-delay", 0, "Time to wait before sending more than the minimum successful query requests.") f.DurationVar(&cfg.QueryIngestersWithin, "querier.query-ingesters-within", 3*time.Hour, "Maximum lookback beyond which queries are not sent to ingester. 0 means all queries are sent to ingester.") f.IntVar(&cfg.MaxConcurrent, "querier.max-concurrent", 10, "The maximum number of concurrent queries allowed.") diff --git a/pkg/ruler/base/ruler.go b/pkg/ruler/base/ruler.go index fd0ceb89feda4..161c7dac2b634 100644 --- a/pkg/ruler/base/ruler.go +++ b/pkg/ruler/base/ruler.go @@ -172,7 +172,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&cfg.ShardingStrategy, "ruler.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) f.DurationVar(&cfg.FlushCheckPeriod, "ruler.flush-period", 1*time.Minute, "Period with which to attempt to flush rule groups.") f.StringVar(&cfg.RulePath, "ruler.rule-path", "/rules", "File path to store temporary rule files.") - f.BoolVar(&cfg.EnableAPI, "experimental.ruler.enable-api", false, "Enable the ruler api.") + f.BoolVar(&cfg.EnableAPI, "experimental.ruler.enable-api", false, "Enable the ruler API.") f.DurationVar(&cfg.OutageTolerance, "ruler.for-outage-tolerance", time.Hour, `Max time to tolerate outage for restoring "for" state of alert.`) f.DurationVar(&cfg.ForGracePeriod, "ruler.for-grace-period", 10*time.Minute, `Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than the grace period.`) f.DurationVar(&cfg.ResendDelay, "ruler.resend-delay", time.Minute, `Minimum amount of time to wait before resending an alert to Alertmanager.`) diff --git a/pkg/ruler/config.go b/pkg/ruler/config.go index 41eda7ff17c88..70ebcac7312fb 100644 --- a/pkg/ruler/config.go +++ b/pkg/ruler/config.go @@ -31,7 +31,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { c.WALCleaner.RegisterFlags(f) // TODO(owen-d, 3.0.0): remove deprecated experimental prefix in Cortex if they'll accept it. - f.BoolVar(&c.Config.EnableAPI, "ruler.enable-api", true, "Enable the ruler api.") + f.BoolVar(&c.Config.EnableAPI, "ruler.enable-api", true, "Enable the ruler API.") } // Validate overrides the embedded cortex variant which expects a cortex limits struct. Instead, copy the relevant bits over. From 8fe386fbb3787ad6534208ed687af68715dd95b3 Mon Sep 17 00:00:00 2001 From: Irina Date: Wed, 14 Dec 2022 13:41:51 +0000 Subject: [PATCH 12/62] Move to fork github.com/grafana/tail (#7931) --- clients/pkg/promtail/targets/file/tailer.go | 2 +- go.mod | 4 +-- go.sum | 6 ++-- .../github.com/grafana/dskit/flagext/bytes.go | 33 +++++++++++++++++++ .../{hpcloud => grafana}/tail/.gitignore | 0 .../{hpcloud => grafana}/tail/.travis.yml | 0 .../{hpcloud => grafana}/tail/CHANGES.md | 0 .../{hpcloud => grafana}/tail/Dockerfile | 0 .../{hpcloud => grafana}/tail/LICENSE.txt | 0 .../{hpcloud => grafana}/tail/Makefile | 0 .../{hpcloud => grafana}/tail/README.md | 0 .../{hpcloud => grafana}/tail/appveyor.yml | 0 .../tail/ratelimiter/Licence | 0 .../tail/ratelimiter/leakybucket.go | 0 .../tail/ratelimiter/memory.go | 0 .../tail/ratelimiter/storage.go | 0 .../{hpcloud => grafana}/tail/tail.go | 6 ++-- .../{hpcloud => grafana}/tail/tail_posix.go | 0 .../{hpcloud => grafana}/tail/tail_windows.go | 2 +- .../{hpcloud => grafana}/tail/util/util.go | 0 .../tail/watch/file_posix.go | 0 .../tail/watch/file_windows.go | 0 .../tail/watch/filechanges.go | 0 .../tail/watch/inotify.go | 2 +- .../tail/watch/inotify_tracker.go | 2 +- .../tail/watch/polling.go | 2 +- .../{hpcloud => grafana}/tail/watch/watch.go | 0 .../tail/winfile/winfile.go | 0 vendor/modules.txt | 15 ++++----- 29 files changed, 52 insertions(+), 22 deletions(-) create mode 100644 vendor/github.com/grafana/dskit/flagext/bytes.go rename vendor/github.com/{hpcloud => grafana}/tail/.gitignore (100%) rename vendor/github.com/{hpcloud => grafana}/tail/.travis.yml (100%) rename vendor/github.com/{hpcloud => grafana}/tail/CHANGES.md (100%) rename vendor/github.com/{hpcloud => grafana}/tail/Dockerfile (100%) rename vendor/github.com/{hpcloud => grafana}/tail/LICENSE.txt (100%) rename vendor/github.com/{hpcloud => grafana}/tail/Makefile (100%) rename vendor/github.com/{hpcloud => grafana}/tail/README.md (100%) rename vendor/github.com/{hpcloud => grafana}/tail/appveyor.yml (100%) rename vendor/github.com/{hpcloud => grafana}/tail/ratelimiter/Licence (100%) rename vendor/github.com/{hpcloud => grafana}/tail/ratelimiter/leakybucket.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/ratelimiter/memory.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/ratelimiter/storage.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/tail.go (99%) rename vendor/github.com/{hpcloud => grafana}/tail/tail_posix.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/tail_windows.go (81%) rename vendor/github.com/{hpcloud => grafana}/tail/util/util.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/watch/file_posix.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/watch/file_windows.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/watch/filechanges.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/watch/inotify.go (98%) rename vendor/github.com/{hpcloud => grafana}/tail/watch/inotify_tracker.go (99%) rename vendor/github.com/{hpcloud => grafana}/tail/watch/polling.go (98%) rename vendor/github.com/{hpcloud => grafana}/tail/watch/watch.go (100%) rename vendor/github.com/{hpcloud => grafana}/tail/winfile/winfile.go (100%) diff --git a/clients/pkg/promtail/targets/file/tailer.go b/clients/pkg/promtail/targets/file/tailer.go index 79605ca1f92fb..c9297cd04d235 100644 --- a/clients/pkg/promtail/targets/file/tailer.go +++ b/clients/pkg/promtail/targets/file/tailer.go @@ -8,7 +8,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/hpcloud/tail" + "github.com/grafana/tail" "github.com/pkg/errors" "github.com/prometheus/common/model" "go.uber.org/atomic" diff --git a/go.mod b/go.mod index cd016a7a3978b..1ae5ec1a58181 100644 --- a/go.mod +++ b/go.mod @@ -52,11 +52,11 @@ require ( github.com/grafana/dskit v0.0.0-20221212120341-3e308a49441b github.com/grafana/go-gelf/v2 v2.0.1 github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 + github.com/grafana/tail v0.0.0-20221214082743-3a1c242a4d7b github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645 github.com/hashicorp/consul/api v1.15.3 github.com/hashicorp/golang-lru v0.5.4 - github.com/hpcloud/tail v1.0.0 github.com/imdario/mergo v0.3.12 github.com/influxdata/go-syslog/v3 v3.0.1-0.20201128200927-a1889d947b48 github.com/influxdata/telegraf v1.16.3 @@ -309,8 +309,6 @@ require ( // Upgrade to run with gRPC 1.3.0 and above. replace github.com/sercand/kuberesolver => github.com/sercand/kuberesolver v2.4.0+incompatible -replace github.com/hpcloud/tail => github.com/grafana/tail v0.0.0-20221013162612-e88cf3c7a8f6 - replace github.com/Azure/azure-sdk-for-go => github.com/Azure/azure-sdk-for-go v36.2.0+incompatible replace github.com/Azure/azure-storage-blob-go => github.com/MasslessParticle/azure-storage-blob-go v0.14.1-0.20220216145902-b5e698eff68e diff --git a/go.sum b/go.sum index 686956b050cd7..4dd627e7aaee2 100644 --- a/go.sum +++ b/go.sum @@ -736,8 +736,8 @@ github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 h1:A3dhViTeFDSQcGOXuUi6ukCQSMyDtDISBp2z6OOo2YM= github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6/go.mod h1:M5qHK+eWfAv8VR/265dIuEpL3fNfeC21tXXp9itM24A= -github.com/grafana/tail v0.0.0-20221013162612-e88cf3c7a8f6 h1:aHuSFz3MqN0+qU2Ss+C5BA1yh5a0+c3zF4OBvcVqcZg= -github.com/grafana/tail v0.0.0-20221013162612-e88cf3c7a8f6/go.mod h1:GIMXMPB/lRAllP5rVDvcGif87ryO2hgD7tCtHMdHrho= +github.com/grafana/tail v0.0.0-20221214082743-3a1c242a4d7b h1:lqdF6YhGFn2BJqTxbLMZM8UiDSEVF0434IDANpVblaY= +github.com/grafana/tail v0.0.0-20221214082743-3a1c242a4d7b/go.mod h1:7t5XR+2IA8P2qggOAHTj/GCZfoLBle3OvNSYh1VkRBU= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-middleware v1.1.0/go.mod h1:f5nM7jw/oeRSadq3xCzHAvxcr8HZnzsqU6ILg/0NiiE= @@ -838,6 +838,7 @@ github.com/heroku/rollrus v0.2.0/go.mod h1:B3MwEcr9nmf4xj0Sr5l9eSht7wLKMa1C+9ajg github.com/heroku/x v0.0.50 h1:CA0AXkSumucVJD+T+x+6c7X1iDEb+40F8GNgH5UjJwo= github.com/heroku/x v0.0.50/go.mod h1:vr+jORZ6sG3wgEq2FAS6UbOUrz9/DxpQGN/xPHVgbSM= github.com/hetznercloud/hcloud-go v1.35.3 h1:WCmFAhLRooih2QHAsbCbEdpIHnshQQmrPqsr3rHE1Ow= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huandu/xstrings v1.3.1 h1:4jgBlKK6tLKFvO8u5pmYjG91cqytmDCDvGh7ECVFfFs= github.com/huandu/xstrings v1.3.1/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/hudl/fargo v1.3.0/go.mod h1:y3CKSmjA+wD2gak7sUSXTAoopbhU08POFhmITJgmKTg= @@ -1720,7 +1721,6 @@ golang.org/x/sys v0.0.0-20191003212358-c178f38b412c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191008105621-543471e840be/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191119060738-e882bf8e40c2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/github.com/grafana/dskit/flagext/bytes.go b/vendor/github.com/grafana/dskit/flagext/bytes.go new file mode 100644 index 0000000000000..dabb101b8e85b --- /dev/null +++ b/vendor/github.com/grafana/dskit/flagext/bytes.go @@ -0,0 +1,33 @@ +// Provenance-includes-location: https://github.com/thanos-io/thanos/blob/main/pkg/model/units.go +// Provenance-includes-license: Apache-2.0 +// Provenance-includes-copyright: The Thanos Authors. + +package flagext + +import ( + "github.com/alecthomas/units" +) + +// Bytes is a data type which supports yaml serialization/deserialization +// with units. +type Bytes uint64 + +func (b *Bytes) UnmarshalYAML(unmarshal func(interface{}) error) error { + var value string + err := unmarshal(&value) + if err != nil { + return err + } + + bytes, err := units.ParseBase2Bytes(value) + if err != nil { + return err + } + + *b = Bytes(bytes) + return nil +} + +func (b *Bytes) MarshalYAML() (interface{}, error) { + return units.Base2Bytes(*b).String(), nil +} diff --git a/vendor/github.com/hpcloud/tail/.gitignore b/vendor/github.com/grafana/tail/.gitignore similarity index 100% rename from vendor/github.com/hpcloud/tail/.gitignore rename to vendor/github.com/grafana/tail/.gitignore diff --git a/vendor/github.com/hpcloud/tail/.travis.yml b/vendor/github.com/grafana/tail/.travis.yml similarity index 100% rename from vendor/github.com/hpcloud/tail/.travis.yml rename to vendor/github.com/grafana/tail/.travis.yml diff --git a/vendor/github.com/hpcloud/tail/CHANGES.md b/vendor/github.com/grafana/tail/CHANGES.md similarity index 100% rename from vendor/github.com/hpcloud/tail/CHANGES.md rename to vendor/github.com/grafana/tail/CHANGES.md diff --git a/vendor/github.com/hpcloud/tail/Dockerfile b/vendor/github.com/grafana/tail/Dockerfile similarity index 100% rename from vendor/github.com/hpcloud/tail/Dockerfile rename to vendor/github.com/grafana/tail/Dockerfile diff --git a/vendor/github.com/hpcloud/tail/LICENSE.txt b/vendor/github.com/grafana/tail/LICENSE.txt similarity index 100% rename from vendor/github.com/hpcloud/tail/LICENSE.txt rename to vendor/github.com/grafana/tail/LICENSE.txt diff --git a/vendor/github.com/hpcloud/tail/Makefile b/vendor/github.com/grafana/tail/Makefile similarity index 100% rename from vendor/github.com/hpcloud/tail/Makefile rename to vendor/github.com/grafana/tail/Makefile diff --git a/vendor/github.com/hpcloud/tail/README.md b/vendor/github.com/grafana/tail/README.md similarity index 100% rename from vendor/github.com/hpcloud/tail/README.md rename to vendor/github.com/grafana/tail/README.md diff --git a/vendor/github.com/hpcloud/tail/appveyor.yml b/vendor/github.com/grafana/tail/appveyor.yml similarity index 100% rename from vendor/github.com/hpcloud/tail/appveyor.yml rename to vendor/github.com/grafana/tail/appveyor.yml diff --git a/vendor/github.com/hpcloud/tail/ratelimiter/Licence b/vendor/github.com/grafana/tail/ratelimiter/Licence similarity index 100% rename from vendor/github.com/hpcloud/tail/ratelimiter/Licence rename to vendor/github.com/grafana/tail/ratelimiter/Licence diff --git a/vendor/github.com/hpcloud/tail/ratelimiter/leakybucket.go b/vendor/github.com/grafana/tail/ratelimiter/leakybucket.go similarity index 100% rename from vendor/github.com/hpcloud/tail/ratelimiter/leakybucket.go rename to vendor/github.com/grafana/tail/ratelimiter/leakybucket.go diff --git a/vendor/github.com/hpcloud/tail/ratelimiter/memory.go b/vendor/github.com/grafana/tail/ratelimiter/memory.go similarity index 100% rename from vendor/github.com/hpcloud/tail/ratelimiter/memory.go rename to vendor/github.com/grafana/tail/ratelimiter/memory.go diff --git a/vendor/github.com/hpcloud/tail/ratelimiter/storage.go b/vendor/github.com/grafana/tail/ratelimiter/storage.go similarity index 100% rename from vendor/github.com/hpcloud/tail/ratelimiter/storage.go rename to vendor/github.com/grafana/tail/ratelimiter/storage.go diff --git a/vendor/github.com/hpcloud/tail/tail.go b/vendor/github.com/grafana/tail/tail.go similarity index 99% rename from vendor/github.com/hpcloud/tail/tail.go rename to vendor/github.com/grafana/tail/tail.go index b41e2b32c9023..92fec824f14fb 100644 --- a/vendor/github.com/hpcloud/tail/tail.go +++ b/vendor/github.com/grafana/tail/tail.go @@ -15,9 +15,9 @@ import ( "sync" "time" - "github.com/hpcloud/tail/ratelimiter" - "github.com/hpcloud/tail/util" - "github.com/hpcloud/tail/watch" + "github.com/grafana/tail/ratelimiter" + "github.com/grafana/tail/util" + "github.com/grafana/tail/watch" "gopkg.in/tomb.v1" ) diff --git a/vendor/github.com/hpcloud/tail/tail_posix.go b/vendor/github.com/grafana/tail/tail_posix.go similarity index 100% rename from vendor/github.com/hpcloud/tail/tail_posix.go rename to vendor/github.com/grafana/tail/tail_posix.go diff --git a/vendor/github.com/hpcloud/tail/tail_windows.go b/vendor/github.com/grafana/tail/tail_windows.go similarity index 81% rename from vendor/github.com/hpcloud/tail/tail_windows.go rename to vendor/github.com/grafana/tail/tail_windows.go index ef2cfca1b74b4..36e197bc451e8 100644 --- a/vendor/github.com/hpcloud/tail/tail_windows.go +++ b/vendor/github.com/grafana/tail/tail_windows.go @@ -3,7 +3,7 @@ package tail import ( - "github.com/hpcloud/tail/winfile" + "github.com/grafana/tail/winfile" "os" ) diff --git a/vendor/github.com/hpcloud/tail/util/util.go b/vendor/github.com/grafana/tail/util/util.go similarity index 100% rename from vendor/github.com/hpcloud/tail/util/util.go rename to vendor/github.com/grafana/tail/util/util.go diff --git a/vendor/github.com/hpcloud/tail/watch/file_posix.go b/vendor/github.com/grafana/tail/watch/file_posix.go similarity index 100% rename from vendor/github.com/hpcloud/tail/watch/file_posix.go rename to vendor/github.com/grafana/tail/watch/file_posix.go diff --git a/vendor/github.com/hpcloud/tail/watch/file_windows.go b/vendor/github.com/grafana/tail/watch/file_windows.go similarity index 100% rename from vendor/github.com/hpcloud/tail/watch/file_windows.go rename to vendor/github.com/grafana/tail/watch/file_windows.go diff --git a/vendor/github.com/hpcloud/tail/watch/filechanges.go b/vendor/github.com/grafana/tail/watch/filechanges.go similarity index 100% rename from vendor/github.com/hpcloud/tail/watch/filechanges.go rename to vendor/github.com/grafana/tail/watch/filechanges.go diff --git a/vendor/github.com/hpcloud/tail/watch/inotify.go b/vendor/github.com/grafana/tail/watch/inotify.go similarity index 98% rename from vendor/github.com/hpcloud/tail/watch/inotify.go rename to vendor/github.com/grafana/tail/watch/inotify.go index 8d6bbf729a149..f72d2ca22784a 100644 --- a/vendor/github.com/hpcloud/tail/watch/inotify.go +++ b/vendor/github.com/grafana/tail/watch/inotify.go @@ -8,7 +8,7 @@ import ( "os" "path/filepath" - "github.com/hpcloud/tail/util" + "github.com/grafana/tail/util" "gopkg.in/fsnotify/fsnotify.v1" "gopkg.in/tomb.v1" diff --git a/vendor/github.com/hpcloud/tail/watch/inotify_tracker.go b/vendor/github.com/grafana/tail/watch/inotify_tracker.go similarity index 99% rename from vendor/github.com/hpcloud/tail/watch/inotify_tracker.go rename to vendor/github.com/grafana/tail/watch/inotify_tracker.go index 739b3c2abf8e6..0e02d2d5075b1 100644 --- a/vendor/github.com/hpcloud/tail/watch/inotify_tracker.go +++ b/vendor/github.com/grafana/tail/watch/inotify_tracker.go @@ -10,7 +10,7 @@ import ( "sync" "syscall" - "github.com/hpcloud/tail/util" + "github.com/grafana/tail/util" "gopkg.in/fsnotify/fsnotify.v1" ) diff --git a/vendor/github.com/hpcloud/tail/watch/polling.go b/vendor/github.com/grafana/tail/watch/polling.go similarity index 98% rename from vendor/github.com/hpcloud/tail/watch/polling.go rename to vendor/github.com/grafana/tail/watch/polling.go index 4f05e3abfbdaf..8ab6446a78773 100644 --- a/vendor/github.com/hpcloud/tail/watch/polling.go +++ b/vendor/github.com/grafana/tail/watch/polling.go @@ -4,7 +4,7 @@ package watch import ( - "github.com/hpcloud/tail/util" + "github.com/grafana/tail/util" "gopkg.in/tomb.v1" "os" "runtime" diff --git a/vendor/github.com/hpcloud/tail/watch/watch.go b/vendor/github.com/grafana/tail/watch/watch.go similarity index 100% rename from vendor/github.com/hpcloud/tail/watch/watch.go rename to vendor/github.com/grafana/tail/watch/watch.go diff --git a/vendor/github.com/hpcloud/tail/winfile/winfile.go b/vendor/github.com/grafana/tail/winfile/winfile.go similarity index 100% rename from vendor/github.com/hpcloud/tail/winfile/winfile.go rename to vendor/github.com/grafana/tail/winfile/winfile.go diff --git a/vendor/modules.txt b/vendor/modules.txt index 9736aa89e7eb5..d6e290d7c6dd7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -715,6 +715,13 @@ github.com/grafana/go-gelf/v2/gelf ## explicit; go 1.17 github.com/grafana/regexp github.com/grafana/regexp/syntax +# github.com/grafana/tail v0.0.0-20221214082743-3a1c242a4d7b +## explicit; go 1.13 +github.com/grafana/tail +github.com/grafana/tail/ratelimiter +github.com/grafana/tail/util +github.com/grafana/tail/watch +github.com/grafana/tail/winfile # github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 ## explicit; go 1.14 github.com/grpc-ecosystem/go-grpc-middleware @@ -768,13 +775,6 @@ github.com/hashicorp/serf/coordinate # github.com/heroku/x v0.0.50 ## explicit; go 1.12 github.com/heroku/x/logplex/encoding -# github.com/hpcloud/tail v1.0.0 => github.com/grafana/tail v0.0.0-20221013162612-e88cf3c7a8f6 -## explicit; go 1.13 -github.com/hpcloud/tail -github.com/hpcloud/tail/ratelimiter -github.com/hpcloud/tail/util -github.com/hpcloud/tail/watch -github.com/hpcloud/tail/winfile # github.com/huandu/xstrings v1.3.1 ## explicit; go 1.12 github.com/huandu/xstrings @@ -1949,7 +1949,6 @@ sigs.k8s.io/structured-merge-diff/v4/value ## explicit; go 1.12 sigs.k8s.io/yaml # github.com/sercand/kuberesolver => github.com/sercand/kuberesolver v2.4.0+incompatible -# github.com/hpcloud/tail => github.com/grafana/tail v0.0.0-20221013162612-e88cf3c7a8f6 # github.com/Azure/azure-sdk-for-go => github.com/Azure/azure-sdk-for-go v36.2.0+incompatible # github.com/Azure/azure-storage-blob-go => github.com/MasslessParticle/azure-storage-blob-go v0.14.1-0.20220216145902-b5e698eff68e # github.com/hashicorp/consul => github.com/hashicorp/consul v1.5.1 From 10b869c82e2a903898ce3a9462f3fef0d2dd60a5 Mon Sep 17 00:00:00 2001 From: Irina Date: Wed, 14 Dec 2022 13:48:05 +0000 Subject: [PATCH 13/62] Move to fork github.com/grafana/gomemcache (#7936) --- go.mod | 6 +- go.sum | 4 +- pkg/storage/chunk/cache/memcached.go | 4 +- pkg/storage/chunk/cache/memcached_client.go | 2 +- .../chunk/cache/memcached_client_selector.go | 4 +- .../cache/memcached_client_selector_test.go | 2 +- .../chunk/cache/memcached_client_test.go | 2 +- pkg/storage/chunk/cache/memcached_test.go | 2 +- .../{bradfitz => grafana}/gomemcache/LICENSE | 0 .../gomemcache/memcache/memcache.go | 150 ++++++++++-------- .../gomemcache/memcache/selector.go | 0 vendor/modules.txt | 7 +- 12 files changed, 102 insertions(+), 81 deletions(-) rename vendor/github.com/{bradfitz => grafana}/gomemcache/LICENSE (100%) rename vendor/github.com/{bradfitz => grafana}/gomemcache/memcache/memcache.go (88%) rename vendor/github.com/{bradfitz => grafana}/gomemcache/memcache/selector.go (100%) diff --git a/go.mod b/go.mod index 1ae5ec1a58181..919e92898e12d 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,6 @@ require ( github.com/aws/aws-sdk-go v1.44.128 github.com/baidubce/bce-sdk-go v0.9.111 github.com/bmatcuk/doublestar v1.2.2 - github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b github.com/buger/jsonparser v1.1.1 github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee github.com/cespare/xxhash v1.1.0 @@ -51,6 +50,7 @@ require ( github.com/gorilla/websocket v1.5.0 github.com/grafana/dskit v0.0.0-20221212120341-3e308a49441b github.com/grafana/go-gelf/v2 v2.0.1 + github.com/grafana/gomemcache v0.0.0-20221213170046-b5da8a745d41 github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 github.com/grafana/tail v0.0.0-20221214082743-3a1c242a4d7b github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 @@ -318,10 +318,6 @@ replace github.com/hashicorp/consul => github.com/hashicorp/consul v1.5.1 // Use fork of gocql that has gokit logs and Prometheus metrics. replace github.com/gocql/gocql => github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 -// Same as Cortex -// Using a 3rd-party branch for custom dialer - see https://github.com/bradfitz/gomemcache/pull/86 -replace github.com/bradfitz/gomemcache => github.com/owen-d/gomemcache v0.0.0-20220719101501-ce4268ea75ae - replace github.com/cloudflare/cloudflare-go => github.com/cyriltovena/cloudflare-go v0.27.1-0.20211118103540-ff77400bcb93 exclude k8s.io/client-go v8.0.0+incompatible diff --git a/go.sum b/go.sum index 4dd627e7aaee2..8117378cb54bb 100644 --- a/go.sum +++ b/go.sum @@ -732,6 +732,8 @@ github.com/grafana/go-gelf/v2 v2.0.1 h1:BOChP0h/jLeD+7F9mL7tq10xVkDG15he3T1zHuQa github.com/grafana/go-gelf/v2 v2.0.1/go.mod h1:lexHie0xzYGwCgiRGcvZ723bSNyNI8ZRD4s0CLobh90= github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 h1:xLuzPoOzdfNb/RF/IENCw+oLVdZB4G21VPhkHBgwSHY= github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85/go.mod h1:crI9WX6p0IhrqB+DqIUHulRW853PaNFf7o4UprV//3I= +github.com/grafana/gomemcache v0.0.0-20221213170046-b5da8a745d41 h1:YxVdHh0Erfya/wb4mzy/JkTxtmefBICE6gAwSkS+61I= +github.com/grafana/gomemcache v0.0.0-20221213170046-b5da8a745d41/go.mod h1:6fkC8bkriadatJOc7Pvjcvqr2xh9C79BYRRfE3WWoo0= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe h1:yIXAAbLswn7VNWBIvM71O2QsgfgW9fRXZNR0DXe6pDU= github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE= github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 h1:A3dhViTeFDSQcGOXuUi6ukCQSMyDtDISBp2z6OOo2YM= @@ -1127,8 +1129,6 @@ github.com/openzipkin/zipkin-go v0.2.2/go.mod h1:NaW6tEwdmWMaCDZzg8sh+IBNOxHMPnh github.com/openzipkin/zipkin-go-opentracing v0.3.4/go.mod h1:js2AbwmHW0YD9DwIw2JhQWmbfFi/UnWyYwdVhqbCDOE= github.com/ory/dockertest v3.3.4+incompatible/go.mod h1:1vX4m9wsvi00u5bseYwXaSnhNrne+V0E6LAcBILJdPs= github.com/ovh/go-ovh v1.1.0 h1:bHXZmw8nTgZin4Nv7JuaLs0KG5x54EQR7migYTd1zrk= -github.com/owen-d/gomemcache v0.0.0-20220719101501-ce4268ea75ae h1:NkG2GIrREfX6FfB8D1QCLcFqpE5xHL7rbUGwXB9pvwM= -github.com/owen-d/gomemcache v0.0.0-20220719101501-ce4268ea75ae/go.mod h1:H0wQNHz2YrLsuXOZozoeDmnHXkNCRmMW0gwFWDfEZDA= github.com/packethost/packngo v0.1.1-0.20180711074735-b9cb5096f54c/go.mod h1:otzZQXgoO96RTzDB/Hycg0qZcXZsWJGJRSXbmEIJ+4M= github.com/pact-foundation/pact-go v1.0.4/go.mod h1:uExwJY4kCzNPcHRj+hCR/HBbOOIwwtUjcrb0b5/5kLM= github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= diff --git a/pkg/storage/chunk/cache/memcached.go b/pkg/storage/chunk/cache/memcached.go index 025a46cee0e4f..4256baf8b0d61 100644 --- a/pkg/storage/chunk/cache/memcached.go +++ b/pkg/storage/chunk/cache/memcached.go @@ -8,8 +8,8 @@ import ( "sync" "time" - "github.com/bradfitz/gomemcache/memcache" "github.com/go-kit/log" + "github.com/grafana/gomemcache/memcache" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" instr "github.com/weaveworks/common/instrument" @@ -113,7 +113,7 @@ type result struct { } func memcacheStatusCode(err error) string { - // See https://godoc.org/github.com/bradfitz/gomemcache/memcache#pkg-variables + // See https://godoc.org/github.com/grafana/gomemcache/memcache#pkg-variables switch err { case nil: return "200" diff --git a/pkg/storage/chunk/cache/memcached_client.go b/pkg/storage/chunk/cache/memcached_client.go index 4d80587a7262c..874eb295b69b4 100644 --- a/pkg/storage/chunk/cache/memcached_client.go +++ b/pkg/storage/chunk/cache/memcached_client.go @@ -10,9 +10,9 @@ import ( "sync" "time" - "github.com/bradfitz/gomemcache/memcache" "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/gomemcache/memcache" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" diff --git a/pkg/storage/chunk/cache/memcached_client_selector.go b/pkg/storage/chunk/cache/memcached_client_selector.go index e13adca2b4393..e56a1d2af8a99 100644 --- a/pkg/storage/chunk/cache/memcached_client_selector.go +++ b/pkg/storage/chunk/cache/memcached_client_selector.go @@ -5,9 +5,9 @@ import ( "strings" "sync" - "github.com/bradfitz/gomemcache/memcache" "github.com/cespare/xxhash" "github.com/facette/natsort" + "github.com/grafana/gomemcache/memcache" ) // MemcachedJumpHashSelector implements the memcache.ServerSelector @@ -47,7 +47,7 @@ func DefaultMemcachedJumpHashSelector() *MemcachedJumpHashSelector { // staticAddr caches the Network() and String() values from // any net.Addr. // -// Copied from github.com/bradfitz/gomemcache/selector.go. +// Copied from github.com/grafana/gomemcache/selector.go. type staticAddr struct { network, str string } diff --git a/pkg/storage/chunk/cache/memcached_client_selector_test.go b/pkg/storage/chunk/cache/memcached_client_selector_test.go index 6a0f34e8b9fda..2a3f28709549c 100644 --- a/pkg/storage/chunk/cache/memcached_client_selector_test.go +++ b/pkg/storage/chunk/cache/memcached_client_selector_test.go @@ -5,8 +5,8 @@ import ( "net" "testing" - "github.com/bradfitz/gomemcache/memcache" "github.com/facette/natsort" + "github.com/grafana/gomemcache/memcache" "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/storage/chunk/cache" diff --git a/pkg/storage/chunk/cache/memcached_client_test.go b/pkg/storage/chunk/cache/memcached_client_test.go index 028fba8ef4609..32413ea221d6d 100644 --- a/pkg/storage/chunk/cache/memcached_client_test.go +++ b/pkg/storage/chunk/cache/memcached_client_test.go @@ -3,7 +3,7 @@ package cache_test import ( "sync" - "github.com/bradfitz/gomemcache/memcache" + "github.com/grafana/gomemcache/memcache" ) type mockMemcache struct { diff --git a/pkg/storage/chunk/cache/memcached_test.go b/pkg/storage/chunk/cache/memcached_test.go index b8b5810459246..e544d40044c2f 100644 --- a/pkg/storage/chunk/cache/memcached_test.go +++ b/pkg/storage/chunk/cache/memcached_test.go @@ -7,8 +7,8 @@ import ( "sync" "testing" - "github.com/bradfitz/gomemcache/memcache" "github.com/go-kit/log" + "github.com/grafana/gomemcache/memcache" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/atomic" diff --git a/vendor/github.com/bradfitz/gomemcache/LICENSE b/vendor/github.com/grafana/gomemcache/LICENSE similarity index 100% rename from vendor/github.com/bradfitz/gomemcache/LICENSE rename to vendor/github.com/grafana/gomemcache/LICENSE diff --git a/vendor/github.com/bradfitz/gomemcache/memcache/memcache.go b/vendor/github.com/grafana/gomemcache/memcache/memcache.go similarity index 88% rename from vendor/github.com/bradfitz/gomemcache/memcache/memcache.go rename to vendor/github.com/grafana/gomemcache/memcache/memcache.go index d0e70d3023429..7f2d5a6269d07 100644 --- a/vendor/github.com/bradfitz/gomemcache/memcache/memcache.go +++ b/vendor/github.com/grafana/gomemcache/memcache/memcache.go @@ -22,7 +22,6 @@ import ( "bytes" "errors" "fmt" - "hash/fnv" "io" "net" @@ -127,24 +126,14 @@ func New(server ...string) *Client { // NewFromSelector returns a new Client using the provided ServerSelector. func NewFromSelector(ss ServerSelector) *Client { - c := Client{selector: ss} - - // TODO: make configurable - shards := 128 - - c.connMaps = make([]*connMap, 0, shards) - for i := 0; i < shards; i++ { - c.connMaps = append(c.connMaps, &connMap{}) - } - return &c + return &Client{selector: ss} } // Client is a memcache client. // It is safe for unlocked use by multiple concurrent goroutines. type Client struct { - // Dialer specifies a custom dialer used to dial new connections to a server. + // DialTimeout specifies a custom dialer used to dial new connections to a server. DialTimeout func(network, address string, timeout time.Duration) (net.Conn, error) - // Timeout specifies the socket read/write timeout. // If zero, DefaultTimeout is used. Timeout time.Duration @@ -157,12 +146,10 @@ type Client struct { // be set to a number higher than your peak parallel requests. MaxIdleConns int - selector ServerSelector + Pool BytesPool - connMaps []*connMap -} + selector ServerSelector -type connMap struct { lk sync.Mutex freeconn map[string][]*conn } @@ -196,6 +183,15 @@ type conn struct { c *Client } +// BytesPool is a pool of bytes that can be reused. +type BytesPool interface { + // Get returns a new byte slice that has a capacity at least the same as the + // requested size. + Get(sz int) (*[]byte, error) + // Put returns a byte slice to the pool. + Put(b *[]byte) +} + // release returns this connection back to the client's free pool func (cn *conn) release() { cn.c.putFreeConn(cn.addr, cn) @@ -217,47 +213,32 @@ func (cn *conn) condRelease(err *error) { } } -func (c *Client) cmFor(addr string) *connMap { - return c.connMaps[int(hash(addr))%len(c.connMaps)] -} - func (c *Client) putFreeConn(addr net.Addr, cn *conn) { - cm := c.cmFor(addr.String()) - cm.lk.Lock() - defer cm.lk.Unlock() - - if cm.freeconn == nil { - cm.freeconn = make(map[string][]*conn) + c.lk.Lock() + defer c.lk.Unlock() + if c.freeconn == nil { + c.freeconn = make(map[string][]*conn) } - freelist := cm.freeconn[addr.String()] + freelist := c.freeconn[addr.String()] if len(freelist) >= c.maxIdleConns() { cn.nc.Close() return } - cm.freeconn[addr.String()] = append(freelist, cn) -} - -func hash(s string) uint32 { - h := fnv.New32a() - h.Write([]byte(s)) - return h.Sum32() + c.freeconn[addr.String()] = append(freelist, cn) } func (c *Client) getFreeConn(addr net.Addr) (cn *conn, ok bool) { - cm := c.cmFor(addr.String()) - cm.lk.Lock() - defer cm.lk.Unlock() - - if cm.freeconn == nil { + c.lk.Lock() + defer c.lk.Unlock() + if c.freeconn == nil { return nil, false } - - freelist, ok := cm.freeconn[addr.String()] + freelist, ok := c.freeconn[addr.String()] if !ok || len(freelist) == 0 { return nil, false } cn = freelist[len(freelist)-1] - cm.freeconn[addr.String()] = freelist[:len(freelist)-1] + c.freeconn[addr.String()] = freelist[:len(freelist)-1] return cn, true } @@ -287,11 +268,11 @@ func (cte *ConnectTimeoutError) Error() string { } func (c *Client) dial(addr net.Addr) (net.Conn, error) { - if c.DialTimeout == nil { - c.DialTimeout = net.DialTimeout + dialTimeout := c.DialTimeout + if dialTimeout == nil { + dialTimeout = net.DialTimeout } - - nc, err := net.DialTimeout(addr.Network(), addr.String(), c.netTimeout()) + nc, err := dialTimeout(addr.Network(), addr.String(), c.netTimeout()) if err == nil { return nc, nil } @@ -400,7 +381,7 @@ func (c *Client) getFromAddr(addr net.Addr, keys []string, cb func(*Item)) error if err := rw.Flush(); err != nil { return err } - if err := parseGetResponse(rw.Reader, cb); err != nil { + if err := c.parseGetResponse(rw.Reader, cb); err != nil { return err } return nil @@ -513,7 +494,7 @@ func (c *Client) GetMulti(keys []string) (map[string]*Item, error) { } var err error - for range keyMap { + for _ = range keyMap { if ge := <-ch; ge != nil { err = ge } @@ -523,7 +504,7 @@ func (c *Client) GetMulti(keys []string) (map[string]*Item, error) { // parseGetResponse reads a GET response from r and calls cb for each // read and allocated Item -func parseGetResponse(r *bufio.Reader, cb func(*Item)) error { +func (c *Client) parseGetResponse(r *bufio.Reader, cb func(*Item)) error { for { line, err := r.ReadSlice('\n') if err != nil { @@ -537,14 +518,27 @@ func parseGetResponse(r *bufio.Reader, cb func(*Item)) error { if err != nil { return err } - it.Value = make([]byte, size+2) + buffSize := size + 2 + if c.Pool != nil { + v, err := c.Pool.Get(buffSize) + if err != nil { + return err + } + it.Value = (*v)[:buffSize] + } else { + it.Value = make([]byte, buffSize) + } _, err = io.ReadFull(r, it.Value) if err != nil { - it.Value = nil + if c.Pool != nil { + c.Pool.Put(&it.Value) + } return err } if !bytes.HasSuffix(it.Value, crlf) { - it.Value = nil + if c.Pool != nil { + c.Pool.Put(&it.Value) + } return fmt.Errorf("memcache: corrupt get result read") } it.Value = it.Value[:size] @@ -555,17 +549,49 @@ func parseGetResponse(r *bufio.Reader, cb func(*Item)) error { // scanGetResponseLine populates it and returns the declared size of the item. // It does not read the bytes of the item. func scanGetResponseLine(line []byte, it *Item) (size int, err error) { - pattern := "VALUE %s %d %d %d\r\n" - dest := []interface{}{&it.Key, &it.Flags, &size, &it.casid} - if bytes.Count(line, space) == 3 { - pattern = "VALUE %s %d %d\r\n" - dest = dest[:3] - } - n, err := fmt.Sscanf(string(line), pattern, dest...) - if err != nil || n != len(dest) { + errf := func(line []byte) (int, error) { return -1, fmt.Errorf("memcache: unexpected line in get response: %q", line) } - return size, nil + if !bytes.HasPrefix(line, []byte("VALUE ")) || !bytes.HasSuffix(line, []byte("\r\n")) { + return errf(line) + } + s := string(line[6 : len(line)-2]) + var rest string + var found bool + it.Key, rest, found = cut(s, ' ') + if !found { + return errf(line) + } + val, rest, found := cut(rest, ' ') + if !found { + return errf(line) + } + flags64, err := strconv.ParseUint(val, 10, 32) + if err != nil { + return errf(line) + } + it.Flags = uint32(flags64) + val, rest, found = cut(rest, ' ') + size64, err := strconv.ParseUint(val, 10, 32) + if err != nil { + return errf(line) + } + if !found { // final CAS ID is optional. + return int(size64), nil + } + it.casid, err = strconv.ParseUint(rest, 10, 64) + if err != nil { + return errf(line) + } + return int(size64), nil +} + +// Similar to strings.Cut in Go 1.18, but sep can only be 1 byte. +func cut(s string, sep byte) (before, after string, found bool) { + if i := strings.IndexByte(s, sep); i >= 0 { + return s[:i], s[i+1:], true + } + return s, "", false } // Set writes the given item, unconditionally. diff --git a/vendor/github.com/bradfitz/gomemcache/memcache/selector.go b/vendor/github.com/grafana/gomemcache/memcache/selector.go similarity index 100% rename from vendor/github.com/bradfitz/gomemcache/memcache/selector.go rename to vendor/github.com/grafana/gomemcache/memcache/selector.go diff --git a/vendor/modules.txt b/vendor/modules.txt index d6e290d7c6dd7..a62489467aae3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -280,9 +280,6 @@ github.com/beorn7/perks/quantile # github.com/bmatcuk/doublestar v1.2.2 ## explicit; go 1.12 github.com/bmatcuk/doublestar -# github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b => github.com/owen-d/gomemcache v0.0.0-20220719101501-ce4268ea75ae -## explicit; go 1.12 -github.com/bradfitz/gomemcache/memcache # github.com/buger/jsonparser v1.1.1 ## explicit; go 1.13 github.com/buger/jsonparser @@ -711,6 +708,9 @@ github.com/grafana/dskit/tenant # github.com/grafana/go-gelf/v2 v2.0.1 ## explicit; go 1.17 github.com/grafana/go-gelf/v2/gelf +# github.com/grafana/gomemcache v0.0.0-20221213170046-b5da8a745d41 +## explicit; go 1.12 +github.com/grafana/gomemcache/memcache # github.com/grafana/regexp v0.0.0-20221005093135-b4c2bcb0a4b6 ## explicit; go 1.17 github.com/grafana/regexp @@ -1953,7 +1953,6 @@ sigs.k8s.io/yaml # github.com/Azure/azure-storage-blob-go => github.com/MasslessParticle/azure-storage-blob-go v0.14.1-0.20220216145902-b5e698eff68e # github.com/hashicorp/consul => github.com/hashicorp/consul v1.5.1 # github.com/gocql/gocql => github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 -# github.com/bradfitz/gomemcache => github.com/owen-d/gomemcache v0.0.0-20220719101501-ce4268ea75ae # github.com/cloudflare/cloudflare-go => github.com/cyriltovena/cloudflare-go v0.27.1-0.20211118103540-ff77400bcb93 # google.golang.org/grpc => google.golang.org/grpc v1.45.0 # github.com/hashicorp/memberlist => github.com/grafana/memberlist v0.3.1-0.20220714140823-09ffed8adbbe From c71620ae9437aa23142cdc410115f28ae8f29997 Mon Sep 17 00:00:00 2001 From: Karsten Jeschkies Date: Wed, 14 Dec 2022 14:58:03 +0100 Subject: [PATCH 14/62] Flush buffered logger on exit. (#7924) **What this PR does / why we need it**: I ran into this issue several times. No error logs were printed or no generated token for GEL. This is because crashing runs can be much faster than the flush period. **Special notes for your reviewer**: **Checklist** - [ ] Reviewed the `CONTRIBUTING.md` guide - [ ] Documentation added - [ ] Tests updated - [ ] `CHANGELOG.md` updated - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/upgrading/_index.md` --- cmd/loki/main.go | 13 +++++++++---- pkg/util/log/log.go | 5 ++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/cmd/loki/main.go b/cmd/loki/main.go index 86c09ba3a7798..b1ead7dde6e84 100644 --- a/cmd/loki/main.go +++ b/cmd/loki/main.go @@ -21,6 +21,11 @@ import ( "github.com/grafana/loki/pkg/validation" ) +func exit(code int) { + util_log.Flush() + os.Exit(code) +} + func main() { var config loki.ConfigWrapper @@ -41,7 +46,7 @@ func main() { // Init the logger which will honor the log level set in config.Server if reflect.DeepEqual(&config.Server.LogLevel, &logging.Level{}) { level.Error(util_log.Logger).Log("msg", "invalid log level") - os.Exit(1) + exit(1) } util_log.InitLogger(&config.Server, prometheus.DefaultRegisterer, config.UseBufferedLogger, config.UseSyncLogger) @@ -49,7 +54,7 @@ func main() { // and CLI flags parsed. if err := config.Validate(); err != nil { level.Error(util_log.Logger).Log("msg", "validating config", "err", err.Error()) - os.Exit(1) + exit(1) } if config.PrintConfig { @@ -66,7 +71,7 @@ func main() { if config.VerifyConfig { level.Info(util_log.Logger).Log("msg", "config is valid") - os.Exit(0) + exit(0) } if config.Tracing.Enabled { @@ -97,7 +102,7 @@ func main() { if config.ListTargets { t.ListTargets() - os.Exit(0) + exit(0) } level.Info(util_log.Logger).Log("msg", "Starting Loki", "version", version.Info()) diff --git a/pkg/util/log/log.go b/pkg/util/log/log.go index 4a0030b880076..5a46a0f5ba74c 100644 --- a/pkg/util/log/log.go +++ b/pkg/util/log/log.go @@ -92,9 +92,6 @@ func newPrometheusLogger(l logging.Level, format logging.Format, reg prometheus. var writer io.Writer if buffered { - // TODO: it's technically possible here to lose logs between the 100ms flush and the process being killed - // => call buf.Flush() in a signal handler if this is a concern, but this is unlikely to be a problem - // retain a reference to this logger because it doesn't conform to the standard Logger interface, // and we can't unwrap it to get the underlying logger when we flush on shutdown bufferedLogger = log.NewLineBufferedLogger(os.Stderr, logEntries, @@ -172,6 +169,8 @@ func CheckFatal(location string, err error, logger log.Logger) { fmt.Fprintln(os.Stderr, errStr) logger.Log("err", errStr) + err = Flush() + fmt.Fprintln(os.Stderr, "Could not flush logger", err) os.Exit(1) } From cbd6ec15ce5616cc7ff31b3d41f54156d0e99ff7 Mon Sep 17 00:00:00 2001 From: Kaviraj Kanagaraj Date: Wed, 14 Dec 2022 16:19:56 +0100 Subject: [PATCH 15/62] feat(logql): Supporting prettifying LogQL expressions (#7906) **What this PR does / why we need it**: Changes 1. Added `Pretty()` method to `Expr` interface. So that every expression would know how to render themselfs as pretty :) 2. Implemented `Pretty()` for every single LogQL expressions 3. Exposed `/api/v1/format_query` endpoint that takes `query` argument and returns formatted version. 4. Integerated into `logcli`. echo `'' | logcli fmt` would format the query. Why? 1. Readability, helpful in debugging and **more importantly, to understand the execution flow** example **before** ``` quantile_over_time(0.99,{container="ingress-nginx",service="hosted-grafana"}| json| unwrap response_latency_seconds| __error__=""[1m]) by (cluster) ``` ``` sum(rate({job="loki", namespace="loki", cluster="loki-dev-us"} |= "err" [5m])) + sum(rate({job="loki-dev", namespace="loki", cluster="loki-dev-eu"}|logfmt | level != "info" [5m])) / sum(rate({job="loki-prod", namespace="loki", cluster="loki-prod-us"} |logfmt | level="error"[5m])) ``` ``` label_replace(rate({job="api-server",service="a:c"}|= "err" [5m]), "foo", "$1", "service", "(.*):.*") ``` **after** ``` quantile_over_time( 0.99, {container="ingress-nginx", service="hosted-grafana"} | json | unwrap response_latency_seconds | __error__="" [1m] ) by (cluster) ``` ``` sum( rate( {job="loki", namespace="loki", cluster="loki-dev-us"} |= "err" [5m] ) ) + sum( rate( {job="loki-dev", namespace="loki", cluster="loki-dev-eu"} | logfmt | level!="info" [5m] ) ) / sum( rate( {job="loki-prod", namespace="loki", cluster="loki-prod-us"} | logfmt | level="error" [5m] ) ) ``` ``` label_replace( rate({job="api-server", service="a:c"} |= "err"[5m]), "foo", "$1", "service", "(.*):.*" ) ``` You can find more examples in the `prettier_test.go` Future plans * Integerate into LogQL analyzer * Integrate into Grafana UI. **Which issue(s) this PR fixes**: Fixes # NA **Special notes for your reviewer**: This whole idea was inspired from last [PromCon lighting talk](https://youtu.be/pjkWzDVxWk4?t=24469) **Checklist** - [x] Reviewed the `CONTRIBUTING.md` guide - [x] Documentation added - [x] Tests updated - [x] `CHANGELOG.md` updated Signed-off-by: Kaviraj Co-authored-by: Christian Haudum Co-authored-by: Christian Simon --- CHANGELOG.md | 1 + cmd/logcli/main.go | 25 ++ docs/sources/api/_index.md | 13 + pkg/logql/syntax/ast.go | 5 +- pkg/logql/syntax/prettier.go | 406 ++++++++++++++++++++++++++++++ pkg/logql/syntax/prettier_test.go | 406 ++++++++++++++++++++++++++++++ pkg/loki/format_query_handler.go | 24 ++ pkg/loki/loki.go | 1 + 8 files changed, 880 insertions(+), 1 deletion(-) create mode 100644 pkg/logql/syntax/prettier.go create mode 100644 pkg/logql/syntax/prettier_test.go create mode 100644 pkg/loki/format_query_handler.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 37c57665bfd36..cbadbbfdf6a96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ * [7708](https://github.com/grafana/loki/pull/7708) **DylanGuedes**: Fix multitenant querying. * [7784](https://github.com/grafana/loki/pull/7784) **isodude**: Fix default values of connect addresses for compactor and querier workers to work with IPv6. * [7880](https://github.com/grafana/loki/pull/7880) **sandeepsukhani**: consider range and offset in queries while looking for schema config for query sharding. +* [7906](https://github.com/grafana/loki/pull/7906) **kavirajk**: Add API endpoint that formats LogQL expressions and support new `fmt` subcommand in `logcli` to format LogQL query. ##### Changes diff --git a/cmd/logcli/main.go b/cmd/logcli/main.go index f34aa808ca021..5c71e413bf464 100644 --- a/cmd/logcli/main.go +++ b/cmd/logcli/main.go @@ -1,6 +1,8 @@ package main import ( + "fmt" + "io" "log" "math" "net/url" @@ -18,6 +20,7 @@ import ( "github.com/grafana/loki/pkg/logcli/output" "github.com/grafana/loki/pkg/logcli/query" "github.com/grafana/loki/pkg/logcli/seriesquery" + "github.com/grafana/loki/pkg/logql/syntax" _ "github.com/grafana/loki/pkg/util/build" ) @@ -109,6 +112,8 @@ Use the --analyze-labels flag to get a summary of the labels found in all stream This is helpful to find high cardinality labels. `) seriesQuery = newSeriesQuery(seriesCmd) + + fmtCmd = app.Command("fmt", "Formats a LogQL query.") ) func main() { @@ -213,7 +218,27 @@ func main() { labelsQuery.DoLabels(queryClient) case seriesCmd.FullCommand(): seriesQuery.DoSeries(queryClient) + case fmtCmd.FullCommand(): + if err := formatLogQL(os.Stdin, os.Stdout); err != nil { + log.Fatalf("unable to format logql: %s", err) + } + } +} + +func formatLogQL(r io.Reader, w io.Writer) error { + b, err := io.ReadAll(r) + if err != nil { + return err } + + expr, err := syntax.ParseExpr(string(b)) + if err != nil { + return fmt.Errorf("failed to parse the query: %w", err) + } + + fmt.Fprintf(w, "%s\n", syntax.Prettify(expr)) + + return nil } func newQueryClient(app *kingpin.Application) client.Client { diff --git a/docs/sources/api/_index.md b/docs/sources/api/_index.md index c20e426fb6aff..4f00864a6c0a5 100644 --- a/docs/sources/api/_index.md +++ b/docs/sources/api/_index.md @@ -23,6 +23,7 @@ These endpoints are exposed by all components: - [`GET /config`](#list-current-configuration) - [`GET /services`](#list-running-services) - [`GET /loki/api/v1/status/buildinfo`](#list-build-information) +- [`GET /loki/api/v1/format_query`](#format-query) These endpoints are exposed by the querier and the query frontend: @@ -703,6 +704,18 @@ GET /loki/api/v1/status/buildinfo `/loki/api/v1/status/buildinfo` exposes the build information in a JSON object. The fields are `version`, `revision`, `branch`, `buildDate`, `buildUser`, and `goVersion`. +## Format query + +``` +GET /loki/api/v1/format_query +POST /loki/api/v1/format_query +``` + +Params: +- `query`: A LogQL query string. Can be passed as URL param (`?query=`) in case of both `GET` and `POST`. Or as form value in case of `POST`. + +The `/loki/api/v1/format_query` endpoint allows to format LogQL queries. It returns an error if the passed LogQL is invalid. It is exposed by all Loki components and helps to improve readability and the debugging experience of LogQL queries. + ## List series The Series API is available under the following: diff --git a/pkg/logql/syntax/ast.go b/pkg/logql/syntax/ast.go index 6faca21f0dc69..6cb9e138665a6 100644 --- a/pkg/logql/syntax/ast.go +++ b/pkg/logql/syntax/ast.go @@ -23,6 +23,9 @@ type Expr interface { Shardable() bool // A recursive check on the AST to see if it's shardable. Walkable fmt.Stringer + + // Pretty prettyfies any LogQL expression at given `level` of the whole LogQL query. + Pretty(level int) string } func Clone(e Expr) (Expr, error) { @@ -658,7 +661,7 @@ const ( OpTypeAnd = "and" OpTypeUnless = "unless" - // binops - operations + // binops - arithmetic OpTypeAdd = "+" OpTypeSub = "-" OpTypeMul = "*" diff --git a/pkg/logql/syntax/prettier.go b/pkg/logql/syntax/prettier.go new file mode 100644 index 0000000000000..2c8e70dfb4a13 --- /dev/null +++ b/pkg/logql/syntax/prettier.go @@ -0,0 +1,406 @@ +// LogQL formatter is inspired from PromQL formatter +// https://github.com/prometheus/prometheus/blob/release-2.40/promql/parser/prettier.go +// https://youtu.be/pjkWzDVxWk4?t=24469 + +package syntax + +import ( + "fmt" + "strconv" + "strings" + + "github.com/prometheus/common/model" +) + +// How LogQL formatter works? +// ========================= +// General idea is to parse the LogQL query(string) and converts it into AST(expressions) first, then format each expression from bottom up (from leaf expressions to the root expression). Every expression in AST has a level/depth (distance from the root), that is passed by it's parent. +// +// While prettifying an expression, we consider two things: +// 1. Did the current expression's parent add a new line? +// 2. Does the current expression exceeds `maxCharsPerLine` limit? +// +// The level of a expression determines if it should be indented or not. +// The answer to the 1 is NO if the level passed is 0. This means, the +// parent expression did not apply a new line, so the current Node must not +// apply any indentation as prefix. +// If level > 1, a new line is applied by the parent. So, the current expression +// should prefix an indentation before writing any of its content. This indentation +// will be ([level/depth of current expression] * " "). +// +// The answer to 2 is YES if the normalized length of the current expression exceeds +// the `maxCharsPerLine` limit. Hence, it applies the indentation equal to +// its depth and increments the level by 1 before passing down the child. +// If the answer is NO, the current expression returns the normalized string value of itself. +// + +var ( + // maxCharsPerLine is used to qualify whether some LogQL expressions are worth `splitting` into new lines. + maxCharsPerLine = 100 +) + +func Prettify(e Expr) string { + return e.Pretty(0) +} + +// e.g: `{foo="bar"}` +func (e *MatchersExpr) Pretty(level int) string { + return commonPrefixIndent(level, e) +} + +// e.g: `{foo="bar"} | logfmt | level="error"` +// Here, left = `{foo="bar"}` and multistages would collection of each stage in pipeline, here `logfmt` and `level="error"` +func (e *PipelineExpr) Pretty(level int) string { + if !needSplit(e) { + return indent(level) + e.String() + } + + s := fmt.Sprintf("%s\n", e.Left.Pretty(level)) + for i, ms := range e.MultiStages { + s += ms.Pretty(level + 1) + //NOTE: Needed because, we tend to format multiple stage in pipeline as each stage in single line + // e.g: + // | logfmt + // | level = "error" + // But all the stages will have same indent level. So here we don't increase level. + if i < len(e.MultiStages)-1 { + s += "\n" + } + } + return s +} + +// e.g: `|= "error" != "memcache" |= ip("192.168.0.1")` +// NOTE: here `ip` is Op in this expression. +func (e *LineFilterExpr) Pretty(level int) string { + if !needSplit(e) { + return indent(level) + e.String() + } + + var s string + + if e.Left != nil { + // s += indent(level) + s += e.Left.Pretty(level) + // NOTE: Similar to PiplelinExpr, we also have to format every LineFilterExpr in new line. But with same indendation level. + // e.g: + // |= "error" + // != "memcached" + // |= ip("192.168.0.1") + s += "\n" + } + + s += indent(level) + + // We re-use LineFilterExpr's String() implementation to avoid duplication. + // We create new LineFilterExpr without `Left`. + ne := newLineFilterExpr(e.Ty, e.Op, e.Match) + s += ne.String() + + return s +} + +// e.g: +// `| logfmt` +// `| json` +// `| regexp` +// `| pattern` +// `| unpack` +func (e *LabelParserExpr) Pretty(level int) string { + return commonPrefixIndent(level, e) +} + +// e.g: | level!="error" +func (e *LabelFilterExpr) Pretty(level int) string { + return commonPrefixIndent(level, e) +} + +// e.g: | line_format "{{ .label }}" +func (e *LineFmtExpr) Pretty(level int) string { + return commonPrefixIndent(level, e) +} + +// e.g: | decolorize +func (e *DecolorizeExpr) Pretty(level int) string { + return e.String() +} + +// e.g: | label_format dst="{{ .src }}" +func (e *LabelFmtExpr) Pretty(level int) string { + return commonPrefixIndent(level, e) +} + +// e.g: | json label="expression", another="expression" +func (e *JSONExpressionParser) Pretty(level int) string { + return commonPrefixIndent(level, e) +} + +// e.g: sum_over_time({foo="bar"} | logfmt | unwrap bytes_processed [5m]) +func (e *UnwrapExpr) Pretty(level int) string { + s := indent(level) + + if e.Operation != "" { + s += fmt.Sprintf("%s %s %s(%s)", OpPipe, OpUnwrap, e.Operation, e.Identifier) + } else { + s += fmt.Sprintf("%s %s %s", OpPipe, OpUnwrap, e.Identifier) + } + for _, f := range e.PostFilters { + s += fmt.Sprintf("\n%s%s %s", indent(level), OpPipe, f) + } + return s +} + +// e.g: `{foo="bar"}|logfmt[5m]` +// TODO(kavi): Rename `LogRange` -> `LogRangeExpr` (to be consistent with other expressions?) +func (e *LogRange) Pretty(level int) string { + s := e.Left.Pretty(level) + + if e.Unwrap != nil { + // NOTE: | unwrap should go to newline + s += "\n" + s += e.Unwrap.Pretty(level + 1) + } + + // TODO: this will put [1m] on the same line, not in new line as people used to now. + s = fmt.Sprintf("%s [%s]", s, model.Duration(e.Interval)) + + if e.Offset != 0 { + oe := OffsetExpr{Offset: e.Offset} + s += oe.Pretty(level) + } + + return s +} + +// e.g: count_over_time({foo="bar"}[5m] offset 3h) +// TODO(kavi): why does offset not work in log queries? e.g: `{foo="bar"} offset 1h`? is it bug? or anything else? +// NOTE: Also offset expression never to be indented. It always goes with its parent expression (usually RangeExpr). +func (e *OffsetExpr) Pretty(level int) string { + // using `model.Duration` as it can format ignoring zero units. + // e.g: time.Duration(2 * Hour) -> "2h0m0s" + // but model.Duration(2 * Hour) -> "2h" + return fmt.Sprintf(" %s %s", OpOffset, model.Duration(e.Offset)) +} + +// e.g: count_over_time({foo="bar"}[5m]) +func (e *RangeAggregationExpr) Pretty(level int) string { + s := indent(level) + if !needSplit(e) { + return s + e.String() + } + + s += e.Operation // e.g: quantile_over_time + + s += "(\n" + + // print args to the function. + if e.Params != nil { + s = fmt.Sprintf("%s%s%s,", s, indent(level+1), fmt.Sprint(*e.Params)) + s += "\n" + } + + s += e.Left.Pretty(level + 1) + + s += "\n" + indent(level) + ")" + + if e.Grouping != nil { + s += e.Grouping.Pretty(level) + } + + return s +} + +// e.g: +// sum(count_over_time({foo="bar"}[5m])) by (container) +// topk(10, count_over_time({foo="bar"}[5m])) by (container) + +// Syntax: ([parameter,] ) [without|by (