cortexproject · pracucci · Dec 11, 2020 · Dec 9, 2020 · Dec 10, 2020 · Dec 10, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,13 @@
 
 ## master / unreleased
 
+* [CHANGE] Blocks storage: compactor is now required when running a Cortex cluster with the blocks storage, because it also keeps the bucket index updated. #3583
+* [CHANGE] Blocks storage: block deletion marks are now stored in a per-tenant global markers/ location too, other than within the block location. The compactor, at startup, will copy deletion marks from the block location to the global location. This migration is required only once, so you can safely disable it via `-compactor.block-deletion-marks-migration-enabled=false` once new compactor has successfully started once in your cluster. #3583
+* [ENHANCEMENT] Blocks storage: introduced a per-tenant bucket index, periodically updated by the compactor, used to avoid full bucket scanning done by queriers and store-gateways. The bucket index is updated by the compactor during blocks cleanup, on every `-compactor.cleanup-interval`. #3553 #3555 #3561 #3583
+* [ENHANCEMENT] Compactor: exported the following metrics. #3583
+  * `cortex_bucket_blocks_count`: Total number of blocks per tenant in the bucket. Includes blocks marked for deletion.
+  * `cortex_bucket_blocks_marked_for_deletion_count`: Total number of blocks per tenant marked for deletion in the bucket.
+  * `cortex_bucket_index_last_successful_update_timestamp_seconds`: Timestamp of the last successful update of a tenant's bucket index.
 * [ENHANCEMENT] Ruler: Add `cortex_prometheus_last_evaluation_samples` to expose the number of samples generated by a rule group per tenant. #3582
 * [ENHANCEMENT] Memberlist: add status page (/memberlist) with available details about memberlist-based KV store and memberlist cluster. It's also possible to view KV values in Go struct or JSON format, or download for inspection. #3575
 * [ENHANCEMENT] Memberlist: client can now keep a size-bounded buffer with sent and received messages and display them in the admin UI (/memberlist) for troubleshooting. #3581

diff --git a/development/tsdb-blocks-storage-s3/docker-compose.yml b/development/tsdb-blocks-storage-s3/docker-compose.yml
@@ -277,7 +277,7 @@ services:
       context:    .
       dockerfile: dev.dockerfile
     image: cortex
-    command: ["sh", "-c", "sleep 3 && exec ./dlv exec ./cortex --listen=:18011 --headless=true --api-version=2 --accept-multiclient --continue -- -config.file=./config/cortex.yaml -target=query-scheduler -server.http-listen-port=8011 -server.grpc-listen-port=9011 -store.max-query-length=8760h -log.level=debug"]
+    command: ["sh", "-c", "sleep 3 && exec ./dlv exec ./cortex --listen=:18011 --headless=true --api-version=2 --accept-multiclient --continue -- -config.file=./config/cortex.yaml -target=query-scheduler -server.http-listen-port=8011 -server.grpc-listen-port=9011 -store.max-query-length=8760h"]
     depends_on:
       - consul
       - minio
@@ -299,7 +299,7 @@ services:
       context:    .
       dockerfile: dev.dockerfile
     image: cortex
-    command: ["sh", "-c", "sleep 3 && exec ./dlv exec ./cortex --listen=:18012 --headless=true --api-version=2 --accept-multiclient --continue -- -config.file=./config/cortex.yaml -target=query-frontend -server.http-listen-port=8012 -server.grpc-listen-port=9012 -store.max-query-length=8760h -frontend.scheduler-address=query-scheduler:9011 -log.level=debug"]
+    command: ["sh", "-c", "sleep 3 && exec ./dlv exec ./cortex --listen=:18012 --headless=true --api-version=2 --accept-multiclient --continue -- -config.file=./config/cortex.yaml -target=query-frontend -server.http-listen-port=8012 -server.grpc-listen-port=9012 -store.max-query-length=8760h -frontend.scheduler-address=query-scheduler:9011"]
     depends_on:
       - consul
       - minio
@@ -321,7 +321,7 @@ services:
       context:    .
       dockerfile: dev.dockerfile
     image: cortex
-    command: ["sh", "-c", "sleep 3 && exec ./dlv exec ./cortex --listen=:18013 --headless=true --api-version=2 --accept-multiclient --continue -- -config.file=./config/cortex.yaml -target=querier -server.http-listen-port=8013 -server.grpc-listen-port=9013 -querier.scheduler-address=query-scheduler:9011 -querier.frontend-address= -log.level=debug"]
+    command: ["sh", "-c", "sleep 3 && exec ./dlv exec ./cortex --listen=:18013 --headless=true --api-version=2 --accept-multiclient --continue -- -config.file=./config/cortex.yaml -target=querier -server.http-listen-port=8013 -server.grpc-listen-port=9013 -querier.scheduler-address=query-scheduler:9011 -querier.frontend-address="]
     depends_on:
       - consul
       - minio

diff --git a/docs/blocks-storage/_index.md b/docs/blocks-storage/_index.md
@@ -29,7 +29,7 @@ When running the Cortex blocks storage, the Cortex architecture doesn't signific
 
 The **[store-gateway](./store-gateway.md)** is responsible to query blocks and is used by the [querier](./querier.md) at query time. The store-gateway is required when running the blocks storage.
 
-The **[compactor](./compactor.md)** is responsible to merge and deduplicate smaller blocks into larger ones, in order to reduce the number of blocks stored in the long-term storage for a given tenant and query them more efficiently. The compactor is optional but highly recommended.
+The **[compactor](./compactor.md)** is responsible to merge and deduplicate smaller blocks into larger ones, in order to reduce the number of blocks stored in the long-term storage for a given tenant and query them more efficiently. It also keeps the bucket index updated and, for this reason, it's a required component.
 
 Finally, the [**table-manager**](../chunks-storage/table-manager.md) and the [**schema config**](../chunks-storage/schema-config.md) are **not used** by the blocks storage.
 

diff --git a/docs/blocks-storage/compactor.md b/docs/blocks-storage/compactor.md
@@ -7,13 +7,16 @@ slug: compactor
 
 <!-- DO NOT EDIT THIS FILE - This file has been automatically generated from its .template -->
 
-The **compactor** is an optional service which compacts multiple blocks of a given tenant into a single optimized larger block. Running compactor is **highly recommended** to reduce storage costs (deduplication, index size reduction), and increase query speed (querying fewer blocks is faster).
+The **compactor** is an service which is responsible to:
+
+- Compact multiple blocks of a given tenant into a single optimized larger block. This helps to reduce storage costs (deduplication, index size reduction), and increase query speed (querying fewer blocks is faster).
+- Keep the per-tenant bucket index updated. The bucket index is used by [queriers](./querier.md) and [store-gateways](./store-gateway.md) to discover new blocks in the storage.
 
 The compactor is **stateless**.
 
-## How it works
+## How compaction works
 
-The compactor has two main benefits:
+The blocks compaction has two main benefits:
 
 1. Vertically compact blocks uploaded by all ingesters for the same time range
 2. Horizontally compact blocks with small time ranges into a single larger block
@@ -122,21 +125,31 @@ compactor:
   # CLI flag: -compactor.compaction-concurrency
   [compaction_concurrency: <int> | default = 1]
 
-  # Max number of tenants for which blocks should be cleaned up concurrently
-  # (deletion of blocks previously marked for deletion).
+  # How frequently compactor should run blocks cleanup and maintenance, as well
+  # as update the bucket index.
+  # CLI flag: -compactor.cleanup-interval
+  [cleanup_interval: <duration> | default = 15m]
+
+  # Max number of tenants for which blocks cleanup and maintenance should run
+  # concurrently.
   # CLI flag: -compactor.cleanup-concurrency
   [cleanup_concurrency: <int> | default = 20]
 
   # Time before a block marked for deletion is deleted from bucket. If not 0,
-  # blocks will be marked for deletion and compactor component will delete
-  # blocks marked for deletion from the bucket. If delete-delay is 0, blocks
-  # will be deleted straight away. Note that deleting blocks immediately can
-  # cause query failures, if store gateway still has the block loaded, or
-  # compactor is ignoring the deletion because it's compacting the block at the
-  # same time.
+  # blocks will be marked for deletion and compactor component will permanently
+  # delete blocks marked for deletion from the bucket. If 0, blocks will be
+  # deleted straight away. Note that deleting blocks immediately can cause query
+  # failures.
   # CLI flag: -compactor.deletion-delay
   [deletion_delay: <duration> | default = 12h]
 
+  # When enabled, at compactor startup the bucket will be scanned and all found
+  # deletion marks inside the block location will be copied to the markers
+  # global location too. This option can (and should) be safely disabled as soon
+  # as the compactor has successfully run at least once.
+  # CLI flag: -compactor.block-deletion-marks-migration-enabled
+  [block_deletion_marks_migration_enabled: <boolean> | default = true]
+
   # Comma separated list of tenants that can be compacted. If specified, only
   # these tenants will be compacted by compactor, otherwise all tenants can be
   # compacted. Subject to sharding.

diff --git a/docs/blocks-storage/compactor.template b/docs/blocks-storage/compactor.template
@@ -7,13 +7,16 @@ slug: compactor
 
 {{ .GeneratedFileWarning }}
 
-The **compactor** is an optional service which compacts multiple blocks of a given tenant into a single optimized larger block. Running compactor is **highly recommended** to reduce storage costs (deduplication, index size reduction), and increase query speed (querying fewer blocks is faster).
+The **compactor** is an service which is responsible to:
+
+- Compact multiple blocks of a given tenant into a single optimized larger block. This helps to reduce storage costs (deduplication, index size reduction), and increase query speed (querying fewer blocks is faster).
+- Keep the per-tenant bucket index updated. The bucket index is used by [queriers](./querier.md) and [store-gateways](./store-gateway.md) to discover new blocks in the storage.
 
 The compactor is **stateless**.
 
-## How it works
+## How compaction works
 
-The compactor has two main benefits:
+The blocks compaction has two main benefits:
 
 1. Vertically compact blocks uploaded by all ingesters for the same time range
 2. Horizontally compact blocks with small time ranges into a single larger block

diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
@@ -3995,20 +3995,31 @@ The `compactor_config` configures the compactor for the blocks storage.
 # CLI flag: -compactor.compaction-concurrency
 [compaction_concurrency: <int> | default = 1]
 
-# Max number of tenants for which blocks should be cleaned up concurrently
-# (deletion of blocks previously marked for deletion).
+# How frequently compactor should run blocks cleanup and maintenance, as well as
+# update the bucket index.
+# CLI flag: -compactor.cleanup-interval
+[cleanup_interval: <duration> | default = 15m]
+
+# Max number of tenants for which blocks cleanup and maintenance should run
+# concurrently.
 # CLI flag: -compactor.cleanup-concurrency
 [cleanup_concurrency: <int> | default = 20]
 
 # Time before a block marked for deletion is deleted from bucket. If not 0,
-# blocks will be marked for deletion and compactor component will delete blocks
-# marked for deletion from the bucket. If delete-delay is 0, blocks will be
+# blocks will be marked for deletion and compactor component will permanently
+# delete blocks marked for deletion from the bucket. If 0, blocks will be
 # deleted straight away. Note that deleting blocks immediately can cause query
-# failures, if store gateway still has the block loaded, or compactor is
-# ignoring the deletion because it's compacting the block at the same time.
+# failures.
 # CLI flag: -compactor.deletion-delay
 [deletion_delay: <duration> | default = 12h]
 
+# When enabled, at compactor startup the bucket will be scanned and all found
+# deletion marks inside the block location will be copied to the markers global
+# location too. This option can (and should) be safely disabled as soon as the
+# compactor has successfully run at least once.
+# CLI flag: -compactor.block-deletion-marks-migration-enabled
+[block_deletion_marks_migration_enabled: <boolean> | default = true]
+
 # Comma separated list of tenants that can be compacted. If specified, only
 # these tenants will be compacted by compactor, otherwise all tenants can be
 # compacted. Subject to sharding.

diff --git a/docs/configuration/v1-guarantees.md b/docs/configuration/v1-guarantees.md
@@ -63,3 +63,5 @@ Currently experimental features are:
 - Ingester: close idle TSDB and remove them from local disk (`-blocks-storage.tsdb.close-idle-tsdb-timeout`)
 - Tenant Deletion in Purger, for blocks storage.
 - Query-frontend: query stats tracking (`-frontend.query-stats-enabled`)
+- Blocks storage bucket index
+  - The block deletion marks migration support in the compactor (`-compactor.block-deletion-marks-migration-enabled`) is temporarily and will be removed in future versions