Skip to content

Commit

Permalink
Make encryption persistence timeout configurable via env var (#25636)
Browse files Browse the repository at this point in the history
* Make the encryption tracking persistence timeout configurable via env

* docs

* changelog

* Update vault/barrier_aes_gcm.go

Co-authored-by: Steven Clark <steven.clark@hashicorp.com>

* use ParseDurationSecond

---------

Co-authored-by: Steven Clark <steven.clark@hashicorp.com>
  • Loading branch information
sgmiller and stevendpclark authored Feb 27, 2024
1 parent 2cf10b3 commit da21b85
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 2 deletions.
3 changes: 3 additions & 0 deletions changelog/25636.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
core: make the best effort timeout for encryption count tracking persistence configurable via an environment variable.
```
20 changes: 18 additions & 2 deletions vault/barrier_aes_gcm.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ import (
"fmt"
"io"
"math"
"os"
"strconv"
"strings"
"sync"
"time"

"github.com/hashicorp/go-secure-stdlib/parseutil"

"github.com/armon/go-metrics"
"github.com/hashicorp/go-secure-stdlib/strutil"
"github.com/hashicorp/vault/sdk/helper/jsonutil"
Expand All @@ -38,7 +41,8 @@ const (
autoRotateCheckInterval = 5 * time.Minute
legacyRotateReason = "legacy rotation"
// The keyring is persisted before the root key.
keyringTimeout = 1 * time.Second
defaultKeyringTimeout = 1 * time.Second
bestEffortKeyringTimeoutOverride = "VAULT_ENCRYPTION_COUNT_PERSIST_TIMEOUT"
)

// Versions of the AESGCM storage methodology
Expand Down Expand Up @@ -91,6 +95,8 @@ type AESGCMBarrier struct {
// Used only for testing
RemoteEncryptions *atomic.Int64
totalLocalEncryptions *atomic.Int64

bestEffortKeyringTimeout time.Duration
}

func (b *AESGCMBarrier) RotationConfig() (kc KeyRotationConfig, err error) {
Expand All @@ -115,6 +121,15 @@ func (b *AESGCMBarrier) SetRotationConfig(ctx context.Context, rotConfig KeyRota
// NewAESGCMBarrier is used to construct a new barrier that uses
// the provided physical backend for storage.
func NewAESGCMBarrier(physical physical.Backend) (*AESGCMBarrier, error) {
keyringTimeout := defaultKeyringTimeout
keyringTimeoutStr := os.Getenv(bestEffortKeyringTimeoutOverride)
if keyringTimeoutStr != "" {
t, err := parseutil.ParseDurationSecond(keyringTimeoutStr)
if err != nil {
return nil, fmt.Errorf("failed parsing %s environment variable: %w", bestEffortKeyringTimeoutOverride, err)
}
keyringTimeout = t
}
b := &AESGCMBarrier{
backend: physical,
sealed: true,
Expand All @@ -123,6 +138,7 @@ func NewAESGCMBarrier(physical physical.Backend) (*AESGCMBarrier, error) {
UnaccountedEncryptions: atomic.NewInt64(0),
RemoteEncryptions: atomic.NewInt64(0),
totalLocalEncryptions: atomic.NewInt64(0),
bestEffortKeyringTimeout: keyringTimeout,
}
return b, nil
}
Expand Down Expand Up @@ -256,7 +272,7 @@ func (b *AESGCMBarrier) persistKeyringInternal(ctx context.Context, keyring *Key
// We reduce the timeout on the initial 'put' but if this succeeds we will
// allow longer later on when we try to persist the root key .
var cancelKeyring func()
ctxKeyring, cancelKeyring = context.WithTimeout(ctx, keyringTimeout)
ctxKeyring, cancelKeyring = context.WithTimeout(ctx, b.bestEffortKeyringTimeout)
defer cancelKeyring()
}

Expand Down
8 changes: 8 additions & 0 deletions website/content/docs/internals/rotation.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,11 @@ Operators can estimate the number of encryptions by summing the following:
- The `vault.token.creation` metric where the `token_type` label is `batch`.
- The `merkle.flushDirty.num_pages` metric.
- The WAL index.

Vault periodically persists the number of encryptions to support rotation.
This save operation has a 1 second timeout to prevent impact to performance
if Vault is under heavy load. Because persisting encryptions involves the
seal backend (if seal wrap is enabled), some seals (such as HSMs) may take
regularly longer than 1 second to respond. If this is the case, operators
may override that timeout by setting the environment variable
`VAULT_ENCRYPTION_COUNT_PERSIST_TIMEOUT` to a larger value, such as "5s".

0 comments on commit da21b85

Please sign in to comment.