Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,32 @@ type SafeFloat = i.SafeFloat
// SafeRune aliases rune. See the explanation for SafeString.
type SafeRune = i.SafeRune

// HashValue is a marker interface to be implemented by types whose
// values should be hashed when redacted, instead of being replaced
// with the redaction marker.
type HashValue = i.HashValue

// HashString represents a string that should be hashed when redacted.
type HashString = i.HashString

// HashInt represents an integer that should be hashed when redacted.
type HashInt = i.HashInt

// HashUint represents an unsigned integer that should be hashed when redacted.
type HashUint = i.HashUint

// HashFloat represents a floating-point value that should be hashed when redacted.
type HashFloat = i.HashFloat

// HashRune represents a rune that should be hashed when redacted.
type HashRune = i.HashRune

// HashByte represents a byte that should be hashed when redacted.
type HashByte = i.HashByte

// HashBytes represents a byte slice that should be hashed when redacted.
type HashBytes = i.HashBytes

// RedactableString is a string that contains a mix of safe and unsafe
// bits of data, but where it is known that unsafe bits are enclosed
// by redaction markers ‹ and ›, and occurrences of the markers
Expand Down Expand Up @@ -173,3 +199,11 @@ type StringBuilder = builder.StringBuilder
func RegisterSafeType(t reflect.Type) {
ifmt.RegisterSafeType(t)
}

// EnableHashing enables hash-based redaction with an optional salt.
// Hash markers (‹†value›) will be replaced with hashes instead of being fully redacted.
// When salt is nil, hash markers use plain SHA1.
// When salt is provided, hash markers use HMAC-SHA1 for better security.
func EnableHashing(salt []byte) {
m.EnableHashing(salt)
}
55 changes: 55 additions & 0 deletions interfaces/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,61 @@ type SafeValue interface {
SafeValue()
}

// HashValue is a marker interface to be implemented by types whose
// values should be hashed when redacted, instead of being replaced
// with the redaction marker.
//
// This allows correlation between log entries while maintaining privacy.
// The hash is computed during the Redact() call, not at log creation time.
//
// Types implementing this interface should alias base Go types.
// The hash is applied to the string representation of the value.
type HashValue interface {
HashValue()
}

// HashString represents a string that should be hashed when redacted.
type HashString string

// HashValue makes HashString a HashValue.
func (HashString) HashValue() {}

// HashInt represents an integer that should be hashed when redacted.
type HashInt int64

// HashValue makes HashInt a HashValue.
func (HashInt) HashValue() {}

// HashUint represents an unsigned integer that should be hashed when redacted.
type HashUint uint64

// HashValue makes HashUint a HashValue.
func (HashUint) HashValue() {}

// HashFloat represents a floating-point value that should be hashed when redacted.
type HashFloat float64

// HashValue makes HashFloat a HashValue.
func (HashFloat) HashValue() {}

// HashRune represents a rune that should be hashed when redacted.
type HashRune rune

// HashValue makes HashRune a HashValue.
func (HashRune) HashValue() {}

// HashByte represents a byte that should be hashed when redacted.
type HashByte byte

// HashValue makes HashByte a HashValue.
func (HashByte) HashValue() {}

// HashBytes represents a byte slice that should be hashed when redacted.
type HashBytes []byte

// HashValue makes HashBytes a HashValue.
func (HashBytes) HashValue() {}

// SafeMessager is an alternative to SafeFormatter used in previous
// versions of CockroachDB.
// NB: this interface is obsolete. Use SafeFormatter instead.
Expand Down
6 changes: 4 additions & 2 deletions internal/markers/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ const (
EscapeMark = '?'
EscapeMarkS = string(EscapeMark)
RedactedS = StartS + "×" + EndS
HashPrefix = '†'
HashPrefixS = string(HashPrefix)
)

// Internal variables.
Expand All @@ -35,6 +37,6 @@ var (
EndBytes = []byte(EndS)
EscapeMarkBytes = []byte(EscapeMarkS)
RedactedBytes = []byte(RedactedS)
ReStripSensitive = regexp.MustCompile(StartS + "[^" + StartS + EndS + "]*" + EndS)
ReStripMarkers = regexp.MustCompile("[" + StartS + EndS + "]")
ReStripSensitive = regexp.MustCompile(StartS + HashPrefixS + "?" + "[^" + StartS + EndS + "]*" + EndS)
ReStripMarkers = regexp.MustCompile("[" + StartS + EndS + HashPrefixS + "]")
)
124 changes: 124 additions & 0 deletions internal/markers/hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright 2025 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package markers

import (
"crypto/hmac"
"crypto/sha1"
"encoding/hex"
"sync"
)

/*
Hash function implementation notes:

We use SHA-1 because it provides a good balance of performance and output size
for the log correlation use case. SHA-1 is fast and produces compact hashes,
making it well-suited for high-throughput logging scenarios.

When a salt is provided via EnableHashing(), we use HMAC-SHA1 which provides
additional security properties and domain separation.

The hash output is truncated to 8 hex characters (32 bits) to keep log output
concise while still providing sufficient collision resistance for typical logging
workloads.
*/

// defaultHashLength is the number of hex characters to use from the SHA-1 hash.
// 8 hex chars = 32 bits = ~4.3 billion unique values.
// This provides a good balance between collision resistance and output brevity.
// For typical logging scenarios with fewer unique sensitive values per analysis window,
// this collision risk should be acceptable. If not, we can make this configurable in the future.
const defaultHashLength = 8

var hashConfig = struct {
sync.RWMutex
enabled bool
salt []byte
}{
enabled: false,
salt: nil,
}

// EnableHashing enables hash-based redaction with an optional salt.
// When salt is nil, hash markers use plain SHA1.
// When salt is provided, hash markers use HMAC-SHA1 for better security.
func EnableHashing(salt []byte) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any chance we can make this configurable concurrently? would be nice to put behind a cluster setting. but also, log redaction is something that's configured via logging config which is not hot reloadable so maybe it's fine.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cost of making it concurrency safe seemed reasonable. So, went ahead and made the change

image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cost of this lock/unlock isn't reflected in the benchmark because it will only manifest when there are many concurrent redactions happening. On its own it's not very costly.

In this scenario when there's just a single boolean that needs to be accessed concurrently, a lock is too costly and complex. Can you just use a single atomic boolean here instead? Then there's no locking, no defer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The salt variable can be a string stored in an atomic.Value. I don't see a need to keep the values of enabled and salt locked together.

hashConfig.Lock()
defer hashConfig.Unlock()
hashConfig.enabled = true
hashConfig.salt = salt
}

// IsHashingEnabled returns true if hash-based redaction is enabled.
func IsHashingEnabled() bool {
hashConfig.RLock()
defer hashConfig.RUnlock()
return hashConfig.enabled
}

// hashString computes a truncated hash of the input string.
// Uses HMAC-SHA1 if salt is set, otherwise plain SHA1.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think HMAC-SHA1 should be good enough to start of with. Has a good balance of speed and "secure enough"

// Must only be called when hashing is enabled (IsHashingEnabled() == true).
func hashString(value string) string {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would be nice to know via benchmarks what the relative cost of hashing is vs just redacting.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a screenshot in the PR description

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. do you know what the 4 additional allocations are here?

hashConfig.RLock()
salt := hashConfig.salt
hashConfig.RUnlock()

var h []byte
if len(salt) > 0 {
mac := hmac.New(sha1.New, salt)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like an opportunity for pooling or pre-allocating. If you can reuse hash instances from the pool and call Reset on them between calls if the salt hasn't changed.

mac.Write([]byte(value))
h = mac.Sum(nil)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you can allocate the byte array of the correct size here and below and pass in to the Sum, you can avoid an allocation here I think. If you allocate in this function and pass it in, the slice will stay on the stack and be collected immediately, whereas if the Sum function needs to allocate and return to you, that variable ends up on the heap.

} else {
hasher := sha1.New()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here as above. sha1 can be pooled and you can reduce allocations.

hasher.Write([]byte(value))
h = hasher.Sum(nil)
}

fullHash := hex.EncodeToString(h)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think here you can pass a pre-allocated array as well as your destination instead of making hex allocate for you.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should be able to use escape analysis to see where the heap allocs are happening.

if len(fullHash) > defaultHashLength {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this conditional necessary? if we can guarantee that defaultHashLength isn't greater than the hash length, we can just return fullHash[:defaultHashLength] no?

return fullHash[:defaultHashLength]
}
return fullHash
}

// hashBytes computes a truncated hash of the input byte slice.
// Uses HMAC-SHA1 if salt is set, otherwise plain SHA1.
// Must only be called when hashing is enabled (IsHashingEnabled() == true).
func hashBytes(value []byte) []byte {
hashConfig.RLock()
salt := hashConfig.salt
hashConfig.RUnlock()

var h []byte
if len(salt) > 0 {
mac := hmac.New(sha1.New, salt)
mac.Write(value)
h = mac.Sum(nil)
} else {
hasher := sha1.New()
hasher.Write(value)
h = hasher.Sum(nil)
}

fullHash := make([]byte, sha1.Size*2)
_ = hex.Encode(fullHash, h)

if len(fullHash) > defaultHashLength {
return fullHash[:defaultHashLength]
}
return fullHash
}
Loading