Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Fixed upstream TLS client session caching so sessions are scoped by the effective SNI used
for the connection. This prevents a session learned for one upstream SNI from being offered
on a connection using a different SNI. The existing ``max_session_keys`` setting now limits
cached sessions per SNI bucket, and Envoy bounds the number of SNI buckets internally. This
behavior can be temporarily reverted by setting runtime guard
``envoy.reloadable_features.scope_upstream_tls_session_cache_by_sni`` to ``false``.
1 change: 1 addition & 0 deletions source/common/runtime/runtime_features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ RUNTIME_GUARD(envoy_reloadable_features_report_load_when_rq_active_is_non_zero);
RUNTIME_GUARD(envoy_reloadable_features_reset_ignore_upstream_reason);
RUNTIME_GUARD(envoy_reloadable_features_reset_with_error);
RUNTIME_GUARD(envoy_reloadable_features_safe_http2_options);
RUNTIME_GUARD(envoy_reloadable_features_scope_upstream_tls_session_cache_by_sni);
RUNTIME_GUARD(envoy_reloadable_features_skip_dns_lookup_for_proxied_requests);
RUNTIME_GUARD(envoy_reloadable_features_skip_pending_overflow_count_on_active_rq);
RUNTIME_GUARD(envoy_reloadable_features_ssl_socket_report_connection_reset);
Expand Down
2 changes: 2 additions & 0 deletions source/common/tls/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,9 @@ envoy_cc_library(
"//source/common/stats:utility_lib",
"//source/common/tls/cert_validator:cert_validator_lib",
"//source/common/tls/private_key:private_key_manager_lib",
"@abseil-cpp//absl/container:flat_hash_map",
"@abseil-cpp//absl/container:node_hash_set",
"@abseil-cpp//absl/strings:string_view",
"@abseil-cpp//absl/synchronization",
"@envoy_api//envoy/admin/v3:pkg_cc_proto",
"@envoy_api//envoy/type/matcher/v3:pkg_cc_proto",
Expand Down
198 changes: 158 additions & 40 deletions source/common/tls/client_context_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ ClientContextImpl::ClientContextImpl(
static_cast<ContextImpl*>(SSL_CTX_get_app_data(SSL_get_SSL_CTX(ssl)));
ClientContextImpl* client_context_impl = dynamic_cast<ClientContextImpl*>(context_impl);
RELEASE_ASSERT(client_context_impl != nullptr, ""); // for Coverity
return client_context_impl->newSessionKey(session);
return client_context_impl->newSessionKey(ssl, session);
});
}

Expand Down Expand Up @@ -127,14 +127,7 @@ ClientContextImpl::newSsl(const Network::TransportSocketOptionsConstSharedPtr& o

bssl::UniquePtr<SSL> ssl_con = std::move(ssl_con_or_status.value());

std::string server_name_indication;
if (options && options->serverNameOverride().has_value()) {
server_name_indication = options->serverNameOverride().value();
} else if (auto_host_sni_ && host != nullptr && !host->hostname().empty()) {
server_name_indication = host->hostname();
} else {
server_name_indication = server_name_indication_;
}
const std::string server_name_indication = effectiveSni(options, host);

if (!server_name_indication.empty()) {
const int rc = SSL_set_tlsext_host_name(ssl_con.get(), server_name_indication.c_str());
Expand All @@ -143,6 +136,18 @@ ClientContextImpl::newSsl(const Network::TransportSocketOptionsConstSharedPtr& o
absl::StrCat("Failed to create upstream TLS due to failure setting SNI: ",
Utility::getLastCryptoError().value_or("unknown")));
}

// BoringSSL does not expose the callback's original SNI key when it later
// returns a new session. Store Envoy's effective SNI on this SSL object so
// the new-session callback can cache the ticket under the same name that
// was sent in the ClientHello.
auto effective_sni = std::make_unique<std::string>(server_name_indication);
if (SSL_set_ex_data(ssl_con.get(), sslEffectiveSniIndex(), effective_sni.get()) != 1) {
return absl::InvalidArgumentError(
absl::StrCat("Failed to create upstream TLS due to failure storing SNI: ",
Utility::getLastCryptoError().value_or("unknown")));
}
effective_sni.release();
}

if (options && !options->verifySubjectAltNameListOverride().empty()) {
Expand Down Expand Up @@ -176,50 +181,163 @@ ClientContextImpl::newSsl(const Network::TransportSocketOptionsConstSharedPtr& o
}

if (max_session_keys_ > 0) {
if (session_keys_single_use_) {
// Stored single-use session keys, use write/write locks.
absl::WriterMutexLock l(session_keys_mu_);
if (!session_keys_.empty()) {
// Use the most recently stored session key, since it has the highest
// probability of still being recognized/accepted by the server.
SSL_SESSION* session = session_keys_.front().get();
SSL_set_session(ssl_con.get(), session);
// Remove single-use session key (TLS 1.3) after first use.
if (SSL_SESSION_should_be_single_use(session)) {
session_keys_.pop_front();
}
}
if (scopeUpstreamTlsSessionCacheBySni()) {
setSessionForSni(ssl_con.get(), server_name_indication);
} else {
// Never stored single-use session keys, use read/write locks.
absl::ReaderMutexLock l(session_keys_mu_);
if (!session_keys_.empty()) {
// Use the most recently stored session key, since it has the highest
// probability of still being recognized/accepted by the server.
SSL_SESSION* session = session_keys_.front().get();
SSL_set_session(ssl_con.get(), session);
}
setSessionFromContextCache(ssl_con.get());
}
}

return ssl_con;
}

int ClientContextImpl::newSessionKey(SSL_SESSION* session) {
// In case we ever store single-use session key (TLS 1.3),
// we need to switch to using write/write locks.
int ClientContextImpl::sslEffectiveSniIndex() {
CONSTRUCT_ON_FIRST_USE(int, []() -> int {
// BoringSSL ex-data is per-SSL application storage. Envoy installs the
// effective SNI string in newSsl() so the later new-session callback can
// recover the same cache key from the SSL*. The ex-data free callback owns
// and deletes that string when BoringSSL frees the SSL object.
// See BoringSSL API-CONVENTIONS.md, "ex_data", for this callback-state
// pattern:
// https://boringssl.googlesource.com/boringssl/+/HEAD/API-CONVENTIONS.md
int ssl_effective_sni_index = SSL_get_ex_new_index(
0, nullptr, nullptr, nullptr, [](void*, void* ptr, CRYPTO_EX_DATA*, int, long, void*) {
delete static_cast<std::string*>(ptr);
});
RELEASE_ASSERT(ssl_effective_sni_index >= 0, "");
return ssl_effective_sni_index;
}());
}

std::string
ClientContextImpl::effectiveSni(const Network::TransportSocketOptionsConstSharedPtr& options,
Upstream::HostDescriptionConstSharedPtr host) const {
// Keep the cache key in lock-step with the SNI selection used for the actual
// ClientHello. Reusing sessions across these names can resume the wrong TLS
// identity when multiple upstream logical hosts share a ClientContextImpl.
if (options && options->serverNameOverride().has_value()) {
return options->serverNameOverride().value();
}
if (auto_host_sni_ && host != nullptr && !host->hostname().empty()) {
return host->hostname();
}
return server_name_indication_;
}

void ClientContextImpl::touchSessionBucket(
absl::flat_hash_map<std::string, SniSessionBucket>::iterator it) {
session_sni_lru_.splice(session_sni_lru_.begin(), session_sni_lru_, it->second.lru_it);
it->second.lru_it = session_sni_lru_.begin();
}

void ClientContextImpl::setSessionForSni(SSL* ssl, absl::string_view sni) {
// No SNI means there is no safe logical host boundary for session reuse.
if (sni.empty()) {
return;
}

absl::WriterMutexLock lock(session_keys_mu_);
auto it = session_keys_by_sni_.find(std::string(sni));
if (it == session_keys_by_sni_.end() || it->second.sessions.empty()) {
return;
}

touchSessionBucket(it);

// Use the newest SSL_SESSION for this SNI. In TLS 1.3, BoringSSL represents
// resumption tickets as SSL_SESSION objects, and those tickets can be
// single-use, so remove them immediately after installing them on the new SSL
// object.
SSL_SESSION* session = it->second.sessions.front().get();
SSL_set_session(ssl, session);

if (SSL_SESSION_should_be_single_use(session)) {
it->second.sessions.pop_front();
if (it->second.sessions.empty()) {
session_sni_lru_.erase(it->second.lru_it);
session_keys_by_sni_.erase(it);
}
}
}

void ClientContextImpl::setSessionFromContextCache(SSL* ssl) {
absl::WriterMutexLock lock(session_keys_mu_);
if (session_keys_.empty()) {
return;
}

// Runtime-guarded rollback path for the previous context-wide cache
// behavior. This deliberately ignores SNI and should only be used while the
// reloadable feature remains available.
SSL_SESSION* session = session_keys_.front().get();
SSL_set_session(ssl, session);

if (SSL_SESSION_should_be_single_use(session)) {
session_keys_single_use_ = true;
session_keys_.pop_front();
}
}

int ClientContextImpl::newSessionKey(SSL* ssl, SSL_SESSION* session) {
// BoringSSL transfers ownership of |session| to Envoy when this callback
// returns 1. If Envoy cannot cache it, free it here and still report success.
if (max_session_keys_ == 0) {
SSL_SESSION_free(session);
return 1;
}

if (!scopeUpstreamTlsSessionCacheBySni()) {
absl::WriterMutexLock lock(session_keys_mu_);
while (session_keys_.size() >= max_session_keys_) {
session_keys_.pop_back();
}
session_keys_.push_front(bssl::UniquePtr<SSL_SESSION>(session));
return 1; // Tell BoringSSL that we took ownership of the session.
}
absl::WriterMutexLock l(session_keys_mu_);
// Evict oldest entries.
while (session_keys_.size() >= max_session_keys_) {
session_keys_.pop_back();

const auto* effective_sni =
static_cast<const std::string*>(SSL_get_ex_data(ssl, sslEffectiveSniIndex()));
if (effective_sni == nullptr || effective_sni->empty()) {
SSL_SESSION_free(session);
return 1;
}

absl::WriterMutexLock lock(session_keys_mu_);
const std::string& sni = *effective_sni;
auto [it, inserted] = session_keys_by_sni_.try_emplace(sni);
if (inserted) {
// New SNI bucket: place it at the front of the global SNI LRU.
session_sni_lru_.push_front(sni);
it->second.lru_it = session_sni_lru_.begin();
} else {
touchSessionBucket(it);
}
// Add new session key at the front of the queue, so that it's used first.
session_keys_.push_front(bssl::UniquePtr<SSL_SESSION>(session));

auto& sessions = it->second.sessions;
// max_session_keys_ is interpreted per effective SNI bucket. The separate
// MaxSniSessionCacheEntries cap keeps the total number of buckets bounded.
// The newest sessions are stored at the front and oldest entries are
// discarded first.
while (sessions.size() >= max_session_keys_) {
sessions.pop_back();
}
sessions.push_front(bssl::UniquePtr<SSL_SESSION>(session));

// Bound the number of distinct SNI buckets so a long-lived context cannot
// grow without limit when upstream hostnames are highly variable.
while (session_keys_by_sni_.size() > MaxSniSessionCacheEntries) {
const std::string evict = session_sni_lru_.back();
session_sni_lru_.pop_back();
session_keys_by_sni_.erase(evict);
}

return 1; // Tell BoringSSL that we took ownership of the session.
}

bool ClientContextImpl::scopeUpstreamTlsSessionCacheBySni() const {
return Runtime::runtimeFeatureEnabled(
"envoy.reloadable_features.scope_upstream_tls_session_cache_by_sni");
}

// This callback should return 1 on success, 0 on internal error, and negative number
// on failure or pause a handshake.
int ClientContextImpl::selectTlsContext(SSL* ssl) {
Expand Down
36 changes: 34 additions & 2 deletions source/common/tls/client_context_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <array>
#include <deque>
#include <functional>
#include <list>
#include <memory>
#include <string>
#include <vector>
Expand All @@ -24,6 +25,8 @@
#include "source/common/tls/context_manager_impl.h"
#include "source/common/tls/stats.h"

#include "absl/container/flat_hash_map.h"
#include "absl/strings/string_view.h"
#include "absl/synchronization/mutex.h"
#include "openssl/ssl.h"
#include "openssl/x509v3.h"
Expand Down Expand Up @@ -62,7 +65,34 @@ class ClientContextImpl : public ContextImpl,
absl::Status& creation_status);

private:
int newSessionKey(SSL_SESSION* session);
friend class ClientContextImplPeer;

struct SniSessionBucket {
// Sessions are newest-first so the next connection uses the ticket most
// likely to still be accepted by the upstream.
std::deque<bssl::UniquePtr<SSL_SESSION>> sessions;
// Iterator into session_sni_lru_ for O(1) promotion and eviction.
std::list<std::string>::iterator lru_it;
};

// max_session_keys_ limits sessions within each effective SNI bucket. This
// fixed implementation cap bounds the number of distinct SNI buckets. If
// operators need tuning beyond this conservative bound, a config field can
// expose the bucket limit independently from max_session_keys_.
// TODO(dio): Consider exposing this bucket limit as config if operators need
// to tune memory policy independently from per-SNI session count.
static constexpr size_t MaxSniSessionCacheEntries = 128;

static int sslEffectiveSniIndex();

int newSessionKey(SSL* ssl, SSL_SESSION* session);
std::string effectiveSni(const Network::TransportSocketOptionsConstSharedPtr& options,
Upstream::HostDescriptionConstSharedPtr host) const;
void setSessionForSni(SSL* ssl, absl::string_view sni);
void setSessionFromContextCache(SSL* ssl);
void touchSessionBucket(absl::flat_hash_map<std::string, SniSessionBucket>::iterator it)
ABSL_EXCLUSIVE_LOCKS_REQUIRED(session_keys_mu_);
bool scopeUpstreamTlsSessionCacheBySni() const;

const std::string server_name_indication_;
const bool auto_host_sni_;
Expand All @@ -71,7 +101,9 @@ class ClientContextImpl : public ContextImpl,
const size_t max_session_keys_;
absl::Mutex session_keys_mu_;
std::deque<bssl::UniquePtr<SSL_SESSION>> session_keys_ ABSL_GUARDED_BY(session_keys_mu_);
bool session_keys_single_use_{false};
std::list<std::string> session_sni_lru_ ABSL_GUARDED_BY(session_keys_mu_);
absl::flat_hash_map<std::string, SniSessionBucket>
session_keys_by_sni_ ABSL_GUARDED_BY(session_keys_mu_);
Ssl::UpstreamTlsCertificateSelectorPtr tls_certificate_selector_;
};

Expand Down
2 changes: 2 additions & 0 deletions test/common/tls/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,15 @@ envoy_cc_test(
"//test/mocks/runtime:runtime_mocks",
"//test/mocks/server:server_mocks",
"//test/mocks/ssl:ssl_mocks",
"//test/mocks/upstream:host_mocks",
"//test/test_common:environment_lib",
"//test/test_common:logging_lib",
"//test/test_common:network_utility_lib",
"//test/test_common:registry_lib",
"//test/test_common:simulated_time_system_lib",
"//test/test_common:test_runtime_lib",
"//test/test_common:utility_lib",
"@abseil-cpp//absl/strings",
"@envoy_api//envoy/config/listener/v3:pkg_cc_proto",
"@envoy_api//envoy/extensions/transport_sockets/tls/v3:pkg_cc_proto",
],
Expand Down
Loading
Loading