Skip to content

Commit f4fb622

Browse files
committed
network: implement early close detection for OS X
Implements early close detection on OS X by inspecting the TCP state using getsockopt. *Risk Level*: low (conditional compilation) *Testing*: integration tests pass (tested against envoyproxy#4232) *Docs Changes*: n/a *Release Notes*: n/a *Fixes*: envoyproxy#4294 Signed-off-by: Stephan Zuercher <stephan@turbinelabs.io>
1 parent 0057e22 commit f4fb622

File tree

11 files changed

+154
-14
lines changed

11 files changed

+154
-14
lines changed

include/envoy/api/os_sys_calls.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ class OsSysCalls {
119119
virtual SysCallIntResult getsockopt(int sockfd, int level, int optname, void* optval,
120120
socklen_t* optlen) PURE;
121121

122+
/**
123+
* @see man 2 getsockname
124+
*/
125+
virtual SysCallIntResult getsockname(int sockfd, sockaddr* addr, socklen_t* addrlen) PURE;
126+
122127
/**
123128
* @see man 2 socket
124129
*/

source/common/api/os_sys_calls_impl.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ SysCallIntResult OsSysCallsImpl::getsockopt(int sockfd, int level, int optname,
8686
return {rc, errno};
8787
}
8888

89+
SysCallIntResult OsSysCallsImpl::getsockname(int sockfd, sockaddr* addr, socklen_t* addrlen) {
90+
const int rc = ::getsockname(sockfd, addr, addrlen);
91+
return {rc, errno};
92+
}
93+
8994
SysCallIntResult OsSysCallsImpl::socket(int domain, int type, int protocol) {
9095
const int rc = ::socket(domain, type, protocol);
9196
return {rc, errno};

source/common/api/os_sys_calls_impl.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class OsSysCallsImpl : public OsSysCalls {
2828
socklen_t optlen) override;
2929
SysCallIntResult getsockopt(int sockfd, int level, int optname, void* optval,
3030
socklen_t* optlen) override;
31+
SysCallIntResult getsockname(int sockfd, sockaddr* addr, socklen_t* addrlen) override;
3132
SysCallIntResult socket(int domain, int type, int protocol) override;
3233
};
3334

source/common/network/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ envoy_cc_library(
5050
"//include/envoy/event:timer_interface",
5151
"//include/envoy/network:connection_interface",
5252
"//include/envoy/network:filter_interface",
53+
"//source/common/api:os_sys_calls_lib",
5354
"//source/common/buffer:buffer_lib",
5455
"//source/common/buffer:watermark_buffer_lib",
5556
"//source/common/common:assert_lib",

source/common/network/connection_impl.cc

Lines changed: 90 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#include "common/network/connection_impl.h"
22

33
#include <netinet/tcp.h>
4+
5+
#ifdef __APPLE__
6+
#include <netinet/tcp_fsm.h>
7+
#endif
48
#include <sys/socket.h>
59
#include <sys/types.h>
610
#include <unistd.h>
@@ -12,6 +16,7 @@
1216
#include "envoy/event/timer.h"
1317
#include "envoy/network/filter.h"
1418

19+
#include "common/api/os_sys_calls_impl.h"
1520
#include "common/common/assert.h"
1621
#include "common/common/empty_string.h"
1722
#include "common/common/enum_to_int.h"
@@ -65,6 +70,13 @@ ConnectionImpl::ConnectionImpl(Event::Dispatcher& dispatcher, ConnectionSocketPt
6570
Event::FileReadyType::Read | Event::FileReadyType::Write);
6671

6772
transport_socket_->setTransportSocketCallbacks(*this);
73+
74+
sockaddr addr;
75+
socklen_t len = sizeof(addr);
76+
auto& os_syscalls = Api::OsSysCallsSingleton::get();
77+
const Api::SysCallIntResult result = os_syscalls.getsockname(fd(), &addr, &len);
78+
RELEASE_ASSERT(result.rc_ == 0, "");
79+
is_uds_ = addr.sa_family == AF_UNIX;
6880
}
6981

7082
ConnectionImpl::~ConnectionImpl() {
@@ -159,27 +171,24 @@ void ConnectionImpl::noDelay(bool enable) {
159171
}
160172

161173
// Don't set NODELAY for unix domain sockets
162-
sockaddr addr;
163-
socklen_t len = sizeof(addr);
164-
int rc = getsockname(fd(), &addr, &len);
165-
RELEASE_ASSERT(rc == 0, "");
166-
167-
if (addr.sa_family == AF_UNIX) {
174+
if (is_uds_) {
168175
return;
169176
}
170177

171178
// Set NODELAY
172179
int new_value = enable;
173-
rc = setsockopt(fd(), IPPROTO_TCP, TCP_NODELAY, &new_value, sizeof(new_value));
180+
auto& os_syscalls = Api::OsSysCallsSingleton::get();
181+
const Api::SysCallIntResult result =
182+
os_syscalls.setsockopt(fd(), IPPROTO_TCP, TCP_NODELAY, &new_value, sizeof(new_value));
174183
#ifdef __APPLE__
175-
if (-1 == rc && errno == EINVAL) {
184+
if (-1 == result.rc_ && result.errno_ == EINVAL) {
176185
// Sometimes occurs when the connection is not yet fully formed. Empirically, TCP_NODELAY is
177186
// enabled despite this result.
178187
return;
179188
}
180189
#endif
181190

182-
RELEASE_ASSERT(0 == rc, "");
191+
RELEASE_ASSERT(0 == result.rc_, "");
183192
}
184193

185194
uint64_t ConnectionImpl::id() const { return id_; }
@@ -249,7 +258,14 @@ void ConnectionImpl::readDisable(bool disable) {
249258
// If half-close semantics are enabled, we never want early close notifications; we
250259
// always want to read all avaiable data, even if the other side has closed.
251260
if (detect_early_close_ && !enable_half_close_) {
261+
#ifdef __APPLE__
262+
// libevent only supports detecting early close with epoll, so we leave read events enabled
263+
// and check the connection state on read, tracking real read events in
264+
// disabled_read_pending_.
265+
file_event_->setEnabled(Event::FileReadyType::Write | Event::FileReadyType::Read);
266+
#else
252267
file_event_->setEnabled(Event::FileReadyType::Write | Event::FileReadyType::Closed);
268+
#endif // __APPLE__
253269
} else {
254270
file_event_->setEnabled(Event::FileReadyType::Write);
255271
}
@@ -263,6 +279,17 @@ void ConnectionImpl::readDisable(bool disable) {
263279
// We never ask for both early close and read at the same time. If we are reading, we want to
264280
// consume all available data.
265281
file_event_->setEnabled(Event::FileReadyType::Read | Event::FileReadyType::Write);
282+
283+
#ifdef __APPLE__
284+
if (disabled_read_pending_) {
285+
// An actual read event occurred while reads were disabled (see above).
286+
ENVOY_CONN_LOG(trace, "readDisable trigger pending read", *this);
287+
disabled_read_pending_ = false;
288+
file_event_->activate(Event::FileReadyType::Read);
289+
return;
290+
}
291+
#endif
292+
266293
// If the connection has data buffered there's no guarantee there's also data in the kernel
267294
// which will kick off the filter chain. Instead fake an event to make sure the buffered data
268295
// gets processed regardless.
@@ -403,6 +430,28 @@ void ConnectionImpl::onFileEvent(uint32_t events) {
403430
return;
404431
}
405432

433+
#ifdef __APPLE__
434+
// Because OSX doesn't support early close detection via events, we leave the read event enabled
435+
// even when reads are disabled. Detect it here and convert it to a Closed event if we can detect
436+
// the connection is closed. See https://github.com/envoyproxy/envoy/issues/4294.
437+
if (detect_early_close_ && !read_enabled_ && (events & Event::FileReadyType::Read) != 0) {
438+
if (detectEarlyClose()) {
439+
// No longer connected. Convert to a closed event.
440+
ENVOY_CONN_LOG(trace, "early close detection triggered", *this);
441+
events |= Event::FileReadyType::Closed;
442+
} else {
443+
ENVOY_CONN_LOG(trace, "pending read in early close detection", *this);
444+
disabled_read_pending_ = true;
445+
}
446+
447+
// Reads are disabled, so never pass the read event along.
448+
events &= ~Event::FileReadyType::Read;
449+
if (!events) {
450+
return;
451+
}
452+
}
453+
#endif
454+
406455
if (events & Event::FileReadyType::Closed) {
407456
// We never ask for both early close and read at the same time. If we are reading, we want to
408457
// consume all available data.
@@ -459,8 +508,10 @@ void ConnectionImpl::onWriteReady() {
459508
if (connecting_) {
460509
int error;
461510
socklen_t error_size = sizeof(error);
462-
int rc = getsockopt(fd(), SOL_SOCKET, SO_ERROR, &error, &error_size);
463-
ASSERT(0 == rc);
511+
auto& os_syscalls = Api::OsSysCallsSingleton::get();
512+
const Api::SysCallIntResult result =
513+
os_syscalls.getsockopt(fd(), SOL_SOCKET, SO_ERROR, &error, &error_size);
514+
ASSERT(0 == result.rc_);
464515

465516
if (error == 0) {
466517
ENVOY_CONN_LOG(debug, "connected", *this);
@@ -535,6 +586,34 @@ bool ConnectionImpl::bothSidesHalfClosed() {
535586
return read_end_stream_ && write_end_stream_ && write_buffer_->length() == 0;
536587
}
537588

589+
#ifdef __APPLE__
590+
bool ConnectionImpl::detectEarlyClose() {
591+
auto& os_syscalls = Api::OsSysCallsSingleton::get();
592+
593+
if (is_uds_) {
594+
ENVOY_CONN_LOG(trace, "checking for UDS early close with read disabled", *this);
595+
596+
int bytes;
597+
socklen_t bytes_size = sizeof(int);
598+
const Api::SysCallIntResult result =
599+
os_syscalls.getsockopt(fd(), SOL_SOCKET, SO_NREAD, &bytes, &bytes_size);
600+
ASSERT(0 == result.rc_);
601+
602+
return bytes == 0;
603+
}
604+
605+
ENVOY_CONN_LOG(trace, "checking for TCP early close with read disabled", *this);
606+
tcp_connection_info info;
607+
socklen_t info_size = sizeof(tcp_connection_info);
608+
const Api::SysCallIntResult result =
609+
os_syscalls.getsockopt(fd(), IPPROTO_TCP, TCP_CONNECTION_INFO, &info, &info_size);
610+
ASSERT(0 == result.rc_);
611+
612+
return info.tcpi_state == TCPS_CLOSED ||
613+
(info.tcpi_state >= TCPS_CLOSE_WAIT && info.tcpi_state <= TCPS_TIME_WAIT);
614+
}
615+
#endif
616+
538617
ClientConnectionImpl::ClientConnectionImpl(
539618
Event::Dispatcher& dispatcher, const Address::InstanceConstSharedPtr& remote_address,
540619
const Network::Address::InstanceConstSharedPtr& source_address,

source/common/network/connection_impl.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#pragma once
22

3+
#include <netinet/tcp.h>
4+
35
#include <atomic>
46
#include <cstdint>
57
#include <list>
@@ -129,7 +131,6 @@ class ConnectionImpl : public virtual Connection,
129131
Buffer::InstancePtr write_buffer_;
130132
uint32_t read_buffer_limit_ = 0;
131133

132-
protected:
133134
bool connecting_{false};
134135
ConnectionEvent immediate_error_event_{ConnectionEvent::Connected};
135136
bool bind_error_{false};
@@ -146,6 +147,10 @@ class ConnectionImpl : public virtual Connection,
146147
// Returns true iff end of stream has been both written and read.
147148
bool bothSidesHalfClosed();
148149

150+
#ifdef __APPLE__
151+
bool detectEarlyClose();
152+
#endif
153+
149154
static std::atomic<uint64_t> next_global_id_;
150155

151156
Event::Dispatcher& dispatcher_;
@@ -161,6 +166,10 @@ class ConnectionImpl : public virtual Connection,
161166
bool read_end_stream_{false};
162167
bool write_end_stream_{false};
163168
bool current_write_end_stream_{false};
169+
#ifdef __APPLE__
170+
bool disabled_read_pending_{false};
171+
#endif
172+
bool is_uds_{false};
164173
Buffer::Instance* current_write_buffer_{};
165174
uint64_t last_read_buffer_size_{};
166175
uint64_t last_write_buffer_size_{};

test/common/network/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ envoy_cc_test(
5252
"//test/test_common:environment_lib",
5353
"//test/test_common:network_utility_lib",
5454
"//test/test_common:test_time_lib",
55+
"//test/test_common:threadsafe_singleton_injector_lib",
5556
],
5657
)
5758

test/common/network/connection_impl_test.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "test/test_common/network_utility.h"
2222
#include "test/test_common/printers.h"
2323
#include "test/test_common/test_time.h"
24+
#include "test/test_common/threadsafe_singleton_injector.h"
2425
#include "test/test_common/utility.h"
2526

2627
#include "gmock/gmock.h"
@@ -887,6 +888,13 @@ class MockTransportConnectionImplTest : public testing::Test {
887888
.WillOnce(Invoke([this](TransportSocketCallbacks& callbacks) {
888889
transport_socket_callbacks_ = &callbacks;
889890
}));
891+
EXPECT_CALL(os_sys_calls_, getsockname(_, _, _))
892+
.WillOnce(Invoke([](int, sockaddr* addr, socklen_t* addrlen) -> Api::SysCallIntResult {
893+
EXPECT_NE(nullptr, addr);
894+
EXPECT_NE(nullptr, addrlen);
895+
addr->sa_family = AF_INET;
896+
return {0, 0};
897+
}));
890898
connection_.reset(
891899
new ConnectionImpl(dispatcher_, std::make_unique<ConnectionSocketImpl>(0, nullptr, nullptr),
892900
TransportSocketPtr(transport_socket_), true));
@@ -903,6 +911,9 @@ class MockTransportConnectionImplTest : public testing::Test {
903911
return {PostIoAction::KeepOpen, size, false};
904912
}
905913

914+
NiceMock<Api::MockOsSysCalls> os_sys_calls_;
915+
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls_{&os_sys_calls_};
916+
906917
std::unique_ptr<ConnectionImpl> connection_;
907918
Event::MockDispatcher dispatcher_;
908919
NiceMock<MockConnectionCallbacks> callbacks_;

test/extensions/filters/listener/proxy_protocol/proxy_protocol_test.cc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,12 @@ TEST_P(ProxyProtocolTest, errorRecv_2) {
260260
'r', 'e', ' ', 'd', 'a', 't', 'a'};
261261
Api::MockOsSysCalls os_sys_calls;
262262
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls);
263+
264+
// Pass through sys calls made by ConnectionImpl.
265+
EXPECT_CALL(os_sys_calls, getsockopt_(_, _, _, _, _))
266+
.Times(AnyNumber())
267+
.WillRepeatedly(Invoke(::getsockopt));
268+
263269
EXPECT_CALL(os_sys_calls, recv(_, _, _, _))
264270
.Times(AnyNumber())
265271
.WillOnce(Return(Api::SysCallSizeResult{-1, 0}));
@@ -297,6 +303,12 @@ TEST_P(ProxyProtocolTest, errorFIONREAD_1) {
297303
'r', 'e', ' ', 'd', 'a', 't', 'a'};
298304
Api::MockOsSysCalls os_sys_calls;
299305
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls);
306+
307+
// Pass through sys calls made by ConnectionImpl.
308+
EXPECT_CALL(os_sys_calls, getsockopt_(_, _, _, _, _))
309+
.Times(AnyNumber())
310+
.WillRepeatedly(Invoke(::getsockopt));
311+
300312
EXPECT_CALL(os_sys_calls, ioctl(_, FIONREAD, _)).WillOnce(Return(Api::SysCallIntResult{-1, 0}));
301313
EXPECT_CALL(os_sys_calls, writev(_, _, _))
302314
.Times(AnyNumber())
@@ -490,6 +502,11 @@ TEST_P(ProxyProtocolTest, v2ParseExtensionsIoctlError) {
490502
Api::MockOsSysCalls os_sys_calls;
491503
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls);
492504

505+
// Pass through sys calls made by ConnectionImpl.
506+
EXPECT_CALL(os_sys_calls, getsockopt_(_, _, _, _, _))
507+
.Times(AnyNumber())
508+
.WillRepeatedly(Invoke(::getsockopt));
509+
493510
EXPECT_CALL(os_sys_calls, ioctl(_, FIONREAD, _))
494511
.Times(AnyNumber())
495512
.WillRepeatedly(Invoke([](int fd, unsigned long int request, void* argp) {
@@ -621,6 +638,11 @@ TEST_P(ProxyProtocolTest, v2Fragmented3Error) {
621638
Api::MockOsSysCalls os_sys_calls;
622639
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls);
623640

641+
// Pass through sys calls made by ConnectionImpl.
642+
EXPECT_CALL(os_sys_calls, getsockopt_(_, _, _, _, _))
643+
.Times(AnyNumber())
644+
.WillRepeatedly(Invoke(::getsockopt));
645+
624646
EXPECT_CALL(os_sys_calls, recv(_, _, _, _))
625647
.Times(AnyNumber())
626648
.WillRepeatedly(Invoke([](int fd, void* buf, size_t len, int flags) {
@@ -667,6 +689,11 @@ TEST_P(ProxyProtocolTest, v2Fragmented4Error) {
667689
Api::MockOsSysCalls os_sys_calls;
668690
TestThreadsafeSingletonInjector<Api::OsSysCallsImpl> os_calls(&os_sys_calls);
669691

692+
// Pass through sys calls made by ConnectionImpl.
693+
EXPECT_CALL(os_sys_calls, getsockopt_(_, _, _, _, _))
694+
.Times(AnyNumber())
695+
.WillRepeatedly(Invoke(::getsockopt));
696+
670697
EXPECT_CALL(os_sys_calls, recv(_, _, _, _))
671698
.Times(AnyNumber())
672699
.WillRepeatedly(Invoke([](int fd, void* buf, size_t len, int flags) {

test/mocks/api/mocks.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ SysCallIntResult MockOsSysCalls::setsockopt(int sockfd, int level, int optname,
4646

4747
// Allow mocking system call failure.
4848
if (setsockopt_(sockfd, level, optname, optval, optlen) != 0) {
49-
return SysCallIntResult{-1, 0};
49+
return SysCallIntResult{-1, errno};
5050
}
5151

5252
boolsockopts_[SockOptKey(sockfd, level, optname)] = !!*reinterpret_cast<const int*>(optval);
@@ -63,7 +63,7 @@ SysCallIntResult MockOsSysCalls::getsockopt(int sockfd, int level, int optname,
6363
}
6464
// Allow mocking system call failure.
6565
if (getsockopt_(sockfd, level, optname, optval, optlen) != 0) {
66-
return {-1, 0};
66+
return {-1, errno};
6767
}
6868
*reinterpret_cast<int*>(optval) = val;
6969
return {0, 0};

0 commit comments

Comments
 (0)