11#include " common/network/connection_impl.h"
22
33#include < netinet/tcp.h>
4+
5+ #ifdef __APPLE__
6+ #include < netinet/tcp_fsm.h>
7+ #endif
48#include < sys/socket.h>
59#include < sys/types.h>
610#include < unistd.h>
1216#include " envoy/event/timer.h"
1317#include " envoy/network/filter.h"
1418
19+ #include " common/api/os_sys_calls_impl.h"
1520#include " common/common/assert.h"
1621#include " common/common/empty_string.h"
1722#include " common/common/enum_to_int.h"
@@ -65,6 +70,13 @@ ConnectionImpl::ConnectionImpl(Event::Dispatcher& dispatcher, ConnectionSocketPt
6570 Event::FileReadyType::Read | Event::FileReadyType::Write);
6671
6772 transport_socket_->setTransportSocketCallbacks (*this );
73+
74+ sockaddr addr;
75+ socklen_t len = sizeof (addr);
76+ auto & os_syscalls = Api::OsSysCallsSingleton::get ();
77+ const Api::SysCallIntResult result = os_syscalls.getsockname (fd (), &addr, &len);
78+ RELEASE_ASSERT (result.rc_ == 0 , " " );
79+ is_uds_ = addr.sa_family == AF_UNIX;
6880}
6981
7082ConnectionImpl::~ConnectionImpl () {
@@ -159,27 +171,24 @@ void ConnectionImpl::noDelay(bool enable) {
159171 }
160172
161173 // Don't set NODELAY for unix domain sockets
162- sockaddr addr;
163- socklen_t len = sizeof (addr);
164- int rc = getsockname (fd (), &addr, &len);
165- RELEASE_ASSERT (rc == 0 , " " );
166-
167- if (addr.sa_family == AF_UNIX) {
174+ if (is_uds_) {
168175 return ;
169176 }
170177
171178 // Set NODELAY
172179 int new_value = enable;
173- rc = setsockopt (fd (), IPPROTO_TCP, TCP_NODELAY, &new_value, sizeof (new_value));
180+ auto & os_syscalls = Api::OsSysCallsSingleton::get ();
181+ const Api::SysCallIntResult result =
182+ os_syscalls.setsockopt (fd (), IPPROTO_TCP, TCP_NODELAY, &new_value, sizeof (new_value));
174183#ifdef __APPLE__
175- if (-1 == rc && errno == EINVAL) {
184+ if (-1 == result. rc_ && result. errno_ == EINVAL) {
176185 // Sometimes occurs when the connection is not yet fully formed. Empirically, TCP_NODELAY is
177186 // enabled despite this result.
178187 return ;
179188 }
180189#endif
181190
182- RELEASE_ASSERT (0 == rc , " " );
191+ RELEASE_ASSERT (0 == result. rc_ , " " );
183192}
184193
185194uint64_t ConnectionImpl::id () const { return id_; }
@@ -249,7 +258,14 @@ void ConnectionImpl::readDisable(bool disable) {
249258 // If half-close semantics are enabled, we never want early close notifications; we
250259 // always want to read all avaiable data, even if the other side has closed.
251260 if (detect_early_close_ && !enable_half_close_) {
261+ #ifdef __APPLE__
262+ // libevent only supports detecting early close with epoll, so we leave read events enabled
263+ // and check the connection state on read, tracking real read events in
264+ // disabled_read_pending_.
265+ file_event_->setEnabled (Event::FileReadyType::Write | Event::FileReadyType::Read);
266+ #else
252267 file_event_->setEnabled (Event::FileReadyType::Write | Event::FileReadyType::Closed);
268+ #endif // __APPLE__
253269 } else {
254270 file_event_->setEnabled (Event::FileReadyType::Write);
255271 }
@@ -263,6 +279,17 @@ void ConnectionImpl::readDisable(bool disable) {
263279 // We never ask for both early close and read at the same time. If we are reading, we want to
264280 // consume all available data.
265281 file_event_->setEnabled (Event::FileReadyType::Read | Event::FileReadyType::Write);
282+
283+ #ifdef __APPLE__
284+ if (disabled_read_pending_) {
285+ // An actual read event occurred while reads were disabled (see above).
286+ ENVOY_CONN_LOG (trace, " readDisable trigger pending read" , *this );
287+ disabled_read_pending_ = false ;
288+ file_event_->activate (Event::FileReadyType::Read);
289+ return ;
290+ }
291+ #endif
292+
266293 // If the connection has data buffered there's no guarantee there's also data in the kernel
267294 // which will kick off the filter chain. Instead fake an event to make sure the buffered data
268295 // gets processed regardless.
@@ -403,6 +430,28 @@ void ConnectionImpl::onFileEvent(uint32_t events) {
403430 return ;
404431 }
405432
433+ #ifdef __APPLE__
434+ // Because OSX doesn't support early close detection via events, we leave the read event enabled
435+ // even when reads are disabled. Detect it here and convert it to a Closed event if we can detect
436+ // the connection is closed. See https://github.com/envoyproxy/envoy/issues/4294.
437+ if (detect_early_close_ && !read_enabled_ && (events & Event::FileReadyType::Read) != 0 ) {
438+ if (detectEarlyClose ()) {
439+ // No longer connected. Convert to a closed event.
440+ ENVOY_CONN_LOG (trace, " early close detection triggered" , *this );
441+ events |= Event::FileReadyType::Closed;
442+ } else {
443+ ENVOY_CONN_LOG (trace, " pending read in early close detection" , *this );
444+ disabled_read_pending_ = true ;
445+ }
446+
447+ // Reads are disabled, so never pass the read event along.
448+ events &= ~Event::FileReadyType::Read;
449+ if (!events) {
450+ return ;
451+ }
452+ }
453+ #endif
454+
406455 if (events & Event::FileReadyType::Closed) {
407456 // We never ask for both early close and read at the same time. If we are reading, we want to
408457 // consume all available data.
@@ -459,8 +508,10 @@ void ConnectionImpl::onWriteReady() {
459508 if (connecting_) {
460509 int error;
461510 socklen_t error_size = sizeof (error);
462- int rc = getsockopt (fd (), SOL_SOCKET, SO_ERROR, &error, &error_size);
463- ASSERT (0 == rc);
511+ auto & os_syscalls = Api::OsSysCallsSingleton::get ();
512+ const Api::SysCallIntResult result =
513+ os_syscalls.getsockopt (fd (), SOL_SOCKET, SO_ERROR, &error, &error_size);
514+ ASSERT (0 == result.rc_ );
464515
465516 if (error == 0 ) {
466517 ENVOY_CONN_LOG (debug, " connected" , *this );
@@ -535,6 +586,34 @@ bool ConnectionImpl::bothSidesHalfClosed() {
535586 return read_end_stream_ && write_end_stream_ && write_buffer_->length () == 0 ;
536587}
537588
589+ #ifdef __APPLE__
590+ bool ConnectionImpl::detectEarlyClose () {
591+ auto & os_syscalls = Api::OsSysCallsSingleton::get ();
592+
593+ if (is_uds_) {
594+ ENVOY_CONN_LOG (trace, " checking for UDS early close with read disabled" , *this );
595+
596+ int bytes;
597+ socklen_t bytes_size = sizeof (int );
598+ const Api::SysCallIntResult result =
599+ os_syscalls.getsockopt (fd (), SOL_SOCKET, SO_NREAD, &bytes, &bytes_size);
600+ ASSERT (0 == result.rc_ );
601+
602+ return bytes == 0 ;
603+ }
604+
605+ ENVOY_CONN_LOG (trace, " checking for TCP early close with read disabled" , *this );
606+ tcp_connection_info info;
607+ socklen_t info_size = sizeof (tcp_connection_info);
608+ const Api::SysCallIntResult result =
609+ os_syscalls.getsockopt (fd (), IPPROTO_TCP, TCP_CONNECTION_INFO, &info, &info_size);
610+ ASSERT (0 == result.rc_ );
611+
612+ return info.tcpi_state == TCPS_CLOSED ||
613+ (info.tcpi_state >= TCPS_CLOSE_WAIT && info.tcpi_state <= TCPS_TIME_WAIT);
614+ }
615+ #endif
616+
538617ClientConnectionImpl::ClientConnectionImpl (
539618 Event::Dispatcher& dispatcher, const Address::InstanceConstSharedPtr& remote_address,
540619 const Network::Address::InstanceConstSharedPtr& source_address,
0 commit comments