Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog.d/23922_websocket_source_reconnect.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixed the `websocket` source entering a "zombie" state when the `connect_timeout_secs` threshold was reached with multiple sources running. The connection timeout is now applied per connect attempt with indefinite retries, rather than as a total timeout limit.

authors: benjamin-awd
32 changes: 32 additions & 0 deletions src/common/websocket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ pub enum WebSocketError {
DnsError { source: dns::DnsError },
#[snafu(display("No addresses returned."))]
NoAddresses,
#[snafu(display("Connection attempt timed out"))]
ConnectionTimedOut,
}

#[derive(Clone)]
Expand Down Expand Up @@ -143,6 +145,36 @@ impl WebSocketConnector {
}
}

/// Connects with exponential backoff, applying a timeout to each individual connection attempt.
/// This will retry forever until a connection is established.
pub(crate) async fn connect_backoff_with_timeout(
&self,
timeout_duration: Duration,
) -> WebSocketStream<MaybeTlsStream<TcpStream>> {
let mut backoff = Self::fresh_backoff();

loop {
match time::timeout(timeout_duration, self.connect()).await {
Ok(Ok(ws_stream)) => {
emit!(WebSocketConnectionEstablished {});
return ws_stream;
}
Ok(Err(error)) => {
emit!(WebSocketConnectionFailedError {
error: Box::new(error)
});
}
Err(_) => {
emit!(WebSocketConnectionFailedError {
error: Box::new(WebSocketError::ConnectionTimedOut),
});
}
}

time::sleep(backoff.next().unwrap()).await;
}
}

#[cfg(feature = "sinks-websocket")]
pub(crate) async fn healthcheck(&self) -> crate::Result<()> {
self.connect().await.map(|_| ()).map_err(Into::into)
Expand Down
28 changes: 8 additions & 20 deletions src/sources/websocket/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ use crate::{
common::websocket::{PingInterval, WebSocketConnector, is_closed},
config::SourceContext,
internal_events::{
ConnectionOpen, OpenGauge, PROTOCOL, WebSocketBytesReceived, WebSocketConnectionError,
WebSocketConnectionEstablished, WebSocketConnectionFailedError,
WebSocketConnectionShutdown, WebSocketKind, WebSocketMessageReceived,
WebSocketReceiveError, WebSocketSendError,
ConnectionOpen, OpenGauge, PROTOCOL, WebSocketBytesReceived,
WebSocketConnectionFailedError, WebSocketConnectionShutdown, WebSocketKind,
WebSocketMessageReceived, WebSocketReceiveError, WebSocketSendError,
},
sources::websocket::config::WebSocketConfig,
vector_lib::codecs::StreamDecodingError,
Expand Down Expand Up @@ -310,23 +309,12 @@ impl WebSocketSource {
async fn try_create_sink_and_stream(
&self,
) -> Result<(WebSocketSink, WebSocketStream), WebSocketSourceError> {
let connect_future = self.params.connector.connect_backoff();
let timeout = self.config.connect_timeout_secs;

let ws_stream = match time::timeout(timeout, connect_future).await {
Ok(ws) => ws,
Err(_) => {
emit!(WebSocketConnectionError {
error: TungsteniteError::Io(std::io::Error::new(
std::io::ErrorKind::TimedOut,
"Connection attempt timed out",
))
});
return Err(WebSocketSourceError::ConnectTimeout);
}
};
let ws_stream = self
.params
.connector
.connect_backoff_with_timeout(self.config.connect_timeout_secs)
.await;

emit!(WebSocketConnectionEstablished {});
let (sink, stream) = ws_stream.split();

Ok((Box::pin(sink), Box::pin(stream)))
Expand Down
Loading