Skip to content

Commit

Permalink
fix(object-store): Consider some token source errors transient (#2331)
Browse files Browse the repository at this point in the history
## What ❔

Considers some token source GCS errors transient.

## Why ❔

Considering errors as transient leads to less abnormal application
terminations.

## Checklist

- [x] PR title corresponds to the body of PR (we generate changelog
entries from PRs).
- [x] Documentation comments have been added / updated.
- [x] Code has been formatted via `zk fmt` and `zk lint`.
  • Loading branch information
slowli authored Jun 27, 2024
1 parent ef75292 commit 85386d3
Showing 1 changed file with 22 additions and 14 deletions.
36 changes: 22 additions & 14 deletions core/lib/object_store/src/gcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,22 @@ fn is_transient_http_error(err: &reqwest::Error) -> bool {
|| err.status() == Some(StatusCode::SERVICE_UNAVAILABLE)
}

fn has_transient_io_source(mut err: &(dyn StdError + 'static)) -> bool {
fn get_source<'a, T: StdError + 'static>(mut err: &'a (dyn StdError + 'static)) -> Option<&'a T> {
loop {
if err.is::<io::Error>() {
// We treat any I/O errors as transient. This isn't always true, but frequently occurring I/O errors
// (e.g., "connection reset by peer") *are* transient, and treating an error as transient is a safer option,
// even if it can lead to unnecessary retries.
return true;
if let Some(err) = err.downcast_ref::<T>() {
return Some(err);
}
err = match err.source() {
Some(source) => source,
None => return false,
};
err = err.source()?;
}
}

fn has_transient_io_source(err: &(dyn StdError + 'static)) -> bool {
// We treat any I/O errors as transient. This isn't always true, but frequently occurring I/O errors
// (e.g., "connection reset by peer") *are* transient, and treating an error as transient is a safer option,
// even if it can lead to unnecessary retries.
get_source::<io::Error>(err).is_some()
}

impl From<HttpError> for ObjectStoreError {
fn from(err: HttpError) -> Self {
let is_not_found = match &err {
Expand All @@ -135,10 +136,17 @@ impl From<HttpError> for ObjectStoreError {
if is_not_found {
ObjectStoreError::KeyNotFound(err.into())
} else {
let is_transient = matches!(
&err,
HttpError::HttpClient(err) if is_transient_http_error(err)
);
let is_transient = match &err {
HttpError::HttpClient(err) => is_transient_http_error(err),
HttpError::TokenSource(err) => {
// Token sources are mostly based on the `reqwest` HTTP client, so transient error detection
// can reuse the same logic.
let err = err.as_ref();
has_transient_io_source(err)
|| get_source::<reqwest::Error>(err).is_some_and(is_transient_http_error)
}
HttpError::Response(_) => false,
};
ObjectStoreError::Other {
is_transient,
source: err.into(),
Expand Down

0 comments on commit 85386d3

Please sign in to comment.