-
Notifications
You must be signed in to change notification settings - Fork 100
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(panic): Stop panicking on async task cancellation on shutdown in …
…network and state futures (#7219) * Add an async-error feature and an initial module structure * Implement checking for panics in OS threads and async tasks * Implement waiting for panics in OS threads and async tasks * Add a TODO to simplify some state request error handling * Use the new panic-checking methods in zebra-state * Use new panic-checking methods in zebra-network * fixup! Implement waiting for panics in OS threads and async tasks * Replace existing async code with generic panic-checking methods * Simplify trait to a single method * Move thread panic code into generic trait impls * Simplify option handling Co-authored-by: Arya <aryasolhi@gmail.com> * Fix comment Co-authored-by: Arya <aryasolhi@gmail.com> * Add missing track_caller --------- Co-authored-by: Arya <aryasolhi@gmail.com>
- Loading branch information
Showing
12 changed files
with
367 additions
and
232 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
//! Diagnostic types and functions for Zebra tasks: | ||
//! - OS thread handling | ||
//! - async future task handling | ||
//! - errors and panics | ||
|
||
#[cfg(feature = "async-error")] | ||
pub mod future; | ||
|
||
pub mod thread; | ||
|
||
/// A trait that checks a task's return value for panics. | ||
pub trait CheckForPanics { | ||
/// The output type, after removing panics from `Self`. | ||
type Output; | ||
|
||
/// Check if `self` contains a panic payload or an unexpected termination, then panic. | ||
/// Otherwise, return the non-panic part of `self`. | ||
/// | ||
/// # Panics | ||
/// | ||
/// If `self` contains a panic payload or an unexpected termination. | ||
#[track_caller] | ||
fn check_for_panics(self) -> Self::Output; | ||
} | ||
|
||
/// A trait that waits for a task to finish, then handles panics and cancellations. | ||
pub trait WaitForPanics { | ||
/// The underlying task output, after removing panics and unwrapping termination results. | ||
type Output; | ||
|
||
/// Waits for `self` to finish, then check if its output is: | ||
/// - a panic payload: resume that panic, | ||
/// - an unexpected termination: panic with that error, | ||
/// - an expected termination: hang waiting for shutdown. | ||
/// | ||
/// Otherwise, returns the task return value of `self`. | ||
/// | ||
/// # Panics | ||
/// | ||
/// If `self` contains a panic payload or an unexpected termination. | ||
/// | ||
/// # Hangs | ||
/// | ||
/// If `self` contains an expected termination, and we're shutting down anyway. | ||
#[track_caller] | ||
fn wait_for_panics(self) -> Self::Output; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
//! Diagnostic types and functions for Zebra async future tasks: | ||
//! - task handles | ||
//! - errors and panics | ||
|
||
use std::{future, panic}; | ||
|
||
use futures::future::{BoxFuture, FutureExt}; | ||
use tokio::task::{JoinError, JoinHandle}; | ||
|
||
use crate::shutdown::is_shutting_down; | ||
|
||
use super::{CheckForPanics, WaitForPanics}; | ||
|
||
/// This is the return type of the [`JoinHandle`] future. | ||
impl<T> CheckForPanics for Result<T, JoinError> { | ||
/// The [`JoinHandle`]'s task output, after resuming any panics, | ||
/// and ignoring task cancellations on shutdown. | ||
type Output = Result<T, JoinError>; | ||
|
||
/// Returns the task result if the task finished normally. | ||
/// Otherwise, resumes any panics, logs unexpected errors, and ignores any expected errors. | ||
/// | ||
/// If the task finished normally, returns `Some(T)`. | ||
/// If the task was cancelled, returns `None`. | ||
#[track_caller] | ||
fn check_for_panics(self) -> Self::Output { | ||
match self { | ||
Ok(task_output) => Ok(task_output), | ||
Err(join_error) => Err(join_error.check_for_panics()), | ||
} | ||
} | ||
} | ||
|
||
impl CheckForPanics for JoinError { | ||
/// The [`JoinError`] after resuming any panics, and logging any unexpected task cancellations. | ||
type Output = JoinError; | ||
|
||
/// Resume any panics and panic on unexpected task cancellations. | ||
/// Always returns [`JoinError::Cancelled`](JoinError::is_cancelled). | ||
#[track_caller] | ||
fn check_for_panics(self) -> Self::Output { | ||
match self.try_into_panic() { | ||
Ok(panic_payload) => panic::resume_unwind(panic_payload), | ||
|
||
// We could ignore this error, but then we'd have to change the return type. | ||
Err(task_cancelled) if is_shutting_down() => { | ||
debug!( | ||
?task_cancelled, | ||
"ignoring cancelled task because Zebra is shutting down" | ||
); | ||
|
||
task_cancelled | ||
} | ||
|
||
Err(task_cancelled) => { | ||
panic!("task cancelled during normal Zebra operation: {task_cancelled:?}"); | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl<T> WaitForPanics for JoinHandle<T> | ||
where | ||
T: Send + 'static, | ||
{ | ||
type Output = BoxFuture<'static, T>; | ||
|
||
/// Returns a future which waits for `self` to finish, then checks if its output is: | ||
/// - a panic payload: resume that panic, | ||
/// - an unexpected termination: panic with that error, | ||
/// - an expected termination: hang waiting for shutdown. | ||
/// | ||
/// Otherwise, returns the task return value of `self`. | ||
/// | ||
/// # Panics | ||
/// | ||
/// If `self` contains a panic payload, or [`JoinHandle::abort()`] has been called on `self`. | ||
/// | ||
/// # Hangs | ||
/// | ||
/// If `self` contains an expected termination, and we're shutting down anyway. | ||
/// Futures hang by returning `Pending` and not setting a waker, so this uses minimal resources. | ||
#[track_caller] | ||
fn wait_for_panics(self) -> Self::Output { | ||
async move { | ||
match self.await.check_for_panics() { | ||
Ok(task_output) => task_output, | ||
Err(_expected_cancel_error) => future::pending().await, | ||
} | ||
} | ||
.boxed() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
//! Diagnostic types and functions for Zebra OS thread tasks: | ||
//! - task handles | ||
//! - errors and panics | ||
|
||
use std::{ | ||
panic, | ||
sync::Arc, | ||
thread::{self, JoinHandle}, | ||
}; | ||
|
||
use super::{CheckForPanics, WaitForPanics}; | ||
|
||
impl<T> CheckForPanics for thread::Result<T> { | ||
type Output = T; | ||
|
||
/// Panics if the thread panicked. | ||
/// | ||
/// Threads can't be cancelled except by using a panic, so there are no thread errors here. | ||
#[track_caller] | ||
fn check_for_panics(self) -> Self::Output { | ||
match self { | ||
// The value returned by the thread when it finished. | ||
Ok(thread_output) => thread_output, | ||
|
||
// A thread error is always a panic. | ||
Err(panic_payload) => panic::resume_unwind(panic_payload), | ||
} | ||
} | ||
} | ||
|
||
impl<T> WaitForPanics for JoinHandle<T> { | ||
type Output = T; | ||
|
||
/// Waits for the thread to finish, then panics if the thread panicked. | ||
#[track_caller] | ||
fn wait_for_panics(self) -> Self::Output { | ||
self.join().check_for_panics() | ||
} | ||
} | ||
|
||
impl<T> WaitForPanics for Arc<JoinHandle<T>> { | ||
type Output = Option<T>; | ||
|
||
/// If this is the final `Arc`, waits for the thread to finish, then panics if the thread | ||
/// panicked. Otherwise, returns the thread's return value. | ||
/// | ||
/// If this is not the final `Arc`, drops the handle and immediately returns `None`. | ||
#[track_caller] | ||
fn wait_for_panics(self) -> Self::Output { | ||
// If we are the last Arc with a reference to this handle, | ||
// we can wait for it and propagate any panics. | ||
// | ||
// We use into_inner() because it guarantees that exactly one of the tasks gets the | ||
// JoinHandle. try_unwrap() lets us keep the JoinHandle, but it can also miss panics. | ||
// | ||
// This is more readable as an expanded statement. | ||
#[allow(clippy::manual_map)] | ||
if let Some(handle) = Arc::into_inner(self) { | ||
Some(handle.wait_for_panics()) | ||
} else { | ||
None | ||
} | ||
} | ||
} | ||
|
||
impl<T> CheckForPanics for &mut Option<Arc<JoinHandle<T>>> { | ||
type Output = Option<T>; | ||
|
||
/// If this is the final `Arc`, checks if the thread has finished, then panics if the thread | ||
/// panicked. Otherwise, returns the thread's return value. | ||
/// | ||
/// If the thread has not finished, or this is not the final `Arc`, returns `None`. | ||
#[track_caller] | ||
fn check_for_panics(self) -> Self::Output { | ||
let handle = self.take()?; | ||
|
||
if handle.is_finished() { | ||
// This is the same as calling `self.wait_for_panics()`, but we can't do that, | ||
// because we've taken `self`. | ||
#[allow(clippy::manual_map)] | ||
return handle.wait_for_panics(); | ||
} | ||
|
||
*self = Some(handle); | ||
|
||
None | ||
} | ||
} | ||
|
||
impl<T> WaitForPanics for &mut Option<Arc<JoinHandle<T>>> { | ||
type Output = Option<T>; | ||
|
||
/// If this is the final `Arc`, waits for the thread to finish, then panics if the thread | ||
/// panicked. Otherwise, returns the thread's return value. | ||
/// | ||
/// If this is not the final `Arc`, drops the handle and returns `None`. | ||
#[track_caller] | ||
fn wait_for_panics(self) -> Self::Output { | ||
// This is more readable as an expanded statement. | ||
#[allow(clippy::manual_map)] | ||
if let Some(output) = self.take()?.wait_for_panics() { | ||
Some(output) | ||
} else { | ||
// Some other task has a reference, so we should give up ours to let them use it. | ||
None | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.