Skip to content

Commit 7660a59

Browse files
committed
fix: Ensure to exit with error if any vcpu exits with error
Previously, when a VM exited, we looked for the first vcpu that reported an exit status, and then indiscriminately reported that back. However, it is possible to one vcpu to exit successfully while another exits with an error, and this could lead us to report "Firecracker Exited Successfully" even though it did not. Now, we explicitly look for the status code of all vcpus. If any of them report an error, this takes precedence over non-error status codes. Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
1 parent 885bbc7 commit 7660a59

File tree

1 file changed

+20
-16
lines changed

1 file changed

+20
-16
lines changed

src/vmm/src/lib.rs

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ pub mod vstate;
109109
use std::collections::HashMap;
110110
use std::io;
111111
use std::os::unix::io::AsRawFd;
112-
use std::sync::mpsc::{RecvTimeoutError, TryRecvError};
112+
use std::sync::mpsc::RecvTimeoutError;
113113
use std::sync::{Arc, Barrier, Mutex};
114114
use std::time::Duration;
115115

@@ -915,23 +915,27 @@ impl MutEventSubscriber for Vmm {
915915
// Exit event handling should never do anything more than call 'self.stop()'.
916916
let _ = self.vcpus_exit_evt.read();
917917

918-
let mut exit_code = None;
919-
// Query each vcpu for their exit_code.
920-
for handle in &self.vcpus_handles {
921-
match handle.response_receiver().try_recv() {
922-
Ok(VcpuResponse::Exited(status)) => {
923-
exit_code = Some(status);
924-
// Just use the first encountered exit-code.
925-
break;
926-
}
927-
Ok(_response) => {} // Don't care about these, we are exiting.
928-
Err(TryRecvError::Empty) => {} // Nothing pending in channel
929-
Err(err) => {
930-
panic!("Error while looking for VCPU exit status: {}", err);
918+
let exit_code = 'exit_code: {
919+
// Query each vcpu for their exit_code.
920+
for handle in &self.vcpus_handles {
921+
// Drain all vcpu responses that are pending from this vcpu until we find an
922+
// exit status.
923+
for response in handle.response_receiver().try_iter() {
924+
if let VcpuResponse::Exited(status) = response {
925+
// It could be that some vcpus exited successfully while others
926+
// errored out. Thus make sure that error exits from one vcpu always
927+
// takes precedence over "ok" exits
928+
if status != FcExitCode::Ok {
929+
break 'exit_code status;
930+
}
931+
}
931932
}
932933
}
933-
}
934-
self.stop(exit_code.unwrap_or(FcExitCode::Ok));
934+
935+
// No CPUs exited with error status code, report "Ok"
936+
FcExitCode::Ok
937+
};
938+
self.stop(exit_code);
935939
} else {
936940
error!("Spurious EventManager event for handler: Vmm");
937941
}

0 commit comments

Comments
 (0)