Skip to content

Commit abd5ed0

Browse files
authored
Only query available SPs for task dumps (#8621)
The MGS `sp_all_ids` method will return all SPs that could potentially be present, which resulted in noise in the Nexus logs from attempts to connect to SPs in empty cubbies. Switch to the `ignition_list` method, which allows us to filter for SPs that are present, powered on, and have not faulted.
1 parent 1c53bcd commit abd5ed0

File tree

1 file changed

+26
-6
lines changed

1 file changed

+26
-6
lines changed

nexus/src/app/background/tasks/support_bundle_collector.rs

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use futures::future::BoxFuture;
1919
use futures::stream::FuturesUnordered;
2020
use gateway_client::Client as MgsClient;
2121
use gateway_client::types::SpIdentifier;
22+
use gateway_client::types::SpIgnition;
2223
use internal_dns_resolver::Resolver;
2324
use internal_dns_types::names::ServiceName;
2425
use nexus_db_model::SupportBundle;
@@ -1100,14 +1101,10 @@ async fn save_all_sp_dumps(
11001101
})
11011102
.context("failed to resolve address of MGS")?;
11021103

1103-
let all_sps = mgs_client
1104-
.sp_all_ids()
1105-
.await
1106-
.context("failed to get list of SPs from MGS")?
1107-
.into_inner();
1104+
let available_sps = get_available_sps(&mgs_client).await?;
11081105

11091106
let mut tasks = ParallelTaskSet::new();
1110-
for sp in all_sps {
1107+
for sp in available_sps {
11111108
let mgs_client = mgs_client.clone();
11121109
let sp_dumps_dir = sp_dumps_dir.to_owned();
11131110

@@ -1132,6 +1129,29 @@ async fn save_all_sp_dumps(
11321129
Ok(())
11331130
}
11341131

1132+
/// Use MGS ignition info to find active SPs.
1133+
async fn get_available_sps(
1134+
mgs_client: &MgsClient,
1135+
) -> anyhow::Result<Vec<SpIdentifier>> {
1136+
let ignition_info = mgs_client
1137+
.ignition_list()
1138+
.await
1139+
.context("failed to get ignition info from MGS")?
1140+
.into_inner();
1141+
1142+
let mut active_sps = Vec::new();
1143+
for info in ignition_info {
1144+
if let SpIgnition::Yes { power, flt_sp, .. } = info.details {
1145+
// Only return SPs that are powered on and are not in a faulted state.
1146+
if power && !flt_sp {
1147+
active_sps.push(info.id);
1148+
}
1149+
}
1150+
}
1151+
1152+
Ok(active_sps)
1153+
}
1154+
11351155
/// Fetch and save task dumps from a single SP.
11361156
async fn save_sp_dumps(
11371157
mgs_client: MgsClient,

0 commit comments

Comments
 (0)