Skip to content

Commit 64ea520

Browse files
authored
mgmt-gateway: update host flash (#762)
* mgmt-gateway: update host flash This tracks the corresponding changes to MGS messaging merged in oxidecomputer/omicron#1684: 1. `UpdateStart` has been broken into `UpdatePrepare` and `UpdatePrepareStatus` (which MGS will continue to send periodically until we respond that preparation is done), allowing for updates that have a potentially-long running prep step (like updating host flash, which can take up to several minutes to erase!). 2. Update messages now include a stream-id that we use to correlate related messages; we reject update messages that don't match our current stream ID. 3. Add handling for the new `UpdateAbort` abort message to cancel an in-progress update. By far the most complex bit of this is 1: I've moved setting the system timer out of `mgs_gimlet` (which previously set it only in relation to flushing serial console uart packets out to MGS) and into `main`: now the MGS handler only returns the deadline it wants `main` to set. If we're in the process of prepping for a host flash update (i.e., we need to erase the host flash), we'll set our deadline to 1 tick from now. When it fires, we'll erase 8 sectors (takes about 1 second, worst case), then return and allow `main` to check for other work. This allows us to continue to be responsive to incoming notifications, importantly network requests (allowing us to respond to the `UpdatePrepareStatus` messages in a timely way!).
1 parent 07bfc33 commit 64ea520

File tree

10 files changed

+588
-60
lines changed

10 files changed

+588
-60
lines changed

Cargo.lock

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

app/gimlet/rev-b.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ uses = [
231231
"usart1",
232232
"system_flash", # TODO also used by `net`, both to read the stm32 uid
233233
]
234-
task-slots = ["jefe", "net", "update_server", "sys"]
234+
task-slots = ["jefe", "net", "update_server", "sys", "hf"]
235235
features = ["gimlet", "usart1", "vlan"]
236236
interrupts = {"usart1.irq" = 0b10}
237237

app/gimletlet/app.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ uses = [
148148
"usart1",
149149
"system_flash", # TODO also used by `net`, both to read the stm32 uid
150150
]
151-
task-slots = ["jefe", "net", "update_server", "sys"]
151+
task-slots = ["jefe", "net", "update_server", "sys", "hf"]
152152
features = ["gimlet", "usart1", "vlan"]
153153
interrupts = {"usart1.irq" = 0b10}
154154

task/mgmt-gateway/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ num-traits = {version = "0.2", default-features = false}
1010
serde = {version = "1", default-features = false, features = ["derive"]}
1111
ssmarshal = {version = "1", default-features = false}
1212

13+
drv-gimlet-hf-api = {path = "../../drv/gimlet-hf-api"}
1314
drv-stm32h7-usart = {path = "../../drv/stm32h7-usart", features = ["h753"]}
1415
drv-stm32xx-uid = {path = "../../drv/stm32xx-uid", features = ["family-stm32h7"]}
1516
drv-update-api = {path = "../../drv/update-api"}
@@ -19,7 +20,7 @@ task-jefe-api = {path = "../jefe-api"}
1920
task-net-api = {path = "../net-api", features = ["use-smoltcp"]}
2021
userlib = {path = "../../sys/userlib", features = ["panic-messages"]}
2122

22-
gateway-messages = {git = "https://github.com/oxidecomputer/omicron", rev = "f2e6237e57a36873fc748b6ecd9e42b8ef208c88"}
23+
gateway-messages = {git = "https://github.com/oxidecomputer/omicron", rev = "e6eccffb236d0c242b5f90d30d01daa47aa9d89f"}
2324

2425
[features]
2526
gimlet = []

task/mgmt-gateway/src/main.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,16 @@
66
#![no_main]
77

88
use gateway_messages::{
9-
sp_impl, sp_impl::Error as MgsDispatchError, IgnitionCommand, SpPort,
9+
sp_impl, sp_impl::Error as MgsDispatchError, IgnitionCommand, SpComponent,
10+
SpPort,
1011
};
1112
use mutable_statics::mutable_statics;
1213
use ringbuf::{ringbuf, ringbuf_entry};
1314
use task_net_api::{
1415
Address, LargePayloadBehavior, Net, RecvError, SendError, SocketName,
1516
UdpMetadata,
1617
};
17-
use userlib::{sys_recv_closed, task_slot, TaskId, UnwrapLite};
18+
use userlib::{sys_recv_closed, sys_set_timer, task_slot, TaskId, UnwrapLite};
1819

1920
mod mgs_common;
2021
mod update_buffer;
@@ -53,6 +54,7 @@ enum Log {
5354
SerialConsoleSend { buffered: usize },
5455
UpdatePartial { bytes_written: usize },
5556
UpdateComplete,
57+
HostFlashSectorsErased { num_sectors: usize },
5658
}
5759

5860
#[derive(Debug, Clone, Copy, PartialEq)]
@@ -73,12 +75,23 @@ enum MgsMessage {
7375
length: u16,
7476
},
7577
SerialConsoleDetach,
76-
UpdateStart {
78+
UpdatePrepare {
79+
component: SpComponent,
80+
stream_id: u64,
7781
length: u32,
82+
slot: u16,
83+
},
84+
UpdatePrepareStatus {
85+
component: SpComponent,
86+
stream_id: u64,
7887
},
7988
UpdateChunk {
89+
component: SpComponent,
8090
offset: u32,
8191
},
92+
UpdateAbort {
93+
component: SpComponent,
94+
},
8295
SysResetPrepare,
8396
}
8497

@@ -99,6 +112,8 @@ fn main() {
99112
let mut net_handler = NetHandler::claim_static_resources();
100113

101114
loop {
115+
sys_set_timer(mgs_handler.timer_deadline(), TIMER_IRQ);
116+
102117
let note = sys_recv_closed(
103118
&mut [],
104119
NET_IRQ | USART_IRQ | TIMER_IRQ,
@@ -112,6 +127,10 @@ fn main() {
112127
mgs_handler.drive_usart();
113128
}
114129

130+
if (note & TIMER_IRQ) != 0 {
131+
mgs_handler.handle_timer_fired();
132+
}
133+
115134
if (note & NET_IRQ) != 0 || mgs_handler.wants_to_send_packet_to_mgs() {
116135
net_handler.run_until_blocked(&mut mgs_handler);
117136
}

task/mgmt-gateway/src/mgs_common.rs

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ use core::convert::Infallible;
77
use drv_update_api::stm32h7::BLOCK_SIZE_BYTES;
88
use drv_update_api::{Update, UpdateTarget};
99
use gateway_messages::{
10-
DiscoverResponse, ResponseError, SpPort, SpState, UpdateChunk, UpdateStart,
10+
DiscoverResponse, ResponseError, SpComponent, SpPort, SpState, UpdateChunk,
11+
UpdatePrepare, UpdatePrepareStatusRequest, UpdatePrepareStatusResponse,
1112
};
1213
use ringbuf::ringbuf_entry_root;
1314

@@ -73,36 +74,68 @@ impl MgsCommon {
7374
})
7475
}
7576

76-
pub(crate) fn update_start(
77+
pub(crate) fn update_prepare(
7778
&mut self,
78-
update: UpdateStart,
79+
update: UpdatePrepare,
7980
) -> Result<(), ResponseError> {
80-
ringbuf_entry_root!(Log::MgsMessage(MgsMessage::UpdateStart {
81-
length: update.total_size
82-
}));
81+
// We should only be called to update the SP itself.
82+
if update.component != SpComponent::SP_ITSELF {
83+
panic!();
84+
}
85+
86+
// SP only has one "slot" (the alternate bank).
87+
if update.slot != 0 {
88+
return Err(ResponseError::InvalidSlotForComponent);
89+
}
8390

8491
self.update_buf.ensure_no_update_in_progress()?;
8592

8693
self.update_task
8794
.prep_image_update(UpdateTarget::Alternate)
8895
.map_err(|err| ResponseError::UpdateFailed(err as u32))?;
8996

90-
self.update_buf.start(update.total_size as usize);
97+
self.update_buf
98+
.start(update.stream_id, update.total_size as usize);
9199

92100
Ok(())
93101
}
94102

103+
pub(crate) fn update_prepare_status(
104+
&mut self,
105+
request: UpdatePrepareStatusRequest,
106+
) -> Result<UpdatePrepareStatusResponse, ResponseError> {
107+
self.update_buf
108+
.ensure_matching_stream_id(request.stream_id)?;
109+
110+
// We immediately prepare for update in `update_prepare()`
111+
// and have no followup work to do; if this stream ID
112+
// matches, we're already prepared.
113+
Ok(UpdatePrepareStatusResponse { done: true })
114+
}
115+
95116
pub(crate) fn update_chunk(
96117
&mut self,
97118
chunk: UpdateChunk,
98119
data: &[u8],
99120
) -> Result<(), ResponseError> {
100-
ringbuf_entry_root!(Log::MgsMessage(MgsMessage::UpdateChunk {
101-
offset: chunk.offset,
102-
}));
103-
104121
self.update_buf
105-
.ingest_chunk(&self.update_task, chunk.offset, data)
122+
.ingest_chunk(
123+
chunk.stream_id,
124+
&self.update_task,
125+
chunk.offset,
126+
data,
127+
)
128+
.map(|_progress| ())
129+
}
130+
131+
pub(crate) fn update_abort(&mut self) -> Result<(), ResponseError> {
132+
self.update_task
133+
.abort_update()
134+
.map_err(|err| ResponseError::UpdateFailed(err as u32))?;
135+
136+
self.update_buf.reset();
137+
138+
Ok(())
106139
}
107140

108141
pub(crate) fn reset_prepare(&mut self) -> Result<(), ResponseError> {
@@ -130,7 +163,6 @@ impl MgsCommon {
130163
}
131164
}
132165

133-
/// Grabs reference to a static `UpdateBuffer`. Can only be called once!
134166
fn claim_update_buffer_static(
135167
) -> &'static mut heapless::Vec<u8, BLOCK_SIZE_BYTES> {
136168
use core::sync::atomic::{AtomicBool, Ordering};

0 commit comments

Comments
 (0)