diff --git a/Cargo.lock b/Cargo.lock index 3bf1cf0cf..5e164766b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -143,6 +143,12 @@ dependencies = [ "nodrop", ] +[[package]] +name = "array-init" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d62b7694a562cdf5a74227903507c56ab2cc8bdd1f781ed5cb4cf9c9f810bfc" + [[package]] name = "arrayvec" version = "0.7.4" @@ -1753,6 +1759,21 @@ dependencies = [ "userlib", ] +[[package]] +name = "drv-psc-psu-update" +version = "0.1.0" +dependencies = [ + "array-init 2.1.0", + "build-i2c", + "build-util", + "counters", + "drv-i2c-api", + "drv-i2c-devices", + "ringbuf", + "static-cell", + "userlib", +] + [[package]] name = "drv-psc-seq-api" version = "0.1.0" @@ -3875,8 +3896,8 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" [[package]] name = "pmbus" -version = "0.1.1" -source = "git+https://github.com/oxidecomputer/pmbus#735a70bbc90707a963ec1731c8a0fa427a013f21" +version = "0.1.4" +source = "git+https://github.com/oxidecomputer/pmbus#44568ce7eb86fe0b03dd088a75ad76ea0d8529bb" dependencies = [ "anyhow", "convert_case", @@ -4342,7 +4363,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca37e3e4d1b39afd7ff11ee4e947efae85adfddf4841787bfa47c470e96dc26d" dependencies = [ - "array-init", + "array-init 0.0.4", "serde", "smallvec 0.6.14", ] diff --git a/Cargo.toml b/Cargo.toml index 1c376057b..80a83aaf7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ path = "lib/counters" [workspace.dependencies] anyhow = { version = "1.0.31", default-features = false, features = ["std"] } +array-init = { version = "2.1.0" } arrayvec = { version = "0.7.4", default-features = false } atty = { version = "0.2", default-features = false } bitfield = { version = "0.13", default-features = false } diff --git a/app/psc/base.toml b/app/psc/base.toml index 55396ba01..9a303fa2e 100644 --- a/app/psc/base.toml +++ b/app/psc/base.toml @@ -6,7 +6,7 @@ fwid = true [kernel] name = "psc" -requires = {flash = 32868, ram = 5216} +requires = {flash = 32868, ram = 6000} features = ["dump"] [caboose] @@ -298,6 +298,13 @@ max-sizes = {flash = 16384, ram = 2048 } start = true task-slots = ["i2c_driver", "sensor"] +[tasks.psu_update] +name = "drv-psc-psu-update" +priority = 4 +max-sizes = {flash = 65536, ram = 8192 } +start = true +task-slots = ["i2c_driver"] + [tasks.dump_agent] name = "task-dump-agent" priority = 5 diff --git a/drv/i2c-devices/src/mwocp68.rs b/drv/i2c-devices/src/mwocp68.rs index 5dccabb02..516612014 100644 --- a/drv/i2c-devices/src/mwocp68.rs +++ b/drv/i2c-devices/src/mwocp68.rs @@ -27,13 +27,68 @@ pub struct Mwocp68 { mode: Cell>, } +#[derive(Copy, Clone, PartialEq)] +pub struct FirmwareRev(pub [u8; 4]); + +#[derive(Copy, Clone, PartialEq, Default)] +pub struct SerialNumber(pub [u8; 12]); + +// +// The boot loader command -- sent via BOOT_LOADER_CMD -- is unfortunately odd +// in that its command code is overloaded with BOOT_LOADER_STATUS. (That is, +// a read to the command code is BOOT_LOADER_STATUS, a write is +// BOOT_LOADER_CMD.) This is behavior that the PMBus crate didn't necessarily +// envision, so it can't necessarily help us out; we define the single-byte +// payload codes here rather than declaratively in the PMBus crate. +// +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[repr(u8)] +pub enum BootLoaderCommand { + ClearStatus = 0x00, + RestartProgramming = 0x01, + BootPrimary = 0x12, + BootSecondary = 0x02, + BootPSUFirmware = 0x03, +} + #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum Error { - BadRead { cmd: u8, code: ResponseCode }, - BadWrite { cmd: u8, code: ResponseCode }, - BadData { cmd: u8 }, - BadValidation { cmd: u8, code: ResponseCode }, - InvalidData { err: pmbus::Error }, + BadRead { + cmd: u8, + code: ResponseCode, + }, + BadWrite { + cmd: u8, + code: ResponseCode, + }, + BadData { + cmd: u8, + }, + BadValidation { + cmd: u8, + code: ResponseCode, + }, + InvalidData { + err: pmbus::Error, + }, + BadFirmwareRevRead { + code: ResponseCode, + }, + BadFirmwareRev { + index: u8, + }, + BadFirmwareRevLength, + UpdateInBootLoader, + UpdateNotInBootLoader, + UpdateAlreadySuccessful, + BadBootLoaderStatus { + data: u8, + }, + BadBootLoaderCommand { + cmd: BootLoaderCommand, + code: ResponseCode, + }, + ChecksumNotSuccessful, } impl From for Error { @@ -62,6 +117,66 @@ impl From for Error { } } +/// +/// Defines the state of the firmware update. Once `UpdateSuccessful` +/// has been returned, the update is complete. +/// +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum UpdateState { + /// The boot loader key has been written + WroteBootLoaderKey, + + /// The product key has been written + WroteProductKey, + + /// The boot loader has been booted + BootedBootLoader, + + /// Programming of firmware has been indicated to have started + StartedProgramming, + + /// A block has been written; the next offset is at [`offset`], and the + /// running checksum is in [`checksum`] + WroteBlock { offset: usize, checksum: u64 }, + + /// The last block has been written; the checksum is in [`checksum`] + WroteLastBlock { checksum: u64 }, + + /// The checksum has been sent for verification + SentChecksum, + + /// The checksum has been verified + VerifiedChecksum, + + /// The PSU has been rebooted + RebootedPSU, + + /// The entire update is complete and successful + UpdateSuccessful, +} + +impl UpdateState { + /// + /// Return the milliseconds of delay associated with the current state. + /// Note that some of these values differ slightly from Murata's "PSU + /// Firmware Update Process" document in that they reflect revised + /// guidance from Murata. + /// + fn delay_ms(&self) -> u64 { + match self { + Self::WroteBootLoaderKey => 3_000, + Self::WroteProductKey => 3_000, + Self::BootedBootLoader => 1_000, + Self::StartedProgramming => 2_000, + Self::WroteBlock { .. } | Self::WroteLastBlock { .. } => 100, + Self::SentChecksum => 2_000, + Self::VerifiedChecksum => 4_000, + Self::RebootedPSU => 5_000, + Self::UpdateSuccessful => 0, + } + } +} + impl Mwocp68 { pub fn new(device: &I2cDevice, index: u8) -> Self { Mwocp68 { @@ -350,6 +465,297 @@ impl Mwocp68 { Ok(val) } + /// Will return true if the device is present and valid -- false otherwise + pub fn present(&self) -> bool { + Mwocp68::validate(&self.device).unwrap_or_default() + } + + pub fn power_good(&self) -> Result { + use commands::mwocp68::STATUS_WORD::*; + + let status = pmbus_read!(self.device, STATUS_WORD)?; + Ok(status.get_power_good_status() == Some(PowerGoodStatus::PowerGood)) + } + + /// + /// Returns the firmware revision of the primary MCU (AC input side). + /// + pub fn firmware_revision(&self) -> Result { + const REVISION_LEN: usize = 14; + + let mut data = [0u8; REVISION_LEN]; + let expected = b"XXXX-YYYY-0000"; + + let len = self + .device + .read_block(CommandCode::MFR_REVISION as u8, &mut data) + .map_err(|code| Error::BadFirmwareRevRead { code })?; + + // + // Per ACAN-114, we are expecting this to be of the format: + // + // XXXX-YYYY-0000 + // + // Where XXXX is the firmware revision on the primary MCU (AC input + // side) and YYYY is the firmware revision on the secondary MCU (DC + // output side). We aren't going to be rigid about the format of + // either revision, but we will be rigid about the rest of the format. + // + if len != REVISION_LEN { + return Err(Error::BadFirmwareRevLength); + } + + for index in 0..len { + if expected[index] == b'X' || expected[index] == b'Y' { + continue; + } + + if data[index] != expected[index] { + return Err(Error::BadFirmwareRev { index: index as u8 }); + } + } + + // + // Return the primary MCU version + // + Ok(FirmwareRev([data[0], data[1], data[2], data[3]])) + } + + /// + /// Returns the serial number of the PSU. + /// + pub fn serial_number(&self) -> Result { + let mut serial = SerialNumber::default(); + + let _ = self + .device + .read_block(CommandCode::MFR_SERIAL as u8, &mut serial.0) + .map_err(|code| Error::BadFirmwareRevRead { code })?; + + Ok(serial) + } + + fn get_boot_loader_status( + &self, + ) -> Result { + use pmbus::commands::mwocp68::CommandCode; + let cmd = CommandCode::BOOT_LOADER_STATUS as u8; + let mut data = [0u8]; + + match self.device.read_block(cmd, &mut data) { + Ok(1) => Ok(()), + Ok(len) => Err(Error::BadBootLoaderStatus { data: len as u8 }), + Err(code) => Err(Error::BadRead { cmd, code }), + }?; + + match BOOT_LOADER_STATUS::CommandData::from_slice(&data[0..]) { + Some(status) => Ok(status), + None => Err(Error::BadBootLoaderStatus { data: data[0] }), + } + } + + fn get_boot_loader_mode(&self) -> Result { + // + // This unwrap is safe because the boot loader mode is a single bit. + // + Ok(self.get_boot_loader_status()?.get_mode().unwrap()) + } + + fn boot_loader_command(&self, cmd: BootLoaderCommand) -> Result<(), Error> { + use pmbus::commands::mwocp68::CommandCode; + + // + // The great unfortunateness: BOOT_LOADER_STATUS is overloaded to + // be BOOT_LOADER_CMD on a write. + // + let data = [CommandCode::BOOT_LOADER_STATUS as u8, 1, cmd as u8]; + + self.device + .write(&data) + .map_err(|code| Error::BadBootLoaderCommand { cmd, code })?; + + Ok(()) + } + + /// + /// Perform a firmware update, implementating the procedure contained + /// within Murata's "PSU Firmware Update Process" document. Note that + /// this function must be called initially with a state of `None`; it will + /// return either an error, or the next state in the update process, + /// along with a specified delay in milliseconds. It is up to the caller + /// to assure that the returned delay has been observed before calling + /// back into continue the update. + /// + pub fn update( + &self, + state: Option, + payload: &[u8], + ) -> Result<(UpdateState, u64), Error> { + use pmbus::commands::mwocp68::CommandCode; + use BOOT_LOADER_STATUS::Mode; + + let write_boot_loader_key = || -> Result { + const MWOCP68_BOOT_LOADER_KEY: &[u8] = b"InVe"; + let mut data = [0u8; MWOCP68_BOOT_LOADER_KEY.len() + 2]; + + data[0] = CommandCode::BOOT_LOADER_KEY as u8; + data[1] = MWOCP68_BOOT_LOADER_KEY.len() as u8; + data[2..].copy_from_slice(MWOCP68_BOOT_LOADER_KEY); + + self.device + .write(&data) + .map_err(|code| Error::BadWrite { cmd: data[0], code })?; + + Ok(UpdateState::WroteBootLoaderKey) + }; + + let write_product_key = || -> Result { + const MWOCP68_PRODUCT_KEY: &[u8] = b"M5813-0000000000"; + let mut data = [0u8; MWOCP68_PRODUCT_KEY.len() + 1]; + + data[0] = CommandCode::BOOT_LOADER_PRODUCT_KEY as u8; + data[1..].copy_from_slice(MWOCP68_PRODUCT_KEY); + + self.device + .write(&data) + .map_err(|code| Error::BadWrite { cmd: data[0], code })?; + + Ok(UpdateState::WroteProductKey) + }; + + let boot_boot_loader = || -> Result { + self.boot_loader_command(BootLoaderCommand::BootPrimary)?; + Ok(UpdateState::BootedBootLoader) + }; + + let start_programming = || -> Result { + self.boot_loader_command(BootLoaderCommand::RestartProgramming)?; + Ok(UpdateState::StartedProgramming) + }; + + let write_block = || -> Result { + const BLOCK_LEN: usize = 32; + + let (mut offset, mut checksum) = match state { + Some(UpdateState::WroteBlock { offset, checksum }) => { + (offset, checksum) + } + Some(UpdateState::StartedProgramming) => (0, 0), + _ => panic!(), + }; + + let mut data = [0u8; BLOCK_LEN + 1]; + data[0] = CommandCode::BOOT_LOADER_MEMORY_BLOCK as u8; + data[1..].copy_from_slice(&payload[offset..offset + BLOCK_LEN]); + + self.device + .write(&data) + .map_err(|code| Error::BadWrite { cmd: data[0], code })?; + + checksum = data[1..] + .iter() + .fold(checksum, |c, &d| c.wrapping_add(d.into())); + offset += BLOCK_LEN; + + if offset >= payload.len() { + Ok(UpdateState::WroteLastBlock { checksum }) + } else { + Ok(UpdateState::WroteBlock { offset, checksum }) + } + }; + + let send_checksum = || -> Result { + let Some(UpdateState::WroteLastBlock { checksum }) = state else { + panic!(); + }; + + let data = [ + CommandCode::IMAGE_CHECKSUM as u8, + 2, + (checksum & 0xff) as u8, + ((checksum >> 8) & 0xff) as u8, + ]; + + self.device + .write(&data) + .map_err(|code| Error::BadWrite { cmd: data[0], code })?; + + Ok(UpdateState::SentChecksum) + }; + + let verify_checksum = || -> Result { + use BOOT_LOADER_STATUS::ChecksumSuccessful; + + let status = self.get_boot_loader_status()?; + + match status.get_checksum_successful() { + Some(ChecksumSuccessful::Successful) => { + Ok(UpdateState::VerifiedChecksum) + } + Some(ChecksumSuccessful::NotSuccessful) | None => { + Err(Error::ChecksumNotSuccessful) + } + } + }; + + let reboot_psu = || -> Result { + self.boot_loader_command(BootLoaderCommand::BootPSUFirmware)?; + Ok(UpdateState::RebootedPSU) + }; + + let verify_success = || -> Result { + Ok(UpdateState::UpdateSuccessful) + }; + + // + // We want to confirm that our boot loader is in the state that + // we think it should be in. On the one hand, this will fail in + // a non-totally-unreasonable fashion if we don't check this -- but + // we have an opportunity to assert our in-device state and fail + // cleanly if it doesn't match, and it feels like we should take it. + // + let expected = match state { + None + | Some(UpdateState::WroteBootLoaderKey) + | Some(UpdateState::WroteProductKey) + | Some(UpdateState::RebootedPSU) => Mode::NotBootLoader, + + Some(UpdateState::BootedBootLoader) + | Some(UpdateState::StartedProgramming) + | Some(UpdateState::WroteBlock { .. }) + | Some(UpdateState::WroteLastBlock { .. }) + | Some(UpdateState::SentChecksum) + | Some(UpdateState::VerifiedChecksum) => Mode::BootLoader, + + Some(UpdateState::UpdateSuccessful) => { + return Err(Error::UpdateAlreadySuccessful); + } + }; + + if self.get_boot_loader_mode()? != expected { + return Err(match expected { + Mode::BootLoader => Error::UpdateNotInBootLoader, + Mode::NotBootLoader => Error::UpdateInBootLoader, + }); + } + + let next = match state { + None => write_boot_loader_key()?, + Some(UpdateState::WroteBootLoaderKey) => write_product_key()?, + Some(UpdateState::WroteProductKey) => boot_boot_loader()?, + Some(UpdateState::BootedBootLoader) => start_programming()?, + Some(UpdateState::StartedProgramming) + | Some(UpdateState::WroteBlock { .. }) => write_block()?, + Some(UpdateState::WroteLastBlock { .. }) => send_checksum()?, + Some(UpdateState::SentChecksum) => verify_checksum()?, + Some(UpdateState::VerifiedChecksum) => reboot_psu()?, + Some(UpdateState::RebootedPSU) => verify_success()?, + Some(UpdateState::UpdateSuccessful) => panic!(), + }; + + Ok((next, next.delay_ms())) + } + pub fn i2c_device(&self) -> &I2cDevice { &self.device } diff --git a/drv/psc-psu-update/Cargo.toml b/drv/psc-psu-update/Cargo.toml new file mode 100644 index 000000000..bfc2b02a9 --- /dev/null +++ b/drv/psc-psu-update/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "drv-psc-psu-update" +description = "Task for PSU firmware update" +version = "0.1.0" +edition = "2021" + +[dependencies] +drv-i2c-api = { path = "../i2c-api" } +drv-i2c-devices = { path = "../i2c-devices" } +counters = { path = "../../lib/counters" } +ringbuf = { path = "../../lib/ringbuf", features = ["counters"] } +userlib = { path = "../../sys/userlib", features = ["panic-messages"] } +static-cell = { path = "../../lib/static-cell" } +array-init.workspace = true + +[build-dependencies] +build-util = {path = "../../build/util"} +build-i2c = { path = "../../build/i2c" } + +[[bin]] +name = "drv-psc-psu-update" +test = false +doctest = false +bench = false + +[lints] +workspace = true diff --git a/drv/psc-psu-update/build.rs b/drv/psc-psu-update/build.rs new file mode 100644 index 000000000..660f92b3f --- /dev/null +++ b/drv/psc-psu-update/build.rs @@ -0,0 +1,9 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +fn main() -> Result<(), Box> { + build_util::expose_target_board(); + build_i2c::codegen(build_i2c::Disposition::Devices)?; + Ok(()) +} diff --git a/drv/psc-psu-update/src/main.rs b/drv/psc-psu-update/src/main.rs new file mode 100644 index 000000000..5584d6675 --- /dev/null +++ b/drv/psc-psu-update/src/main.rs @@ -0,0 +1,390 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Server for updating all PSUs to the contained binary payload. +//! +//! We have the capacity to dynamically update the MWOCP68 power supply units +//! connected to the PSC. This update does not involve any interruption of the +//! PSU while it is being performed, but necessitates a reset of the PSU once +//! completed. We want these updates to be automatic and autonomous; there is +//! little that the control plane can know that we do not know -- and even less +//! for the operator. +//! +//! This task contains within it a payload that is the desired firmware image +//! (`MWOCP68_FIRMWARE_PAYLOAD`), along with the `MFR_REVISION` that that +//! pyaload represents (`MWOCP68_FIRMWARE_VERSION`). This task will check +//! every PSU periodically to see if the PSU's firmware revision matches the +//! revision specified as corresponding to the payload; if they don't match (or +//! rather, until they do), an attempt will be made to update the PSU. Each +//! PSU will be updated sequentially: while we can expect a properly configured +//! and operating rack to support the loss of any one PSU, we do not want to +//! induce the loss of more than one simultaneously due to update. If an +//! update fails, the update of that PSU will be exponentially backed off and +//! repeated (up to a backoff of about once per day). Note that we will +//! continue to check PSUs that we have already updated should they be replaced +//! with a PSU with downrev firmware. The state of this task can be +//! ascertained by looking at the `PSU` variable (which contains all of the +//! per-PSU state) as well as the ring buffer. +//! + +#![no_std] +#![no_main] + +use drv_i2c_api::*; +use drv_i2c_devices::mwocp68::{ + Error as Mwocp68Error, FirmwareRev, Mwocp68, SerialNumber, UpdateState, +}; +use ringbuf::*; +use static_cell::ClaimOnceCell; +use userlib::*; + +use core::ops::Add; + +task_slot!(I2C, i2c_driver); + +const TIMER_INTERVAL_MS: u64 = 10_000; + +use i2c_config::devices; + +#[cfg(any(target_board = "psc-b", target_board = "psc-c"))] +static DEVICES: [fn(TaskId) -> I2cDevice; 6] = [ + devices::mwocp68_psu0mcu, + devices::mwocp68_psu1mcu, + devices::mwocp68_psu2mcu, + devices::mwocp68_psu3mcu, + devices::mwocp68_psu4mcu, + devices::mwocp68_psu5mcu, +]; + +static PSU: ClaimOnceCell<[Psu; 6]> = ClaimOnceCell::new( + [Psu { + last_checked: None, + present: None, + power_good: None, + serial_number: None, + firmware_matches: None, + firmware_revision: None, + update_started: None, + update_succeeded: None, + update_failure: None, + update_backoff: None, + }; 6], +); + +#[derive(Copy, Clone, Debug, PartialEq, counters::Count)] +enum Trace { + #[count(skip)] + None, + PowerGoodFailed(u8, drv_i2c_devices::mwocp68::Error), + FirmwareRevFailed(u8, drv_i2c_devices::mwocp68::Error), + AttemptingUpdate(u8), + BackingOff(u8), + UpdateFailed, + UpdateFailedState(Option), + UpdateFailure(Mwocp68Error), + UpdateState(UpdateState), + WroteBlock, + UpdateSucceeded(u8), + UpdateDelay(u64), + PSUReplaced(u8), + SerialNumberError(u8, drv_i2c_devices::mwocp68::Error), + PGError(u8, drv_i2c_devices::mwocp68::Error), + PowerNotGood(u8), +} + +// +// The actual firmware revision and payload. It is very important that the +// revision match the revision contained within the payload, lest we will +// believe that the update has failed when it has in fact succeeded! +// +const MWOCP68_FIRMWARE_REV: FirmwareRev = FirmwareRev(*b"0762"); +const MWOCP68_FIRMWARE_PAYLOAD: &[u8] = include_bytes!("mwocp68-0762.bin"); + +counted_ringbuf!(Trace, 64, Trace::None); + +#[derive(Copy, Clone, PartialOrd, PartialEq)] +struct Ticks(u64); + +impl Ticks { + fn now() -> Self { + Self(sys_get_timer().now) + } +} + +impl Add for Ticks { + type Output = Self; + + fn add(self, other: Self) -> Self { + Self(self.0 + other.0) + } +} + +#[derive(Copy, Clone, Default)] +struct Psu { + /// When did we last check this device? + last_checked: Option, + + /// Is the device physically present? + present: Option, + + /// Is the device on and with POWER_GOOD set? + power_good: Option, + + /// The last serial number read + serial_number: Option, + + /// The last firmware revision read + firmware_revision: Option, + + /// Does the firmware we have match the firmware here? + firmware_matches: Option, + + /// What time did we start an update? + update_started: Option, + + /// What time did the update complete? + update_succeeded: Option, + + /// What time did the update last fail, if any? + update_failure: Option<(Ticks, Option, Option)>, + + /// How long should the next update backoff, if at all? (In ticks.) + update_backoff: Option, +} + +impl Psu { + fn update_should_be_attempted(&mut self, dev: &Mwocp68, ndx: u8) -> bool { + let now = Ticks::now(); + + self.last_checked = Some(now); + self.power_good = None; + self.firmware_matches = None; + self.firmware_revision = None; + + if !dev.present() { + self.present = Some(false); + + // + // If we are seeing our device as not present, we will clear our + // backoff value: if/when a PSU is plugged back in, we want to + // attempt to update it immediately if the firmware revision + // doesn't match our payload. + // + self.update_backoff = None; + return false; + } + + self.present = Some(true); + + // + // If we can read the serial number, we're going to store it -- and + // if we previously stored one and it DOESN'T match, we want to + // clear our backoff value so we don't delay at all in potentially + // trying to update the firmware of the (replaced) PSU. (If we can't + // read the serial number at all, we want to continue to potentially + // update the firmware.) + // + match (dev.serial_number(), self.serial_number) { + (Ok(read), Some(stored)) if read != stored => { + ringbuf_entry!(Trace::PSUReplaced(ndx)); + self.update_backoff = None; + self.serial_number = Some(read); + } + (Ok(_), Some(_)) => {} + (Ok(read), None) => { + self.serial_number = Some(read); + } + (Err(code), _) => { + ringbuf_entry!(Trace::SerialNumberError(ndx, code)); + } + } + + match dev.power_good() { + Ok(power_good) => { + self.power_good = Some(power_good); + + if !power_good { + return false; + } + } + Err(err) => { + ringbuf_entry!(Trace::PowerGoodFailed(ndx, err)); + return false; + } + } + + match dev.firmware_revision() { + Ok(revision) => { + self.firmware_revision = Some(revision); + + if revision == MWOCP68_FIRMWARE_REV { + self.firmware_matches = Some(true); + return false; + } + + self.firmware_matches = Some(false); + } + Err(err) => { + ringbuf_entry!(Trace::FirmwareRevFailed(ndx, err)); + return false; + } + } + + if let (Some(started), Some(backoff)) = + (self.update_started, self.update_backoff) + { + if started + backoff > now { + // + // Indicate we are backing off, but in a way that won't flood + // the ring buffer with the backing off of a single PSU. + // + ringbuf_entry!(Trace::BackingOff(ndx)); + return false; + } + } + + true + } + + fn update_firmware(&mut self, dev: &Mwocp68, ndx: u8) { + ringbuf_entry!(Trace::AttemptingUpdate(ndx)); + self.update_started = Some(Ticks::now()); + + // + // Before we start, update our backoff. We'll double our backoff, up + // to a cap of around a day. + // + self.update_backoff = match self.update_backoff { + Some(backoff) if backoff.0 < 86_400_000 => { + Some(Ticks(backoff.0 * 2)) + } + Some(backoff) => Some(backoff), + None => Some(Ticks(75_000)), + }; + + let mut state = None; + + let mut update_failed = |state, err| { + // + // We failed. Record everything we can! + // + if let Some(err) = err { + ringbuf_entry!(Trace::UpdateFailure(err)); + } + + ringbuf_entry!(Trace::UpdateFailed); + ringbuf_entry!(Trace::UpdateFailedState(state)); + self.update_failure = Some((Ticks::now(), state, err)); + }; + + loop { + match dev.update(state, MWOCP68_FIRMWARE_PAYLOAD) { + Err(err) => { + update_failed(state, Some(err)); + break; + } + + Ok((UpdateState::UpdateSuccessful, _)) => { + let state = Some(UpdateState::UpdateSuccessful); + + // + // We should be back up! As a final measure, we are going + // to check that the firmware revision matches the + // revision we think we just wrote. If it doesn't, there + // is something amiss: it may be that the image is + // corrupt or that the version doesn't otherwise match. + // Regardless, we consider that to be an update failure. + // + match dev.firmware_revision() { + Ok(revision) if revision != MWOCP68_FIRMWARE_REV => { + update_failed(state, None); + break; + } + + Err(err) => { + update_failed(state, Some(err)); + break; + } + + Ok(_) => {} + } + + // + // We're on the new firmware! And now, a final final + // check: make sure that we are power-good. It is very + // unclear what to do here if are NOT power-good: we know + // that we WERE power-good before we started, so it + // certainly seems possible that we have put a firmware + // update on this PSU which has somehow incapacitated it. + // We would rather not put the system in a compromised + // state by continuing to potentially brick PSUs -- but we + // also want to assure that we make progress should this + // ever resolve (e.g., by pulling the bricked PSU). We will + // remain here until we see the updated PSU go power-good; + // if it never does, we will at least not attempt to put + // the (potentially) bad update anywhere else! + // + loop { + match dev.power_good() { + Ok(power_good) if power_good => break, + Ok(_) => { + ringbuf_entry!(Trace::PowerNotGood(ndx)); + } + Err(err) => { + ringbuf_entry!(Trace::PGError(ndx, err)); + } + } + + hl::sleep_for(TIMER_INTERVAL_MS); + } + + ringbuf_entry!(Trace::UpdateSucceeded(ndx)); + self.update_succeeded = Some(Ticks::now()); + self.update_backoff = None; + break; + } + + Ok((next, delay)) => { + match next { + UpdateState::WroteBlock { .. } => { + ringbuf_entry!(Trace::WroteBlock); + } + _ => { + ringbuf_entry!(Trace::UpdateState(next)); + ringbuf_entry!(Trace::UpdateDelay(delay)); + } + } + + hl::sleep_for(delay); + state = Some(next); + } + } + } + } +} + +#[export_name = "main"] +fn main() -> ! { + let i2c_task = I2C.get_task_id(); + + let psus = PSU.claim(); + + let devs: [Mwocp68; 6] = array_init::array_init(|ndx: usize| { + Mwocp68::new(&DEVICES[ndx](i2c_task), 0) + }); + + loop { + hl::sleep_for(TIMER_INTERVAL_MS); + + for (ndx, psu) in psus.iter_mut().enumerate() { + let dev = &devs[ndx]; + + if psu.update_should_be_attempted(dev, ndx as u8) { + psu.update_firmware(dev, ndx as u8); + } + } + } +} + +include!(concat!(env!("OUT_DIR"), "/i2c_config.rs")); diff --git a/drv/psc-psu-update/src/mwocp68-0701.bin b/drv/psc-psu-update/src/mwocp68-0701.bin new file mode 100644 index 000000000..c22f31d65 Binary files /dev/null and b/drv/psc-psu-update/src/mwocp68-0701.bin differ diff --git a/drv/psc-psu-update/src/mwocp68-0762.bin b/drv/psc-psu-update/src/mwocp68-0762.bin new file mode 100644 index 000000000..baa93ca9b Binary files /dev/null and b/drv/psc-psu-update/src/mwocp68-0762.bin differ diff --git a/task/sensor-polling/src/main.rs b/task/sensor-polling/src/main.rs index aef3d21fa..9da0ce043 100644 --- a/task/sensor-polling/src/main.rs +++ b/task/sensor-polling/src/main.rs @@ -36,6 +36,7 @@ impl From for task_sensor_api::NoData { | Mwocp68Error::BadValidation { code, .. } => code.into(), Mwocp68Error::BadData { .. } | Mwocp68Error::InvalidData { .. } => Self::DeviceError, + _ => Self::DeviceError, }, } }