From 77bb6c4cc459268d6b95b4c543a6638b873a6af4 Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:04:19 -0400 Subject: [PATCH] feat(pageserver): add direct io pageserver config (#8622) Part of #8130, [RFC: Direct IO For Pageserver](https://github.com/neondatabase/neon/blob/problame/direct-io-rfc/docs/rfcs/034-direct-io-for-pageserver.md) ## Description Add pageserver config for evaluating/enabling direct I/O. - Disabled: current default, uses buffered io as is. - Evaluate: still uses buffered io, but could do alignment checking and perf simulation (pad latency by direct io RW to a fake file). - Enabled: uses direct io, behavior on alignment error is configurable. Signed-off-by: Yuchen Liang --- libs/pageserver_api/src/models.rs | 49 +++++++++++++++++++++++++++++++ pageserver/src/bin/pageserver.rs | 1 + pageserver/src/config.rs | 17 +++++++++++ pageserver/src/virtual_file.rs | 1 + 4 files changed, 68 insertions(+) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index b541bba6a17e..ab4adfbebe02 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -947,6 +947,8 @@ pub struct TopTenantShardsResponse { } pub mod virtual_file { + use std::path::PathBuf; + #[derive( Copy, Clone, @@ -965,6 +967,53 @@ pub mod virtual_file { #[cfg(target_os = "linux")] TokioEpollUring, } + + /// Direct IO modes for a pageserver. + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)] + #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)] + pub enum DirectIoMode { + /// Direct IO disabled (uses usual buffered IO). + #[default] + Disabled, + /// Direct IO disabled (performs checks and perf simulations). + Evaluate { + /// Alignment check level + alignment_check: DirectIoAlignmentCheckLevel, + /// Latency padded for performance simulation. + latency_padding: DirectIoLatencyPadding, + }, + /// Direct IO enabled. + Enabled { + /// Actions to perform on alignment error. + on_alignment_error: DirectIoOnAlignmentErrorAction, + }, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)] + #[serde(rename_all = "kebab-case")] + pub enum DirectIoAlignmentCheckLevel { + #[default] + Error, + Log, + None, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)] + #[serde(rename_all = "kebab-case")] + pub enum DirectIoOnAlignmentErrorAction { + Error, + #[default] + FallbackToBuffered, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)] + #[serde(tag = "type", rename_all = "kebab-case")] + pub enum DirectIoLatencyPadding { + /// Pad virtual file operations with IO to a fake file. + FakeFileRW { path: PathBuf }, + #[default] + None, + } } // Wrapped in libpq CopyData diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 5ebd6511acdf..932918410c79 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -123,6 +123,7 @@ fn main() -> anyhow::Result<()> { // after setting up logging, log the effective IO engine choice and read path implementations info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine"); + info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings"); info!(?conf.get_impl, "starting with get page implementation"); info!(?conf.get_vectored_impl, "starting with vectored get page implementation"); info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access"); diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 41c2fe0af35a..f4c367bd4d57 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -300,6 +300,9 @@ pub struct PageServerConf { /// This flag is temporary and will be removed after gradual rollout. /// See . pub compact_level0_phase1_value_access: CompactL0Phase1ValueAccess, + + /// Direct IO settings + pub virtual_file_direct_io: virtual_file::DirectIoMode, } /// We do not want to store this in a PageServerConf because the latter may be logged @@ -408,6 +411,8 @@ struct PageServerConfigBuilder { l0_flush: BuilderValue, compact_level0_phase1_value_access: BuilderValue, + + virtual_file_direct_io: BuilderValue, } impl PageServerConfigBuilder { @@ -498,6 +503,7 @@ impl PageServerConfigBuilder { ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB), l0_flush: Set(L0FlushConfig::default()), compact_level0_phase1_value_access: Set(CompactL0Phase1ValueAccess::default()), + virtual_file_direct_io: Set(virtual_file::DirectIoMode::default()), } } } @@ -685,6 +691,10 @@ impl PageServerConfigBuilder { self.compact_level0_phase1_value_access = BuilderValue::Set(value); } + pub fn virtual_file_direct_io(&mut self, value: virtual_file::DirectIoMode) { + self.virtual_file_direct_io = BuilderValue::Set(value); + } + pub fn build(self, id: NodeId) -> anyhow::Result { let default = Self::default_values(); @@ -743,6 +753,7 @@ impl PageServerConfigBuilder { ephemeral_bytes_per_memory_kb, l0_flush, compact_level0_phase1_value_access, + virtual_file_direct_io, } CUSTOM LOGIC { @@ -1018,6 +1029,9 @@ impl PageServerConf { "compact_level0_phase1_value_access" => { builder.compact_level0_phase1_value_access(utils::toml_edit_ext::deserialize_item(item).context("compact_level0_phase1_value_access")?) } + "virtual_file_direct_io" => { + builder.virtual_file_direct_io(utils::toml_edit_ext::deserialize_item(item).context("virtual_file_direct_io")?) + } _ => bail!("unrecognized pageserver option '{key}'"), } } @@ -1103,6 +1117,7 @@ impl PageServerConf { ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB, l0_flush: L0FlushConfig::default(), compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(), + virtual_file_direct_io: virtual_file::DirectIoMode::default(), } } } @@ -1345,6 +1360,7 @@ background_task_maximum_delay = '334 s' ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB, l0_flush: L0FlushConfig::default(), compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(), + virtual_file_direct_io: virtual_file::DirectIoMode::default(), }, "Correct defaults should be used when no config values are provided" ); @@ -1420,6 +1436,7 @@ background_task_maximum_delay = '334 s' ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB, l0_flush: L0FlushConfig::default(), compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(), + virtual_file_direct_io: virtual_file::DirectIoMode::default(), }, "Should be able to parse all basic config values correctly" ); diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index 51b0c420c346..8de646469ebb 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -34,6 +34,7 @@ pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult; mod metadata; mod open_options; use self::owned_buffers_io::write::OwnedAsyncWriter; +pub(crate) use api::DirectIoMode; pub(crate) use io_engine::IoEngineKind; pub(crate) use metadata::Metadata; pub(crate) use open_options::*;