diff --git a/Cargo.toml b/Cargo.toml index 963841e340979..e038c0b4ffc3b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -113,7 +113,7 @@ md5 = "0.7.0" measured = { version = "0.0.22", features=["lasso"] } measured-process = { version = "0.0.22" } memoffset = "0.8" -nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] } +nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] } notify = "6.0.0" num_cpus = "1.15" num-traits = "0.2.15" diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index da0c11d9bf0ab..594fed5c18d81 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -126,10 +126,53 @@ fn main() -> anyhow::Result<()> { info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings"); info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access"); + // The tenants directory contains all the pageserver local disk state. + // Create if not exists and make sure all the contents are durable before proceeding. + // Ensuring durability eliminates a whole bug class where we come up after an unclean shutdown. + // After unclea shutdown, we don't know if all the filesystem content we can read via syscalls is actually durable or not. + // Examples for that: OOM kill, systemd killing us during shutdown, self abort due to unrecoverable IO error. let tenants_path = conf.tenants_path(); - if !tenants_path.exists() { - utils::crashsafe::create_dir_all(conf.tenants_path()) - .with_context(|| format!("Failed to create tenants root dir at '{tenants_path}'"))?; + { + let open = || { + nix::dir::Dir::open( + tenants_path.as_std_path(), + nix::fcntl::OFlag::O_DIRECTORY | nix::fcntl::OFlag::O_RDONLY, + nix::sys::stat::Mode::empty(), + ) + }; + let dirfd = match open() { + Ok(dirfd) => dirfd, + Err(e) => match e { + nix::errno::Errno::ENOENT => { + utils::crashsafe::create_dir_all(&tenants_path).with_context(|| { + format!("Failed to create tenants root dir at '{tenants_path}'") + })?; + open().context("open tenants dir after creating it")? + } + e => anyhow::bail!(e), + }, + }; + + let started = Instant::now(); + // Linux guarantees durability for syncfs. + // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync(). + #[cfg(target_os = "linux")] + nix::unistd::syncfs(dirfd.as_raw_fd()).context("syncfs")?; + #[cfg(target_os = "macos")] + { + // macOS is not a production platform for Neon, don't even bother. + drop(dirfd); + } + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + compile_error!("Unsupported OS"); + } + + let elapsed = started.elapsed(); + info!( + elapsed_ms = elapsed.as_millis(), + "made tenant directory contents durable" + ); } // Initialize up failpoints support