diff --git a/Cargo.lock b/Cargo.lock index d9ac167042ad..2e671aa48af1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3945,6 +3945,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "serde_with", + "serial_test", "smallvec", "storage_broker", "strum", @@ -5612,6 +5613,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scc" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94b13f8ea6177672c49d12ed964cca44836f59621981b04a3e26b87e675181de" +dependencies = [ + "sdd", +] + [[package]] name = "schannel" version = "0.1.23" @@ -5652,6 +5662,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "621e3680f3e07db4c9c2c3fb07c6223ab2fab2e54bd3c04c3ae037990f428c32" +[[package]] +name = "sdd" +version = "3.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "478f121bb72bbf63c52c93011ea1791dca40140dfe13f8336c4c5ac952c33aa9" + [[package]] name = "sec1" version = "0.3.0" @@ -5941,6 +5957,31 @@ dependencies = [ "serde", ] +[[package]] +name = "serial_test" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9" +dependencies = [ + "futures", + "log", + "once_cell", + "parking_lot 0.12.1", + "scc", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "sha1" version = "0.10.5" diff --git a/Cargo.toml b/Cargo.toml index 885f02ba8190..6997db2c02fb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -161,6 +161,7 @@ serde_json = "1" serde_path_to_error = "0.1" serde_with = { version = "2.0", features = [ "base64" ] } serde_assert = "0.5.0" +serial_test = "3.2.0" sha2 = "0.10.2" signal-hook = "0.3" smallvec = "1.11" diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 140b287ccc18..790543ae2639 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -94,6 +94,7 @@ procfs.workspace = true [dev-dependencies] criterion.workspace = true hex-literal.workspace = true +serial_test.workspace = true tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] } indoc.workspace = true diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 8933e8ceb13e..2606de0d17f0 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -1,5 +1,6 @@ use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; +use once_cell::sync::Lazy; use pageserver_api::keyspace::KeySpace; use pageserver_api::models::HistoricLayerInfo; use pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId}; @@ -2096,6 +2097,36 @@ impl Default for LayerImplMetrics { } impl LayerImplMetrics { + /// Resets the layer metrics to 0, for use in tests. Since this is a global static, metrics will + /// be shared across tests, and must be reset in each test case. + #[cfg(test)] + fn reset(&self) { + // Destructure to error on new fields. + let LayerImplMetrics { + started_evictions, + completed_evictions, + cancelled_evictions, + started_deletes, + completed_deletes, + failed_deletes, + rare_counters, + inits_cancelled, + redownload_after, + time_to_evict, + } = self; + + started_evictions.reset(); + completed_evictions.reset(); + cancelled_evictions.values().for_each(|c| c.reset()); + started_deletes.reset(); + completed_deletes.reset(); + failed_deletes.values().for_each(|c| c.reset()); + rare_counters.values().for_each(|c| c.reset()); + inits_cancelled.reset(); + redownload_after.local().clear(); + time_to_evict.local().clear(); + } + fn inc_started_evictions(&self) { self.started_evictions.inc(); } @@ -2247,5 +2278,4 @@ impl RareEvent { } } -pub(crate) static LAYER_IMPL_METRICS: once_cell::sync::Lazy = - once_cell::sync::Lazy::new(LayerImplMetrics::default); +pub(crate) static LAYER_IMPL_METRICS: Lazy = Lazy::new(LayerImplMetrics::default); diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index 36dcc8d8050c..ba51a2618629 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -1,6 +1,7 @@ use std::time::UNIX_EPOCH; use pageserver_api::key::CONTROLFILE_KEY; +use serial_test::serial; use tokio::task::JoinSet; use utils::{ completion::{self, Completion}, @@ -21,7 +22,10 @@ const FOREVER: std::time::Duration = std::time::Duration::from_secs(ADVANCE.as_s /// Demonstrate the API and resident -> evicted -> resident -> deleted transitions. #[tokio::test] +#[serial] async fn smoke_test() { + LAYER_IMPL_METRICS.reset(); + let handle = tokio::runtime::Handle::current(); let h = TenantHarness::create("smoke_test").await.unwrap(); @@ -198,7 +202,10 @@ async fn smoke_test() { /// This test demonstrates a previous hang when a eviction and deletion were requested at the same /// time. Now both of them complete per Arc drop semantics. #[tokio::test(start_paused = true)] +#[serial] async fn evict_and_wait_on_wanted_deleted() { + LAYER_IMPL_METRICS.reset(); + // this is the runtime on which Layer spawns the blocking tasks on let handle = tokio::runtime::Handle::current(); @@ -275,7 +282,10 @@ async fn evict_and_wait_on_wanted_deleted() { /// This test ensures we are able to read the layer while the layer eviction has been /// started but not completed. #[test] +#[serial] fn read_wins_pending_eviction() { + LAYER_IMPL_METRICS.reset(); + let rt = tokio::runtime::Builder::new_current_thread() .max_blocking_threads(1) .enable_all() @@ -395,6 +405,7 @@ fn read_wins_pending_eviction() { /// Use failpoint to delay an eviction starting to get a VersionCheckFailed. #[test] +#[serial] fn multiple_pending_evictions_in_order() { let name = "multiple_pending_evictions_in_order"; let in_order = true; @@ -403,6 +414,7 @@ fn multiple_pending_evictions_in_order() { /// Use failpoint to reorder later eviction before first to get a UnexpectedEvictedState. #[test] +#[serial] fn multiple_pending_evictions_out_of_order() { let name = "multiple_pending_evictions_out_of_order"; let in_order = false; @@ -410,6 +422,8 @@ fn multiple_pending_evictions_out_of_order() { } fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) { + LAYER_IMPL_METRICS.reset(); + let rt = tokio::runtime::Builder::new_current_thread() .max_blocking_threads(1) .enable_all() @@ -587,7 +601,10 @@ fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) { /// disk but the layer internal state says it has not been initialized. Futhermore, it allows us to /// have non-repairing `Layer::is_likely_resident`. #[tokio::test(start_paused = true)] +#[serial] async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() { + LAYER_IMPL_METRICS.reset(); + let handle = tokio::runtime::Handle::current(); let h = TenantHarness::create("cancelled_get_or_maybe_download_does_not_cancel_eviction") .await @@ -665,8 +682,8 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() { } #[tokio::test(start_paused = true)] +#[serial] async fn evict_and_wait_does_not_wait_for_download() { - // let handle = tokio::runtime::Handle::current(); let h = TenantHarness::create("evict_and_wait_does_not_wait_for_download") .await .unwrap(); @@ -759,10 +776,18 @@ async fn evict_and_wait_does_not_wait_for_download() { /// /// Also checks that the same does not happen on a non-evicted layer (regression test). #[tokio::test(start_paused = true)] +#[serial] async fn eviction_cancellation_on_drop() { use bytes::Bytes; use pageserver_api::value::Value; + LAYER_IMPL_METRICS.reset(); + + assert_eq!( + 0, + LAYER_IMPL_METRICS.cancelled_evictions[EvictionCancelled::LayerGone].get() + ); + // this is the runtime on which Layer spawns the blocking tasks on let handle = tokio::runtime::Handle::current();