Skip to content

Commit ee095a6

Browse files
authored
(7/5) [nexus-db-queries] Benchmark for VMM reservation (#7498)
Following-up on the affinity work, I wanted to validate that the additional logic for affinity groups does not make the performance of the instance reservation query any worse than it was *before*.
1 parent 07a3a50 commit ee095a6

File tree

19 files changed

+1672
-716
lines changed

19 files changed

+1672
-716
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

common/src/api/external/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1291,7 +1291,9 @@ pub enum InstanceAutoRestartPolicy {
12911291
/// Affinity policy used to describe "what to do when a request cannot be satisfied"
12921292
///
12931293
/// Used for both Affinity and Anti-Affinity Groups
1294-
#[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, JsonSchema)]
1294+
#[derive(
1295+
Clone, Copy, Debug, Deserialize, Hash, Eq, Serialize, PartialEq, JsonSchema,
1296+
)]
12951297
#[serde(rename_all = "snake_case")]
12961298
pub enum AffinityPolicy {
12971299
/// If the affinity request cannot be satisfied, allow it anyway.

nexus/db-queries/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ testing = ["omicron-test-utils"]
7575
[dev-dependencies]
7676
assert_matches.workspace = true
7777
camino-tempfile.workspace = true
78+
criterion.workspace = true
7879
expectorate.workspace = true
7980
hyper-rustls.workspace = true
8081
gateway-client.workspace = true
@@ -85,7 +86,7 @@ nexus-inventory.workspace = true
8586
nexus-reconfigurator-planning.workspace = true
8687
nexus-test-utils.workspace = true
8788
omicron-sled-agent.workspace = true
88-
omicron-test-utils.workspace = true
89+
omicron-test-utils = { workspace = true, features = ["seed-gen"] }
8990
openapiv3.workspace = true
9091
oso.workspace = true
9192
pem.workspace = true
@@ -97,3 +98,7 @@ regex.workspace = true
9798
rustls.workspace = true
9899
subprocess.workspace = true
99100
term.workspace = true
101+
102+
[[bench]]
103+
name = "sled_reservation"
104+
harness = false
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
:showtitle:
2+
:toc: left
3+
:icons: font
4+
5+
= Benchmarks
6+
7+
This directory contains benchmarks for database queries.
8+
9+
These queries can be run with:
10+
11+
[source,bash]
12+
----
13+
cargo bench -p nexus-db-queries
14+
----
15+
16+
Additionally, the "SHOW_CONTENTION" environment variable can be set to display
17+
extra data from CockroachDB tables about contention statistics, if they
18+
are available.
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
//! Database test helpers
6+
//!
7+
//! These are largely ripped out of "nexus/db-queries/src/db/datastore".
8+
//!
9+
//! Benchmarks are compiled as external binaries from library crates, so we
10+
//! can only access `pub` code.
11+
//!
12+
//! It may be worth refactoring some of these functions to a test utility
13+
//! crate to avoid the de-duplication.
14+
15+
use anyhow::Context;
16+
use anyhow::Result;
17+
use nexus_db_model::Sled;
18+
use nexus_db_model::SledReservationConstraintBuilder;
19+
use nexus_db_model::SledUpdate;
20+
use nexus_db_queries::context::OpContext;
21+
use nexus_db_queries::db::DataStore;
22+
use nexus_db_queries::db::pub_test_utils::helpers::SledUpdateBuilder;
23+
use nexus_db_queries::db::pub_test_utils::helpers::small_resource_request;
24+
use omicron_uuid_kinds::InstanceUuid;
25+
use omicron_uuid_kinds::PropolisUuid;
26+
use uuid::Uuid;
27+
28+
pub fn rack_id() -> Uuid {
29+
Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap()
30+
}
31+
32+
const USABLE_HARDWARE_THREADS: u32 = 32;
33+
34+
pub fn test_new_sled_update() -> SledUpdate {
35+
let mut sled = SledUpdateBuilder::new();
36+
sled.rack_id(rack_id())
37+
.hardware()
38+
.usable_hardware_threads(USABLE_HARDWARE_THREADS);
39+
sled.build()
40+
}
41+
42+
pub async fn create_sleds(datastore: &DataStore, count: usize) -> Vec<Sled> {
43+
let mut sleds = vec![];
44+
for _ in 0..count {
45+
let (sled, _) =
46+
datastore.sled_upsert(test_new_sled_update()).await.unwrap();
47+
sleds.push(sled);
48+
}
49+
sleds
50+
}
51+
52+
/// Given a `sled_count`, returns the number of times a call to
53+
/// `create_reservation` should succeed.
54+
///
55+
/// This can be used to validate parameters before running benchmarks.
56+
pub fn max_resource_request_count(sled_count: usize) -> usize {
57+
let threads_per_request: usize =
58+
small_resource_request().hardware_threads.0.try_into().unwrap();
59+
let threads_per_sled: usize = USABLE_HARDWARE_THREADS.try_into().unwrap();
60+
61+
threads_per_sled * sled_count / threads_per_request
62+
}
63+
64+
pub async fn create_reservation(
65+
opctx: &OpContext,
66+
db: &DataStore,
67+
instance_id: InstanceUuid,
68+
) -> Result<PropolisUuid> {
69+
let vmm_id = PropolisUuid::new_v4();
70+
71+
loop {
72+
match db
73+
.sled_reservation_create(
74+
&opctx,
75+
instance_id,
76+
vmm_id,
77+
small_resource_request(),
78+
SledReservationConstraintBuilder::new().build(),
79+
)
80+
.await
81+
{
82+
Ok(_) => break,
83+
Err(err) => {
84+
// This condition is bad - it would result in a user-visible
85+
// error, in most cases - but it's also an indication of failure
86+
// due to contention. We normally bubble this out to users,
87+
// rather than stalling the request, but in this particular
88+
// case, we choose to retry immediately.
89+
if err.to_string().contains("restart transaction") {
90+
continue;
91+
}
92+
return Err(err).context("Failed to create reservation");
93+
}
94+
}
95+
}
96+
Ok(vmm_id)
97+
}
98+
99+
pub async fn delete_reservation(
100+
opctx: &OpContext,
101+
db: &DataStore,
102+
vmm_id: PropolisUuid,
103+
) -> Result<()> {
104+
db.sled_reservation_delete(&opctx, vmm_id)
105+
.await
106+
.context("Failed to delete reservation")?;
107+
Ok(())
108+
}

0 commit comments

Comments
 (0)