Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
cf76a91
Region snapshot replacement for read-only regions
jmpesp Jan 27, 2025
54cd824
remember, the background tasks just start the process! still need to …
jmpesp Jan 30, 2025
dce5030
correctly report which replacement was requested
jmpesp Jan 30, 2025
51c7f74
wait more so that conflicts are not generated
jmpesp Jan 31, 2025
3ec5341
emit more information during background tasks
jmpesp Feb 5, 2025
fcd95ac
Merge branch 'main' into replace_read_only_regions
jmpesp Feb 6, 2025
63c13b0
fix after merge
jmpesp Feb 6, 2025
46e7e04
try running replacement tasks more frequently
jmpesp Feb 6, 2025
f1e0f3a
deal with remove-rop race for region snapshot replacement steps
jmpesp Feb 7, 2025
b8b0959
Merge branch 'main' into replace_read_only_regions
jmpesp Feb 7, 2025
071295a
CI is slow, wait double the time
jmpesp Feb 10, 2025
0e8c590
Merge branch 'main' into replace_read_only_regions
jmpesp Feb 10, 2025
a1392c9
run replacement tasks inside wait_for_all_replacements
jmpesp Feb 10, 2025
e5d74e5
Merge remote-tracking branch 'upstream/main' into replace_read_only_r…
jmpesp Feb 11, 2025
81f2117
add missing comment
jmpesp Feb 11, 2025
f8ccab5
only assert if expected end state is different
jmpesp Feb 11, 2025
752a482
Shorter poll intervals
jmpesp Feb 11, 2025
572806f
also wait_for_all_replacements in volume_management tests
jmpesp Feb 12, 2025
6dce585
bump from 10 second wait to 30 and 60
jmpesp Feb 12, 2025
8e376cc
more wait_for_all_replacements after run_replacement_tasks_to_completion
jmpesp Feb 12, 2025
af1143e
combine run_replacement_tasks_to_completion and wait_for_all_replacem…
jmpesp Feb 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 45 additions & 14 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ use nexus_db_model::NetworkInterfaceKind;
use nexus_db_model::PhysicalDisk;
use nexus_db_model::Probe;
use nexus_db_model::Project;
use nexus_db_model::ReadOnlyTargetReplacement;
use nexus_db_model::Region;
use nexus_db_model::RegionReplacement;
use nexus_db_model::RegionReplacementState;
Expand Down Expand Up @@ -3068,11 +3069,19 @@ async fn cmd_db_region_replacement_request(
) -> Result<(), anyhow::Error> {
let region = datastore.get_region(args.region_id).await?;

let request_id = datastore
.create_region_replacement_request_for_region(opctx, &region)
.await?;
if region.read_only() {
let request_id = datastore
.create_read_only_region_replacement_request(opctx, region.id())
.await?;

println!("region snapshot replacement {request_id} created");
} else {
let request_id = datastore
.create_region_replacement_request_for_region(opctx, &region)
.await?;

println!("region replacement {request_id} created");
println!("region replacement {request_id} created");
}

Ok(())
}
Expand Down Expand Up @@ -4448,12 +4457,22 @@ async fn cmd_db_region_snapshot_replacement_status(
" state: {:?}",
request.replacement_state
);
println!(
" region snapshot: {} {} {}",
request.old_dataset_id,
request.old_region_id,
request.old_snapshot_id,
);
match request.replacement_type() {
ReadOnlyTargetReplacement::RegionSnapshot {
dataset_id,
region_id,
snapshot_id,
} => {
println!(
" region snapshot: {} {} {}",
dataset_id, region_id, snapshot_id,
);
}

ReadOnlyTargetReplacement::ReadOnlyRegion { region_id } => {
println!(" read-only region: {}", region_id);
}
}
println!(" new region id: {:?}", request.new_region_id);
println!(" in-progress steps left: {:?}", steps_left);
println!();
Expand Down Expand Up @@ -4485,10 +4504,22 @@ async fn cmd_db_region_snapshot_replacement_info(

println!(" started: {}", request.request_time);
println!(" state: {:?}", request.replacement_state);
println!(
" region snapshot: {} {} {}",
request.old_dataset_id, request.old_region_id, request.old_snapshot_id,
);
match request.replacement_type() {
ReadOnlyTargetReplacement::RegionSnapshot {
dataset_id,
region_id,
snapshot_id,
} => {
println!(
" region snapshot: {} {} {}",
dataset_id, region_id, snapshot_id,
);
}

ReadOnlyTargetReplacement::ReadOnlyRegion { region_id } => {
println!(" read-only region: {}", region_id);
}
}
println!(" new region id: {:?}", request.new_region_id);
println!(" in-progress steps left: {:?}", steps_left);
println!();
Expand Down
30 changes: 30 additions & 0 deletions dev-tools/omdb/src/bin/omdb/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ use nexus_types::internal_api::background::BlueprintRendezvousStatus;
use nexus_types::internal_api::background::InstanceReincarnationStatus;
use nexus_types::internal_api::background::InstanceUpdaterStatus;
use nexus_types::internal_api::background::LookupRegionPortStatus;
use nexus_types::internal_api::background::ReadOnlyRegionReplacementStartStatus;
use nexus_types::internal_api::background::RegionReplacementDriverStatus;
use nexus_types::internal_api::background::RegionReplacementStatus;
use nexus_types::internal_api::background::RegionSnapshotReplacementFinishStatus;
Expand Down Expand Up @@ -928,6 +929,9 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) {
"phantom_disks" => {
print_task_phantom_disks(details);
}
"read_only_region_replacement_start" => {
print_task_read_only_region_replacement_start(details);
}
"region_replacement" => {
print_task_region_replacement(details);
}
Expand Down Expand Up @@ -1724,6 +1728,32 @@ fn print_task_phantom_disks(details: &serde_json::Value) {
};
}

fn print_task_read_only_region_replacement_start(details: &serde_json::Value) {
match serde_json::from_value::<ReadOnlyRegionReplacementStartStatus>(
details.clone(),
) {
Err(error) => eprintln!(
"warning: failed to interpret task details: {:?}: {:?}",
error, details
),

Ok(status) => {
println!(
" total requests created ok: {}",
status.requests_created_ok.len(),
);
for line in &status.requests_created_ok {
println!(" > {line}");
}

println!(" errors: {}", status.errors.len());
for line in &status.errors {
println!(" > {line}");
}
}
}
}

fn print_task_region_replacement(details: &serde_json::Value) {
match serde_json::from_value::<RegionReplacementStatus>(details.clone()) {
Err(error) => eprintln!(
Expand Down
12 changes: 12 additions & 0 deletions dev-tools/omdb/tests/env.out
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "read_only_region_replacement_start"
detect if read-only regions need replacement and begin the process


task: "region_replacement"
detects if a region requires replacing and begins the process

Expand Down Expand Up @@ -313,6 +317,10 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "read_only_region_replacement_start"
detect if read-only regions need replacement and begin the process


task: "region_replacement"
detects if a region requires replacing and begins the process

Expand Down Expand Up @@ -484,6 +492,10 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "read_only_region_replacement_start"
detect if read-only regions need replacement and begin the process


task: "region_replacement"
detects if a region requires replacing and begins the process

Expand Down
20 changes: 20 additions & 0 deletions dev-tools/omdb/tests/successes.out
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,10 @@ task: "physical_disk_adoption"
ensure new physical disks are automatically marked in-service


task: "read_only_region_replacement_start"
detect if read-only regions need replacement and begin the process


task: "region_replacement"
detects if a region requires replacing and begins the process

Expand Down Expand Up @@ -613,6 +617,14 @@ task: "physical_disk_adoption"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: task disabled

task: "read_only_region_replacement_start"
configured period: every <REDACTED_DURATION>m
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total requests created ok: 0
errors: 0

task: "region_replacement"
configured period: every <REDACTED_DURATION>m
currently executing: no
Expand Down Expand Up @@ -1104,6 +1116,14 @@ task: "physical_disk_adoption"
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
last completion reported error: task disabled

task: "read_only_region_replacement_start"
configured period: every <REDACTED_DURATION>m
currently executing: no
last completed activation: <REDACTED ITERATIONS>, triggered by a periodic timer firing
started at <REDACTED_TIMESTAMP> (<REDACTED DURATION>s ago) and ran for <REDACTED DURATION>ms
total requests created ok: 0
errors: 0

task: "region_replacement"
configured period: every <REDACTED_DURATION>m
currently executing: no
Expand Down
17 changes: 17 additions & 0 deletions nexus-config/src/nexus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,9 @@ pub struct BackgroundTaskConfig {
RegionSnapshotReplacementFinishConfig,
/// configuration for TUF artifact replication task
pub tuf_artifact_replication: TufArtifactReplicationConfig,
/// configuration for read-only region replacement start task
pub read_only_region_replacement_start:
ReadOnlyRegionReplacementStartConfig,
}

#[serde_as]
Expand Down Expand Up @@ -735,6 +738,14 @@ pub struct TufArtifactReplicationConfig {
pub min_sled_replication: usize,
}

#[serde_as]
#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)]
pub struct ReadOnlyRegionReplacementStartConfig {
/// period (in seconds) for periodic activations of this background task
#[serde_as(as = "DurationSeconds<u64>")]
pub period_secs: Duration,
}

/// Configuration for a nexus server
#[derive(Clone, Debug, Deserialize, PartialEq, Serialize)]
pub struct PackageConfig {
Expand Down Expand Up @@ -993,6 +1004,7 @@ mod test {
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 300
tuf_artifact_replication.min_sled_replication = 3
read_only_region_replacement_start.period_secs = 30
[default_region_allocation_strategy]
type = "random"
seed = 0
Expand Down Expand Up @@ -1194,6 +1206,10 @@ mod test {
period_secs: Duration::from_secs(300),
min_sled_replication: 3,
},
read_only_region_replacement_start:
ReadOnlyRegionReplacementStartConfig {
period_secs: Duration::from_secs(30),
},
},
default_region_allocation_strategy:
crate::nexus_config::RegionAllocationStrategy::Random {
Expand Down Expand Up @@ -1279,6 +1295,7 @@ mod test {
region_snapshot_replacement_finish.period_secs = 30
tuf_artifact_replication.period_secs = 300
tuf_artifact_replication.min_sled_replication = 3
read_only_region_replacement_start.period_secs = 30
[default_region_allocation_strategy]
type = "random"
"##,
Expand Down
Loading
Loading