Skip to content

Commit ee3c964

Browse files
committed
(part 2 of 2) add test that Nexus propagates DNS on startup/rack initialization
1 parent b395959 commit ee3c964

File tree

10 files changed

+216
-71
lines changed

10 files changed

+216
-71
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

nexus/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ oximeter-producer.workspace = true
9090
[dev-dependencies]
9191
assert_matches.workspace = true
9292
criterion.workspace = true
93+
dns-server.workspace = true
9394
expectorate.workspace = true
9495
hyper-rustls.workspace = true
9596
itertools.workspace = true

nexus/db-queries/src/db/datastore/mod.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,8 +1041,9 @@ mod test {
10411041
// Create a sled on which the service should exist.
10421042
let sled_id = create_test_sled(&datastore).await;
10431043

1044-
// Create a new service to exist on this sled.
1045-
let service_id = Uuid::new_v4();
1044+
// Create a few new service to exist on this sled.
1045+
let service1_id =
1046+
"ab7bd7fd-7c37-48ab-a84a-9c09a90c4c7f".parse().unwrap();
10461047
let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0);
10471048
let kind = ServiceKind::Nexus;
10481049

@@ -1053,7 +1054,8 @@ mod test {
10531054
assert_eq!(service1.ip, result.ip);
10541055
assert_eq!(service1.kind, result.kind);
10551056

1056-
let service2_id = Uuid::new_v4();
1057+
let service2_id =
1058+
"fe5b6e3d-dfee-47b4-8719-c54f78912c0b".parse().unwrap();
10571059
let service2 = Service::new(service2_id, sled_id, addr, kind);
10581060
let result =
10591061
datastore.service_upsert(&opctx, service2.clone()).await.unwrap();

nexus/src/app/background/init.rs

Lines changed: 129 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,26 @@ pub fn init(
2424
opctx: &OpContext,
2525
datastore: Arc<DataStore>,
2626
config: &BackgroundTaskConfig,
27-
) -> (common::Driver, common::TaskHandle) {
27+
) -> (common::Driver, common::TaskHandle, common::TaskHandle, common::TaskHandle)
28+
{
2829
let mut driver = common::Driver::new();
2930

30-
let rv = init_dns(
31+
let (task_config, task_servers) = init_dns(
3132
&mut driver,
3233
opctx,
3334
datastore.clone(),
3435
DnsGroup::Internal,
3536
&config.dns_internal,
3637
);
37-
let _ = init_dns(
38+
let (_, external_task_servers) = init_dns(
3839
&mut driver,
3940
opctx,
4041
datastore,
4142
DnsGroup::External,
4243
&config.dns_external,
4344
);
4445

45-
(driver, rv)
46+
(driver, task_config, task_servers, external_task_servers)
4647
}
4748

4849
fn init_dns(
@@ -51,28 +52,26 @@ fn init_dns(
5152
datastore: Arc<DataStore>,
5253
dns_group: DnsGroup,
5354
config: &DnsTasksConfig,
54-
) -> common::TaskHandle {
55+
) -> (common::TaskHandle, common::TaskHandle) {
5556
let dns_group_name = dns_group.to_string();
5657
let metadata = BTreeMap::from([("dns_group".to_string(), dns_group_name)]);
5758

5859
// Background task: DNS config watcher
5960
let dns_config =
6061
dns_config::DnsConfigWatcher::new(Arc::clone(&datastore), dns_group);
6162
let dns_config_watcher = dns_config.watcher();
62-
let rv = driver
63-
.register(
64-
format!("dns_config_{}", dns_group),
65-
config.period_secs_config,
66-
Box::new(dns_config),
67-
opctx.child(metadata.clone()),
68-
vec![],
69-
)
70-
.clone();
63+
let task_config = driver.register(
64+
format!("dns_config_{}", dns_group),
65+
config.period_secs_config,
66+
Box::new(dns_config),
67+
opctx.child(metadata.clone()),
68+
vec![],
69+
);
7170

7271
// Background task: DNS server list watcher
7372
let dns_servers = dns_servers::DnsServersWatcher::new(datastore, dns_group);
7473
let dns_servers_watcher = dns_servers.watcher();
75-
driver.register(
74+
let task_servers = driver.register(
7675
format!("dns_servers_{}", dns_group),
7776
config.period_secs_servers,
7877
Box::new(dns_servers),
@@ -94,7 +93,7 @@ fn init_dns(
9493
vec![Box::new(dns_config_watcher), Box::new(dns_servers_watcher)],
9594
);
9695

97-
rv
96+
(task_config, task_servers)
9897
}
9998

10099
#[cfg(test)]
@@ -107,21 +106,28 @@ mod test {
107106
use nexus_db_queries::context::OpContext;
108107
use nexus_test_utils_macros::nexus_test;
109108
use nexus_types::internal_api::params as nexus_params;
109+
use nexus_types::internal_api::params::ServiceKind;
110110
use omicron_common::api::external::DataPageParams;
111111
use omicron_test_utils::dev::poll;
112+
use std::net::SocketAddr;
112113
use std::num::NonZeroU32;
113114
use std::time::Duration;
115+
use tempfile::TempDir;
116+
use uuid::Uuid;
114117

115118
type ControlPlaneTestContext =
116119
nexus_test_utils::ControlPlaneTestContext<crate::Server>;
117120

121+
// Nexus is supposed to automatically propagate DNS configuration to all the
122+
// DNS servers it knows about. We'll test two things here:
123+
//
124+
// (1) create a new DNS server and ensure that it promptly gets the
125+
// existing DNS configuration
126+
//
127+
// (2) create a new configuration and ensure that both servers promptly get
128+
// the new DNS configuration
118129
#[nexus_test(server = crate::Server)]
119130
async fn test_dns_propagation_basic(cptestctx: &ControlPlaneTestContext) {
120-
// Nexus is supposed to automatically propagate DNS configuration to all
121-
// the DNS servers it knows about. To test that, we'll first write an
122-
// initial DNS configuration to the datastore. Then we'll wake up the
123-
// background task responsible for monitoring that. We should shortly
124-
// see that DNS configuration propagated to the existing DNS server.
125131
let nexus = &cptestctx.server.apictx().nexus;
126132
let datastore = nexus.datastore();
127133
let opctx = OpContext::for_tests(
@@ -142,9 +148,10 @@ mod test {
142148
// Verify that the DNS server is on version 1. This should already be
143149
// the case because it was configured with version 1 when the simulated
144150
// sled agent started up.
145-
let dns_dropshot_server = &cptestctx.sled_agent.dns_dropshot_server;
151+
let initial_dns_dropshot_server =
152+
&cptestctx.sled_agent.dns_dropshot_server;
146153
let dns_config_client = dns_service_client::Client::new(
147-
&format!("http://{}", dns_dropshot_server.local_addr()),
154+
&format!("http://{}", initial_dns_dropshot_server.local_addr()),
148155
cptestctx.logctx.log.clone(),
149156
);
150157
let config = dns_config_client
@@ -173,6 +180,65 @@ mod test {
173180
);
174181
let internal_dns_zone_id = dns_zones[0].id;
175182

183+
// Now spin up another DNS server, add it to the list of servers, and
184+
// make sure that DNS gets propagated to it. Note that we shouldn't
185+
// have to explicitly activate the background task because inserting a
186+
// new service ought to do that for us.
187+
let log = &cptestctx.logctx.log;
188+
let storage_path =
189+
TempDir::new().expect("Failed to create temporary directory");
190+
let config_store = dns_server::storage::Config {
191+
keep_old_generations: 3,
192+
storage_path: storage_path
193+
.path()
194+
.to_string_lossy()
195+
.into_owned()
196+
.into(),
197+
};
198+
let store = dns_server::storage::Store::new(
199+
log.new(o!("component" => "DnsStore")),
200+
&config_store,
201+
)
202+
.unwrap();
203+
204+
let (_, new_dns_dropshot_server) = dns_server::start_servers(
205+
log.clone(),
206+
store,
207+
&dns_server::dns_server::Config {
208+
bind_address: "[::1]:0".parse().unwrap(),
209+
},
210+
&dropshot::ConfigDropshot {
211+
bind_address: "[::1]:0".parse().unwrap(),
212+
request_body_max_bytes: 8 * 1024,
213+
..Default::default()
214+
},
215+
)
216+
.await
217+
.unwrap();
218+
219+
let new_dns_addr = match new_dns_dropshot_server.local_addr() {
220+
SocketAddr::V4(_) => panic!("expected v6 address"),
221+
SocketAddr::V6(a) => a,
222+
};
223+
nexus
224+
.upsert_service(
225+
&opctx,
226+
Uuid::new_v4(),
227+
cptestctx.sled_agent.sled_agent.id,
228+
new_dns_addr,
229+
ServiceKind::InternalDNSConfig.into(),
230+
)
231+
.await
232+
.unwrap();
233+
234+
wait_propagate_dns(
235+
&cptestctx.logctx.log,
236+
"new",
237+
new_dns_dropshot_server.local_addr(),
238+
1,
239+
)
240+
.await;
241+
176242
// Now, write version 2 of the internal DNS configuration with one
177243
// additional record.
178244
type TxnError = TransactionError<()>;
@@ -225,17 +291,52 @@ mod test {
225291
}
226292

227293
// Activate the internal DNS propagation pipeline.
228-
nexus._background_tasks.activate(&nexus._task_internal_dns_config);
294+
nexus.background_tasks.activate(&nexus.task_internal_dns_config);
295+
296+
// Wait for the new generation to get propagated to both servers.
297+
wait_propagate_dns(
298+
&cptestctx.logctx.log,
299+
"initial",
300+
initial_dns_dropshot_server.local_addr(),
301+
2,
302+
)
303+
.await;
229304

305+
wait_propagate_dns(
306+
&cptestctx.logctx.log,
307+
"new",
308+
new_dns_dropshot_server.local_addr(),
309+
2,
310+
)
311+
.await;
312+
}
313+
314+
/// Verify that DNS gets propagated to the specified server
315+
async fn wait_propagate_dns(
316+
log: &slog::Logger,
317+
label: &str,
318+
addr: SocketAddr,
319+
generation: u64,
320+
) {
321+
println!(
322+
"waiting for propagation of generation {} to {} DNS server ({})",
323+
generation, label, addr
324+
);
325+
326+
let client = dns_service_client::Client::new(
327+
&format!("http://{}", addr),
328+
log.clone(),
329+
);
230330
poll::wait_for_condition(
231331
|| async {
232-
match dns_config_client.dns_config_get().await {
332+
match client.dns_config_get().await {
233333
Err(error) => {
234-
// The DNS server is already up. This shouldn't happen.
334+
// The DNS server is already up. This shouldn't
335+
// happen.
235336
Err(poll::CondCheckError::Failed(error))
236337
}
237338
Ok(config) => {
238-
if config.generation == 2 {
339+
if config.generation == generation {
239340
Ok(())
240341
} else {
241342
Err(poll::CondCheckError::NotYet)
@@ -244,12 +345,9 @@ mod test {
244345
}
245346
},
246347
&Duration::from_millis(50),
247-
&Duration::from_secs(15),
348+
&Duration::from_secs(30),
248349
)
249350
.await
250351
.expect("DNS config not propagated in expected time");
251-
252-
// XXX-dap then add a second part to the test that adds a new DNS
253-
// server.
254352
}
255353
}

nexus/src/app/mod.rs

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,12 +168,16 @@ pub struct Nexus {
168168
dpd_client: Arc<dpd_client::Client>,
169169

170170
/// Background tasks
171-
// This field is not currently used, but will be when we start activating
172-
// background tasks from elsewhere in Nexus.
173-
_background_tasks: background::Driver,
171+
background_tasks: background::Driver,
174172

175173
/// task handle for the internal DNS config background task
176-
_task_internal_dns_config: background::TaskHandle,
174+
task_internal_dns_config: background::TaskHandle,
175+
176+
/// task handle for the internal DNS servers background task
177+
task_internal_dns_servers: background::TaskHandle,
178+
179+
/// task handle for the external DNS servers background task
180+
task_external_dns_servers: background::TaskHandle,
177181
}
178182

179183
impl Nexus {
@@ -253,7 +257,12 @@ impl Nexus {
253257
authn::Context::internal_api(),
254258
Arc::clone(&db_datastore),
255259
);
256-
let (background_tasks, task_internal_dns_config) = background::init(
260+
let (
261+
background_tasks,
262+
task_internal_dns_config,
263+
task_internal_dns_servers,
264+
task_external_dns_servers,
265+
) = background::init(
257266
&background_ctx,
258267
Arc::clone(&db_datastore),
259268
&config.pkg.background_tasks,
@@ -291,8 +300,10 @@ impl Nexus {
291300
samael_max_issue_delay: std::sync::Mutex::new(None),
292301
resolver,
293302
dpd_client,
294-
_background_tasks: background_tasks,
295-
_task_internal_dns_config: task_internal_dns_config,
303+
background_tasks,
304+
task_internal_dns_config,
305+
task_internal_dns_servers,
306+
task_external_dns_servers,
296307
};
297308

298309
// TODO-cleanup all the extra Arcs here seems wrong
@@ -318,6 +329,31 @@ impl Nexus {
318329
);
319330

320331
*nexus.recovery_task.lock().unwrap() = Some(recovery_task);
332+
333+
// Kick all background tasks once the populate step finishes. Among
334+
// other things, the populate step installs role assignments for
335+
// internal identities that are used by the background tasks. If we
336+
// don't do this here, those tasks might fail spuriously on startup and
337+
// not be retried for a while.
338+
let task_nexus = nexus.clone();
339+
let task_log = nexus.log.clone();
340+
tokio::spawn(async move {
341+
match task_nexus.wait_for_populate().await {
342+
Ok(_) => {
343+
info!(
344+
task_log,
345+
"populate complete; activating background tasks"
346+
);
347+
for task in task_nexus.background_tasks.tasks() {
348+
task_nexus.background_tasks.activate(task);
349+
}
350+
}
351+
Err(_) => {
352+
error!(task_log, "populate failed");
353+
}
354+
}
355+
});
356+
321357
nexus
322358
}
323359

nexus/src/app/rack.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,11 @@ impl super::Nexus {
155155
)
156156
.await?;
157157

158+
// We've potentially updated both the list of DNS servers and the DNS
159+
// configuration. Activate both background tasks.
160+
self.background_tasks.activate(&self.task_internal_dns_config);
161+
self.background_tasks.activate(&self.task_internal_dns_servers);
162+
158163
Ok(())
159164
}
160165

0 commit comments

Comments
 (0)