Skip to content

Commit 9c7bbf7

Browse files
committed
Merge branch 'main' into dap/multi-crdb
2 parents 5a7ecec + b884c76 commit 9c7bbf7

File tree

17 files changed

+633
-133
lines changed

17 files changed

+633
-133
lines changed

Cargo.lock

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ members = [
2323
"installinator-artifactd",
2424
"installinator-common",
2525
"internal-dns",
26+
"internal-dns-cli",
2627
"ipcc-key-value",
2728
"key-manager",
2829
"nexus",
@@ -83,6 +84,7 @@ default-members = [
8384
"installinator-artifactd",
8485
"installinator-common",
8586
"internal-dns",
87+
"internal-dns-cli",
8688
"ipcc-key-value",
8789
"key-manager",
8890
"nexus",

dns-server/src/dns_server.rs

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ use anyhow::anyhow;
1515
use anyhow::Context;
1616
use pretty_hex::*;
1717
use serde::Deserialize;
18-
use slog::info;
19-
use slog::{debug, error, o, Logger};
18+
use slog::{debug, error, info, o, trace, Logger};
2019
use std::net::SocketAddr;
2120
use std::str::FromStr;
2221
use std::sync::Arc;
@@ -150,7 +149,7 @@ async fn handle_dns_packet(request: Request) {
150149
let log = &request.log;
151150
let buf = &request.packet;
152151

153-
debug!(&log, "buffer"; "buffer" => ?buf.hex_dump());
152+
trace!(&log, "buffer"; "buffer" => ?buf.hex_dump());
154153

155154
// Decode the message.
156155
let mut dec = BinDecoder::new(&buf);
@@ -270,6 +269,12 @@ async fn handle_dns_message(
270269
}
271270
})
272271
.collect::<Result<Vec<_>, RequestError>>()?;
272+
debug!(
273+
&log,
274+
"dns response";
275+
"query" => ?query,
276+
"records" => ?&response_records
277+
);
273278
respond_records(request, rb, header, &response_records).await
274279
}
275280

illumos-utils/src/running_zone.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,10 @@ impl RunningZone {
260260
self.inner.zonepath.join("root")
261261
}
262262

263+
pub fn control_interface(&self) -> AddrObject {
264+
AddrObject::new(self.inner.get_control_vnic_name(), "omicron6").unwrap()
265+
}
266+
263267
/// Runs a command within the Zone, return the output.
264268
//
265269
// NOTE: It's important that this function is synchronous.

internal-dns-cli/Cargo.toml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[package]
2+
name = "internal-dns-cli"
3+
version = "0.1.0"
4+
edition = "2021"
5+
license = "MPL-2.0"
6+
7+
[dependencies]
8+
anyhow.workspace = true
9+
clap.workspace = true
10+
dropshot.workspace = true
11+
internal-dns.workspace = true
12+
omicron-common.workspace = true
13+
slog.workspace = true
14+
tokio.workspace = true
15+
trust-dns-resolver.workspace = true
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// This Source Code Form is subject to the terms of the Mozilla Public
2+
// License, v. 2.0. If a copy of the MPL was not distributed with this
3+
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
4+
5+
//! Resolves DNS names within the Oxide control plane
6+
7+
use anyhow::Context;
8+
use anyhow::Result;
9+
use clap::Parser;
10+
use clap::ValueEnum;
11+
use internal_dns::resolver::ResolveError;
12+
use internal_dns::resolver::Resolver;
13+
use slog::{info, warn};
14+
use std::net::SocketAddr;
15+
16+
#[derive(Debug, Parser)]
17+
#[clap(name = "dnswait", about = "Resolves DNS names in the control plane")]
18+
struct Opt {
19+
/// Nameserver(s) to query
20+
///
21+
/// If unspecified, uses the system configuration (usually the nameservers
22+
/// configured in /etc/resolv.conf).
23+
#[clap(long, action)]
24+
nameserver_addresses: Vec<SocketAddr>,
25+
26+
/// service name to be resolved (should be the target of a DNS name)
27+
#[arg(value_enum)]
28+
srv_name: ServiceName,
29+
}
30+
31+
#[derive(Debug, Clone, Copy, ValueEnum)]
32+
#[value(rename_all = "kebab-case")]
33+
enum ServiceName {
34+
Cockroach,
35+
}
36+
37+
impl From<ServiceName> for internal_dns::ServiceName {
38+
fn from(value: ServiceName) -> Self {
39+
match value {
40+
ServiceName::Cockroach => internal_dns::ServiceName::Cockroach,
41+
}
42+
}
43+
}
44+
45+
#[tokio::main]
46+
async fn main() -> Result<()> {
47+
let opt = Opt::parse();
48+
let log = dropshot::ConfigLogging::File {
49+
path: "/dev/stderr".into(),
50+
level: dropshot::ConfigLoggingLevel::Info,
51+
if_exists: dropshot::ConfigLoggingIfExists::Append,
52+
}
53+
.to_logger("dnswait")
54+
.context("creating log")?;
55+
56+
let resolver = if opt.nameserver_addresses.is_empty() {
57+
info!(&log, "using system configuration");
58+
let async_resolver =
59+
trust_dns_resolver::AsyncResolver::tokio_from_system_conf()
60+
.context("initializing resolver from system configuration")?;
61+
Resolver::new_with_resolver(log.clone(), async_resolver)
62+
} else {
63+
let addrs = opt.nameserver_addresses;
64+
info!(&log, "using explicit nameservers"; "nameservers" => ?addrs);
65+
Resolver::new_from_addrs(log.clone(), addrs)
66+
.context("creating resolver with explicit nameserver addresses")?
67+
};
68+
69+
let result = omicron_common::backoff::retry_notify(
70+
omicron_common::backoff::retry_policy_internal_service(),
71+
|| async {
72+
let dns_name = internal_dns::ServiceName::from(opt.srv_name);
73+
resolver.lookup_srv(dns_name).await.map_err(|error| match error {
74+
ResolveError::Resolve(_)
75+
| ResolveError::NotFound(_)
76+
| ResolveError::NotFoundByString(_) => {
77+
omicron_common::backoff::BackoffError::transient(error)
78+
}
79+
})
80+
},
81+
|error, delay| {
82+
warn!(
83+
&log,
84+
"DNS query failed; will try again";
85+
"error" => format!("{:#}", error),
86+
"delay" => ?delay,
87+
);
88+
},
89+
)
90+
.await
91+
.context("unexpectedly gave up")?;
92+
93+
for ip in result {
94+
println!("{}", ip)
95+
}
96+
97+
Ok(())
98+
}

internal-dns/src/resolver.rs

Lines changed: 122 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use crate::DNS_ZONE;
66
use omicron_common::address::{
77
Ipv6Subnet, ReservedRackSubnet, AZ_PREFIX, DNS_PORT,
88
};
9-
use slog::{debug, info};
9+
use slog::{debug, info, trace};
1010
use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6};
1111
use trust_dns_proto::rr::record_type::RecordType;
1212
use trust_dns_resolver::config::{
@@ -70,6 +70,15 @@ impl Resolver {
7070
Self::new_from_subnet(log, subnet)
7171
}
7272

73+
/// Return a resolver that uses the system configuration (usually
74+
/// /etc/resolv.conf) for the underlying nameservers.
75+
pub fn new_with_resolver(
76+
log: slog::Logger,
77+
tokio_resolver: TokioAsyncResolver,
78+
) -> Self {
79+
Resolver { log, inner: Box::new(tokio_resolver) }
80+
}
81+
7382
// TODO-correctness This function and its callers make assumptions about how
7483
// many internal DNS servers there are on the subnet and where they are. Is
7584
// that okay? It would seem more flexible not to assume this. Instead, we
@@ -121,6 +130,107 @@ impl Resolver {
121130
Ok(*address)
122131
}
123132

133+
/// Returns the targets of the SRV records for a DNS name
134+
///
135+
/// The returned values are generally other DNS names that themselves would
136+
/// need to be looked up to find A/AAAA records.
137+
pub async fn lookup_srv(
138+
&self,
139+
srv: crate::ServiceName,
140+
) -> Result<Vec<String>, ResolveError> {
141+
let name = format!("{}.{}", srv.dns_name(), DNS_ZONE);
142+
trace!(self.log, "lookup_srv"; "dns_name" => &name);
143+
let response = self.inner.srv_lookup(&name).await?;
144+
debug!(
145+
self.log,
146+
"lookup_srv";
147+
"dns_name" => &name,
148+
"response" => ?response
149+
);
150+
151+
Ok(response.into_iter().map(|srv| srv.target().to_string()).collect())
152+
}
153+
154+
pub async fn lookup_all_ipv6(
155+
&self,
156+
srv: crate::ServiceName,
157+
) -> Result<Vec<Ipv6Addr>, ResolveError> {
158+
let name = format!("{}.{}", srv.dns_name(), DNS_ZONE);
159+
trace!(self.log, "lookup_all_ipv6 srv"; "dns_name" => &name);
160+
let response = self.inner.srv_lookup(&name).await?;
161+
debug!(
162+
self.log,
163+
"lookup_ipv6 srv";
164+
"dns_name" => &name,
165+
"response" => ?response
166+
);
167+
168+
// SRV records have a target, which is itself another DNS name that
169+
// needs to be looked up in order to get to the actual IP addresses.
170+
// Many DNS servers return these IP addresses directly in the response
171+
// to the SRV query as Additional records. Ours does not. See
172+
// omicron#3434. So we need to do another round of lookups separately.
173+
//
174+
// According to the docs` for
175+
// `trust_dns_resolver::lookup::SrvLookup::ip_iter()`, it sounds like
176+
// trust-dns would have done this for us. It doesn't. See
177+
// bluejekyll/trust-dns#1980.
178+
//
179+
// So if we have gotten any IPs, then we assume that one of the above
180+
// issues has been addressed and so we have all the IPs and we're done.
181+
// Otherwise, explicitly do the extra lookups.
182+
let addresses: Vec<Ipv6Addr> = response
183+
.ip_iter()
184+
.filter_map(|addr| match addr {
185+
IpAddr::V4(_) => None,
186+
IpAddr::V6(addr) => Some(addr),
187+
})
188+
.collect();
189+
if !addresses.is_empty() {
190+
return Ok(addresses);
191+
}
192+
193+
// What do we do if some of these queries succeed while others fail? We
194+
// may have some addresses, but the list might be incomplete. That
195+
// might be okay for some use cases but not others. For now, we do the
196+
// simple thing. In the future, we'll want a more cueball-like resolver
197+
// interface that better deals with these cases.
198+
let log = &self.log;
199+
let futures = response.iter().map(|srv| async {
200+
let target = srv.target();
201+
trace!(
202+
log,
203+
"lookup_all_ipv6: looking up SRV target";
204+
"name" => ?target,
205+
);
206+
self.inner.ipv6_lookup(target.clone()).await
207+
});
208+
let results = futures::future::try_join_all(futures).await?;
209+
let results = results
210+
.into_iter()
211+
.flat_map(|ipv6| ipv6.into_iter())
212+
.collect::<Vec<_>>();
213+
if results.is_empty() {
214+
Err(ResolveError::NotFound(srv))
215+
} else {
216+
Ok(results)
217+
}
218+
}
219+
220+
pub async fn lookup_ip(
221+
&self,
222+
srv: crate::ServiceName,
223+
) -> Result<IpAddr, ResolveError> {
224+
let name = format!("{}.{}", srv.dns_name(), DNS_ZONE);
225+
debug!(self.log, "lookup srv"; "dns_name" => &name);
226+
let response = self.inner.lookup_ip(&name).await?;
227+
let address = response
228+
.iter()
229+
.next()
230+
.ok_or_else(|| ResolveError::NotFound(srv))?;
231+
Ok(address)
232+
}
233+
124234
/// Looks up a single [`SocketAddrV6`] based on the SRV name
125235
/// Returns an error if the record does not exist.
126236
pub async fn lookup_socket_v6(
@@ -156,20 +266,6 @@ impl Resolver {
156266
}
157267
})
158268
}
159-
160-
pub async fn lookup_ip(
161-
&self,
162-
srv: crate::ServiceName,
163-
) -> Result<IpAddr, ResolveError> {
164-
let name = format!("{}.{}", srv.dns_name(), DNS_ZONE);
165-
debug!(self.log, "lookup srv"; "dns_name" => &name);
166-
let response = self.inner.lookup_ip(&name).await?;
167-
let address = response
168-
.iter()
169-
.next()
170-
.ok_or_else(|| ResolveError::NotFound(srv))?;
171-
Ok(address)
172-
}
173269
}
174270

175271
#[cfg(test)]
@@ -427,6 +523,17 @@ mod test {
427523
.expect("Should have been able to look up IP address");
428524
assert!(cockroach_addrs.iter().any(|addr| addr.ip() == &ip));
429525

526+
// Look up all the Cockroach addresses.
527+
let mut ips =
528+
resolver.lookup_all_ipv6(ServiceName::Cockroach).await.expect(
529+
"Should have been able to look up all CockroachDB addresses",
530+
);
531+
ips.sort();
532+
assert_eq!(
533+
ips,
534+
cockroach_addrs.iter().map(|s| *s.ip()).collect::<Vec<_>>()
535+
);
536+
430537
// Look up Clickhouse
431538
let ip = resolver
432539
.lookup_ipv6(ServiceName::Clickhouse)

openapi/sled-agent.json

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,23 @@
1010
"version": "0.0.1"
1111
},
1212
"paths": {
13+
"/cockroachdb": {
14+
"post": {
15+
"summary": "Initializes a CockroachDB cluster",
16+
"operationId": "cockroachdb_init",
17+
"responses": {
18+
"204": {
19+
"description": "resource updated"
20+
},
21+
"4XX": {
22+
"$ref": "#/components/responses/Error"
23+
},
24+
"5XX": {
25+
"$ref": "#/components/responses/Error"
26+
}
27+
}
28+
}
29+
},
1330
"/disks/{disk_id}": {
1431
"put": {
1532
"operationId": "disk_put",

0 commit comments

Comments
 (0)