Skip to content

Commit 0ae9c04

Browse files
authored
Better cleanup of networking resources (#1352)
* Better cleanup of networking resources - Remove IP addresses, VNICs, and etherstub during `omicron-package uninstall` - Fail `destroy_virtual_hardware.sh` if Omicron zones are still installed - In `destroy_virtual_hardware.sh`, also unload xde driver and delete interfaces over the simulated Chelsio VNICs - Update how-to-run.adoc. * Appease clippy * Remove cruft comment * fix merge conflicts with main * Simplify failure detection / handling in scripts
1 parent a81daa0 commit 0ae9c04

File tree

10 files changed

+245
-80
lines changed

10 files changed

+245
-80
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/how-to-run.adoc

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Any additional prerequisite software may be installed with the following script:
2525
$ ./tools/install_prerequisites.sh
2626
----
2727

28-
=== Make me a Gimlet!
28+
=== Make (or unmake) me a Gimlet!
2929

3030
The sled agent expects to manage a real Gimlet. However, until those are built,
3131
developers generally make do with something else, usually a commodity machine.
@@ -35,13 +35,11 @@ file-based ZFS vdevs and ZFS zpools on top of those, and a couple of VNICs. The
3535
vdevs model the actual U.2s that will be in a Gimlet, and the VNICs model the
3636
two Chelsio NIC ports.
3737

38-
You can clean up these resources with `./tools/destroy_virtual_hardware.sh`. Be
39-
aware that this is a best effort script. If the sled agent or other Omicron
40-
zones are still running, the zpools cannot be deleted. The script will warn you
41-
about the things it cannot remove, so you can do so yourself. Also note that all
42-
the networking bits are temporary, so a reboot should always clear them.
43-
44-
Both scripts must be run as root, e.g, `pfexec ./tools/create_virtual_hardware.sh`.
38+
You can clean up these resources with `./tools/destroy_virtual_hardware.sh`.
39+
This script requires Omicron be uninstalled, e.g., with `pfexec
40+
./target/release/omicron-package uninstall`, and a warning will be printed if
41+
that is not the case. The script will then remove the file-based vdevs and the
42+
VNICs created by `create_virtual_hardware.sh`.
4543

4644
== Deploying Omicron
4745

package/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"
1919
ring = "0.16"
2020
serde = { version = "1.0", features = [ "derive" ] }
2121
serde_derive = "1.0"
22+
slog = { version = "2.5", features = [ "max_level_trace", "release_max_level_debug"] }
23+
slog-async = "2.7"
24+
slog-term = "2.9"
2225
smf = "0.2"
2326
tar = "0.4"
2427
tempfile = "3.3"

package/src/bin/omicron-package.rs

Lines changed: 45 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use clap::{Parser, Subcommand};
99
use futures::stream::{self, StreamExt, TryStreamExt};
1010
use indicatif::{MultiProgress, ProgressBar, ProgressStyle};
1111
use omicron_package::{parse, BuildCommand, DeployCommand};
12+
use omicron_sled_agent::cleanup_networking_resources;
1213
use omicron_sled_agent::zone;
1314
use omicron_zone_package::config::Config;
1415
use omicron_zone_package::config::ExternalPackage;
@@ -17,6 +18,10 @@ use omicron_zone_package::package::Package;
1718
use omicron_zone_package::package::Progress;
1819
use rayon::prelude::*;
1920
use ring::digest::{Context as DigestContext, Digest, SHA256};
21+
use slog::info;
22+
use slog::o;
23+
use slog::Drain;
24+
use slog::Logger;
2025
use std::env;
2126
use std::fs::create_dir_all;
2227
use std::path::{Path, PathBuf};
@@ -298,6 +303,7 @@ fn do_install(
298303
config: &Config,
299304
artifact_dir: &Path,
300305
install_dir: &Path,
306+
log: &Logger,
301307
) -> Result<()> {
302308
create_dir_all(&install_dir).map_err(|err| {
303309
anyhow!("Cannot create installation directory: {}", err)
@@ -323,10 +329,11 @@ fn do_install(
323329

324330
let src = tarfile.as_path();
325331
let dst = install_dir.join(src.strip_prefix(artifact_dir)?);
326-
println!(
327-
"Installing Service: {} -> {}",
328-
src.to_string_lossy(),
329-
dst.to_string_lossy()
332+
info!(
333+
log,
334+
"Installing service";
335+
"src" => %src.to_string_lossy(),
336+
"dst" => %dst.to_string_lossy(),
330337
);
331338
std::fs::copy(&src, &dst)?;
332339
Ok(())
@@ -346,10 +353,11 @@ fn do_install(
346353
for service_name in global_zone_service_names {
347354
let tar_path = install_dir.join(format!("{}.tar", service_name));
348355
let service_path = install_dir.join(service_name);
349-
println!(
350-
"Unpacking {} to {}",
351-
tar_path.to_string_lossy(),
352-
service_path.to_string_lossy()
356+
info!(
357+
log,
358+
"Unpacking service tarball";
359+
"tar_path" => %tar_path.to_string_lossy(),
360+
"service_path" => %service_path.to_string_lossy(),
353361
);
354362

355363
let tar_file = std::fs::File::open(&tar_path)?;
@@ -366,8 +374,9 @@ fn do_install(
366374
.join(&package.service_name)
367375
.join("pkg")
368376
.join("manifest.xml");
369-
println!(
370-
"Installing bootstrap service from {}",
377+
info!(
378+
log,
379+
"Installing boostrap service from {}",
371380
manifest_path.to_string_lossy()
372381
);
373382
smf::Config::import().run(&manifest_path)?;
@@ -425,7 +434,11 @@ fn remove_all_unless_already_removed<P: AsRef<Path>>(path: P) -> Result<()> {
425434
Ok(())
426435
}
427436

428-
fn remove_all_except<P: AsRef<Path>>(path: P, to_keep: &[&str]) -> Result<()> {
437+
fn remove_all_except<P: AsRef<Path>>(
438+
path: P,
439+
to_keep: &[&str],
440+
log: &Logger,
441+
) -> Result<()> {
429442
let dir = match path.as_ref().read_dir() {
430443
Ok(dir) => dir,
431444
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(()),
@@ -434,9 +447,9 @@ fn remove_all_except<P: AsRef<Path>>(path: P, to_keep: &[&str]) -> Result<()> {
434447
for entry in dir {
435448
let entry = entry?;
436449
if to_keep.contains(&&*(entry.file_name().to_string_lossy())) {
437-
println!(" Keeping: '{}'", entry.path().to_string_lossy());
450+
info!(log, "Keeping: '{}'", entry.path().to_string_lossy());
438451
} else {
439-
println!(" Removing: '{}'", entry.path().to_string_lossy());
452+
info!(log, "Removing: '{}'", entry.path().to_string_lossy());
440453
if entry.metadata()?.is_dir() {
441454
remove_all_unless_already_removed(entry.path())?;
442455
} else {
@@ -447,27 +460,32 @@ fn remove_all_except<P: AsRef<Path>>(path: P, to_keep: &[&str]) -> Result<()> {
447460
Ok(())
448461
}
449462

450-
fn do_uninstall(
463+
async fn do_uninstall(
451464
config: &Config,
452465
artifact_dir: &Path,
453466
install_dir: &Path,
467+
log: &Logger,
454468
) -> Result<()> {
455-
println!("Removing all Omicron zones");
469+
info!(log, "Removing all Omicron zones");
456470
uninstall_all_omicron_zones()?;
457-
println!("Uninstalling all packages");
471+
info!(log, "Uninstalling all packages");
458472
uninstall_all_packages(config);
459-
println!("Removing artifacts in: {}", artifact_dir.to_string_lossy());
473+
info!(log, "Removing artifacts in: {}", artifact_dir.to_string_lossy());
460474

461475
const ARTIFACTS_TO_KEEP: &[&str] =
462476
&["clickhouse", "cockroachdb", "xde", "console-assets", "downloads"];
463-
remove_all_except(artifact_dir, ARTIFACTS_TO_KEEP)?;
477+
remove_all_except(artifact_dir, ARTIFACTS_TO_KEEP, log)?;
464478

465-
println!(
479+
info!(
480+
log,
466481
"Removing installed objects in: {}",
467482
install_dir.to_string_lossy()
468483
);
469484
const INSTALLED_OBJECTS_TO_KEEP: &[&str] = &["opte"];
470-
remove_all_except(install_dir, INSTALLED_OBJECTS_TO_KEEP)?;
485+
remove_all_except(install_dir, INSTALLED_OBJECTS_TO_KEEP, log)?;
486+
487+
cleanup_networking_resources(log).await?;
488+
471489
Ok(())
472490
}
473491

@@ -545,6 +563,11 @@ async fn main() -> Result<()> {
545563
let args = Args::try_parse()?;
546564
let config = parse::<_, Config>(&args.manifest)?;
547565

566+
let decorator = slog_term::TermDecorator::new().build();
567+
let drain = slog_term::CompactFormat::new(decorator).build().fuse();
568+
let drain = slog_async::Async::new(drain).build().fuse();
569+
let log = slog::Logger::root(drain, o!());
570+
548571
// Use a CWD that is the root of the Omicron repository.
549572
if let Ok(manifest) = env::var("CARGO_MANIFEST_DIR") {
550573
let manifest_dir = PathBuf::from(manifest);
@@ -561,13 +584,13 @@ async fn main() -> Result<()> {
561584
artifact_dir,
562585
install_dir,
563586
}) => {
564-
do_install(&config, &artifact_dir, &install_dir)?;
587+
do_install(&config, &artifact_dir, &install_dir, &log)?;
565588
}
566589
SubCommand::Deploy(DeployCommand::Uninstall {
567590
artifact_dir,
568591
install_dir,
569592
}) => {
570-
do_uninstall(&config, &artifact_dir, &install_dir)?;
593+
do_uninstall(&config, &artifact_dir, &install_dir, &log).await?;
571594
}
572595
}
573596

sled-agent/src/illumos/dladm.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
77
use crate::common::vlan::VlanID;
88
use crate::illumos::vnic::VnicKind;
9+
use crate::illumos::zone::IPADM;
910
use crate::illumos::{execute, ExecutionError, PFEXEC};
1011
use omicron_common::api::external::MacAddr;
1112
use serde::{Deserialize, Serialize};
@@ -164,6 +165,50 @@ impl Dladm {
164165
Ok(EtherstubVnic(ETHERSTUB_VNIC_NAME.to_string()))
165166
}
166167

168+
// Return the name of the IP interface over the etherstub VNIC, if it
169+
// exists.
170+
fn get_etherstub_vnic_interface() -> Result<String, ExecutionError> {
171+
let mut cmd = std::process::Command::new(PFEXEC);
172+
let cmd = cmd.args(&[
173+
IPADM,
174+
"show-if",
175+
"-p",
176+
"-o",
177+
"IFNAME",
178+
ETHERSTUB_VNIC_NAME,
179+
]);
180+
execute(cmd)?;
181+
Ok(ETHERSTUB_VNIC_NAME.to_string())
182+
}
183+
184+
/// Delete the VNIC over the inter-zone comms etherstub.
185+
pub(crate) fn delete_etherstub_vnic() -> Result<(), ExecutionError> {
186+
// It's not clear why, but this requires deleting the _interface_ that's
187+
// over the VNIC first. Other VNICs don't require this for some reason.
188+
if Self::get_etherstub_vnic_interface().is_ok() {
189+
let mut cmd = std::process::Command::new(PFEXEC);
190+
let cmd = cmd.args(&[IPADM, "delete-if", ETHERSTUB_VNIC_NAME]);
191+
execute(cmd)?;
192+
}
193+
194+
if Self::get_etherstub_vnic().is_ok() {
195+
let mut cmd = std::process::Command::new(PFEXEC);
196+
let cmd = cmd.args(&[DLADM, "delete-vnic", ETHERSTUB_VNIC_NAME]);
197+
execute(cmd)?;
198+
}
199+
Ok(())
200+
}
201+
202+
/// Delete the inter-zone comms etherstub.
203+
pub(crate) fn delete_etherstub() -> Result<(), ExecutionError> {
204+
if Self::get_etherstub().is_ok() {
205+
let mut cmd = std::process::Command::new(PFEXEC);
206+
let cmd = cmd.args(&[DLADM, "delete-etherstub", ETHERSTUB_NAME]);
207+
execute(cmd)?;
208+
}
209+
Ok(())
210+
}
211+
167212
/// Returns the name of the first observed physical data link.
168213
pub fn find_physical() -> Result<PhysicalLink, FindPhysicalLinkError> {
169214
let mut command = std::process::Command::new(PFEXEC);

sled-agent/src/illumos/zone.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::illumos::{execute, PFEXEC};
1616
use omicron_common::address::SLED_PREFIX;
1717

1818
const DLADM: &str = "/usr/sbin/dladm";
19-
const IPADM: &str = "/usr/sbin/ipadm";
19+
pub const IPADM: &str = "/usr/sbin/ipadm";
2020
pub const SVCADM: &str = "/usr/sbin/svcadm";
2121
pub const SVCCFG: &str = "/usr/sbin/svccfg";
2222
pub const ZLOGIN: &str = "/usr/sbin/zlogin";

sled-agent/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ mod storage_manager;
3838
mod updates;
3939

4040
pub use illumos::zone;
41+
pub use sled_agent::cleanup_networking_resources;
4142

4243
#[cfg(test)]
4344
mod mocks;

0 commit comments

Comments
 (0)