From 5be60652868b8c683b3bc423f553676cb28082ad Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 20 Jan 2022 12:26:47 -0500 Subject: [PATCH] WIP: Adapt to and use new ostree-ext tar-split branch Depends https://github.com/ostreedev/ostree-rs-ext/pull/123 --- Cargo.lock | 3 +- Cargo.toml | 3 + rust/src/container.rs | 237 +++++++++++++++++++++++++++++ rust/src/lib.rs | 15 +- rust/src/main.rs | 3 + rust/src/sysroot_upgrade.rs | 16 +- src/lib/rpmostree-package-priv.h | 4 + src/libpriv/rpmostree-refts.cxx | 58 +++++++ src/libpriv/rpmostree-refts.h | 28 ++++ src/libpriv/rpmostree-rpm-util.cxx | 17 +++ src/libpriv/rpmostree-rpm-util.h | 2 + 11 files changed, 370 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e8d42bc3b..d436ebb06b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1625,8 +1625,7 @@ dependencies = [ [[package]] name = "ostree-ext" version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6d186573b7713d0794d2aa34c32ea5fa45a4f7fe77127c89815029ccc0bf3d" +source = "git+https://github.com/cgwalters/ostree-rs-ext?branch=tar-split#2d2dadfff9e4ac201730ba15a193272c56c9ce43" dependencies = [ "anyhow", "async-compression", diff --git a/Cargo.toml b/Cargo.toml index c3df576696..0af7cffc68 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -109,3 +109,6 @@ bin-unit-tests = [] sanitizers = [] default = [] + +[patch.crates-io] +ostree-ext = { git = "https://github.com/cgwalters/ostree-rs-ext", branch = "tar-split" } diff --git a/rust/src/container.rs b/rust/src/container.rs index ea18e5ffda..fe80262496 100644 --- a/rust/src/container.rs +++ b/rust/src/container.rs @@ -2,7 +2,22 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT +use std::collections::{BTreeMap, HashMap}; +use std::convert::TryInto; + use anyhow::Result; +use camino::Utf8PathBuf; +use chrono::prelude::*; +use ostree::glib; +use ostree_ext::chunking::Chunking; +use ostree_ext::objectsource::{ + ContentID, ObjectMeta, ObjectMetaMap, ObjectMetaSet, ObjectSourceMeta, +}; +use ostree_ext::prelude::*; +use ostree_ext::{gio, ostree}; +use structopt::StructOpt; + +use crate::cxxrsutil::FFIGObjectReWrap; /// Main entrypoint for container pub async fn entrypoint(args: &[&str]) -> Result { @@ -14,3 +29,225 @@ pub async fn entrypoint(args: &[&str]) -> Result { ostree_ext::cli::run_from_iter(args).await?; Ok(0) } + +#[derive(Debug, StructOpt)] +struct ContentMappingOpts { + #[structopt(long)] + repo: String, + + #[structopt(long = "ref")] + ostree_ref: String, +} + +#[derive(Debug, Default)] +struct MappingBuilder { + /// Maps from package ID to metadata + packagemeta: ObjectMetaSet, + /// Mapping from content object sha256 to package numeric ID + content: ObjectMetaMap, + /// Mapping from content object sha256 to package numeric ID + duplicates: BTreeMap>, + multi_provider: Vec, + + /// Size according to RPM database + rpmsize: u64, +} + +impl MappingBuilder { + /// For now, we stick everything that isn't a package inside a single "unpackaged" state. + /// In the future though if we support e.g. containers in /usr/share/containers or the + /// like, this will need to change. + const UNPACKAGED_ID: &'static str = "rpmostree-unpackaged-content"; +} + +impl From for ObjectMeta { + fn from(b: MappingBuilder) -> ObjectMeta { + ObjectMeta { + map: b.content, + set: b.packagemeta, + } + } +} + +/// Walk over the whole filesystem, and generate mappings from content object checksums +/// to the package that owns them. +/// +/// In the future, we could compute this much more efficiently by walking that +/// instead. But this design is currently oriented towards accepting a single ostree +/// commit as input. +fn build_mapping_recurse( + path: &mut Utf8PathBuf, + dir: gio::File, + ts: &crate::ffi::RpmTs, + state: &mut MappingBuilder, +) -> Result<()> { + use std::collections::btree_map::Entry; + let cancellable = gio::NONE_CANCELLABLE; + let e = dir.enumerate_children( + "standard::name,standard::type", + gio::FileQueryInfoFlags::NOFOLLOW_SYMLINKS, + cancellable, + )?; + for child in e { + let childi = child?; + let name: Utf8PathBuf = childi.name().try_into()?; + let child = dir.child(&name); + path.push(&name); + match childi.file_type() { + gio::FileType::Regular | gio::FileType::SymbolicLink => { + let child = child.downcast::().unwrap(); + + let mut pkgs = ts.packages_providing_file(path.as_str())?; + // Let's be deterministic (but _unstable because we don't care about behavior of equal strings) + pkgs.sort_unstable(); + // For now, we pick the alphabetically first package providing a file + let mut pkgs = pkgs.into_iter(); + let pkgid = pkgs + .next() + .unwrap_or_else(|| MappingBuilder::UNPACKAGED_ID.to_string()); + // Track cases of duplicate owners + match pkgs.len() { + 0 => {} + _ => { + state.multi_provider.push(path.clone()); + } + } + + let checksum = child.checksum().unwrap().to_string(); + match state.content.entry(checksum.clone()) { + Entry::Vacant(v) => { + v.insert(pkgid); + } + Entry::Occupied(_) => { + let v = state.duplicates.entry(checksum).or_default(); + v.push(pkgid); + } + } + } + gio::FileType::Directory => { + build_mapping_recurse(path, child, ts, state)?; + } + o => anyhow::bail!("Unhandled file type: {}", o), + } + path.pop(); + } + Ok(()) +} + +/// Print out information about how we would generate "chunks" (i.e. OCI layers) for this ostree commit. +pub fn content_mapping(args: &[&str]) -> Result<()> { + let args = args.iter().skip(1); + let opt = ContentMappingOpts::from_iter(args); + let repo = ostree::Repo::new_for_path(opt.repo.as_str()); + repo.open(gio::NONE_CANCELLABLE)?; + let (root, rev) = repo.read_commit(opt.ostree_ref.as_str(), gio::NONE_CANCELLABLE)?; + let pkglist = { + let repo = repo.gobj_rewrap(); + let cancellable = gio::Cancellable::new(); + unsafe { + let r = crate::ffi::package_variant_list_for_commit( + repo, + rev.as_str(), + cancellable.gobj_rewrap(), + )?; + let r: glib::Variant = glib::translate::from_glib_full(r as *mut _); + r + } + }; + + // Open the RPM database for this commit. + let q = crate::ffi::rpmts_for_commit(repo.gobj_rewrap(), rev.as_str())?; + + let mut state = MappingBuilder::default(); + + let mut lowest_change_time = None; + let mut package_meta = HashMap::new(); + for pkg in pkglist.iter() { + let name = pkg.child_value(0); + let pkgid = String::from(name.str().unwrap()); + let pkgmeta = q.package_meta(&pkgid)?; + let buildtime = pkgmeta.buildtime(); + if let Some((lowid, lowtime)) = lowest_change_time.as_mut() { + if *lowtime > buildtime { + *lowid = pkgid.clone(); + *lowtime = buildtime; + } + } else { + lowest_change_time = Some((pkgid.clone(), pkgmeta.buildtime())) + } + state.rpmsize += pkgmeta.size(); + package_meta.insert(pkgid, pkgmeta); + } + + // SAFETY: There must be at least one package. + let (lowest_change_name, lowest_change_time) = + lowest_change_time.expect("Failed to find any packages"); + // Walk over the packages, and generate the `packagemeta` mapping, which is basically a subset of + // package metadata abstracted for ostree. Note that right now, the package metadata includes + // both a "unique identifer" and a "human readable name", but for rpm-ostree we're just making + // those the same thing. + for (name, pkgmeta) in package_meta.iter() { + let buildtime = pkgmeta.buildtime(); + let change_time_offset_secs: u32 = buildtime + .checked_sub(lowest_change_time) + .unwrap() + .try_into() + .unwrap(); + // Convert to hours, because there's no strong use for caring about the relative difference of builds in terms + // of minutes or seconds. + let change_time_offset = change_time_offset_secs / (60 * 60); + state.packagemeta.insert( + name.clone(), + ObjectSourceMeta { + name: name.to_string(), + change_time_offset, + }, + ); + } + + // Insert metadata for unpakaged content. TODO: Better split up e.g. the initramfs from the RPM database + state.packagemeta.insert( + MappingBuilder::UNPACKAGED_ID.to_string(), + ObjectSourceMeta { + name: MappingBuilder::UNPACKAGED_ID.to_string(), + // Assume that content in here changes frequently. + change_time_offset: u32::MAX, + }, + ); + + // Walk the filesystem + build_mapping_recurse(&mut Utf8PathBuf::from("/"), root, &q, &mut state)?; + + // Print out information about what we found + println!( + "{} objects in {} packages", + state.content.len(), + state.packagemeta.len() + ); + println!("rpm size: {}", state.rpmsize); + println!( + "Earliest changed package: {} at {}", + lowest_change_name, + Utc.timestamp_opt(lowest_change_time.try_into().unwrap(), 0) + .unwrap() + ); + println!("{} duplicates", state.duplicates.len()); + if !state.multi_provider.is_empty() { + println!("Multiple owners:"); + for v in state.multi_provider.iter() { + println!(" {}", v); + } + } + + // Convert our build state into the state that ostree consumes, discarding + // transient data such as the cases of files owned by multiple packages. + let meta: ObjectMeta = state.into(); + // Ask ostree to convert this data into chunks + let mut chunking = Chunking::new(&repo, rev.as_str())?; + chunking.process_mapping(&repo, &meta)?; + + // Just print it for now. + chunking.print(); + + Ok(()) +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 4b754d7809..3a8f6541f8 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -675,14 +675,27 @@ pub mod ffi { fn output_message(msg: &str); } - // rpmostree-rpm-util.h unsafe extern "C++" { include!("rpmostree-rpm-util.h"); + #[allow(missing_debug_implementations)] + type RpmTs; + #[allow(missing_debug_implementations)] + type PackageMeta; + // Currently only used in unit tests #[allow(dead_code)] fn nevra_to_cache_branch(nevra: &CxxString) -> Result; fn get_repodata_chksum_repr(pkg: &mut DnfPackage) -> Result; + fn rpmts_for_commit(repo: Pin<&mut OstreeRepo>, rev: &str) -> Result>; + + // Methods on RpmTs + fn packages_providing_file(self: &RpmTs, path: &str) -> Result>; + fn package_meta(self: &RpmTs, name: &str) -> Result>; + + // Methods on PackageMeta + fn size(self: &PackageMeta) -> u64; + fn buildtime(self: &PackageMeta) -> u64; } // rpmostree-package-variants.h diff --git a/rust/src/main.rs b/rust/src/main.rs index bdc2b389f0..4e2520619e 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -58,6 +58,9 @@ async fn inner_async_main(args: Vec) -> Result { "cliwrap" => rpmostree_rust::cliwrap::entrypoint(args).map(|_| 0), // The `unlock` is a hidden alias for "ostree CLI compatibility" "usroverlay" | "unlock" => usroverlay(args).map(|_| 0), + "ex-dump-content-mapping" => { + rpmostree_rust::container::content_mapping(&args_borrowed).map(|_| 0) + } // C++ main _ => Ok(rpmostree_rust::ffi::rpmostree_main(args)?), } diff --git a/rust/src/sysroot_upgrade.rs b/rust/src/sysroot_upgrade.rs index 649cfd2c82..1828e3dfb9 100644 --- a/rust/src/sysroot_upgrade.rs +++ b/rust/src/sysroot_upgrade.rs @@ -6,7 +6,7 @@ use crate::cxxrsutil::*; use crate::ffi::{output_message, ContainerImageState}; use anyhow::Result; use ostree::glib; -use ostree_container::store::LayeredImageImporter; +use ostree_container::store::ImageImporter; use ostree_container::store::PrepareResult; use ostree_container::OstreeImageReference; use ostree_ext::container as ostree_container; @@ -31,25 +31,15 @@ async fn pull_container_async( imgref: &OstreeImageReference, ) -> Result { output_message(&format!("Pulling manifest: {}", &imgref)); - let mut imp = LayeredImageImporter::new(repo, imgref, Default::default()).await?; + let mut imp = ImageImporter::new(repo, imgref, Default::default()).await?; let prep = match imp.prepare().await? { PrepareResult::AlreadyPresent(r) => return Ok(r.into()), PrepareResult::Ready(r) => r, }; let digest = prep.manifest_digest.clone(); output_message(&format!("Importing: {} (digest: {})", &imgref, &digest)); - if prep.base_layer.commit.is_none() { - let size = glib::format_size(prep.base_layer.size()); - output_message(&format!( - "Downloading base layer: {} ({})", - prep.base_layer.digest(), - size - )); - } else { - output_message(&format!("Using base: {}", prep.base_layer.digest())); - } // TODO add nice download progress - for layer in prep.layers.iter() { + for layer in prep.all_layers() { if layer.commit.is_some() { output_message(&format!("Using layer: {}", layer.digest())); } else { diff --git a/src/lib/rpmostree-package-priv.h b/src/lib/rpmostree-package-priv.h index ed537462d6..21323ee9cc 100644 --- a/src/lib/rpmostree-package-priv.h +++ b/src/lib/rpmostree-package-priv.h @@ -23,6 +23,9 @@ #include #include "rpmostree-package.h" +#ifdef __cplusplus +#include "rust/cxx.h" +#endif G_BEGIN_DECLS @@ -43,6 +46,7 @@ _rpm_ostree_package_list_for_commit (OstreeRepo *repo, GPtrArray **out_pkglist, GCancellable *cancellable, GError **error); + gboolean _rpm_ostree_diff_package_lists (GPtrArray *a, GPtrArray *b, diff --git a/src/libpriv/rpmostree-refts.cxx b/src/libpriv/rpmostree-refts.cxx index 26288ae748..b8fdfd7b4a 100644 --- a/src/libpriv/rpmostree-refts.cxx +++ b/src/libpriv/rpmostree-refts.cxx @@ -61,3 +61,61 @@ rpmostree_refts_unref (RpmOstreeRefTs *rts) (void)glnx_tmpdir_delete (&rts->tmpdir, NULL, NULL); g_free (rts); } + +namespace rpmostreecxx { + +RpmTs::RpmTs(RpmOstreeRefTs *ts) { _ts = ts; } + +RpmTs::~RpmTs() { + rpmostree_refts_unref (_ts); +} + +rust::Vec +RpmTs::packages_providing_file(const rust::Str path) const { + auto path_c = std::string(path); + g_auto(rpmdbMatchIterator) mi = rpmtsInitIterator(_ts->ts, RPMDBI_INSTFILENAMES, path_c.c_str(), 0); + if (mi == NULL) + mi = rpmtsInitIterator(_ts->ts, RPMDBI_PROVIDENAME, path_c.c_str(), 0); + rust::Vec ret; + if (mi != NULL) + { + Header h; + while ((h = rpmdbNextIterator(mi)) != NULL) + { + const char *name = headerGetString(h, RPMTAG_NAME); + ret.push_back(rust::String(name)); + } + } + return ret; +} + +std::unique_ptr +RpmTs::package_meta(const rust::Str name) const { + auto name_c = std::string(name); + g_auto(rpmdbMatchIterator) mi = rpmtsInitIterator(_ts->ts, RPMDBI_NAME, name_c.c_str(), 0); + if (mi == NULL) + { + g_autofree char *err = g_strdup_printf ("Package not found: %s", name_c.c_str()); + throw std::runtime_error(err); + } + Header h; + const char *previous = NULL; + auto retval = std::make_unique(); + while ((h = rpmdbNextIterator(mi)) != NULL) + { + const char *vers = headerGetString(h, RPMTAG_VERSION); + if (previous != NULL) + { + g_autofree char *buf = g_strdup_printf ("Multiple installed '%s' (%s, %s)", name_c.c_str(), previous, vers); + throw std::runtime_error(buf); + } + previous = vers; + retval->_size = headerGetNumber(h, RPMTAG_LONGARCHIVESIZE); + retval->_buildtime = headerGetNumber(h, RPMTAG_BUILDTIME); + } + if (!previous) + g_assert_not_reached (); + return retval; +} + +} diff --git a/src/libpriv/rpmostree-refts.h b/src/libpriv/rpmostree-refts.h index f28d36fd8b..92152eb924 100644 --- a/src/libpriv/rpmostree-refts.h +++ b/src/libpriv/rpmostree-refts.h @@ -24,6 +24,9 @@ #include #include #include "libglnx.h" +#include "rpmostree-util.h" +#include +#include "rust/cxx.h" G_BEGIN_DECLS @@ -44,4 +47,29 @@ rpmostree_refts_unref (RpmOstreeRefTs *rts); G_DEFINE_AUTOPTR_CLEANUP_FUNC(RpmOstreeRefTs, rpmostree_refts_unref); + +namespace rpmostreecxx { + +struct PackageMeta { + uint64_t _size; + uint64_t _buildtime; + + uint64_t size() const { return _size; }; + uint64_t buildtime() const { return _buildtime; }; +}; + +// A simple C++ wrapper for a librpm C type, so we can expose it to Rust via cxx.rs. +class RpmTs { +public: + RpmTs(::RpmOstreeRefTs *ts); + ~RpmTs(); + rpmts get_ts() const; + rust::Vec packages_providing_file(const rust::Str path) const; + std::unique_ptr package_meta(const rust::Str package) const; +private: + ::RpmOstreeRefTs *_ts; +}; + +} + G_END_DECLS diff --git a/src/libpriv/rpmostree-rpm-util.cxx b/src/libpriv/rpmostree-rpm-util.cxx index 5ee74a9bbd..6eaaa46146 100644 --- a/src/libpriv/rpmostree-rpm-util.cxx +++ b/src/libpriv/rpmostree-rpm-util.cxx @@ -991,6 +991,8 @@ rpmostree_get_refts_for_commit (OstreeRepo *repo, GCancellable *cancellable, GError **error) { + CXX_TRY(core_libdnf_process_global_init(), error); + g_auto(GLnxTmpDir) tmpdir = { 0, }; if (!glnx_mkdtemp ("rpmostree-dbquery-XXXXXX", 0700, &tmpdir, error)) return FALSE; @@ -1645,3 +1647,18 @@ rpmostree_advisories_variant (DnfSack *sack, g_variant_builder_add_value (&builder, advisory_variant_new (advisory, pkgs)); return g_variant_ref_sink (g_variant_builder_end (&builder)); } + +namespace rpmostreecxx { + +std::unique_ptr +rpmts_for_commit(OstreeRepo &repo, rust::Str rev) { + g_autoptr(GError) local_error = NULL; + RpmOstreeRefTs *refts = NULL; + auto rev_c = std::string(rev); + if (!rpmostree_get_refts_for_commit (&repo, rev_c.c_str(), &refts, NULL, &local_error)) + util::throw_gerror(local_error); + + return std::make_unique(refts); +} + +} \ No newline at end of file diff --git a/src/libpriv/rpmostree-rpm-util.h b/src/libpriv/rpmostree-rpm-util.h index e48d4a021e..d7118fd858 100644 --- a/src/libpriv/rpmostree-rpm-util.h +++ b/src/libpriv/rpmostree-rpm-util.h @@ -37,6 +37,8 @@ namespace rpmostreecxx { rust::String nevra_to_cache_branch(const std::string &nevra); rust::String get_repodata_chksum_repr(DnfPackage &pkg); + + std::unique_ptr rpmts_for_commit(OstreeRepo &repo, rust::Str rev); } // C code follows