| 
 | 1 | +//! Gets metadata about a workspace from Cargo  | 
 | 2 | +
  | 
 | 3 | +use std::collections::BTreeMap;  | 
 | 4 | +use std::ffi::OsStr;  | 
 | 5 | +use std::path::{Path, PathBuf};  | 
 | 6 | + | 
 | 7 | +/// Describes how this module can fail  | 
 | 8 | +#[derive(Debug, thiserror::Error)]  | 
 | 9 | +pub enum Error {  | 
 | 10 | +    #[error("I/O Error: {0:?}")]  | 
 | 11 | +    Io(#[from] std::io::Error),  | 
 | 12 | +    #[error("Failed get output from cargo-metadata: {0:?}")]  | 
 | 13 | +    GettingMetadata(#[from] cargo_metadata::Error),  | 
 | 14 | +    #[error("Failed to run cargo vendor: {0:?}")]  | 
 | 15 | +    LaunchingVendor(std::io::Error),  | 
 | 16 | +    #[error("Failed to complete cargo vendor")]  | 
 | 17 | +    RunningVendor,  | 
 | 18 | +    #[error("Bad path {0:?} whilst scraping files")]  | 
 | 19 | +    Scraping(PathBuf),  | 
 | 20 | +}  | 
 | 21 | + | 
 | 22 | +/// Uniquely describes a package on crates.io  | 
 | 23 | +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]  | 
 | 24 | +pub struct Package {  | 
 | 25 | +    /// The name of the package  | 
 | 26 | +    pub name: String,  | 
 | 27 | +    /// The version number  | 
 | 28 | +    pub version: String,  | 
 | 29 | +}  | 
 | 30 | + | 
 | 31 | +/// Extra data about a package  | 
 | 32 | +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]  | 
 | 33 | +pub struct PackageMetadata {  | 
 | 34 | +    /// The license it is under  | 
 | 35 | +    pub license: String,  | 
 | 36 | +    /// The list of authors from the package metadata  | 
 | 37 | +    pub authors: Vec<String>,  | 
 | 38 | +    /// A list of important files from the package, with their contents.  | 
 | 39 | +    ///  | 
 | 40 | +    /// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.  | 
 | 41 | +    pub notices: BTreeMap<String, String>,  | 
 | 42 | +    /// If this is true, this dep is in the Rust Standard Library  | 
 | 43 | +    pub is_in_libstd: Option<bool>,  | 
 | 44 | +}  | 
 | 45 | + | 
 | 46 | +/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.  | 
 | 47 | +///  | 
 | 48 | +/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can  | 
 | 49 | +/// grab the license files.  | 
 | 50 | +///  | 
 | 51 | +/// Any dependency with a path beginning with `root_path` is ignored, as we  | 
 | 52 | +/// assume `reuse` has covered it already.  | 
 | 53 | +pub fn get_metadata_and_notices(  | 
 | 54 | +    cargo: &Path,  | 
 | 55 | +    dest: &Path,  | 
 | 56 | +    root_path: &Path,  | 
 | 57 | +    manifest_paths: &[&Path],  | 
 | 58 | +) -> Result<BTreeMap<Package, PackageMetadata>, Error> {  | 
 | 59 | +    let mut output = get_metadata(cargo, root_path, manifest_paths)?;  | 
 | 60 | + | 
 | 61 | +    // Now do a cargo-vendor and grab everything  | 
 | 62 | +    let vendor_path = dest.join("vendor");  | 
 | 63 | +    println!("Vendoring deps into {}...", vendor_path.display());  | 
 | 64 | +    run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;  | 
 | 65 | + | 
 | 66 | +    // Now for each dependency we found, go and grab any important looking files  | 
 | 67 | +    for (package, metadata) in output.iter_mut() {  | 
 | 68 | +        load_important_files(package, metadata, &vendor_path)?;  | 
 | 69 | +    }  | 
 | 70 | + | 
 | 71 | +    Ok(output)  | 
 | 72 | +}  | 
 | 73 | + | 
 | 74 | +/// Use `cargo metadata` to get a list of dependencies and their license data.  | 
 | 75 | +///  | 
 | 76 | +/// Any dependency with a path beginning with `root_path` is ignored, as we  | 
 | 77 | +/// assume `reuse` has covered it already.  | 
 | 78 | +pub fn get_metadata(  | 
 | 79 | +    cargo: &Path,  | 
 | 80 | +    root_path: &Path,  | 
 | 81 | +    manifest_paths: &[&Path],  | 
 | 82 | +) -> Result<BTreeMap<Package, PackageMetadata>, Error> {  | 
 | 83 | +    let mut output = BTreeMap::new();  | 
 | 84 | +    // Look at the metadata for each manifest  | 
 | 85 | +    for manifest_path in manifest_paths {  | 
 | 86 | +        if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {  | 
 | 87 | +            panic!("cargo_manifest::get requires a path to a Cargo.toml file");  | 
 | 88 | +        }  | 
 | 89 | +        let metadata = cargo_metadata::MetadataCommand::new()  | 
 | 90 | +            .cargo_path(cargo)  | 
 | 91 | +            .env("RUSTC_BOOTSTRAP", "1")  | 
 | 92 | +            .manifest_path(manifest_path)  | 
 | 93 | +            .exec()?;  | 
 | 94 | +        for package in metadata.packages {  | 
 | 95 | +            let manifest_path = package.manifest_path.as_path();  | 
 | 96 | +            if manifest_path.starts_with(root_path) {  | 
 | 97 | +                // it's an in-tree dependency and reuse covers it  | 
 | 98 | +                continue;  | 
 | 99 | +            }  | 
 | 100 | +            // otherwise it's an out-of-tree dependency  | 
 | 101 | +            let package_id = Package { name: package.name, version: package.version.to_string() };  | 
 | 102 | +            output.insert(  | 
 | 103 | +                package_id,  | 
 | 104 | +                PackageMetadata {  | 
 | 105 | +                    license: package.license.unwrap_or_else(|| String::from("Unspecified")),  | 
 | 106 | +                    authors: package.authors,  | 
 | 107 | +                    notices: BTreeMap::new(),  | 
 | 108 | +                    is_in_libstd: None,  | 
 | 109 | +                },  | 
 | 110 | +            );  | 
 | 111 | +        }  | 
 | 112 | +    }  | 
 | 113 | + | 
 | 114 | +    Ok(output)  | 
 | 115 | +}  | 
 | 116 | + | 
 | 117 | +/// Run cargo-vendor, fetching into the given dir  | 
 | 118 | +fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {  | 
 | 119 | +    let mut vendor_command = std::process::Command::new(cargo);  | 
 | 120 | +    vendor_command.env("RUSTC_BOOTSTRAP", "1");  | 
 | 121 | +    vendor_command.arg("vendor");  | 
 | 122 | +    vendor_command.arg("--quiet");  | 
 | 123 | +    vendor_command.arg("--versioned-dirs");  | 
 | 124 | +    for manifest_path in manifest_paths {  | 
 | 125 | +        vendor_command.arg("-s");  | 
 | 126 | +        vendor_command.arg(manifest_path);  | 
 | 127 | +    }  | 
 | 128 | +    vendor_command.arg(dest);  | 
 | 129 | + | 
 | 130 | +    let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?;  | 
 | 131 | + | 
 | 132 | +    if !vendor_status.success() {  | 
 | 133 | +        return Err(Error::RunningVendor);  | 
 | 134 | +    }  | 
 | 135 | + | 
 | 136 | +    Ok(())  | 
 | 137 | +}  | 
 | 138 | + | 
 | 139 | +/// Add important files off disk into this dependency.  | 
 | 140 | +///  | 
 | 141 | +/// Maybe one-day Cargo.toml will contain enough information that we don't need  | 
 | 142 | +/// to do this manual scraping.  | 
 | 143 | +fn load_important_files(  | 
 | 144 | +    package: &Package,  | 
 | 145 | +    dep: &mut PackageMetadata,  | 
 | 146 | +    vendor_root: &Path,  | 
 | 147 | +) -> Result<(), Error> {  | 
 | 148 | +    let name_version = format!("{}-{}", package.name, package.version);  | 
 | 149 | +    println!("Scraping notices for {}...", name_version);  | 
 | 150 | +    let dep_vendor_path = vendor_root.join(name_version);  | 
 | 151 | +    for entry in std::fs::read_dir(dep_vendor_path)? {  | 
 | 152 | +        let entry = entry?;  | 
 | 153 | +        let metadata = entry.metadata()?;  | 
 | 154 | +        let path = entry.path();  | 
 | 155 | +        let Some(filename) = path.file_name() else {  | 
 | 156 | +            return Err(Error::Scraping(path));  | 
 | 157 | +        };  | 
 | 158 | +        let lc_filename = filename.to_ascii_lowercase();  | 
 | 159 | +        let lc_filename_str = lc_filename.to_string_lossy();  | 
 | 160 | +        let mut keep = false;  | 
 | 161 | +        for m in ["copyright", "licence", "license", "author", "notice"] {  | 
 | 162 | +            if lc_filename_str.contains(m) {  | 
 | 163 | +                keep = true;  | 
 | 164 | +                break;  | 
 | 165 | +            }  | 
 | 166 | +        }  | 
 | 167 | +        if keep {  | 
 | 168 | +            if metadata.is_dir() {  | 
 | 169 | +                for inner_entry in std::fs::read_dir(entry.path())? {  | 
 | 170 | +                    let inner_entry = inner_entry?;  | 
 | 171 | +                    if inner_entry.metadata()?.is_file() {  | 
 | 172 | +                        let inner_filename = inner_entry.file_name();  | 
 | 173 | +                        let inner_filename_str = inner_filename.to_string_lossy();  | 
 | 174 | +                        let qualified_filename =  | 
 | 175 | +                            format!("{}/{}", lc_filename_str, inner_filename_str);  | 
 | 176 | +                        println!("Scraping {}", qualified_filename);  | 
 | 177 | +                        dep.notices.insert(  | 
 | 178 | +                            qualified_filename.to_string(),  | 
 | 179 | +                            std::fs::read_to_string(inner_entry.path())?,  | 
 | 180 | +                        );  | 
 | 181 | +                    }  | 
 | 182 | +                }  | 
 | 183 | +            } else if metadata.is_file() {  | 
 | 184 | +                let filename = filename.to_string_lossy();  | 
 | 185 | +                println!("Scraping {}", filename);  | 
 | 186 | +                dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?);  | 
 | 187 | +            }  | 
 | 188 | +        }  | 
 | 189 | +    }  | 
 | 190 | +    Ok(())  | 
 | 191 | +}  | 
0 commit comments