From a17e3b6bc4a11c1270ceb444b009f085050ffbbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fe=CC=81lix=20Saparelli?= Date: Tue, 23 Jan 2024 17:48:40 +1300 Subject: [PATCH] feat(upload): Attempt delegated tokens --- Cargo.lock | 244 ++++++--------------------------- Cargo.toml | 6 +- src/actions/context.rs | 2 +- src/actions/upload.rs | 29 ++-- src/actions/upload/cancel.rs | 87 ------------ src/actions/upload/confirm.rs | 92 ------------- src/actions/upload/delegate.rs | 151 ++++++++++++++++++++ src/actions/upload/file.rs | 170 ----------------------- src/actions/upload/files.rs | 144 +++++++++++++++++++ src/actions/upload/list.rs | 4 +- src/actions/upload/preauth.rs | 240 -------------------------------- src/actions/upload/status.rs | 101 -------------- src/actions/upload/token.rs | 119 ---------------- src/aws.rs | 97 ++++++++----- src/aws/s3.rs | 100 +------------- src/aws/token.rs | 98 +++++++++++++ 16 files changed, 519 insertions(+), 1165 deletions(-) delete mode 100644 src/actions/upload/cancel.rs delete mode 100644 src/actions/upload/confirm.rs create mode 100644 src/actions/upload/delegate.rs delete mode 100644 src/actions/upload/file.rs create mode 100644 src/actions/upload/files.rs delete mode 100644 src/actions/upload/preauth.rs delete mode 100644 src/actions/upload/status.rs delete mode 100644 src/actions/upload/token.rs create mode 100644 src/aws/token.rs diff --git a/Cargo.lock b/Cargo.lock index 6e23d82..368b55f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,19 +89,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "async-compression" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a116f46a969224200a0a97f29cfd4c50e7534e4b4826bd23ea2c3c533039c82c" -dependencies = [ - "flate2", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", -] - [[package]] name = "autocfg" version = "1.1.0" @@ -548,18 +535,17 @@ dependencies = [ "aws-config", "aws-credential-types", "aws-sdk-s3", + "aws-sdk-sts", "base64ct", "boxcar", "build-data", "bytes", - "bytesize", "chrono", "clap", "clap_complete", "clap_complete_nushell", "dirs", "duct", - "flate2", "glob", "humantime", "indicatif", @@ -568,14 +554,13 @@ dependencies = [ "leon-macros", "miette", "mimalloc", - "minicbor", "node-semver", - "reqwest", "serde", "serde_json", "tokio", "tracing", "tracing-subscriber", + "walkdir", "windows_exe_info", ] @@ -645,12 +630,6 @@ dependencies = [ "either", ] -[[package]] -name = "bytesize" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" - [[package]] name = "cc" version = "1.0.83" @@ -733,7 +712,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -954,7 +933,7 @@ dependencies = [ "rustc_version", "toml", "vswhom", - "winreg 0.51.0", + "winreg", ] [[package]] @@ -963,15 +942,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "encoding_rs" -version = "0.8.33" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1" -dependencies = [ - "cfg-if", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -1004,16 +974,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "flate2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "fnv" version = "1.0.7" @@ -1321,12 +1281,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "ipnet" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" - [[package]] name = "is-terminal" version = "0.4.10" @@ -1395,7 +1349,7 @@ dependencies = [ "leon", "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -1501,7 +1455,7 @@ checksum = "49e7bc1560b95a3c4a25d03de42fe76ca718ab92d1a22a55b9b4cf67b3ae635c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -1513,32 +1467,6 @@ dependencies = [ "libmimalloc-sys", ] -[[package]] -name = "mime" -version = "0.3.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" - -[[package]] -name = "minicbor" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d15f4203d71fdf90903c2696e55426ac97a363c67b218488a73b534ce7aca10" -dependencies = [ - "minicbor-derive", -] - -[[package]] -name = "minicbor-derive" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1154809406efdb7982841adb6311b3d095b46f78342dd646736122fe6b19e267" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1759,7 +1687,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -1904,48 +1832,6 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" -[[package]] -name = "reqwest" -version = "0.11.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" -dependencies = [ - "async-compression", - "base64", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http 0.2.11", - "http-body", - "hyper", - "hyper-rustls", - "ipnet", - "js-sys", - "log", - "mime", - "once_cell", - "percent-encoding", - "pin-project-lite", - "rustls", - "rustls-native-certs", - "rustls-pemfile", - "serde", - "serde_json", - "serde_urlencoded", - "system-configuration", - "tokio", - "tokio-rustls", - "tokio-util", - "tower-service", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "winreg 0.50.0", -] - [[package]] name = "rfc6979" version = "0.3.1" @@ -2101,6 +1987,15 @@ dependencies = [ "safe-regex-compiler", ] +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.23" @@ -2186,7 +2081,7 @@ checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -2209,18 +2104,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - [[package]] name = "sha1" version = "0.10.6" @@ -2368,17 +2251,6 @@ dependencies = [ "is-terminal", ] -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.48" @@ -2390,27 +2262,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "system-configuration" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" -dependencies = [ - "bitflags 1.3.2", - "core-foundation", - "system-configuration-sys", -] - -[[package]] -name = "system-configuration-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "terminal_size" version = "0.1.17" @@ -2459,7 +2310,7 @@ checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -2542,7 +2393,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -2628,7 +2479,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn", ] [[package]] @@ -2816,6 +2667,16 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2852,22 +2713,10 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn", "wasm-bindgen-shared", ] -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.40" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bde2032aeb86bdfaecc8b261eef3cba735cc426c1f3a3416d1e0791be95fc461" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "wasm-bindgen-macro" version = "0.2.90" @@ -2886,7 +2735,7 @@ checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2897,16 +2746,6 @@ version = "0.2.90" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" -[[package]] -name = "web-sys" -version = "0.3.67" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.9" @@ -2923,6 +2762,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -3088,16 +2936,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" -dependencies = [ - "cfg-if", - "windows-sys 0.48.0", -] - [[package]] name = "winreg" version = "0.51.0" diff --git a/Cargo.toml b/Cargo.toml index bb3d3bc..5c19a63 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,17 +15,16 @@ repository = "https://github.com/beyondessential/ops" aws-config = "1.1.2" aws-credential-types = { version = "1.1.2", features = ["hardcoded-credentials"] } aws-sdk-s3 = "1.12.0" +aws-sdk-sts = "1.11.0" base64ct = { version = "1.6.0", features = ["std"] } boxcar = "0.2.4" bytes = "1.5.0" -bytesize = "1.3.0" chrono = "0.4.31" clap = { version = "4.4.18", features = ["derive", "cargo", "wrap_help", "env", "unicode", "string"] } clap_complete = "4.4.8" clap_complete_nushell = "4.4.2" dirs = "5.0.1" duct = "0.13.7" -flate2 = "1.0.28" glob = "0.3.1" humantime = "2.1.0" indicatif = { version = "0.17.7", features = ["tokio"] } @@ -34,14 +33,13 @@ leon = "2.0.1" leon-macros = "1.0.0" miette = { version = "5.10.0", features = ["fancy"] } mimalloc = "0.1.39" -minicbor = { version = "0.20.0", features = ["derive", "std"] } node-semver = "2.1.0" -reqwest = { version = "0.11.23", default-features = false, features = ["rustls-tls-native-roots", "gzip", "json"] } serde = { version = "1.0.195", features = ["derive"] } serde_json = "1.0.111" tokio = { version = "1.35.1", features = ["full"] } tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", features = ["env-filter", "json"] } +walkdir = "2.4.0" [build-dependencies] build-data = "0.1.5" diff --git a/src/actions/context.rs b/src/actions/context.rs index c5b7753..19c2978 100644 --- a/src/actions/context.rs +++ b/src/actions/context.rs @@ -9,7 +9,7 @@ use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use tracing::Metadata; use tracing_subscriber::fmt::MakeWriter; -use super::upload::token::UploadId; +use super::upload::UploadId; #[derive(Clone, Debug)] pub struct Context { diff --git a/src/actions/upload.rs b/src/actions/upload.rs index 91a18dd..3f76646 100644 --- a/src/actions/upload.rs +++ b/src/actions/upload.rs @@ -3,13 +3,9 @@ use miette::Result; use super::Context; -pub mod cancel; -pub mod confirm; -pub mod file; +pub mod delegate; +pub mod files; pub mod list; -pub mod preauth; -pub mod status; -pub mod token; /// Upload files to S3. #[derive(Debug, Clone, Parser)] @@ -21,21 +17,22 @@ pub struct UploadArgs { #[derive(Debug, Clone, Subcommand)] pub enum UploadAction { - Cancel(cancel::CancelArgs), - Confirm(confirm::ConfirmArgs), - File(file::FileArgs), + Files(files::FilesArgs), List(list::ListArgs), - Preauth(preauth::PreauthArgs), - Status(status::StatusArgs), + Delegate(delegate::DelegateArgs), } pub async fn run(ctx: Context) -> Result<()> { match ctx.args_top.action.clone() { - UploadAction::Cancel(subargs) => cancel::run(ctx.with_sub(subargs)).await, - UploadAction::Confirm(subargs) => confirm::run(ctx.with_sub(subargs)).await, - UploadAction::File(subargs) => file::run(ctx.with_sub(subargs)).await, + UploadAction::Files(subargs) => files::run(ctx.with_sub(subargs)).await, UploadAction::List(subargs) => list::run(ctx.with_sub(subargs)).await, - UploadAction::Preauth(subargs) => preauth::run(ctx.with_sub(subargs)).await, - UploadAction::Status(subargs) => status::run(ctx.with_sub(subargs)).await, + UploadAction::Delegate(subargs) => delegate::run(ctx.with_sub(subargs)).await, } } + +#[derive(Debug, Clone)] +pub struct UploadId { + pub bucket: String, + pub key: String, + pub id: String, +} diff --git a/src/actions/upload/cancel.rs b/src/actions/upload/cancel.rs deleted file mode 100644 index dcddf05..0000000 --- a/src/actions/upload/cancel.rs +++ /dev/null @@ -1,87 +0,0 @@ -use std::path::PathBuf; - -use aws_sdk_s3::Client as S3Client; -use clap::Parser; -use miette::{miette, IntoDiagnostic, Result}; -use tokio::{fs::File, io::AsyncReadExt}; -use tracing::{info, instrument}; - -use crate::{ - actions::{upload::token::decode_token, Context}, - aws::{self, AwsArgsFragment}, -}; - -use super::UploadArgs; - -/// Cancel a pre-auth'ed upload. -/// -/// Given a pre-auth token or Upload ID (as generated by `bestool upload preauth`), cancel the -/// upload it references. If the upload is ongoing, this will cause it to immediately fail. -/// -/// This MUST be run from your local, trusted computer, using WriteAccess or AdminAccess to the -/// account that contains the destination bucket. -#[derive(Debug, Clone, Parser)] -pub struct CancelArgs { - /// File which contains the token to cancel. - #[arg( - long, - value_name = "FILENAME", - default_value = "token.txt", - required_unless_present_any = &["token", "upload_id"], - )] - pub token_file: PathBuf, - - /// Token value. - /// - /// This is the token to cancel. If not specified here, it will be taken from the file specified - /// in `--token-file`. Prefer to use `--token-file` instead of this option, as tokens are - /// generally larger than can be passed on the command line. - #[arg(long, value_name = "TOKEN")] - pub token: Option, - - /// Upload ID. - /// - /// This is the Upload ID to cancel. If not specified here, it will be taken from `--token` or - /// `--token-file`. - #[arg(long, value_name = "UPLOAD_ID", conflicts_with_all = &["token", "token_file"])] - pub upload_id: Option, - - #[command(flatten)] - pub aws: AwsArgsFragment, -} - -#[instrument(skip(ctx))] -pub async fn run(ctx: Context) -> Result<()> { - let id = if let Some(upload_id) = ctx.args_sub.upload_id.as_deref() { - upload_id.parse().map_err(|err| miette!("{}", err))? - } else { - let token = if let Some(token) = ctx.args_sub.token.clone() { - token - } else { - let mut file = File::open(&ctx.args_sub.token_file) - .await - .into_diagnostic()?; - let mut token = String::new(); - file.read_to_string(&mut token).await.into_diagnostic()?; - token - }; - - decode_token(&token)?.id - }; - - let aws = aws::init(&ctx.args_sub.aws).await; - let client = S3Client::new(&aws); - - info!(?id, "Cancelling multipart upload"); - client - .abort_multipart_upload() - .bucket(id.bucket) - .key(id.key) - .upload_id(id.id) - .send() - .await - .into_diagnostic()?; - info!("Upload cancelled"); - - Ok(()) -} diff --git a/src/actions/upload/confirm.rs b/src/actions/upload/confirm.rs deleted file mode 100644 index 0465b2b..0000000 --- a/src/actions/upload/confirm.rs +++ /dev/null @@ -1,92 +0,0 @@ -use std::path::PathBuf; - -use aws_sdk_s3::Client as S3Client; -use clap::Parser; -use miette::{miette, IntoDiagnostic, Result}; -use tokio::{fs::File, io::AsyncReadExt}; -use tracing::{info, instrument}; - -use crate::{ - actions::{upload::token::decode_token, Context}, - aws::{self, AwsArgsFragment}, -}; - -use super::UploadArgs; - -/// Confirm an upload. -/// -/// Given a pre-auth token or Upload ID (as generated by `bestool upload preauth`), confirm it. This -/// will finalise the upload and write the file to the bucket. Until this is called, the upload is -/// not visible to the bucket and will eventually get deleted. -/// -/// This MUST be run from your local, trusted computer, using WriteAccess or AdminAccess to the -/// account that contains the destination bucket. Make sure the upload is complete before calling -/// this, as it may fail or write a partial file otherwise. -#[derive(Debug, Clone, Parser)] -pub struct ConfirmArgs { - /// File which contains the token to confirm. - #[arg( - long, - value_name = "FILENAME", - default_value = "token.txt", - required_unless_present_any = &["token", "upload_id"], - )] - pub token_file: PathBuf, - - /// Token value. - /// - /// This is the token to confirm. If not specified here, it will be taken from the file specified - /// in `--token-file`. Prefer to use `--token-file` instead of this option, as tokens are - /// generally larger than can be passed on the command line. - #[arg(long, value_name = "TOKEN")] - pub token: Option, - - /// Upload ID. - /// - /// This is the Upload ID to confirm. If not specified here, it will be taken from `--token` or - /// `--token-file`. - #[arg( - long, - value_name = "UPLOAD_ID", - conflicts_with_all = &["token", "token_file"], - )] - pub upload_id: Option, - - #[command(flatten)] - pub aws: AwsArgsFragment, -} - -#[instrument(skip(ctx))] -pub async fn run(ctx: Context) -> Result<()> { - let id = if let Some(upload_id) = ctx.args_sub.upload_id.as_deref() { - upload_id.parse().map_err(|err| miette!("{}", err))? - } else { - let token = if let Some(token) = ctx.args_sub.token.clone() { - token - } else { - let mut file = File::open(&ctx.args_sub.token_file) - .await - .into_diagnostic()?; - let mut token = String::new(); - file.read_to_string(&mut token).await.into_diagnostic()?; - token - }; - - decode_token(&token)?.id - }; - - let aws = aws::init(&ctx.args_sub.aws).await; - let _client = S3Client::new(&aws); - - info!(?id, "Finalising multipart upload"); - // client - // .abort_multipart_upload() - // .bucket(id.bucket) - // .key(id.key) - // .upload_id(id.id) - // .send() - // .await - // .into_diagnostic()?; - - Ok(()) -} diff --git a/src/actions/upload/delegate.rs b/src/actions/upload/delegate.rs new file mode 100644 index 0000000..25d5085 --- /dev/null +++ b/src/actions/upload/delegate.rs @@ -0,0 +1,151 @@ +use aws_sdk_s3::{types::BucketVersioningStatus, Client as S3Client}; +use clap::Parser; +use miette::{bail, IntoDiagnostic, Result}; +use serde_json::json; +use tracing::{info, instrument}; + +use crate::{ + actions::Context, + aws::{self, s3::parse_bucket_and_key, token::DelegatedToken, AwsArgs}, +}; + +use super::UploadArgs; + +/// Generate a delegated token to upload a file. +/// +/// This MUST be run from your local, trusted computer, using WriteAccess or AdminAccess to the +/// account that contains the desired destination bucket. It will generate a delegated token that +/// allows anyone to upload a file to the specified bucket and key, without them needing any AWS +/// credentials. +/// +/// Tokens have an expiry date. By default, tokens expire after 12 HOURS. You can specify a longer +/// or shorter expiry time with `--expiry`. Be careful with short expiry times! The token must +/// remain valid for the entire duration of the upload, and if the upload takes longer than the +/// token is valid, it will fail and will need to be retried from scratch, using a new token. +#[derive(Debug, Clone, Parser)] +pub struct DelegateArgs { + /// AWS S3 bucket to upload to. + /// + /// This may also contain the key, if given in s3://bucket/key format. See the `--key` option + /// for semantics of the key portion. + #[arg(long, value_name = "BUCKET", required = true)] + pub bucket: String, + + /// Pathname in the bucket to upload to. + /// + /// Files can only be uploaded to the exact name as specified here, unless wildcards are used. + /// + /// If this contains one or more wildcards (`*`), files can be uploaded to any key that matches + /// the wildcard pattern (be very careful with this!). When using wildcards, always enclose the + /// value in quotes, to prevent the shell from expanding the wildcard. You also need to specify + /// `--allow-wildcards`, to prevent mishaps. + /// + /// You can also give the key via the `--bucket` option, if provided in s3://bucket/key format. + #[arg(long, value_name = "KEY")] + pub key: Option, + + /// Expiry duration of the token. + /// + /// This is the duration for which the token will be valid. It can be specified in seconds, or + /// with a suffix like `m` for minutes, `h` for hours, or `d` for days. The default is 12 hours. + /// Minimum is 15 minutes and maximum is 36 hours, unless you're logged in with the root user, + /// in which case the maximum (and default) is 1 hour. However, you should avoid using the root + /// user for anything other than account management, which this tool is not. + /// + /// Be careful with short expiry times! The token must remain valid for the entire duration of + /// the upload, and if the upload takes longer than the token is valid, it will fail and will + /// need to be retried from scratch, using a new token. + /// + /// Longer expiries are more convenient, but also more dangerous, as they give more time for an + /// attacker to use the token to upload files to your bucket. On the whole, though, this is a + /// pretty hard token to misuse: the worst that can happen is someone uploading a huge file to + /// your bucket, incurring you transfer and storage costs. + #[arg(long, value_name = "DURATION", default_value = "12h")] + pub expiry: humantime::Duration, + + /// Allow non-versioned buckets. + /// + /// By default, this command will refuse to generate a token for a non-versioned bucket, because + /// it's too easy to accidentally overwrite files. If you really want to, provide this option. + #[arg(long)] + pub allow_non_versioned: bool, + + /// Allow wildcard keys. + /// + /// By default, this command will refuse to generate a token for a wildcard keys, to prevent + /// accidents. If that's what you intended to do, provide this option. Remember to enclose the + /// key in quotes and be careful about providing too-wide access! + #[arg(long)] + pub allow_wilcards: bool, + + #[command(flatten)] + pub aws: AwsArgs, +} + +#[instrument(skip(ctx))] +pub async fn run(ctx: Context) -> Result<()> { + let (bucket, key) = parse_bucket_and_key(&ctx.args_sub.bucket, ctx.args_sub.key.as_deref())?; + + let aws = aws::init(&ctx.args_sub.aws).await; + + if !ctx.args_sub.allow_non_versioned { + info!("Checking bucket is versioned"); + let client = S3Client::new(&aws); + match client + .get_bucket_versioning() + .bucket(bucket) + .send() + .await + .into_diagnostic().map(|r| r.status) + { + Ok(Some(BucketVersioningStatus::Enabled)) => (), + Ok(Some(_)) => bail!("Bucket is not versioned, allowing delegated upload is dangerous. Use --allow-non-versioned to bypass."), + Err(err) => bail!("Unable to check if bucket is versioned. Allowing delegated upload may be dangerous. Use --allow-non-versioned to bypass this check.\n{err:?}"), + _ => (), + } + } + + if !ctx.args_sub.allow_wilcards { + info!("Checking key is not a wildcard"); + if key.contains('*') { + bail!( + "Key contains a wildcard, this can be dangerous. Use --allow-wildcards to bypass." + ); + } + } + + if key.ends_with('/') { + bail!("Key ends with a slash, this can't be used to upload files. Specify the full path, or use a wildcard."); + } + + info!( + "Generating federated credentials to upload to s3://{}/{}", + bucket, key + ); + let token = DelegatedToken::new( + &aws, + ctx.args_sub.expiry.into(), + &json!({ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:CreateMultipartUpload", + "s3:CompleteMultipartUpload", + "s3:AbortMultipartUpload", + "s3:UploadPart", + ], + "Resource": [ + format!("arn:aws:s3:::{}/{}", bucket, key), + ], + }, + ], + }), + ) + .await?; + + println!("{token}"); + Ok(()) +} diff --git a/src/actions/upload/file.rs b/src/actions/upload/file.rs deleted file mode 100644 index a89a945..0000000 --- a/src/actions/upload/file.rs +++ /dev/null @@ -1,170 +0,0 @@ -use std::path::PathBuf; - -use aws_sdk_s3::Client as S3Client; -use clap::{Parser, ValueHint}; -use miette::{bail, IntoDiagnostic, Result}; -use tokio::{fs::File, io::AsyncReadExt}; -use tracing::{error, instrument, warn}; - -use crate::{ - actions::Context, - aws::{ - self, - s3::{multipart_upload, parse_bucket_and_key, singlepart_upload, token_upload}, - AwsArgsFragment, - }, -}; - -use super::{token::decode_token, UploadArgs}; - -/// Upload a file to AWS S3. -/// -/// There's two ways to upload a file: using AWS credentials, or using a pre-auth token. If you have -/// AWS credentials, you can use the `--bucket` and `--key` options to specify the bucket and key to -/// upload to. If you don't have AWS credentials, you can use the `--pre-auth` option to specify a -/// pre-auth token generated by `bestool upload preauth`, either by you on a trusted computer, or by -/// someone else who has AWS credentials with write access to a bucket. -/// -/// If you use the `--pre-auth` option, the token generator specifies the number of parts the file -/// can be split into. In general, that will be enough to upload the file in small chunks and retry -/// any failed chunks, which is more reliable than uploading the file in one go, especially for very -/// big files and flaky connections. If you use AWS credentials and have the required permissions, -/// the tool will adaptively split the file into chunks and retry failed chunks, without the limits -/// inherent from the pre-auth token. That often leads to faster uploads than with a pre-auth token. -/// -/// If you need to upload an entire folder, either archive it first (e.g. with Zip or tar), or use -/// `bestool upload folder` instead, which archives the folder itself and otherwise behaves as here. -/// -/// Uploading multiple files to the same bucket and key is possible (specify multiple PATHS here), -/// but only with AWS credentials, not with a pre-auth token. -#[derive(Debug, Clone, Parser)] -pub struct FileArgs { - /// File(s) to upload. - /// - /// You can specify multiple files here, and they will all be uploaded to the same bucket and - /// key, which must in this case end with a slash. Uploading multiple files is not possible with - /// a pre-auth token. - #[arg( - value_hint = ValueHint::FilePath, - value_name = "PATH", - required = true, - )] - pub files: Vec, - - /// AWS S3 bucket to upload to. - /// - /// This may also contain the key, if given in s3://bucket/key format. See the `--key` option - /// for semantics of the key portion. - #[arg( - long, - value_name = "BUCKET", - required_unless_present_any = &["token", "token_file"], - )] - pub bucket: Option, - - /// Pathname in the bucket to upload to. - /// - /// If not specified, the file will be uploaded to the root of the bucket. - /// - /// If this ends with a slash, the file will be uploaded to a directory, and the filename will - /// be the same as the local filename. If this does not end with a slash, the file will be given - /// the exact name as specified here. - /// - /// You can also give the key via the `--bucket` option, if provided in s3://bucket/key format. - #[arg(long, value_name = "KEY")] - pub key: Option, - - /// File which contains the pre-auth token to use. - /// - /// This is a pre-auth token generated by `bestool upload preauth`. Setting this is exclusive to - /// setting `--bucket` and `--key`, as the pre-auth token is generated for a particular bucket - /// and filename within that bucket. - /// - /// Using this option, you can upload a file without AWS credentials. Note that pre-auth tokens - /// are time-limited: if you try to use an expired token, the upload will fail; more critically, - /// if the upload takes longer than the token's lifetime, the upload will also fail. - #[arg( - long, - value_name = "FILENAME", - required_unless_present_any = &["token", "bucket", "key"], - conflicts_with_all = &["bucket", "key", "aws_access_key_id", "aws_secret_access_key", "aws_region"], - )] - pub token_file: Option, - - /// Pre-auth token to use as a string. - /// - /// Same as `--token-file` but passing the token as a string on the command-line. Generally you - /// should prefer using `--token-file`, as tokens are generally too long to pass as arguments. - #[arg( - long, - value_name = "TOKEN", - required_unless_present_any = &["token_file", "bucket", "key"], - conflicts_with_all = &["bucket", "key", "aws_access_key_id", "aws_secret_access_key", "aws_region"], - )] - pub token: Option, - - #[command(flatten)] - pub aws: AwsArgsFragment, -} - -#[instrument(skip(ctx))] -pub async fn run(mut ctx: Context) -> Result<()> { - let token = if let Some(token) = ctx.args_sub.token.clone() { - Some(decode_token(&token)?) - } else if let Some(token_file) = ctx.args_sub.token_file.as_deref() { - let mut file = File::open(token_file).await.into_diagnostic()?; - let mut token = String::new(); - file.read_to_string(&mut token).await.into_diagnostic()?; - Some(decode_token(&token)?) - } else { - None - }; - - if let Some(token) = token { - if ctx.args_sub.files.len() > 1 { - bail!("Cannot upload multiple files with a pre-auth token"); - } - - let Some(file) = ctx.args_sub.files.pop() else { - bail!("No file to upload"); - }; - - token_upload(ctx.erased(), token, &file).await - } else if let Some(bucket) = ctx.args_sub.bucket.as_deref() { - let (bucket, key) = parse_bucket_and_key(bucket, ctx.args_sub.key.as_deref())?; - with_aws(ctx.clone(), bucket, key).await - } else { - bail!("No bucket or pre-auth token specified"); - } -} - -pub async fn with_aws(ctx: Context, bucket: &str, key: &str) -> Result<()> { - let aws = aws::init(&ctx.args_sub.aws).await; - let client = S3Client::new(&aws); - - let (first, files) = { - let (left, right) = ctx.args_sub.files.split_at(1); - // UNWRAP: length is checked to >= 1 in run() - (left.get(0).unwrap(), right) - }; - - let use_multipart = - if let Err(err) = multipart_upload(ctx.erased(), bucket, key, first, &client).await { - error!(?err, "Upload failed with multipart"); - warn!("Attempting single-part upload(s) instead"); - singlepart_upload(ctx.erased(), bucket, key, first, &client).await?; - false - } else { - true - }; - - for file in files { - if use_multipart { - multipart_upload(ctx.erased(), bucket, key, file, &client).await?; - } else { - singlepart_upload(ctx.erased(), bucket, key, file, &client).await?; - } - } - - Ok(()) -} diff --git a/src/actions/upload/files.rs b/src/actions/upload/files.rs new file mode 100644 index 0000000..f41c729 --- /dev/null +++ b/src/actions/upload/files.rs @@ -0,0 +1,144 @@ +use std::{mem::take, path::PathBuf}; + +use aws_sdk_s3::Client as S3Client; +use clap::{Parser, ValueHint}; +use miette::{bail, Result}; +use tracing::{error, instrument, warn}; + +use crate::{ + actions::Context, + aws::{ + self, + s3::{multipart_upload, parse_bucket_and_key, singlepart_upload}, + AwsArgs, + }, +}; + +use super::UploadArgs; + +/// Upload files to AWS S3. +/// +/// There's two ways to upload a file: using AWS credentials, or using a delegated token. If you +/// don't have AWS credentials on the machine with the file you want to upload, you or someone with +/// the rights to can use the `--aws-delegated` option to specify a delegated token generated by +/// `bestool upload delegate`. +/// +/// This tool uploads files in small chunks and retries any failed chunks, which is more reliable +/// than uploading the file in one go, especially for very big files and flaky connections. It will +/// adaptively reduce the chunking size as it retries failed chunks, to work around broken networks. +/// +/// If you need to upload an entire folder, use `bestool archive` to generate an archive file, +/// optionally with compression, and upload that instead. Alternatively, you can upload multiple +/// files to the same bucket and key (specify multiple PATH arguments), and even use `--recursive`. +#[derive(Debug, Clone, Parser)] +pub struct FilesArgs { + /// File(s) to upload. + /// + /// You can specify multiple files here, and they will all be uploaded to the same bucket and + /// key, which must in this case end with a slash. Also see `--recursive` for the behaviour when + /// uploading folders. + #[arg( + value_hint = ValueHint::FilePath, + value_name = "PATH", + required = true, + num_args = 1.., + )] + pub files: Vec, + + /// AWS S3 bucket to upload to. + /// + /// This may also contain the key, if given in s3://bucket/key format. See the `--key` option + /// for semantics of the key portion. + /// + /// If using a delegated token, this bucket must match the bucket the token was generated for. + #[arg(long, value_name = "BUCKET")] + pub bucket: String, + + /// Pathname in the bucket to upload to. + /// + /// If not specified, the file will be uploaded to the root of the bucket. + /// + /// If this ends with a slash, the file will be uploaded to a directory, and the filename will + /// be the same as the local filename. If this does not end with a slash, the file will be given + /// the exact name as specified here. + /// + /// You can also give the key via the `--bucket` option, if provided in s3://bucket/key format. + /// + /// If using a delegated token, this key must match the key the token was generated for. + #[arg(long, value_name = "KEY")] + pub key: Option, + + /// Recurse into folders. + /// + /// Any folders given in the PATH arguments will be listed, all files found will be uploaded, + /// and any subfolders found will be recursed into. + /// + /// If this is not specified, any folders given in the PATH arguments will error. + #[arg(long)] + pub recursive: bool, + + #[command(flatten)] + pub aws: AwsArgs, +} + +#[instrument(skip(ctx))] +pub async fn run(mut ctx: Context) -> Result<()> { + let (bucket, key) = parse_bucket_and_key(&ctx.args_sub.bucket, ctx.args_sub.key.as_deref())?; + let aws = aws::init(&ctx.args_sub.aws).await; + let client = S3Client::new(&aws); + + let files = take(&mut ctx.args_sub.files); + let files = if ctx.args_sub.recursive { + let mut filtered = Vec::new(); + for file in files { + if file.is_dir() { + filtered.extend( + walkdir::WalkDir::new(file) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.file_type().is_file()) + .map(|entry| entry.into_path()), + ); + } else { + filtered.push(file); + } + } + filtered + } else { + for file in &files { + if file.is_dir() { + bail!("Cannot upload a directory without --recursive",); + } + } + files + }; + + let (first, files) = { + let (left, right) = files.split_at(1); + let Some(left) = left.get(0) else { + bail!("No files to upload"); + }; + + (left, right) + }; + + let use_multipart = + if let Err(err) = multipart_upload(ctx.erased(), bucket, key, first, &client).await { + error!(?err, "Upload failed with multipart"); + warn!("Attempting single-part upload(s) instead"); + singlepart_upload(ctx.erased(), bucket, key, first, &client).await?; + false + } else { + true + }; + + for file in files { + if use_multipart { + multipart_upload(ctx.erased(), bucket, key, file, &client).await?; + } else { + singlepart_upload(ctx.erased(), bucket, key, file, &client).await?; + } + } + + Ok(()) +} diff --git a/src/actions/upload/list.rs b/src/actions/upload/list.rs index ed0b185..a977d3a 100644 --- a/src/actions/upload/list.rs +++ b/src/actions/upload/list.rs @@ -5,7 +5,7 @@ use tracing::{info, instrument}; use crate::{ actions::Context, - aws::{self, AwsArgsFragment}, + aws::{self, AwsArgs}, }; use super::UploadArgs; @@ -23,7 +23,7 @@ pub struct ListArgs { pub bucket: String, #[command(flatten)] - pub aws: AwsArgsFragment, + pub aws: AwsArgs, } #[instrument(skip(ctx))] diff --git a/src/actions/upload/preauth.rs b/src/actions/upload/preauth.rs deleted file mode 100644 index 24c120d..0000000 --- a/src/actions/upload/preauth.rs +++ /dev/null @@ -1,240 +0,0 @@ -use std::path::PathBuf; - -use aws_sdk_s3::{presigning::PresigningConfig, types::ChecksumAlgorithm, Client as S3Client}; -use clap::Parser; -use miette::{bail, IntoDiagnostic, Result}; -use tokio::{fs::File, io::AsyncWriteExt}; -use tracing::{debug, info, instrument}; - -use crate::{ - actions::{ - upload::token::{encode_token, UploadId}, - Context, - }, - aws::{self, s3::parse_bucket_and_key, AwsArgsFragment, MINIMUM_MULTIPART_PART_SIZE}, - file_chunker::DEFAULT_CHUNK_SIZE, -}; - -use super::UploadArgs; - -/// Generate a pre-auth token to upload a file. -/// -/// This MUST be run from your local, trusted computer, using WriteAccess or AdminAccess to the -/// account that contains the desired destination bucket. It will generate a pre-auth token that -/// allows anyone to upload a file to the specified bucket and key, without them needing any AWS -/// credentials. -/// -/// As a token can be quite large, it will be written to a file, by default `token.txt`. Override -/// this with `--token-file`. The contents of the file are text, so can be copy-pasted rather than -/// requiring to transmit the file verbatim. -/// -/// Once all parts are uploaded, the upload must be confirmed using `bestool upload confirm`. Take -/// note of the "Upload ID" printed by this command, as it will be needed to confirm the upload, to -/// see its status with `bestool upload status`, or to cancel it with `bestool upload cancel`. -/// -/// When creating a token, you specify the maximum number of parts the file can be split into, -/// either explicitly with `--max-parts` or by giving `--approximate-size`, which will use internal -/// logic to produce an appropriate number for most situations. This will be used to upload the file -/// in small chunks and retry any failed chunks, which is more reliable than uploading the file in -/// one go, especially for very large files and flaky connections. -/// -/// If the uploader runs out of parts to use before the file is fully uploaded, it will fail and a -/// new token will have to be generated with a higher number of parts. -/// -/// Tokens also have an expiry date. By default, tokens expire after 2 HOURS. You can specify a -/// longer or shorter expiry time with `--expiry`. Be careful with short expiry times! The token -/// must remain valid for the entire duration of the upload, and if the upload takes longer than the -/// token is valid, it will fail and will need to be retried from scratch, using a new token. -/// -/// A maximum of 1000 tokens can be active at any one time for a particular bucket. -#[derive(Debug, Clone, Parser)] -pub struct PreauthArgs { - /// AWS S3 bucket to upload to. - /// - /// This may also contain the key, if given in s3://bucket/key format. See the `--key` option - /// for semantics of the key portion. - #[arg(long, value_name = "BUCKET", required = true)] - pub bucket: String, - - /// Pathname in the bucket to upload to. - /// - /// If not specified, the file will be uploaded to the root of the bucket. - /// - /// If this ends with a slash, the file will be uploaded to a directory, and the filename will - /// be the same as the local filename. If this does not end with a slash, the file will be given - /// the exact name as specified here. - /// - /// You can also give the key via the `--bucket` option, if provided in s3://bucket/key format. - #[arg(long, value_name = "KEY")] - pub key: Option, - - /// Expiry duration of the token. - /// - /// This is the duration for which the token will be valid. It can be specified in seconds, or - /// with a suffix like `m` for minutes, `h` for hours, or `d` for days. The default is 2 hours. - /// - /// Be careful with short expiry times! The token must remain valid for the entire duration of - /// the upload, and if the upload takes longer than the token is valid, it will fail and will - /// need to be retried from scratch, using a new token. You can't issue a token valid for longer - /// than your own credentials are; this is mostly an issue when using temporary (eg SSO) creds. - /// - /// Longer expiries are more convenient, but also more dangerous, as they give more time for an - /// attacker to use the token to upload files to your bucket. On the whole, though, this is a - /// pretty hard token to misuse: the worst that can happen is someone uploading a huge file to - /// your bucket, incurring you transfer and storage costs. - #[arg(long, value_name = "DURATION", default_value = "2h")] - pub expiry: humantime::Duration, - - /// Maximum number of parts the file can be split into. - /// - /// This is the maximum number of parts the file can be split into. If not specified, it will be - /// calculated from `--approximate-size`, using internal logic to produce an appropriate number. - /// - /// If you specify both, `--max-parts` takes precedence. - /// - /// If the uploader runs out of parts to use before the file is fully uploaded, it will fail and - /// a new token will have to be generated with a higher number of parts; for this reason you - /// should include some extra parts in the number you specify here if calculating it from a size - /// yourself; 10% extra is a good rule of thumb. - /// - /// The absolute maximum is 10000. - #[arg( - long, - value_name = "PARTS", - value_parser = clap::value_parser!(u64).range(1..10000), - required_unless_present = "approximate_size", - )] - pub max_parts: Option, - - /// Approximate size of the entire file. - /// - /// Provide the approximate size of the entire file, in bytes. You can use suffixes like `K`, - /// `M`, or `G`. It will be used to calculate the maximum number of parts the file can be split - /// into, including some extra parts for retries. - #[arg(long, value_name = "SIZE", required_unless_present = "max_parts")] - pub approximate_size: Option, - - /// File to save the token to. - #[arg(long, value_name = "FILENAME", default_value = "token.txt")] - pub token_file: PathBuf, - - #[command(flatten)] - pub aws: AwsArgsFragment, -} - -#[instrument(skip(ctx))] -pub async fn run(ctx: Context) -> Result<()> { - let (bucket, key) = parse_bucket_and_key(&ctx.args_sub.bucket, ctx.args_sub.key.as_deref())?; - - let parts = i32::try_from(if let Some(max_parts) = ctx.args_sub.max_parts { - max_parts - } else if let Some(approximate_size) = ctx.args_sub.approximate_size { - let approximate_size = approximate_size.as_u64(); - if approximate_size == 0 { - bail!("--approximate-size cannot be zero"); - } - - let small_chunks = plus_margin(approximate_size / MINIMUM_MULTIPART_PART_SIZE.get()); - let medium_chunks = plus_margin(approximate_size / DEFAULT_CHUNK_SIZE.get()); - let large_chunks = plus_margin(approximate_size / DEFAULT_CHUNK_SIZE.get()); - - if small_chunks <= 1000 { - small_chunks - } else if medium_chunks <= 1000 { - medium_chunks - } else if large_chunks <= 1000 { - large_chunks - } else { - 1000 - } - } else { - unreachable!("clap should enforce one of max_parts or approximate_size"); - }) - .into_diagnostic()?; - - if ctx.args_sub.token_file.exists() { - bail!( - "Token file {:?} already exists, not overwriting", - ctx.args_sub.token_file - ); - } - - info!( - "Generating pre-auth token for s3://{}/{} ({} parts)", - bucket, key, parts - ); - let aws = aws::init(&ctx.args_sub.aws).await; - let client = S3Client::new(&aws); - - let progress = ctx.bar((parts as u64) + 3); - progress.tick(); - - debug!("Creating multipart upload"); - progress.set_message("create multipart"); - let checksum = ChecksumAlgorithm::Sha256; - let mp = client - .create_multipart_upload() - .bucket(bucket) - .key(&*key) - .checksum_algorithm(checksum.clone()) - .metadata("Preauther", crate::APP_NAME) - .send() - .await - .into_diagnostic()?; - progress.inc(1); - - let Some(upload_id) = mp.upload_id else { - bail!("No upload ID returned from S3"); - }; - let upload_id = UploadId { - bucket: bucket.into(), - key: key.into(), - id: upload_id, - parts, - }; - - ctx.progress.suspend(|| eprintln!("Upload ID: {upload_id}")); - - let presigning = PresigningConfig::expires_in(ctx.args_sub.expiry.into()).into_diagnostic()?; - info!( - "Pre-auth token valid from {:?}, expires in {:?}", - presigning.start_time(), - presigning.expires() - ); - - progress.set_message("presign multipart"); - let mut presigned_parts = Vec::with_capacity(parts as usize); - for part_no in 1..=parts { - progress.inc(1); - presigned_parts.push( - client - .upload_part() - .bucket(bucket) - .key(key) - .checksum_algorithm(checksum.clone()) - .part_number(part_no) - .upload_id(&upload_id.id) - .presigned(presigning.clone()) - .await - .into_diagnostic()?, - ); - } - - progress.set_message("generate token"); - let token = encode_token(&upload_id, &presigned_parts)?; - progress.inc(1); - - progress.set_message(format!("write to {}", ctx.args_sub.token_file.display())); - let mut file = File::create(&ctx.args_sub.token_file) - .await - .into_diagnostic()?; - file.write_all(token.as_bytes()).await.into_diagnostic()?; - progress.inc(1); - - progress.abandon(); - Ok(()) -} - -fn plus_margin(n: u64) -> u64 { - n.max(1) + (n / 10).max(9) -} diff --git a/src/actions/upload/status.rs b/src/actions/upload/status.rs deleted file mode 100644 index c5d076e..0000000 --- a/src/actions/upload/status.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::path::PathBuf; - -use aws_sdk_s3::Client as S3Client; -use clap::Parser; -use miette::{miette, IntoDiagnostic, Result}; -use tokio::{fs::File, io::AsyncReadExt}; -use tracing::{info, instrument}; - -use crate::{ - actions::{upload::token::decode_token, Context}, - aws::{self, AwsArgsFragment}, -}; - -use super::UploadArgs; - -/// Query the status of an upload. -/// -/// Given a pre-auth token or Upload ID (as generated by `bestool upload preauth`), show when it -/// expires, and how many parts have been uploaded so far, along with the amount of data uploaded. -/// -/// This MUST be run from your local, trusted computer, using WriteAccess or AdminAccess to the -/// account that contains the destination bucket. -#[derive(Debug, Clone, Parser)] -pub struct StatusArgs { - /// File which contains the token to query. - #[arg( - long, - value_name = "FILENAME", - default_value = "token.txt", - required_unless_present_any = &["token", "upload_id"], - )] - pub token_file: PathBuf, - - /// Token value. - /// - /// This is the token to query. If not specified here, it will be taken from the file specified - /// in `--token-file`. Prefer to use `--token-file` instead of this option, as tokens are - /// generally larger than can be passed on the command line. - #[arg(long, value_name = "TOKEN")] - pub token: Option, - - /// Upload ID. - /// - /// This is the Upload ID to query. If not specified here, it will be taken from `--token` or - /// `--token-file`. - #[arg( - long, - value_name = "UPLOAD_ID", - conflicts_with_all = &["token", "token_file"], - )] - pub upload_id: Option, - - #[command(flatten)] - pub aws: AwsArgsFragment, -} - -#[instrument(skip(ctx))] -pub async fn run(ctx: Context) -> Result<()> { - let id = if let Some(upload_id) = ctx.args_sub.upload_id.as_deref() { - upload_id.parse().map_err(|err| miette!("{}", err))? - } else { - let token = if let Some(token) = ctx.args_sub.token.clone() { - token - } else { - let mut file = File::open(&ctx.args_sub.token_file) - .await - .into_diagnostic()?; - let mut token = String::new(); - file.read_to_string(&mut token).await.into_diagnostic()?; - token - }; - - decode_token(&token)?.id - }; - - let aws = aws::init(&ctx.args_sub.aws).await; - let client = S3Client::new(&aws); - - info!(?id, "Querying multipart upload status"); - let mut parts = client - .list_parts() - .bucket(id.bucket) - .key(id.key) - .upload_id(id.id) - .into_paginator() - .items() - .send(); - - let mut parts_remaining = id.parts; - while let Some(part) = parts.next().await { - parts_remaining -= 1; - let part = part.into_diagnostic()?; - eprintln!("{part:?}"); - } - - if parts_remaining > 0 { - eprintln!("{} parts remaining", parts_remaining); - } - - Ok(()) -} diff --git a/src/actions/upload/token.rs b/src/actions/upload/token.rs deleted file mode 100644 index df97c20..0000000 --- a/src/actions/upload/token.rs +++ /dev/null @@ -1,119 +0,0 @@ -use std::{ - collections::BTreeMap, - fmt, - io::{Read, Write}, - str::FromStr, -}; - -use aws_sdk_s3::presigning::PresignedRequest; -use base64ct::{Base64Unpadded, Encoding}; -use flate2::{bufread::ZlibDecoder, write::ZlibEncoder, Compression}; -use miette::{IntoDiagnostic, Result}; -use minicbor::{Decode, Encode}; -use tracing::instrument; - -#[derive(Debug, Clone, Encode, Decode)] -pub struct UploadId { - #[n(1)] - pub bucket: String, - #[n(2)] - pub key: String, - #[n(3)] - pub id: String, - #[n(4)] - pub parts: i32, -} - -impl FromStr for UploadId { - type Err = String; - - fn from_str(s: &str) -> Result { - let [bucket, key, id, parts] = - <[&str; 4]>::try_from(s.splitn(4, '|').collect::>()).map_err(|parts| { - format!( - "not enough |-separated parts: expected 4, got {}", - parts.len() - ) - })?; - - if bucket.is_empty() || key.is_empty() || id.is_empty() || parts.is_empty() { - return Err("Invalid upload ID (empty parts)".to_string()); - } - - Ok(UploadId { - bucket: bucket.to_string(), - key: key.to_string(), - id: id.to_string(), - parts: i32::from_str(parts).map_err(|err| err.to_string())?, - }) - } -} - -impl fmt::Display for UploadId { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}|{}|{}|{}", self.bucket, self.key, self.id, self.parts) - } -} - -#[derive(Debug, Clone, Encode, Decode)] -#[cbor(map)] -pub struct Token { - #[n(0)] - pub version: u8, - #[n(1)] - pub id: UploadId, - #[n(2)] - pub parts: Vec, - #[n(3)] - pub preauther: String, -} - -#[derive(Debug, Clone, Encode, Decode)] -#[cbor(map)] -pub struct TokenPart { - #[n(1)] - pub number: i32, - #[n(2)] - pub method: String, - #[n(3)] - pub uri: String, - #[n(4)] - pub headers: BTreeMap, -} - -#[instrument(skip(parts), level = "debug")] -pub fn encode_token(upload_id: &UploadId, parts: &[PresignedRequest]) -> Result { - let token = Token { - version: 0, - id: upload_id.clone(), - parts: parts - .iter() - .enumerate() - .map(|(number, part)| TokenPart { - number: number as i32 + 1, - method: part.method().to_string(), - uri: part.uri().to_string(), - headers: part - .headers() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(), - }) - .collect(), - preauther: crate::APP_NAME.to_string(), - }; - - let encoded = minicbor::to_vec(&token).into_diagnostic()?; - let mut e = ZlibEncoder::new(Vec::new(), Compression::best()); - e.write_all(&encoded).into_diagnostic()?; - let compressed = e.finish().into_diagnostic()?; - Ok(Base64Unpadded::encode_string(&compressed)) -} - -#[instrument(skip(token), level = "debug")] -pub fn decode_token(token: &str) -> Result { - let compressed = Base64Unpadded::decode_vec(&token).into_diagnostic()?; - let mut d = ZlibDecoder::new(&compressed[..]); - let mut encoded = Vec::new(); - d.read_to_end(&mut encoded).into_diagnostic()?; - minicbor::decode(&encoded).into_diagnostic() -} diff --git a/src/aws.rs b/src/aws.rs index 6fdf739..1097f69 100644 --- a/src/aws.rs +++ b/src/aws.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, num::NonZeroU64}; +use std::num::NonZeroU64; use aws_config::{ default_provider::credentials::Builder, AppName, BehaviorVersion, ConfigLoader, Region, @@ -8,6 +8,7 @@ use aws_credential_types::Credentials; use clap::Parser; pub mod s3; +pub mod token; /// The minimum size of a part in a multipart upload (excluding the last part). /// @@ -16,21 +17,14 @@ pub mod s3; /// /// Also see: . /// -/// In practice "5 MiB" is not even enough, it must be a little more than that. +/// In practice "5 MiB" is not even enough, it must be a little more than that; we use 6 MiB. // SAFETY: hardcoded pub const MINIMUM_MULTIPART_PART_SIZE: NonZeroU64 = unsafe { NonZeroU64::new_unchecked(6 * 1024 * 1024) }; -/// Implement this trait on an Args struct to be able to use it as an AWS credential source. -pub trait AwsArgs { - /// Get the AWS Access Key ID. - fn aws_access_key_id(&self) -> Option>; - fn aws_secret_access_key(&self) -> Option>; - fn aws_region(&self) -> Option>; -} - +/// Include this struct as `#[command(flatten)]` in an Args struct so it can host AWS credentials. #[derive(Debug, Clone, Parser)] -pub struct AwsArgsFragment { +pub struct AwsArgs { /// AWS Access Key ID. /// /// This is the AWS Access Key ID to use for authentication. If not specified here, it will be @@ -54,37 +48,72 @@ pub struct AwsArgsFragment { /// file (usually `~/.aws/credentials`), or from ambient credentials (eg EC2 instance profile). #[arg(long, value_name = "REGION")] pub aws_region: Option, + + /// AWS Session Token. + /// + /// This is the AWS Session Token to use for authentication using temporary credentials. If not + /// specified here, it will be taken from the environment variable `AWS_SESSION_TOKEN` if exists. + #[arg(long, value_name = "SESSION_TOKEN")] + pub aws_session_token: Option, + + /// AWS Delegated Identity Token. + /// + /// This is a Base64-encoded JSON structure containing an access key id, secret key, session + /// token, and expiry time. It can be generated using `delegate` subcommands or other tooling. + /// It is used as a more convenient way to pass AWS credentials to `bestool` when using + /// temporary credentials. + #[arg(long, value_name = "TOKEN")] + pub aws_delegated: Option, } -standard_aws_args!(AwsArgsFragment); +impl AwsArgs { + fn aws_access_key_id(&self) -> Option<::std::borrow::Cow<'_, str>> { + self.aws_access_key_id + .as_deref() + .map(::std::borrow::Cow::Borrowed) + .or_else(|| { + self.aws_delegated + .as_ref() + .map(|t| ::std::borrow::Cow::Owned(t.access_key_id.clone())) + }) + } -macro_rules! standard_aws_args { - ($args:ident) => { - impl crate::aws::AwsArgs for $args { - fn aws_access_key_id(&self) -> Option<::std::borrow::Cow<'_, str>> { - self.aws_access_key_id + fn aws_secret_access_key(&self) -> Option<::std::borrow::Cow<'_, str>> { + self.aws_secret_access_key + .as_deref() + .map(::std::borrow::Cow::Borrowed) + .or_else(|| { + self.aws_delegated .as_ref() - .map(|s| ::std::borrow::Cow::Borrowed(s.as_str())) - } + .map(|t| ::std::borrow::Cow::Owned(t.secret_access_key.clone())) + }) + } - fn aws_secret_access_key(&self) -> Option<::std::borrow::Cow<'_, str>> { - self.aws_secret_access_key + fn aws_region(&self) -> Option<::std::borrow::Cow<'_, str>> { + self.aws_region + .as_deref() + .or_else(|| { + self.aws_delegated .as_ref() - .map(|s| ::std::borrow::Cow::Borrowed(s.as_str())) - } + .and_then(|t: &token::DelegatedToken| t.region.as_deref()) + }) + .map(::std::borrow::Cow::Borrowed) + } - fn aws_region(&self) -> Option<::std::borrow::Cow<'_, str>> { - self.aws_region + fn aws_session_token(&self) -> Option<::std::borrow::Cow<'_, str>> { + self.aws_session_token + .as_deref() + .or_else(|| { + self.aws_delegated .as_ref() - .map(|s| ::std::borrow::Cow::Borrowed(s.as_str())) - } - } - }; + .and_then(|t| t.session_token.as_deref()) + }) + .map(::std::borrow::Cow::Borrowed) + } } -pub(crate) use standard_aws_args; /// Get AWS config from the environment, or credentials files, or ambient, etc. -pub async fn init(args: &dyn AwsArgs) -> SdkConfig { +pub async fn init(args: &AwsArgs) -> SdkConfig { let mut config = ConfigLoader::default() .behavior_version(BehaviorVersion::v2023_11_09()) .app_name(AppName::new(crate::APP_NAME).unwrap()); @@ -93,8 +122,10 @@ pub async fn init(args: &dyn AwsArgs) -> SdkConfig { // instead of having only the keys as credentials provider, we set up a full provider chain // and add these credentials to it, so that we can still use ambient credentials, regions, // sessions, etc. - let mut chain = Builder::default() - .with_custom_credential_source("args", Credentials::from_keys(key_id, secret, None)); + let mut chain = Builder::default().with_custom_credential_source( + "args", + Credentials::from_keys(key_id, secret, args.aws_session_token().map(Into::into)), + ); if let Some(region) = args.aws_region() { chain = chain.region(Region::new(region.into_owned())); } diff --git a/src/aws/s3.rs b/src/aws/s3.rs index fd2763d..f752c8c 100644 --- a/src/aws/s3.rs +++ b/src/aws/s3.rs @@ -1,10 +1,9 @@ use std::{ borrow::Cow, - mem::take, num::NonZeroU64, path::Path, sync::{ - atomic::{AtomicU32, AtomicUsize, Ordering}, + atomic::{AtomicU32, Ordering}, Arc, }, }; @@ -15,16 +14,11 @@ use aws_sdk_s3::{ Client as S3Client, }; use miette::{bail, IntoDiagnostic, Result}; -use reqwest::Method; use tokio::fs::metadata; use tracing::{debug, info, instrument}; use crate::{ - actions::{ - context::Cleanup, - upload::token::{Token, UploadId}, - Context, - }, + actions::{context::Cleanup, upload::UploadId, Context}, file_chunker::{FileChunker, DEFAULT_CHUNK_SIZE}, }; @@ -66,7 +60,6 @@ pub async fn multipart_upload( bucket: bucket.to_string(), key: key.to_string(), id: upload_id, - parts: chunker.chunks() as i32, }; ctx.add_cleanup(Cleanup::MultiPartUpload(upload_id.clone())); @@ -176,93 +169,6 @@ pub async fn multipart_upload( Ok(()) } -#[instrument(skip(ctx, token))] -pub async fn token_upload(ctx: Context, mut token: Token, file: &Path) -> Result<()> { - let client = reqwest::Client::new(); - - debug!("Loading file {}", file.display()); - let mut chunker = FileChunker::new(file).await?; - // UNWRAP: DEFAULT_CHUNK_SIZE is non-zero - let token_parts = token.id.parts as u64; - chunker.chunk_size = NonZeroU64::new( - (chunker.len() / (token_parts - (token_parts / 10))).max(DEFAULT_CHUNK_SIZE.get()), - ) - .unwrap(); - chunker.min_chunk_size = MINIMUM_MULTIPART_PART_SIZE; - - info!( - chunk_size=%chunker.chunk_size, - upload_id=?token.id, - "Uploading {} ({} bytes) to s3://{}/{}", - file.display(), - chunker.len(), - token.id.bucket, - token.id.key - ); - let progress = ctx.data_bar(chunker.len()); - progress.set_message(file.display().to_string()); - progress.tick(); - - let parts = Arc::<[_]>::from(take(&mut token.parts).into_boxed_slice()); - let part_i = Arc::new(AtomicUsize::new(0)); - - while let Some((bytes, _)) = - match chunker - .with_next_chunk(&{ - let client = client.clone(); - let parts = parts.clone(); - let part_i = part_i.clone(); - - move |bytes| { - let client = client.clone(); - let parts = parts.clone(); - let part_i = part_i.load(Ordering::SeqCst); - - async move { - let Some(part) = parts.get(part_i) else { - bail!("Used all of the parts in the token, but still have chunks to upload!"); - }; - - debug!(bytes = bytes.len(), "uploading a chunk"); - let mut request = client.request( - match part.method.to_ascii_uppercase().as_str() { - "GET" => Method::GET, - "PATCH" => Method::PATCH, - "POST" => Method::POST, - "PUT" => Method::PUT, - _ => bail!("Invalid/unknown HTTP method in token: {}", part.method), - }, - &part.uri, - ); - for (key, value) in &part.headers { - request = request.header(key, value); - } - request.body(bytes).send().await.into_diagnostic()?; - - Ok(()) - } - } - }) - .await - { - Ok(res) => res, - Err(err) => { - debug!(?err, "error sending chunk, stopping upload"); - return Err(err); - } - } { - if part_i.fetch_add(1, Ordering::SeqCst) as u64 >= token_parts { - bail!("Used all of the parts in the token, but still have chunks to upload!"); - } - progress.inc(bytes); - } - - progress.tick(); - progress.abandon(); // finish, leaving the completed bar in place - - Ok(()) -} - pub async fn singlepart_upload( ctx: Context, bucket: &str, @@ -320,7 +226,7 @@ pub fn parse_bucket_and_key<'a>( if let Some((bucket, key)) = bucket[5..].split_once('/') { (bucket, key) } else { - (bucket, "/") + (bucket, "") } } else if let Some(key) = key { (bucket, key) diff --git a/src/aws/token.rs b/src/aws/token.rs new file mode 100644 index 0000000..8a6b609 --- /dev/null +++ b/src/aws/token.rs @@ -0,0 +1,98 @@ +use std::{fmt, str::FromStr, time::Duration}; + +use aws_config::SdkConfig; +use aws_sdk_sts::Client as STSClient; +use base64ct::{Base64, Encoding}; +use miette::{IntoDiagnostic, Result}; +use serde::{Deserialize, Serialize}; +use tracing::info; + +pub const DELEGATED_TOKEN_VERSION: u8 = 1; + +/// AWS Delegated Identity Token. +/// +/// This is a Base64-encoded JSON structure containing an access key id, secret key, session token, +/// and expiry time. +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct DelegatedToken { + pub version: u8, + pub access_key_id: String, + pub secret_access_key: String, + pub region: Option, + pub session_token: Option, + pub expiry: Option, +} + +impl FromStr for DelegatedToken { + type Err = std::io::Error; + + fn from_str(s: &str) -> Result { + let json = Base64::decode_vec(s)?; + let token: DelegatedToken = serde_json::from_slice(&json)?; + if token.version != DELEGATED_TOKEN_VERSION { + return Err(std::io::Error::other("Invalid token version")); + } + Ok(token) + } +} + +impl fmt::Display for DelegatedToken { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let json = serde_json::to_string(self).map_err(|_| fmt::Error)?; + let base = Base64::encode_string(json.as_bytes()); + f.write_str(&base) + } +} + +impl DelegatedToken { + pub async fn new( + aws: &SdkConfig, + expiry: Duration, + policy: &serde_json::Value, + ) -> Result { + let client = STSClient::new(aws); + let token = client + .get_federation_token() + .name(crate::APP_NAME) + .duration_seconds(expiry.as_secs() as i32) + .policy(policy.to_string()) + .send() + .await + .into_diagnostic()?; + + info!( + "Created temporary federated user: {:?}", + token.federated_user.as_ref().unwrap() + ); + info!( + "Token expires at: {}", + token.credentials.as_ref().unwrap().expiration + ); + + Ok(Self { + version: DELEGATED_TOKEN_VERSION, + access_key_id: token + .credentials + .as_ref() + .unwrap() + .access_key_id + .to_string(), + secret_access_key: token + .credentials + .as_ref() + .unwrap() + .secret_access_key + .to_string(), + session_token: Some( + token + .credentials + .as_ref() + .unwrap() + .session_token + .to_string(), + ), + region: aws.region().map(|s| s.to_string()), + expiry: Some(token.credentials.unwrap().expiration.to_string()), + }) + } +}