From 1d4a1aa58c8bad54d1cf94f6fe95bebe5f0be686 Mon Sep 17 00:00:00 2001 From: Tim Van Wassenhove Date: Sat, 30 Mar 2024 17:55:37 +0100 Subject: [PATCH] feat: add support for running on gcs --- Usage.md | 4 ++++ src/main.rs | 16 +++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/Usage.md b/Usage.md index 9f82522..35e132f 100644 --- a/Usage.md +++ b/Usage.md @@ -14,6 +14,10 @@ qv ./datasets/tpc-h-parquet/1/customer -s ## View data on GCS. +### Configuration + +QV expects the environment variable 'GOOGLE_APPLICATION_CREDENTIALS' to exist and point to a file which contains google credentials. + ```bash qv gs://datafusion-delta-testing/data/delta/COVID-19_NYT ``` diff --git a/src/main.rs b/src/main.rs index 49941bb..c40be3a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -77,11 +77,21 @@ async fn main() -> Result<()> { .map_err(|e| DataFusionError::Execution(format!("Failed to parse url, {e}")))?; let gcs = build_gcs(&gcs_url).await?; let gcs_arc = Arc::new(gcs); - ctx.runtime_env().register_object_store(&gcs_url, gcs_arc); + ctx.runtime_env().register_object_store(&gcs_url, gcs_arc.clone()); deltalake::gcp::register_handlers(None); - data_path + // add trailing slash to folder + if !data_path.ends_with('/') { + let path = Path::parse(gcs_url.path())?; + if gcs_arc.head(&path).await.is_err() { + format!("{data_path}/") + } else { + data_path + } + } else { + data_path + } } else { data_path }; @@ -396,7 +406,7 @@ async fn build_s3(url: &Url, sdk_config: &SdkConfig) -> Result { async fn build_gcs(gcs_url: &Url) -> Result { let google_application_credentials = env::var("GOOGLE_APPLICATION_CREDENTIALS") - .map_err(|e| DataFusionError::External(Box::new(e)))?; + .map_err(|_| DataFusionError::Execution(format!("Could not find GOOGLE_APPLICATION_CREDENTIALS environment variable")))?; let bucket_name = gcs_url.host_str().unwrap();