Skip to content

Commit

Permalink
feat: add support for running on gcs
Browse files Browse the repository at this point in the history
  • Loading branch information
timvw committed Mar 30, 2024
1 parent f1f789c commit 1d4a1aa
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
4 changes: 4 additions & 0 deletions Usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ qv ./datasets/tpc-h-parquet/1/customer -s

## View data on GCS.

### Configuration

QV expects the environment variable 'GOOGLE_APPLICATION_CREDENTIALS' to exist and point to a file which contains google credentials.

```bash
qv gs://datafusion-delta-testing/data/delta/COVID-19_NYT
```
Expand Down
16 changes: 13 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,21 @@ async fn main() -> Result<()> {
.map_err(|e| DataFusionError::Execution(format!("Failed to parse url, {e}")))?;

Check warning on line 77 in src/main.rs

View workflow job for this annotation

GitHub Actions / cargo fmt

Diff in /home/runner/work/qv/qv/src/main.rs
let gcs = build_gcs(&gcs_url).await?;
let gcs_arc = Arc::new(gcs);
ctx.runtime_env().register_object_store(&gcs_url, gcs_arc);
ctx.runtime_env().register_object_store(&gcs_url, gcs_arc.clone());

deltalake::gcp::register_handlers(None);

data_path
// add trailing slash to folder
if !data_path.ends_with('/') {
let path = Path::parse(gcs_url.path())?;
if gcs_arc.head(&path).await.is_err() {
format!("{data_path}/")
} else {
data_path
}
} else {
data_path
}
} else {
data_path
};
Expand Down Expand Up @@ -396,7 +406,7 @@ async fn build_s3(url: &Url, sdk_config: &SdkConfig) -> Result<AmazonS3> {

async fn build_gcs(gcs_url: &Url) -> Result<GoogleCloudStorage> {
let google_application_credentials = env::var("GOOGLE_APPLICATION_CREDENTIALS")
.map_err(|e| DataFusionError::External(Box::new(e)))?;
.map_err(|_| DataFusionError::Execution(format!("Could not find GOOGLE_APPLICATION_CREDENTIALS environment variable")))?;

Check failure on line 409 in src/main.rs

View workflow job for this annotation

GitHub Actions / cargo clippy

useless use of `format!`

let bucket_name = gcs_url.host_str().unwrap();

Expand Down

0 comments on commit 1d4a1aa

Please sign in to comment.