Skip to content

Commit

Permalink
Add HTTP object store example (apache#7602)
Browse files Browse the repository at this point in the history
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
  • Loading branch information
2 people authored and Ted-Jiang committed Oct 7, 2023
1 parent ae86332 commit 5c57afb
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 1 deletion.
2 changes: 1 addition & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ futures = "0.3"
log = "0.4"
mimalloc = { version = "0.1", default-features = false }
num_cpus = "1.13.0"
object_store = { version = "0.7.0", features = ["aws"] }
object_store = { version = "0.7.0", features = ["aws", "http"] }
prost = { version = "0.12", default-features = false }
prost-derive = { version = "0.11", default-features = false }
serde = { version = "1.0.136", features = ["derive"] }
Expand Down
1 change: 1 addition & 0 deletions datafusion-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ cargo run --example csv_sql
- [`parquet_sql.rs`](examples/parquet_sql.rs): Build and run a query plan from a SQL statement against a local Parquet file
- [`parquet_sql_multiple_files.rs`](examples/parquet_sql_multiple_files.rs): Build and run a query plan from a SQL statement against multiple local Parquet files
- [`query-aws-s3.rs`](examples/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
- [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP
- [`rewrite_expr.rs`](examples/rewrite_expr.rs): Define and invoke a custom Query Optimizer pass
- [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
- [`simple_udf.rs`](examples/simple_udf.rs): Define and invoke a User Defined (scalar) Function (UDF)
Expand Down
57 changes: 57 additions & 0 deletions datafusion-examples/examples/query-http-csv.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use datafusion::error::Result;
use datafusion::prelude::*;
use object_store::http::HttpBuilder;
use std::sync::Arc;
use url::Url;

/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
/// fetching results
#[tokio::main]
async fn main() -> Result<()> {
// create local execution context
let ctx = SessionContext::new();

// setup http object store
let base_url = Url::parse("https://github.com").unwrap();
let http_store = HttpBuilder::new()
.with_url(base_url.clone())
.build()
.unwrap();
ctx.runtime_env()
.register_object_store(&base_url, Arc::new(http_store));

// register csv file with the execution context
ctx.register_csv(
"aggregate_test_100",
"https://github.com/apache/arrow-testing/raw/master/data/csv/aggregate_test_100.csv",
CsvReadOptions::new(),
)
.await?;

// execute the query
let df = ctx
.sql("SELECT c1,c2,c3 FROM aggregate_test_100 LIMIT 5")
.await?;

// print the results
df.show().await?;

Ok(())
}

0 comments on commit 5c57afb

Please sign in to comment.