diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml index d928f0177dc36..e5146c7fd94e3 100644 --- a/datafusion-examples/Cargo.toml +++ b/datafusion-examples/Cargo.toml @@ -46,7 +46,7 @@ futures = "0.3" log = "0.4" mimalloc = { version = "0.1", default-features = false } num_cpus = "1.13.0" -object_store = { version = "0.7.0", features = ["aws"] } +object_store = { version = "0.7.0", features = ["aws", "http"] } prost = { version = "0.12", default-features = false } prost-derive = { version = "0.11", default-features = false } serde = { version = "1.0.136", features = ["derive"] } diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md index 02dd9c4173251..bfed3976c9469 100644 --- a/datafusion-examples/README.md +++ b/datafusion-examples/README.md @@ -54,6 +54,7 @@ cargo run --example csv_sql - [`parquet_sql.rs`](examples/parquet_sql.rs): Build and run a query plan from a SQL statement against a local Parquet file - [`parquet_sql_multiple_files.rs`](examples/parquet_sql_multiple_files.rs): Build and run a query plan from a SQL statement against multiple local Parquet files - [`query-aws-s3.rs`](examples/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3 +- [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP - [`rewrite_expr.rs`](examples/rewrite_expr.rs): Define and invoke a custom Query Optimizer pass - [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF) - [`simple_udf.rs`](examples/simple_udf.rs): Define and invoke a User Defined (scalar) Function (UDF) diff --git a/datafusion-examples/examples/query-http-csv.rs b/datafusion-examples/examples/query-http-csv.rs new file mode 100644 index 0000000000000..928d702711591 --- /dev/null +++ b/datafusion-examples/examples/query-http-csv.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::error::Result; +use datafusion::prelude::*; +use object_store::http::HttpBuilder; +use std::sync::Arc; +use url::Url; + +/// This example demonstrates executing a simple query against an Arrow data source (CSV) and +/// fetching results +#[tokio::main] +async fn main() -> Result<()> { + // create local execution context + let ctx = SessionContext::new(); + + // setup http object store + let base_url = Url::parse("https://github.com").unwrap(); + let http_store = HttpBuilder::new() + .with_url(base_url.clone()) + .build() + .unwrap(); + ctx.runtime_env() + .register_object_store(&base_url, Arc::new(http_store)); + + // register csv file with the execution context + ctx.register_csv( + "aggregate_test_100", + "https://github.com/apache/arrow-testing/raw/master/data/csv/aggregate_test_100.csv", + CsvReadOptions::new(), + ) + .await?; + + // execute the query + let df = ctx + .sql("SELECT c1,c2,c3 FROM aggregate_test_100 LIMIT 5") + .await?; + + // print the results + df.show().await?; + + Ok(()) +}