Skip to content

Commit 68ad990

Browse files
authored
add csv mode to datafusion cli (#281)
1 parent eeb69af commit 68ad990

File tree

8 files changed

+260
-32
lines changed

8 files changed

+260
-32
lines changed

.github/workflows/rust.yml

Lines changed: 72 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ on:
2323
pull_request:
2424

2525
jobs:
26-
2726
# build the library, a compilation step used by multiple steps below
2827
linux-build-lib:
2928
name: Build Libraries on AMD64 Rust ${{ matrix.rust }}
@@ -61,17 +60,19 @@ jobs:
6160
rustup component add rustfmt
6261
- name: Build Workspace
6362
run: |
64-
export CARGO_HOME="/github/home/.cargo"
65-
export CARGO_TARGET_DIR="/github/home/target"
6663
cargo build
64+
env:
65+
CARGO_HOME: "/github/home/.cargo"
66+
CARGO_TARGET_DIR: "/github/home/target"
6767
# Ballista is currently not part of the main workspace so requires a separate build step
6868
- name: Build Ballista
6969
run: |
70-
export CARGO_HOME="/github/home/.cargo"
71-
export CARGO_TARGET_DIR="/github/home/target"
7270
cd ballista/rust
7371
# snmalloc requires cmake so build without default features
7472
cargo build --no-default-features
73+
env:
74+
CARGO_HOME: "/github/home/.cargo"
75+
CARGO_TARGET_DIR: "/github/home/target"
7576

7677
# test the crate
7778
linux-test:
@@ -111,8 +112,6 @@ jobs:
111112
rustup component add rustfmt
112113
- name: Run tests
113114
run: |
114-
export CARGO_HOME="/github/home/.cargo"
115-
export CARGO_TARGET_DIR="/github/home/target"
116115
export ARROW_TEST_DATA=$(pwd)/testing/data
117116
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
118117
# run tests on all workspace members with default feature list
@@ -122,16 +121,69 @@ jobs:
122121
cargo test --no-default-features
123122
cargo run --example csv_sql
124123
cargo run --example parquet_sql
124+
env:
125+
CARGO_HOME: "/github/home/.cargo"
126+
CARGO_TARGET_DIR: "/github/home/target"
125127
# Ballista is currently not part of the main workspace so requires a separate test step
126128
- name: Run Ballista tests
127129
run: |
128-
export CARGO_HOME="/github/home/.cargo"
129-
export CARGO_TARGET_DIR="/github/home/target"
130130
export ARROW_TEST_DATA=$(pwd)/testing/data
131131
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
132132
cd ballista/rust
133133
# snmalloc requires cmake so build without default features
134134
cargo test --no-default-features
135+
env:
136+
CARGO_HOME: "/github/home/.cargo"
137+
CARGO_TARGET_DIR: "/github/home/target"
138+
139+
integration-test:
140+
name: "Integration Test"
141+
needs: [linux-build-lib]
142+
runs-on: ubuntu-latest
143+
services:
144+
postgres:
145+
image: postgres:13
146+
env:
147+
POSTGRES_PASSWORD: postgres
148+
POSTGRES_DB: db_test
149+
ports:
150+
- 5432/tcp
151+
options: >-
152+
--health-cmd pg_isready
153+
--health-interval 10s
154+
--health-timeout 5s
155+
--health-retries 5
156+
steps:
157+
- uses: actions/checkout@v2
158+
- uses: actions/setup-python@v2
159+
with:
160+
python-version: "3.8"
161+
- name: Install Python dependencies
162+
run: |
163+
python -m pip install --upgrade pip setuptools wheel
164+
python -m pip install --upgrade numpy==1.20.3 pandas==1.2.4
165+
- name: Allow access of psql
166+
run: |
167+
# make sure psql can access the server
168+
echo "$POSTGRES_HOST:$POSTGRES_PORT:$POSTGRES_DB:$POSTGRES_USER:$POSTGRES_PASSWORD" | tee ~/.pgpass
169+
chmod 0600 ~/.pgpass
170+
psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c 'select now() as now'
171+
env:
172+
POSTGRES_HOST: localhost
173+
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
174+
POSTGRES_DB: db_test
175+
POSTGRES_USER: postgres
176+
POSTGRES_PASSWORD: postgres
177+
- name: Build datafusion-cli
178+
run: cargo build --bin datafusion-cli
179+
- name: Test Psql Parity
180+
run: python -m unittest -v integration-tests/test_psql_parity.py
181+
env:
182+
POSTGRES_HOST: localhost
183+
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
184+
POSTGRES_DB: db_test
185+
POSTGRES_USER: postgres
186+
POSTGRES_PASSWORD: postgres
135187

136188
windows-and-macos:
137189
name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
@@ -156,9 +208,10 @@ jobs:
156208
run: |
157209
export ARROW_TEST_DATA=$(pwd)/testing/data
158210
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
159-
# do not produce debug symbols to keep memory usage down
160-
export RUSTFLAGS="-C debuginfo=0"
161211
cargo test
212+
env:
213+
# do not produce debug symbols to keep memory usage down
214+
RUSTFLAGS: "-C debuginfo=0"
162215

163216
lint:
164217
name: Lint
@@ -212,9 +265,10 @@ jobs:
212265
rustup component add rustfmt clippy
213266
- name: Run clippy
214267
run: |
215-
export CARGO_HOME="/github/home/.cargo"
216-
export CARGO_TARGET_DIR="/github/home/target"
217268
cargo clippy --all-targets --workspace -- -D warnings
269+
env:
270+
CARGO_HOME: "/github/home/.cargo"
271+
CARGO_TARGET_DIR: "/github/home/target"
218272

219273
miri-checks:
220274
name: MIRI
@@ -242,9 +296,9 @@ jobs:
242296
- name: Run Miri Checks
243297
env:
244298
RUST_BACKTRACE: full
245-
RUST_LOG: 'trace'
299+
RUST_LOG: "trace"
300+
MIRIFLAGS: "-Zmiri-disable-isolation"
246301
run: |
247-
export MIRIFLAGS="-Zmiri-disable-isolation"
248302
cargo miri setup
249303
cargo clean
250304
# Ignore MIRI errors until we can get a clean run
@@ -275,16 +329,16 @@ jobs:
275329
key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}-
276330
- name: Run coverage
277331
run: |
278-
export CARGO_HOME="/home/runner/.cargo"
279-
export CARGO_TARGET_DIR="/home/runner/target"
280-
281332
export ARROW_TEST_DATA=$(pwd)/testing/data
282333
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
283334
284335
# 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
285336
# see https://github.com/xd009642/tarpaulin/issues/618
286337
cargo install --version 0.16.0 cargo-tarpaulin
287338
cargo tarpaulin --out Xml
339+
env:
340+
CARGO_HOME: "/home/runner/.cargo"
341+
CARGO_TARGET_DIR: "/home/runner/target"
288342
- name: Report coverage
289343
continue-on-error: true
290344
run: bash <(curl -s https://codecov.io/bash)

datafusion-cli/src/main.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020
use clap::{crate_version, App, Arg};
2121
use datafusion::error::Result;
2222
use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
23-
use datafusion_cli::{print_format::PrintFormat, PrintOptions};
23+
use datafusion_cli::{
24+
print_format::{all_print_formats, PrintFormat},
25+
PrintOptions,
26+
};
2427
use rustyline::Editor;
2528
use std::env;
2629
use std::fs::File;
@@ -63,14 +66,22 @@ pub async fn main() {
6366
)
6467
.arg(
6568
Arg::with_name("format")
66-
.help("Output format (possible values: table, csv, tsv, json)")
69+
.help("Output format")
6770
.long("format")
6871
.default_value("table")
69-
.validator(is_valid_format)
72+
.possible_values(
73+
&all_print_formats()
74+
.iter()
75+
.map(|format| format.to_string())
76+
.collect::<Vec<_>>()
77+
.iter()
78+
.map(|i| i.as_str())
79+
.collect::<Vec<_>>(),
80+
)
7081
.takes_value(true),
7182
)
7283
.arg(
73-
Arg::with_name("quite")
84+
Arg::with_name("quiet")
7485
.help("Reduce printing other than the results and work quietly")
7586
.short("q")
7687
.long("quiet")
@@ -189,14 +200,6 @@ async fn exec_from_repl(execution_config: ExecutionConfig, print_options: PrintO
189200
rl.save_history(".history").ok();
190201
}
191202

192-
fn is_valid_format(format: String) -> std::result::Result<(), String> {
193-
if format.parse::<PrintFormat>().is_ok() {
194-
Ok(())
195-
} else {
196-
Err(format!("Format '{}' not supported", format))
197-
}
198-
}
199-
200203
fn is_valid_file(dir: String) -> std::result::Result<(), String> {
201204
if Path::new(&dir).is_file() {
202205
Ok(())

datafusion-cli/src/print_format.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use arrow::json::ArrayWriter;
2121
use datafusion::arrow::record_batch::RecordBatch;
2222
use datafusion::arrow::util::pretty;
2323
use datafusion::error::{DataFusionError, Result};
24+
use std::fmt;
2425
use std::str::FromStr;
2526

2627
/// Allow records to be printed in different formats
@@ -32,6 +33,16 @@ pub enum PrintFormat {
3233
Json,
3334
}
3435

36+
/// returns all print formats
37+
pub fn all_print_formats() -> Vec<PrintFormat> {
38+
vec![
39+
PrintFormat::Csv,
40+
PrintFormat::Tsv,
41+
PrintFormat::Table,
42+
PrintFormat::Json,
43+
]
44+
}
45+
3546
impl FromStr for PrintFormat {
3647
type Err = ();
3748
fn from_str(s: &str) -> std::result::Result<Self, ()> {
@@ -45,6 +56,17 @@ impl FromStr for PrintFormat {
4556
}
4657
}
4758

59+
impl fmt::Display for PrintFormat {
60+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61+
match *self {
62+
Self::Csv => write!(f, "csv"),
63+
Self::Tsv => write!(f, "tsv"),
64+
Self::Table => write!(f, "table"),
65+
Self::Json => write!(f, "json"),
66+
}
67+
}
68+
}
69+
4870
fn print_batches_to_json(batches: &[RecordBatch]) -> Result<String> {
4971
let mut bytes = vec![];
5072
{
@@ -108,6 +130,14 @@ mod tests {
108130
assert_eq!(PrintFormat::Table, format);
109131
}
110132

133+
#[test]
134+
fn test_to_str() {
135+
assert_eq!("csv", PrintFormat::Csv.to_string());
136+
assert_eq!("table", PrintFormat::Table.to_string());
137+
assert_eq!("tsv", PrintFormat::Tsv.to_string());
138+
assert_eq!("json", PrintFormat::Json.to_string());
139+
}
140+
111141
#[test]
112142
fn test_from_str_failure() {
113143
assert_eq!(true, "pretty".parse::<PrintFormat>().is_err());

datafusion/docs/cli.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ The DataFusion CLI is a command-line interactive SQL utility that allows queries
2525

2626
Use the following commands to clone this repository and run the CLI. This will require the Rust toolchain to be installed. Rust can be installed from [https://rustup.rs/](https://rustup.rs/).
2727

28-
```sh
28+
```bash
2929
git clone https://github.com/apache/arrow-datafusion
3030
cd arrow-datafusion/datafusion-cli
3131
cargo run --release
@@ -35,7 +35,7 @@ cargo run --release
3535

3636
Use the following commands to clone this repository and build a Docker image containing the CLI tool. Note that there is `.dockerignore` file in the root of the repository that may need to be deleted in order for this to work.
3737

38-
```sh
38+
```bash
3939
git clone https://github.com/apache/arrow-datafusion
4040
cd arrow-datafusion
4141
docker build -f datafusion-cli/Dockerfile . --tag datafusion-cli

integration-tests/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
17+
SELECT
18+
abs(-1.1) as abs,
19+
exp(2.0) as exp,
20+
sin(3.0) as sin,
21+
cos(4.0) as cos,
22+
tan(5.0) as tan;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
-- Licensed to the Apache Software Foundation (ASF) under one
2+
-- or more contributor license agreements. See the NOTICE file
3+
-- distributed with this work for additional information
4+
-- regarding copyright ownership. The ASF licenses this file
5+
-- to you under the Apache License, Version 2.0 (the
6+
-- "License"); you may not use this file except in compliance
7+
-- with the License. You may obtain a copy of the License at
8+
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
17+
SELECT 1 as num;

0 commit comments

Comments
 (0)