Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into hash_join_batch_size
Browse files Browse the repository at this point in the history
  • Loading branch information
korowa committed Jan 4, 2024
2 parents bc26d47 + 819d357 commit e0bd40b
Show file tree
Hide file tree
Showing 438 changed files with 55,191 additions and 23,991 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Audit licenses
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev_pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
github.event_name == 'pull_request_target' &&
(github.event.action == 'opened' ||
github.event.action == 'synchronize')
uses: actions/labeler@v4.3.0
uses: actions/labeler@v5.0.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
configuration-path: .github/workflows/dev_pr/labeler.yml
Expand Down
34 changes: 18 additions & 16 deletions .github/workflows/dev_pr/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,35 +16,37 @@
# under the License.

development-process:
- dev/**.*
- .github/**.*
- ci/**.*
- .asf.yaml
- changed-files:
- any-glob-to-any-file: ['dev/**.*', '.github/**.*', 'ci/**.*', '.asf.yaml']

documentation:
- docs/**.*
- README.md
- ./**/README.md
- DEVELOPERS.md
- datafusion/docs/**.*
- changed-files:
- any-glob-to-any-file: ['docs/**.*', 'README.md', './**/README.md', 'DEVELOPERS.md', 'datafusion/docs/**.*']

sql:
- datafusion/sql/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/sql/**/*']

logical-expr:
- datafusion/expr/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/expr/**/*']

physical-expr:
- datafusion/physical-expr/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/physical-expr/**/*']

optimizer:
- datafusion/optimizer/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/optimizer/**/*']

core:
- datafusion/core/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/core/**/*']

substrait:
- datafusion/substrait/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/substrait/**/*']

sqllogictest:
- datafusion/sqllogictest/**/*
- changed-files:
- any-glob-to-any-file: ['datafusion/sqllogictest/**/*']
2 changes: 1 addition & 1 deletion .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
path: asf-site

- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: "3.10"

Expand Down
29 changes: 25 additions & 4 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ jobs:
- name: Check workspace without default features
run: cargo check --no-default-features -p datafusion

- name: Check datafusion-common without default features
run: cargo check --tests --no-default-features -p datafusion-common

- name: Check workspace in debug mode
run: cargo check

Expand Down Expand Up @@ -96,6 +99,14 @@ jobs:
rust-version: stable
- name: Run tests (excluding doctests)
run: cargo test --lib --tests --bins --features avro,json,backtrace
env:
# do not produce debug symbols to keep memory usage down
# hardcoding other profile params to avoid profile override values
# More on Cargo profiles https://doc.rust-lang.org/cargo/reference/profiles.html?profile-settings#profile-settings
RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C codegen-units=256"
RUST_BACKTRACE: "1"
# avoid rust stack overflows on tpc-ds tests
RUST_MINSTACK: "3000000"
- name: Verify Working Directory Clean
run: git diff --exit-code

Expand Down Expand Up @@ -287,6 +298,7 @@ jobs:
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup update stable
rustup toolchain install stable
rustup default stable
rustup component add rustfmt
Expand All @@ -299,9 +311,13 @@ jobs:
cargo test --lib --tests --bins --all-features
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
# use higher optimization level to overcome Windows rust slowness for tpc-ds
# and speed builds: https://github.com/apache/arrow-datafusion/issues/8696
# Cargo profile docs https://doc.rust-lang.org/cargo/reference/profiles.html?profile-settings#profile-settings
RUSTFLAGS: "-C debuginfo=0 -C opt-level=1 -C target-feature=+crt-static -C incremental=false -C codegen-units=256"
RUST_BACKTRACE: "1"

# avoid rust stack overflows on tpc-ds tests
RUST_MINSTACK: "3000000"
macos:
name: cargo test (mac)
runs-on: macos-latest
Expand All @@ -324,6 +340,7 @@ jobs:
# with a OS-dependent path.
- name: Setup Rust toolchain
run: |
rustup update stable
rustup toolchain install stable
rustup default stable
rustup component add rustfmt
Expand All @@ -335,8 +352,12 @@ jobs:
cargo test --lib --tests --bins --all-features
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"
# hardcoding other profile params to avoid profile override values
# More on Cargo profiles https://doc.rust-lang.org/cargo/reference/profiles.html?profile-settings#profile-settings
RUSTFLAGS: "-C debuginfo=0 -C opt-level=0 -C incremental=false -C codegen-units=256"
RUST_BACKTRACE: "1"
# avoid rust stack overflows on tpc-ds tests
RUST_MINSTACK: "3000000"

test-datafusion-pyarrow:
name: cargo test pyarrow (amd64)
Expand All @@ -348,7 +369,7 @@ jobs:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: "3.8"
- name: Install PyArrow
Expand Down
67 changes: 25 additions & 42 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,7 @@

[workspace]
exclude = ["datafusion-cli"]
members = [
"datafusion/common",
"datafusion/core",
"datafusion/expr",
"datafusion/execution",
"datafusion/optimizer",
"datafusion/physical-expr",
"datafusion/physical-plan",
"datafusion/proto",
"datafusion/proto/gen",
"datafusion/sql",
"datafusion/sqllogictest",
"datafusion/substrait",
"datafusion/wasmtest",
"datafusion-examples",
"docs",
"test-utils",
"benchmarks",
members = ["datafusion/common", "datafusion/core", "datafusion/expr", "datafusion/execution", "datafusion/optimizer", "datafusion/physical-expr", "datafusion/physical-plan", "datafusion/proto", "datafusion/proto/gen", "datafusion/sql", "datafusion/sqllogictest", "datafusion/substrait", "datafusion/wasmtest", "datafusion-examples", "docs", "test-utils", "benchmarks",
]
resolver = "2"

Expand All @@ -46,49 +29,50 @@ license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/arrow-datafusion"
rust-version = "1.70"
version = "33.0.0"
version = "34.0.0"

[workspace.dependencies]
arrow = { version = "48.0.0", features = ["prettyprint"] }
arrow-array = { version = "48.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "48.0.0", default-features = false }
arrow-flight = { version = "48.0.0", features = ["flight-sql-experimental"] }
arrow-ord = { version = "48.0.0", default-features = false }
arrow-schema = { version = "48.0.0", default-features = false }
arrow = { version = "49.0.0", features = ["prettyprint"] }
arrow-array = { version = "49.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "49.0.0", default-features = false }
arrow-flight = { version = "49.0.0", features = ["flight-sql-experimental"] }
arrow-ipc = { version = "49.0.0", default-features = false, features = ["lz4"] }
arrow-ord = { version = "49.0.0", default-features = false }
arrow-schema = { version = "49.0.0", default-features = false }
async-trait = "0.1.73"
bigdecimal = "0.4.1"
bytes = "1.4"
chrono = { version = "0.4.31", default-features = false }
ctor = "0.2.0"
datafusion = { path = "datafusion/core" }
datafusion-common = { path = "datafusion/common" }
datafusion-expr = { path = "datafusion/expr" }
datafusion-sql = { path = "datafusion/sql" }
datafusion-optimizer = { path = "datafusion/optimizer" }
datafusion-physical-expr = { path = "datafusion/physical-expr" }
datafusion-physical-plan = { path = "datafusion/physical-plan" }
datafusion-execution = { path = "datafusion/execution" }
datafusion-proto = { path = "datafusion/proto" }
datafusion-sqllogictest = { path = "datafusion/sqllogictest" }
datafusion-substrait = { path = "datafusion/substrait" }
dashmap = "5.4.0"
datafusion = { path = "datafusion/core", version = "34.0.0" }
datafusion-common = { path = "datafusion/common", version = "34.0.0" }
datafusion-execution = { path = "datafusion/execution", version = "34.0.0" }
datafusion-expr = { path = "datafusion/expr", version = "34.0.0" }
datafusion-optimizer = { path = "datafusion/optimizer", version = "34.0.0" }
datafusion-physical-expr = { path = "datafusion/physical-expr", version = "34.0.0" }
datafusion-physical-plan = { path = "datafusion/physical-plan", version = "34.0.0" }
datafusion-proto = { path = "datafusion/proto", version = "34.0.0" }
datafusion-sql = { path = "datafusion/sql", version = "34.0.0" }
datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "34.0.0" }
datafusion-substrait = { path = "datafusion/substrait", version = "34.0.0" }
doc-comment = "0.3"
env_logger = "0.10"
futures = "0.3"
half = "2.2.1"
indexmap = "2.0.0"
itertools = "0.11"
itertools = "0.12"
log = "^0.4"
num_cpus = "1.13.0"
object_store = "0.7.0"
object_store = { version = "0.8.0", default-features = false }
parking_lot = "0.12"
parquet = { version = "48.0.0", features = ["arrow", "async", "object_store"] }
parquet = { version = "49.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
sqlparser = { version = "0.39.0", features = ["visitor"] }
sqlparser = { version = "0.41.0", features = ["visitor"] }
tempfile = "3"
thiserror = "1.0.44"
chrono = { version = "0.4.31", default-features = false }
url = "2.2"

[profile.release]
Expand All @@ -108,4 +92,3 @@ opt-level = 3
overflow-checks = false
panic = 'unwind'
rpath = false

18 changes: 12 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ in-memory format. [Python Bindings](https://github.com/apache/arrow-datafusion-p
Here are links to some important information

- [Project Site](https://arrow.apache.org/datafusion)
- [Installation](https://arrow.apache.org/datafusion/user-guide/cli.html#installation)
- [Rust Getting Started](https://arrow.apache.org/datafusion/user-guide/example-usage.html)
- [Rust DataFrame API](https://arrow.apache.org/datafusion/user-guide/dataframe.html)
- [Rust API docs](https://docs.rs/datafusion/latest/datafusion)
Expand All @@ -40,8 +41,19 @@ Here are links to some important information
DataFusion is great for building projects such as domain specific query engines, new database platforms and data pipelines, query languages and more.
It lets you start quickly from a fully working engine, and then customize those features specific to your use. [Click Here](https://arrow.apache.org/datafusion/user-guide/introduction.html#known-users) to see a list known users.

## Contributing to DataFusion

Please see the [developer’s guide] for contributing and [communication] for getting in touch with us.

[developer’s guide]: https://arrow.apache.org/datafusion/contributor-guide/index.html#developer-s-guide
[communication]: https://arrow.apache.org/datafusion/contributor-guide/communication.html

## Crate features

This crate has several [features] which can be specified in your `Cargo.toml`.

[features]: https://doc.rust-lang.org/cargo/reference/features.html

Default features:

- `compression`: reading files compressed with `xz2`, `bzip2`, `flate2`, and `zstd`
Expand All @@ -65,9 +77,3 @@ Optional features:
## Rust Version Compatibility

This crate is tested with the latest stable version of Rust. We do not currently test against other, older versions of the Rust compiler.

## Contributing to DataFusion

The [developer’s guide] contains information on how to contribute.

[developer’s guide]: https://arrow.apache.org/datafusion/contributor-guide/index.html#developer-s-guide
10 changes: 5 additions & 5 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion-benchmarks"
description = "DataFusion Benchmarks"
version = "33.0.0"
version = "34.0.0"
edition = { workspace = true }
authors = ["Apache Arrow <dev@arrow.apache.org>"]
homepage = "https://github.com/apache/arrow-datafusion"
Expand All @@ -34,14 +34,14 @@ snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = { workspace = true }
datafusion = { path = "../datafusion/core", version = "33.0.0" }
datafusion-common = { path = "../datafusion/common", version = "33.0.0" }
datafusion = { path = "../datafusion/core", version = "34.0.0" }
datafusion-common = { path = "../datafusion/common", version = "34.0.0" }
env_logger = { workspace = true }
futures = { workspace = true }
log = { workspace = true }
mimalloc = { version = "0.1", optional = true, default-features = false }
num_cpus = { workspace = true }
parquet = { workspace = true }
parquet = { workspace = true, default-features = true }
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { workspace = true }
snmalloc-rs = { version = "0.3", optional = true }
Expand All @@ -50,4 +50,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" }
tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] }

[dev-dependencies]
datafusion-proto = { path = "../datafusion/proto", version = "33.0.0" }
datafusion-proto = { path = "../datafusion/proto", version = "34.0.0" }
Loading

0 comments on commit e0bd40b

Please sign in to comment.