diff --git a/common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java b/common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java index 76141729c7..e09f98b55b 100644 --- a/common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java +++ b/common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java @@ -260,7 +260,12 @@ public List visit(ArrowType.LargeUtf8 type) { type, start, end); - final long len = end - start; + // HACK: For the issue https://github.com/apache/datafusion-comet/issues/540 + // As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import + // a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again. + // So we add this hack to always take full length of data buffer by assuming the first offset + // is always 0 which is true for Arrow Java and arrow-rs. + final int long = end; offsets.getReferenceManager().retain(); return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); } @@ -277,7 +282,12 @@ public List visit(ArrowType.Binary type) { type, start, end); - final int len = end - start; + // HACK: For the issue https://github.com/apache/datafusion-comet/issues/540 + // As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import + // a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again. + // So we add this hack to always take full length of data buffer by assuming the first offset + // is always 0 which is true for Arrow Java and arrow-rs. + final int len = end; offsets.getReferenceManager().retain(); return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); } @@ -296,7 +306,12 @@ public List visit(ArrowType.LargeBinary type) { type, start, end); - final long len = end - start; + // HACK: For the issue https://github.com/apache/datafusion-comet/issues/540 + // As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import + // a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again. + // So we add this hack to always take full length of data buffer by assuming the first offset + // is always 0 which is true for Arrow Java and arrow-rs. + final long len = end; offsets.getReferenceManager().retain(); return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); } diff --git a/core/Cargo.lock b/core/Cargo.lock index 8e6c7fd234..3af5659a85 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -115,7 +115,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-arith", "arrow-array", @@ -135,7 +135,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-array", "arrow-buffer", @@ -149,7 +149,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "ahash", "arrow-buffer", @@ -165,7 +165,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "bytes", "half", @@ -175,7 +175,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-array", "arrow-buffer", @@ -195,7 +195,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-array", "arrow-buffer", @@ -213,7 +213,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-buffer", "arrow-schema", @@ -224,7 +224,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-array", "arrow-buffer", @@ -238,7 +238,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-array", "arrow-buffer", @@ -257,7 +257,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-array", "arrow-buffer", @@ -271,7 +271,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "ahash", "arrow-array", @@ -285,7 +285,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "bitflags 2.5.0", ] @@ -293,7 +293,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "ahash", "arrow-array", @@ -306,7 +306,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "arrow-array", "arrow-buffer", @@ -791,7 +791,7 @@ dependencies = [ [[package]] name = "datafusion" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "ahash", "arrow", @@ -898,7 +898,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "ahash", "arrow", @@ -918,7 +918,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "tokio", ] @@ -926,7 +926,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "arrow", "chrono", @@ -946,7 +946,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "ahash", "arrow", @@ -964,7 +964,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "arrow", "base64", @@ -990,7 +990,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "ahash", "arrow", @@ -1007,7 +1007,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "arrow", "async-trait", @@ -1025,7 +1025,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "ahash", "arrow", @@ -1055,7 +1055,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "arrow", "datafusion-common", @@ -1066,7 +1066,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "ahash", "arrow", @@ -1099,7 +1099,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "39.0.0" -source = "git+https://github.com/viirya/arrow-datafusion.git?rev=dd5f0e1#dd5f0e19b4b7d070d46d5a8ccb3d98d4a08d25a4" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=690a642#690a642e66588dd45a870ed75a14b9b3a36ac906" dependencies = [ "arrow", "arrow-array", @@ -2038,7 +2038,7 @@ dependencies = [ [[package]] name = "parquet" version = "52.0.0" -source = "git+https://github.com/viirya/arrow-rs.git?rev=564f3bd#564f3bded7b0edd8f2a014f5773446562e8406be" +source = "git+https://github.com/viirya/arrow-rs.git?rev=5978a90#5978a9095f73313fc5e11e5c78ad8918f48d008d" dependencies = [ "ahash", "bytes", diff --git a/core/Cargo.toml b/core/Cargo.toml index 571793cf43..cc96b7a322 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -33,13 +33,13 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" } -arrow-buffer = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" } -arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" } -arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" } -arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd" } -parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "564f3bd", default-features = false, features = ["experimental"] } +arrow = { git = "https://github.com/viirya/arrow-rs.git", rev = "5978a90", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { git = "https://github.com/viirya/arrow-rs.git", rev = "5978a90" } +arrow-buffer = { git = "https://github.com/viirya/arrow-rs.git", rev = "5978a90" } +arrow-data = { git = "https://github.com/viirya/arrow-rs.git", rev = "5978a90" } +arrow-schema = { git = "https://github.com/viirya/arrow-rs.git", rev = "5978a90" } +arrow-string = { git = "https://github.com/viirya/arrow-rs.git", rev = "5978a90" } +parquet = { git = "https://github.com/viirya/arrow-rs.git", rev = "5978a90", default-features = false, features = ["experimental"] } half = { version = "2.4.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } @@ -71,12 +71,12 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1" } -datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", features = ["crypto_expressions"] } -datafusion-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", default-features = false } -datafusion-physical-expr-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", default-features = false } -datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "dd5f0e1", default-features = false } +datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "690a642" } +datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "690a642", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "690a642", features = ["crypto_expressions"] } +datafusion-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "690a642", default-features = false } +datafusion-physical-expr-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "690a642", default-features = false } +datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "690a642", default-features = false } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6"