From 27288a08d8f0c5e23ae1dc3c572f15517daf6cec Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sat, 29 Jun 2024 09:27:21 -0700 Subject: [PATCH] fix: Fix range out of index error with a temporary workaround (#584) * fix: Fix range out of index error by using custom arrow-rs repo * Add custom Java Arrow classes * Add a hack * Update * Update * Update to use https://github.com/apache/arrow-rs/pull/5958 * Use tustvold's branch * Use official arrow-rs repo --- .../org/apache/arrow/c/ArrowImporter.java | 3 +- .../apache/arrow/c/CometArrayImporter.java | 152 +++++++ .../arrow/c/CometBufferImportTypeVisitor.java | 398 ++++++++++++++++++ core/Cargo.lock | 81 ++-- core/Cargo.toml | 26 +- 5 files changed, 592 insertions(+), 68 deletions(-) create mode 100644 common/src/main/java/org/apache/arrow/c/CometArrayImporter.java create mode 100644 common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java diff --git a/common/src/main/java/org/apache/arrow/c/ArrowImporter.java b/common/src/main/java/org/apache/arrow/c/ArrowImporter.java index 90398cb72..1f0cbd412 100644 --- a/common/src/main/java/org/apache/arrow/c/ArrowImporter.java +++ b/common/src/main/java/org/apache/arrow/c/ArrowImporter.java @@ -55,7 +55,8 @@ public FieldVector importVector( ArrowArray array, ArrowSchema schema, CDataDictionaryProvider provider) { Field field = importField(schema, provider); FieldVector vector = field.createVector(allocator); - Data.importIntoVector(allocator, array, vector, provider); + CometArrayImporter importer = new CometArrayImporter(allocator, vector, provider); + importer.importArray(array); return vector; } } diff --git a/common/src/main/java/org/apache/arrow/c/CometArrayImporter.java b/common/src/main/java/org/apache/arrow/c/CometArrayImporter.java new file mode 100644 index 000000000..119055b5f --- /dev/null +++ b/common/src/main/java/org/apache/arrow/c/CometArrayImporter.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.arrow.c; + +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.dictionary.Dictionary; +import org.apache.arrow.vector.dictionary.DictionaryProvider; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.pojo.DictionaryEncoding; + +import static org.apache.arrow.c.NativeUtil.NULL; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkNotNull; +import static org.apache.arrow.util.Preconditions.checkState; + +/** + * Importer for {@link ArrowArray}. We copy it from Arrow `ArrayImporter` because we need to use + * `CometBufferImportTypeVisitor` instead of Arrow `BufferImportTypeVisitor`. + */ +final class CometArrayImporter { + private static final int MAX_IMPORT_RECURSION_LEVEL = 64; + + private final BufferAllocator allocator; + private final FieldVector vector; + private final DictionaryProvider dictionaryProvider; + + private ReferenceCountedArrowArray underlyingAllocation; + private int recursionLevel; + + CometArrayImporter( + BufferAllocator allocator, FieldVector vector, DictionaryProvider dictionaryProvider) { + this.allocator = Preconditions.checkNotNull(allocator); + this.vector = Preconditions.checkNotNull(vector); + this.dictionaryProvider = dictionaryProvider; + } + + void importArray(ArrowArray src) { + ArrowArray.Snapshot snapshot = src.snapshot(); + checkState(snapshot.release != NULL, "Cannot import released ArrowArray"); + + // Move imported array + ArrowArray ownedArray = ArrowArray.allocateNew(allocator); + ownedArray.save(snapshot); + src.markReleased(); + src.close(); + + recursionLevel = 0; + + // This keeps the array alive as long as there are any buffers that need it + underlyingAllocation = new ReferenceCountedArrowArray(ownedArray); + try { + doImport(snapshot); + } finally { + underlyingAllocation.release(); + } + } + + private void importChild(CometArrayImporter parent, ArrowArray src) { + ArrowArray.Snapshot snapshot = src.snapshot(); + checkState(snapshot.release != NULL, "Cannot import released ArrowArray"); + recursionLevel = parent.recursionLevel + 1; + checkState( + recursionLevel <= MAX_IMPORT_RECURSION_LEVEL, + "Recursion level in ArrowArray struct exceeded"); + // Child buffers will keep the entire parent import alive. + underlyingAllocation = parent.underlyingAllocation; + doImport(snapshot); + } + + private void doImport(ArrowArray.Snapshot snapshot) { + // First import children (required for reconstituting parent array data) + long[] children = + NativeUtil.toJavaArray(snapshot.children, checkedCastToInt(snapshot.n_children)); + if (children != null && children.length > 0) { + List childVectors = vector.getChildrenFromFields(); + checkState( + children.length == childVectors.size(), + "ArrowArray struct has %s children (expected %s)", + children.length, + childVectors.size()); + for (int i = 0; i < children.length; i++) { + checkState(children[i] != NULL, "ArrowArray struct has NULL child at position %s", i); + CometArrayImporter childImporter = + new CometArrayImporter(allocator, childVectors.get(i), dictionaryProvider); + childImporter.importChild(this, ArrowArray.wrap(children[i])); + } + } + + // Handle import of a dictionary encoded vector + if (snapshot.dictionary != NULL) { + DictionaryEncoding encoding = vector.getField().getDictionary(); + checkNotNull(encoding, "Missing encoding on import of ArrowArray with dictionary"); + + Dictionary dictionary = dictionaryProvider.lookup(encoding.getId()); + checkNotNull(dictionary, "Dictionary lookup failed on import of ArrowArray with dictionary"); + + // reset the dictionary vector to the initial state + dictionary.getVector().clear(); + + CometArrayImporter dictionaryImporter = + new CometArrayImporter(allocator, dictionary.getVector(), dictionaryProvider); + dictionaryImporter.importChild(this, ArrowArray.wrap(snapshot.dictionary)); + } + + // Import main data + ArrowFieldNode fieldNode = new ArrowFieldNode(snapshot.length, snapshot.null_count); + long[] bufferPointers = + NativeUtil.toJavaArray(snapshot.buffers, checkedCastToInt(snapshot.n_buffers)); + + try (final CometBufferImportTypeVisitor visitor = + new CometBufferImportTypeVisitor( + allocator, underlyingAllocation, fieldNode, snapshot, bufferPointers)) { + final List buffers; + if (bufferPointers == null || bufferPointers.length == 0) { + buffers = Collections.emptyList(); + } else { + buffers = vector.getField().getType().accept(visitor); + } + vector.loadFieldBuffers(fieldNode, buffers); + } catch (Exception e) { + throw new IllegalArgumentException( + "Could not load buffers for field " + + vector.getField() + + ". error message: " + + e.getMessage(), + e); + } + } +} diff --git a/common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java b/common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java new file mode 100644 index 000000000..b80e6b7f2 --- /dev/null +++ b/common/src/main/java/org/apache/arrow/c/CometBufferImportTypeVisitor.java @@ -0,0 +1,398 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.arrow.c; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.util.AutoCloseables; +import org.apache.arrow.util.VisibleForTesting; +import org.apache.arrow.vector.DateDayVector; +import org.apache.arrow.vector.DateMilliVector; +import org.apache.arrow.vector.DurationVector; +import org.apache.arrow.vector.Float4Vector; +import org.apache.arrow.vector.Float8Vector; +import org.apache.arrow.vector.IntervalDayVector; +import org.apache.arrow.vector.IntervalMonthDayNanoVector; +import org.apache.arrow.vector.IntervalYearVector; +import org.apache.arrow.vector.LargeVarBinaryVector; +import org.apache.arrow.vector.LargeVarCharVector; +import org.apache.arrow.vector.TimeMicroVector; +import org.apache.arrow.vector.TimeMilliVector; +import org.apache.arrow.vector.TimeNanoVector; +import org.apache.arrow.vector.TimeSecVector; +import org.apache.arrow.vector.TimeStampVector; +import org.apache.arrow.vector.VarBinaryVector; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.complex.DenseUnionVector; +import org.apache.arrow.vector.complex.LargeListVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.MapVector; +import org.apache.arrow.vector.complex.UnionVector; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.util.DataSizeRoundingUtil; + +import static org.apache.arrow.c.NativeUtil.NULL; +import static org.apache.arrow.util.Preconditions.checkState; + +/** + * Import buffers from a C Data Interface struct. We copy it from Arrow `BufferImportTypeVisitor` + * and fix the issue: https://github.com/apache/arrow/issues/42156. + */ +class CometBufferImportTypeVisitor + implements ArrowType.ArrowTypeVisitor>, AutoCloseable { + private final BufferAllocator allocator; + private final ReferenceCountedArrowArray underlyingAllocation; + private final ArrowFieldNode fieldNode; + private final ArrowArray.Snapshot snapshot; + private final long[] buffers; + private final List imported; + + CometBufferImportTypeVisitor( + BufferAllocator allocator, + ReferenceCountedArrowArray underlyingAllocation, + ArrowFieldNode fieldNode, + ArrowArray.Snapshot snapshot, + long[] buffers) { + this.allocator = allocator; + this.underlyingAllocation = underlyingAllocation; + this.fieldNode = fieldNode; + this.snapshot = snapshot; + this.buffers = buffers; + this.imported = new ArrayList<>(); + } + + @Override + public void close() throws Exception { + AutoCloseables.close(imported); + } + + @VisibleForTesting + ArrowBuf importBuffer(ArrowType type, int index, long capacity) { + return importBuffer(type, index, 0, capacity); + } + + @VisibleForTesting + ArrowBuf importBuffer(ArrowType type, int index, long offset, long capacity) { + checkState( + buffers.length > index, + "Expected at least %s buffers for type %s, but found %s", + index + 1, + type, + buffers.length); + long bufferPtr = buffers[index] + offset; + + if (bufferPtr == NULL) { + // C array may be NULL but only accept that if expected capacity is zero too + if (capacity != 0) { + throw new IllegalStateException( + String.format("Buffer %s for type %s cannot be null", index, type)); + } else { + // no data in the C array, return an empty buffer + return allocator.getEmpty(); + } + } + + ArrowBuf buf = underlyingAllocation.unsafeAssociateAllocation(allocator, capacity, bufferPtr); + imported.add(buf); + return buf; + } + + private ArrowBuf importFixedBits(ArrowType type, int index, long bitsPerSlot) { + final long capacity = DataSizeRoundingUtil.divideBy8Ceil(bitsPerSlot * fieldNode.getLength()); + return importBuffer(type, index, capacity); + } + + private ArrowBuf importFixedBytes(ArrowType type, int index, long bytesPerSlot) { + final long capacity = bytesPerSlot * fieldNode.getLength(); + return importBuffer(type, index, capacity); + } + + private ArrowBuf importOffsets(ArrowType type, long bytesPerSlot) { + final long capacity = bytesPerSlot * (fieldNode.getLength() + 1); + final long offset = snapshot.offset * bytesPerSlot; + return importBuffer(type, 1, offset, capacity); + } + + private ArrowBuf importData(ArrowType type, long capacity) { + return importBuffer(type, 2, capacity); + } + + private ArrowBuf maybeImportBitmap(ArrowType type) { + checkState( + buffers.length > 0, + "Expected at least %s buffers for type %s, but found %s", + 1, + type, + buffers.length); + if (buffers[0] == NULL) { + return null; + } + return importFixedBits(type, 0, /* bitsPerSlot= */ 1); + } + + @Override + public List visit(ArrowType.Null type) { + checkState( + buffers.length == 0, + "Expected %s buffers for type %s, but found %s", + 0, + type, + buffers.length); + return Collections.emptyList(); + } + + @Override + public List visit(ArrowType.Struct type) { + return Collections.singletonList(maybeImportBitmap(type)); + } + + @Override + public List visit(ArrowType.List type) { + return Arrays.asList(maybeImportBitmap(type), importOffsets(type, ListVector.OFFSET_WIDTH)); + } + + @Override + public List visit(ArrowType.LargeList type) { + return Arrays.asList( + maybeImportBitmap(type), importOffsets(type, LargeListVector.OFFSET_WIDTH)); + } + + @Override + public List visit(ArrowType.FixedSizeList type) { + return Collections.singletonList(maybeImportBitmap(type)); + } + + @Override + public List visit(ArrowType.Union type) { + switch (type.getMode()) { + case Sparse: + return Collections.singletonList(importFixedBytes(type, 0, UnionVector.TYPE_WIDTH)); + case Dense: + return Arrays.asList( + importFixedBytes(type, 0, DenseUnionVector.TYPE_WIDTH), + importFixedBytes(type, 1, DenseUnionVector.OFFSET_WIDTH)); + default: + throw new UnsupportedOperationException("Importing buffers for union type: " + type); + } + } + + @Override + public List visit(ArrowType.Map type) { + return Arrays.asList(maybeImportBitmap(type), importOffsets(type, MapVector.OFFSET_WIDTH)); + } + + @Override + public List visit(ArrowType.Int type) { + return Arrays.asList(maybeImportBitmap(type), importFixedBits(type, 1, type.getBitWidth())); + } + + @Override + public List visit(ArrowType.FloatingPoint type) { + switch (type.getPrecision()) { + case HALF: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, /* bytesPerSlot= */ 2)); + case SINGLE: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, Float4Vector.TYPE_WIDTH)); + case DOUBLE: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, Float8Vector.TYPE_WIDTH)); + default: + throw new UnsupportedOperationException("Importing buffers for type: " + type); + } + } + + @Override + public List visit(ArrowType.Utf8 type) { + try (ArrowBuf offsets = importOffsets(type, VarCharVector.OFFSET_WIDTH)) { + final int start = offsets.getInt(0); + final int end = offsets.getInt(fieldNode.getLength() * (long) VarCharVector.OFFSET_WIDTH); + checkState( + end >= start, + "Offset buffer for type %s is malformed: start: %s, end: %s", + type, + start, + end); + // HACK: For the issue https://github.com/apache/datafusion-comet/issues/540 + // As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import + // a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again. + // So we add this hack to always take full length of data buffer by assuming the first offset + // is always 0 which is true for Arrow Java and arrow-rs. + final int len = end; + offsets.getReferenceManager().retain(); + return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); + } + } + + @Override + public List visit(ArrowType.LargeUtf8 type) { + try (ArrowBuf offsets = importOffsets(type, LargeVarCharVector.OFFSET_WIDTH)) { + final long start = offsets.getLong(0); + final long end = + offsets.getLong(fieldNode.getLength() * (long) LargeVarCharVector.OFFSET_WIDTH); + checkState( + end >= start, + "Offset buffer for type %s is malformed: start: %s, end: %s", + type, + start, + end); + // HACK: For the issue https://github.com/apache/datafusion-comet/issues/540 + // As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import + // a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again. + // So we add this hack to always take full length of data buffer by assuming the first offset + // is always 0 which is true for Arrow Java and arrow-rs. + final long len = end; + offsets.getReferenceManager().retain(); + return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); + } + } + + @Override + public List visit(ArrowType.Binary type) { + try (ArrowBuf offsets = importOffsets(type, VarBinaryVector.OFFSET_WIDTH)) { + final int start = offsets.getInt(0); + final int end = offsets.getInt(fieldNode.getLength() * (long) VarBinaryVector.OFFSET_WIDTH); + checkState( + end >= start, + "Offset buffer for type %s is malformed: start: %s, end: %s", + type, + start, + end); + // HACK: For the issue https://github.com/apache/datafusion-comet/issues/540 + // As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import + // a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again. + // So we add this hack to always take full length of data buffer by assuming the first offset + // is always 0 which is true for Arrow Java and arrow-rs. + final int len = end; + offsets.getReferenceManager().retain(); + return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); + } + } + + @Override + public List visit(ArrowType.LargeBinary type) { + try (ArrowBuf offsets = importOffsets(type, LargeVarBinaryVector.OFFSET_WIDTH)) { + final long start = offsets.getLong(0); + // TODO: need better tests to cover the failure when I forget to multiply by offset width + final long end = + offsets.getLong(fieldNode.getLength() * (long) LargeVarBinaryVector.OFFSET_WIDTH); + checkState( + end >= start, + "Offset buffer for type %s is malformed: start: %s, end: %s", + type, + start, + end); + // HACK: For the issue https://github.com/apache/datafusion-comet/issues/540 + // As Arrow Java doesn't support `offset` in C Data interface, we cannot correctly import + // a slice of string from arrow-rs to Java Arrow and then export it to arrow-rs again. + // So we add this hack to always take full length of data buffer by assuming the first offset + // is always 0 which is true for Arrow Java and arrow-rs. + final long len = end; + offsets.getReferenceManager().retain(); + return Arrays.asList(maybeImportBitmap(type), offsets, importData(type, len)); + } + } + + @Override + public List visit(ArrowType.FixedSizeBinary type) { + return Arrays.asList(maybeImportBitmap(type), importFixedBytes(type, 1, type.getByteWidth())); + } + + @Override + public List visit(ArrowType.Bool type) { + return Arrays.asList(maybeImportBitmap(type), importFixedBits(type, 1, /* bitsPerSlot= */ 1)); + } + + @Override + public List visit(ArrowType.Decimal type) { + return Arrays.asList(maybeImportBitmap(type), importFixedBits(type, 1, type.getBitWidth())); + } + + @Override + public List visit(ArrowType.Date type) { + switch (type.getUnit()) { + case DAY: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, DateDayVector.TYPE_WIDTH)); + case MILLISECOND: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, DateMilliVector.TYPE_WIDTH)); + default: + throw new UnsupportedOperationException("Importing buffers for type: " + type); + } + } + + @Override + public List visit(ArrowType.Time type) { + switch (type.getUnit()) { + case SECOND: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, TimeSecVector.TYPE_WIDTH)); + case MILLISECOND: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, TimeMilliVector.TYPE_WIDTH)); + case MICROSECOND: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, TimeMicroVector.TYPE_WIDTH)); + case NANOSECOND: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, TimeNanoVector.TYPE_WIDTH)); + default: + throw new UnsupportedOperationException("Importing buffers for type: " + type); + } + } + + @Override + public List visit(ArrowType.Timestamp type) { + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, TimeStampVector.TYPE_WIDTH)); + } + + @Override + public List visit(ArrowType.Interval type) { + switch (type.getUnit()) { + case YEAR_MONTH: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, IntervalYearVector.TYPE_WIDTH)); + case DAY_TIME: + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, IntervalDayVector.TYPE_WIDTH)); + case MONTH_DAY_NANO: + return Arrays.asList( + maybeImportBitmap(type), + importFixedBytes(type, 1, IntervalMonthDayNanoVector.TYPE_WIDTH)); + default: + throw new UnsupportedOperationException("Importing buffers for type: " + type); + } + } + + @Override + public List visit(ArrowType.Duration type) { + return Arrays.asList( + maybeImportBitmap(type), importFixedBytes(type, 1, DurationVector.TYPE_WIDTH)); + } +} diff --git a/core/Cargo.lock b/core/Cargo.lock index eca8b97d4..71fe6eb2e 100644 --- a/core/Cargo.lock +++ b/core/Cargo.lock @@ -115,8 +115,7 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arrow" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ae9728f104939be6d8d9b368a354b4929b0569160ea1641f0721b55a861ce38" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-arith", "arrow-array", @@ -136,8 +135,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7029a5b3efbeafbf4a12d12dc16b8f9e9bff20a410b8c25c5d28acc089e1043" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-array", "arrow-buffer", @@ -151,8 +149,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d33238427c60271710695f17742f45b1a5dc5bcfc5c15331c25ddfe7abf70d97" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "ahash", "arrow-buffer", @@ -168,8 +165,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe9b95e825ae838efaf77e366c00d3fc8cca78134c9db497d6bda425f2e7b7c1" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "bytes", "half", @@ -179,8 +175,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87cf8385a9d5b5fcde771661dd07652b79b9139fea66193eda6a88664400ccab" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-array", "arrow-buffer", @@ -200,8 +195,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea5068bef430a86690059665e40034625ec323ffa4dd21972048eebb0127adc" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-array", "arrow-buffer", @@ -219,8 +213,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb29be98f987bcf217b070512bb7afba2f65180858bca462edf4a39d84a23e10" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-buffer", "arrow-schema", @@ -231,8 +224,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffc68f6523970aa6f7ce1dc9a33a7d9284cfb9af77d4ad3e617dbe5d79cc6ec8" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-array", "arrow-buffer", @@ -246,8 +238,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2041380f94bd6437ab648e6c2085a045e45a0c44f91a1b9a4fe3fed3d379bfb1" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-array", "arrow-buffer", @@ -266,8 +257,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb56ed1547004e12203652f12fe12e824161ff9d1e5cf2a7dc4ff02ba94f413" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-array", "arrow-buffer", @@ -281,8 +271,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "575b42f1fc588f2da6977b94a5ca565459f5ab07b60545e17243fb9a7ed6d43e" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "ahash", "arrow-array", @@ -296,8 +285,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32aae6a60458a2389c0da89c9de0b7932427776127da1a738e2efc21d32f3393" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "bitflags 2.5.0", ] @@ -305,8 +293,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de36abaef8767b4220d7b4a8c2fe5ffc78b47db81b03d77e2136091c3ba39102" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "ahash", "arrow-array", @@ -319,8 +306,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e435ada8409bcafc910bc3e0077f532a4daa20e99060a496685c0e3e53cc2597" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "arrow-array", "arrow-buffer", @@ -805,8 +791,7 @@ dependencies = [ [[package]] name = "datafusion" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f92d2d7a9cba4580900b32b009848d9eb35f1028ac84cdd6ddcf97612cd0068" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "ahash", "arrow", @@ -913,8 +898,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effed030d2c1667eb1e11df5372d4981eaf5d11a521be32220b3985ae5ba6971" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "ahash", "arrow", @@ -934,8 +918,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0091318129dad1359f08e4c6c71f855163c35bba05d1dbf983196f727857894" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "tokio", ] @@ -943,8 +926,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8385aba84fc4a06d3ebccfbcbf9b4f985e80c762fac634b49079f7cc14933fb1" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "arrow", "chrono", @@ -964,8 +946,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebb192f0055d2ce64e38ac100abc18e4e6ae9734d3c28eee522bbbd6a32108a3" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "ahash", "arrow", @@ -983,8 +964,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c081ae5b7edd712b92767fb8ed5c0e32755682f8075707666cd70835807c0b" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "arrow", "base64", @@ -1010,8 +990,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feb28a4ea52c28a26990646986a27c4052829a2a2572386258679e19263f8b78" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "ahash", "arrow", @@ -1028,8 +1007,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12172f2a6c9eb4992a51e62d709eeba5dedaa3b5369cce37ff6c2260e100ba76" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "arrow", "async-trait", @@ -1047,8 +1025,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a3fce531b623e94180f6cd33d620ef01530405751b6ddd2fd96250cdbd78e2e" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "ahash", "arrow", @@ -1078,8 +1055,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046400b6a2cc3ed57a7c576f5ae6aecc77804ac8e0186926b278b189305b2a77" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "arrow", "datafusion-common", @@ -1090,8 +1066,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4aed47f5a2ad8766260befb375b201592e86a08b260256e168ae4311426a2bff" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "ahash", "arrow", @@ -1124,8 +1099,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "39.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fa92bb1fd15e46ce5fb6f1c85f3ac054592560f294429a28e392b5f9cd4255e" +source = "git+https://github.com/viirya/arrow-datafusion.git?rev=17446b1#17446b1886d2872be482efa4225d2b35e5d96569" dependencies = [ "arrow", "arrow-array", @@ -2064,8 +2038,7 @@ dependencies = [ [[package]] name = "parquet" version = "52.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c3b5322cc1bbf67f11c079c42be41a55949099b78732f7dba9e15edde40eab" +source = "git+https://github.com/apache/arrow-rs.git?rev=0a4d8a1#0a4d8a14b58e45ef92e31541f0b51a5b25de5f10" dependencies = [ "ahash", "bytes", diff --git a/core/Cargo.toml b/core/Cargo.toml index 7c22876dc..c3e924a44 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -33,13 +33,13 @@ include = [ [dependencies] parquet-format = "4.0.0" # This must be kept in sync with that from parquet crate -arrow = { version = "52.0.0", features = ["prettyprint", "ffi", "chrono-tz"] } -arrow-array = { version = "52.0.0" } -arrow-buffer = { version = "52.0.0" } -arrow-data = { version = "52.0.0" } -arrow-schema = { version = "52.0.0" } -arrow-string = { version = "52.0.0" } -parquet = { version = "52.0.0", default-features = false, features = ["experimental"] } +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "0a4d8a1", features = ["prettyprint", "ffi", "chrono-tz"] } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "0a4d8a1" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "0a4d8a1" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "0a4d8a1" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "0a4d8a1" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "0a4d8a1" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "0a4d8a1", default-features = false, features = ["experimental"] } half = { version = "2.4.1", default-features = false } futures = "0.3.28" mimalloc = { version = "*", default-features = false, optional = true } @@ -71,12 +71,12 @@ itertools = "0.11.0" chrono = { version = "0.4", default-features = false, features = ["clock"] } chrono-tz = { version = "0.8" } paste = "1.0.14" -datafusion-common = { version = "39.0.0" } -datafusion = { default-features = false, version = "39.0.0", features = ["unicode_expressions", "crypto_expressions"] } -datafusion-functions = { version = "39.0.0", features = ["crypto_expressions"] } -datafusion-expr = { version = "39.0.0", default-features = false } -datafusion-physical-expr-common = { version = "39.0.0", default-features = false } -datafusion-physical-expr = { version = "39.0.0", default-features = false } +datafusion-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "17446b1" } +datafusion = { default-features = false, git = "https://github.com/viirya/arrow-datafusion.git", rev = "17446b1", features = ["unicode_expressions", "crypto_expressions"] } +datafusion-functions = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "17446b1", features = ["crypto_expressions"] } +datafusion-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "17446b1", default-features = false } +datafusion-physical-expr-common = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "17446b1", default-features = false } +datafusion-physical-expr = { git = "https://github.com/viirya/arrow-datafusion.git", rev = "17446b1", default-features = false } unicode-segmentation = "^1.10.1" once_cell = "1.18.0" regex = "1.9.6"