Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c_glib/test/test-array.rb
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def test_diff
def test_different_type
array = build_string_array(["Start", "Shutdown", "Reboot"])
other_array = build_int8_array([2, 3, 6, 10])
assert_equal("# Array types differed: string vs int8",
assert_equal("# Array types differed: string vs int8\n",
array.diff_unified(other_array))
end
end
Expand Down
1 change: 1 addition & 0 deletions ci/conda_env_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

# don't add pandas here, because it is not a mandatory test dependency
cffi
cython
cloudpickle
hypothesis
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ set(ARROW_SRCS
tensor.cc
type.cc
visitor.cc
c/bridge.cc
io/buffered.cc
io/compressed.cc
io/file.cc
Expand Down Expand Up @@ -247,6 +248,7 @@ add_subdirectory(testing)
#

add_subdirectory(array)
add_subdirectory(c)
add_subdirectory(io)
add_subdirectory(util)
add_subdirectory(vendored)
Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,12 @@ Result<std::shared_ptr<StructArray>> StructArray::Make(
if (offset > length) {
return Status::IndexError("Offset greater than length of child arrays");
}
if (null_bitmap == nullptr) {
if (null_count > 0) {
return Status::Invalid("null_count = ", null_count, " but no null bitmap given");
}
null_count = 0;
}
return std::make_shared<StructArray>(struct_(fields), length - offset, children,
null_bitmap, null_count, offset);
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/array/builder_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,15 +83,15 @@ class NumericBuilder : public ArrayBuilder {
/// uninitialized memory access
Status AppendNulls(int64_t length) final {
ARROW_RETURN_NOT_OK(Reserve(length));
data_builder_.UnsafeAppend(length, static_cast<value_type>(0));
data_builder_.UnsafeAppend(length, value_type{}); // zero
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer the explicit static cast to the comment.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but it doesn't work with struct DayMilliseconds, which is why it was changed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it'd be helpful to remove the // zeros in favor of that explanation in a comment?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How interesting

UnsafeSetNull(length);
return Status::OK();
}

/// \brief Append a single null element
Status AppendNull() final {
ARROW_RETURN_NOT_OK(Reserve(1));
data_builder_.UnsafeAppend(static_cast<value_type>(0));
data_builder_.UnsafeAppend(value_type{}); // zero
UnsafeAppendToBitmap(false);
return Status::OK();
}
Expand Down Expand Up @@ -243,7 +243,7 @@ class NumericBuilder : public ArrayBuilder {

void UnsafeAppendNull() {
ArrayBuilder::UnsafeAppendToBitmap(false);
data_builder_.UnsafeAppend(0);
data_builder_.UnsafeAppend(value_type{}); // zero
}

std::shared_ptr<DataType> type() const override { return type_; }
Expand Down
41 changes: 6 additions & 35 deletions cpp/src/arrow/array/builder_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,52 +21,23 @@

#include <memory>

#include "arrow/array.h"
#include "arrow/array/builder_base.h"
#include "arrow/array/builder_binary.h"
#include "arrow/array/builder_primitive.h"
#include "arrow/buffer_builder.h"
#include "arrow/status.h"
#include "arrow/type_traits.h"
#include "arrow/util/macros.h"

namespace arrow {

class ARROW_EXPORT DayTimeIntervalBuilder : public ArrayBuilder {
// TODO this class is untested

class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder<DayTimeIntervalType> {
public:
using TypeClass = DayTimeIntervalType;
using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;

explicit DayTimeIntervalBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
: DayTimeIntervalBuilder(day_time_interval(), pool) {}

DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
: ArrayBuilder(pool), builder_(fixed_size_binary(sizeof(DayMilliseconds)), pool) {}

void Reset() override { builder_.Reset(); }
Status Resize(int64_t capacity) override { return builder_.Resize(capacity); }
Status Append(DayMilliseconds day_millis) {
return builder_.Append(reinterpret_cast<uint8_t*>(&day_millis));
}
void UnsafeAppend(DayMilliseconds day_millis) {
builder_.UnsafeAppend(reinterpret_cast<uint8_t*>(&day_millis));
}
using ArrayBuilder::UnsafeAppendNull;
Status AppendNull() override { return builder_.AppendNull(); }
Status AppendNulls(int64_t length) override { return builder_.AppendNulls(length); }
Status FinishInternal(std::shared_ptr<ArrayData>* out) override {
auto result = builder_.FinishInternal(out);
if (*out != NULLPTR) {
(*out)->type = type();
}
return result;
}

std::shared_ptr<DataType> type() const override { return day_time_interval(); }

private:
FixedSizeBinaryBuilder builder_;
explicit DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
: NumericBuilder<DayTimeIntervalType>(type, pool) {}
};

} // namespace arrow
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/diff_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ TEST_F(DiffTest, Errors) {
ASSERT_RAISES(TypeError, Diff(*base_, *target_, default_memory_pool()));

ASSERT_FALSE(base_->Equals(*target_, EqualOptions().diff_sink(&formatted)));
ASSERT_EQ(formatted.str(), R"(# Array types differed: int32 vs string)");
ASSERT_EQ(formatted.str(), "# Array types differed: int32 vs string\n");
}

template <typename ArrowType>
Expand Down
69 changes: 51 additions & 18 deletions cpp/src/arrow/array/validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "arrow/array/validate.h"

#include "arrow/array.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/logging.h"
#include "arrow/visitor_inline.h"

Expand All @@ -40,11 +41,13 @@ struct ValidateArrayVisitor {
ARROW_RETURN_IF(array.data()->buffers.size() != 2,
Status::Invalid("number of buffers is != 2"));

if (array.length() > 0 && array.data()->buffers[1] == nullptr) {
return Status::Invalid("values buffer is null");
}
if (array.length() > 0 && array.values() == nullptr) {
return Status::Invalid("values is null");
if (array.length() > 0) {
if (array.data()->buffers[1] == nullptr) {
return Status::Invalid("values buffer is null");
}
if (array.values() == nullptr) {
return Status::Invalid("values is null");
}
}
return Status::OK();
}
Expand Down Expand Up @@ -227,21 +230,20 @@ struct ValidateArrayVisitor {
Status ValidateOffsets(const ArrayType& array) {
using offset_type = typename ArrayType::offset_type;

auto value_offsets = array.value_offsets();
if (value_offsets == nullptr) {
if (array.length() != 0) {
if (array.length() > 0) {
// For length 0, an empty offsets array seems accepted as a special case (ARROW-544)
auto value_offsets = array.value_offsets();
if (value_offsets == nullptr) {
return Status::Invalid("non-empty array but value_offsets_ is null");
}
return Status::OK();
}
if (value_offsets->size() / static_cast<int>(sizeof(offset_type)) < array.length()) {
return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
" isn't large enough for length: ", array.length());
}

auto first_offset = array.value_offset(0);
if (array.offset() == 0 && first_offset != 0) {
return Status::Invalid("The first offset isn't zero");
if (value_offsets->size() / static_cast<int>(sizeof(offset_type)) <
array.length() + 1) {
return Status::Invalid("offset buffer size (bytes): ", value_offsets->size(),
" isn't large enough for length: ", array.length());
}
if (array.offset() == 0 && array.value_offset(0) != 0) {
return Status::Invalid("The first offset isn't zero");
}
}
return Status::OK();
}
Expand Down Expand Up @@ -270,6 +272,37 @@ Status ValidateArray(const Array& array) {
"of type ",
type.ToString(), ", got ", data.buffers.size());
}
// Validate length of fixed-witdh buffers
if (array.length() > 0) {
for (size_t i = 0; i < data.buffers.size(); ++i) {
const auto bit_width = layout.bit_widths[i];
if (bit_width > 0) {
const auto& buffer = data.buffers[i];
if (buffer == nullptr) {
if (i == 0) {
// Null bitmap may be absent
continue;
} else {
return Status::Invalid("Buffer #", i,
" is null in non-empty array "
"of type ",
type.ToString());
}
}
const auto min_size =
BitUtil::BytesForBits(bit_width * (array.length() + array.offset()));
if (buffer->size() < min_size) {
return Status::Invalid("Buffer #", i,
" is too small in array "
"of type ",
type.ToString(), " with length ", array.length(),
" and offset ", array.offset(), " (got ", buffer->size(),
", expected at least ", min_size, ")");
}
}
}
}

if (type.id() != Type::EXTENSION) {
if (data.child_data.size() != static_cast<size_t>(type.num_children())) {
return Status::Invalid("Expected ", type.num_children(),
Expand Down
20 changes: 20 additions & 0 deletions cpp/src/arrow/c/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

add_arrow_test(bridge_test PREFIX "arrow-c")

arrow_install_all_headers("arrow/c")
55 changes: 55 additions & 0 deletions cpp/src/arrow/c/abi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#define ARROW_FLAG_DICTIONARY_ORDERED 1
#define ARROW_FLAG_NULLABLE 2
#define ARROW_FLAG_MAP_KEYS_SORTED 4

struct ArrowArray {
// Type description
const char* format;
const char* name;
const char* metadata;
int64_t flags;

// Data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void** buffers;
struct ArrowArray** children;
struct ArrowArray* dictionary;

// Release callback
void (*release)(struct ArrowArray*);
// Opaque producer-specific data
void* private_data;
};

#ifdef __cplusplus
}
#endif
Loading