Skip to content

Commit 6ff0d94

Browse files
committed
Merge branch 'master' into gandiva-cython
2 parents 6ea0062 + 3ab4a0f commit 6ff0d94

File tree

30 files changed

+999
-144
lines changed

30 files changed

+999
-144
lines changed

ci/appveyor-cpp-build.bat

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ if "%JOB%" == "Static_Crt_Build" (
3030
pushd cpp\build-debug
3131

3232
cmake -G "%GENERATOR%" ^
33+
-DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
3334
-DARROW_USE_STATIC_CRT=ON ^
3435
-DARROW_BOOST_USE_SHARED=OFF ^
3536
-DARROW_BUILD_SHARED=OFF ^
@@ -45,6 +46,7 @@ if "%JOB%" == "Static_Crt_Build" (
4546
pushd cpp\build-release
4647

4748
cmake -G "%GENERATOR%" ^
49+
-DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
4850
-DARROW_USE_STATIC_CRT=ON ^
4951
-DARROW_BOOST_USE_SHARED=OFF ^
5052
-DARROW_BUILD_SHARED=OFF ^
@@ -70,6 +72,7 @@ if "%JOB%" == "Build_Debug" (
7072
pushd cpp\build-debug
7173

7274
cmake -G "%GENERATOR%" ^
75+
-DARROW_VERBOSE_THIRDPARTY_BUILD=OFF ^
7376
-DARROW_BOOST_USE_SHARED=OFF ^
7477
-DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
7578
-DARROW_BUILD_STATIC=OFF ^

ci/cpp-msvc-build-main.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
@rem (i.e. for usual configurations)
2020

2121
set ARROW_HOME=%CONDA_PREFIX%\Library
22-
set CMAKE_ARGS=
22+
set CMAKE_ARGS=-DARROW_VERBOSE_THIRDPARTY_BUILD=OFF
2323

2424
if "%JOB%" == "Toolchain" (
2525
set CMAKE_ARGS=%CMAKE_ARGS% -DARROW_WITH_BZ2=ON

ci/travis_script_rust.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ cargo rustc -- -D warnings
3535

3636
cargo build
3737
cargo test
38+
cargo bench
3839
cargo run --example dynamic_types
3940

4041
popd

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -580,7 +580,7 @@ if(ARROW_BUILD_TESTS OR ARROW_BUILD_BENCHMARKS)
580580
-DBUILD_STATIC_LIBS=ON
581581
-DBUILD_PACKAGING=OFF
582582
-DBUILD_TESTING=OFF
583-
-BUILD_CONFIG_TESTS=OFF
583+
-DBUILD_CONFIG_TESTS=OFF
584584
-DINSTALL_HEADERS=ON
585585
-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS}
586586
-DCMAKE_C_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_C_FLAGS}
@@ -1053,38 +1053,38 @@ if (ARROW_WITH_ZSTD)
10531053
# ZSTD
10541054

10551055
if("${ZSTD_HOME}" STREQUAL "")
1056-
set(ZSTD_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-prefix/src/zstd_ep")
1057-
set(ZSTD_INCLUDE_DIR "${ZSTD_BUILD_DIR}/lib")
1056+
set(ZSTD_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zstd_ep-install")
1057+
set(ZSTD_INCLUDE_DIR "${ZSTD_PREFIX}/include")
1058+
1059+
set(ZSTD_CMAKE_ARGS
1060+
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
1061+
"-DCMAKE_INSTALL_PREFIX=${ZSTD_PREFIX}"
1062+
"-DZSTD_BUILD_PROGRAMS=off"
1063+
"-DZSTD_BUILD_SHARED=off"
1064+
"-DZSTD_BUILD_STATIC=on"
1065+
"-DZSTD_MULTITHREAD_SUPPORT=off")
10581066

10591067
if (MSVC)
1068+
set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/zstd_static.lib")
10601069
if (ARROW_USE_STATIC_CRT)
1061-
if (${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG")
1062-
set(ZSTD_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreadedDebug")
1063-
else()
1064-
set(ZSTD_RUNTIME_LIBRARY_LINKAGE "/p:RuntimeLibrary=MultiThreaded")
1065-
endif()
1070+
set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS} "-DZSTD_USE_STATIC_RUNTIME=on")
10661071
endif()
1067-
set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/libzstd_static.lib")
1068-
set(ZSTD_BUILD_COMMAND BUILD_COMMAND msbuild ${ZSTD_BUILD_DIR}/build/VS2010/zstd.sln /t:Build /v:minimal /p:Configuration=${CMAKE_BUILD_TYPE}
1069-
${ZSTD_RUNTIME_LIBRARY_LINKAGE} /p:Platform=x64 /p:PlatformToolset=v140
1070-
/p:OutDir=${ZSTD_BUILD_DIR}/build/VS2010/bin/x64_${CMAKE_BUILD_TYPE}/ /p:SolutionDir=${ZSTD_BUILD_DIR}/build/VS2010/ )
1071-
set(ZSTD_PATCH_COMMAND PATCH_COMMAND git --git-dir=. apply --verbose --whitespace=fix ${CMAKE_SOURCE_DIR}/build-support/zstd_msbuild_gl_runtimelibrary_params.patch)
10721072
else()
1073-
set(ZSTD_STATIC_LIB "${ZSTD_BUILD_DIR}/lib/libzstd.a")
1074-
set(ZSTD_BUILD_COMMAND BUILD_COMMAND ${CMAKE_SOURCE_DIR}/build-support/build-zstd-lib.sh)
1073+
set(ZSTD_STATIC_LIB "${ZSTD_PREFIX}/lib/libzstd.a")
1074+
# Only pass our C flags on Unix as on MSVC it leads to a
1075+
# "incompatible command-line options" error
1076+
set(ZSTD_CMAKE_ARGS ${ZSTD_CMAKE_ARGS}
1077+
"-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}"
1078+
"-DCMAKE_C_FLAGS=${EP_C_FLAGS}")
10751079
endif()
10761080

10771081
ExternalProject_Add(zstd_ep
1078-
URL ${ZSTD_SOURCE_URL}
1079-
${EP_LOG_OPTIONS}
1080-
UPDATE_COMMAND ""
1081-
${ZSTD_PATCH_COMMAND}
1082-
CONFIGURE_COMMAND ""
1083-
INSTALL_COMMAND ""
1084-
BINARY_DIR ${ZSTD_BUILD_DIR}
1085-
BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB}
1086-
${ZSTD_BUILD_COMMAND}
1087-
)
1082+
${EP_LOG_OPTIONS}
1083+
CMAKE_ARGS ${ZSTD_CMAKE_ARGS}
1084+
SOURCE_SUBDIR "build/cmake"
1085+
INSTALL_DIR ${ZSTD_PREFIX}
1086+
URL ${ZSTD_SOURCE_URL}
1087+
BUILD_BYPRODUCTS "${ZSTD_STATIC_LIB}")
10881088

10891089
set(ZSTD_VENDORED 1)
10901090
else()

cpp/src/arrow/buffer-test.cc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
#include <algorithm>
1819
#include <cstdint>
1920
#include <cstring>
2021
#include <limits>
@@ -272,7 +273,10 @@ TYPED_TEST(TypedTestBuffer, ResizeOOM) {
272273
TypeParam buf;
273274
ASSERT_OK(AllocateResizableBuffer(0, &buf));
274275
ASSERT_OK(buf->Resize(100));
275-
int64_t to_alloc = std::numeric_limits<int64_t>::max();
276+
int64_t to_alloc = std::min<uint64_t>(std::numeric_limits<int64_t>::max(),
277+
std::numeric_limits<size_t>::max());
278+
// subtract 63 to prevent overflow after the size is aligned
279+
to_alloc -= 63;
276280
ASSERT_RAISES(OutOfMemory, buf->Resize(to_alloc));
277281
#endif
278282
}

cpp/src/arrow/builder.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ Status BooleanBuilder::AppendValues(const std::vector<bool>& values,
736736

737737
int64_t i = 0;
738738
internal::GenerateBitsUnrolled(raw_data_, length_, length,
739-
[values, &i]() -> bool { return values[i++]; });
739+
[&values, &i]() -> bool { return values[i++]; });
740740

741741
// this updates length_
742742
ArrayBuilder::UnsafeAppendToBitmap(is_valid);
@@ -749,7 +749,7 @@ Status BooleanBuilder::AppendValues(const std::vector<bool>& values) {
749749

750750
int64_t i = 0;
751751
internal::GenerateBitsUnrolled(raw_data_, length_, length,
752-
[values, &i]() -> bool { return values[i++]; });
752+
[&values, &i]() -> bool { return values[i++]; });
753753

754754
// this updates length_
755755
ArrayBuilder::UnsafeSetNotNull(length);

cpp/src/arrow/memory_pool-test.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
#include <algorithm>
1819
#include <cstdint>
1920
#include <limits>
2021

@@ -53,7 +54,10 @@ class TestMemoryPoolBase : public ::testing::Test {
5354
auto pool = memory_pool();
5455

5556
uint8_t* data;
56-
int64_t to_alloc = std::numeric_limits<int64_t>::max();
57+
int64_t to_alloc = std::min<uint64_t>(std::numeric_limits<int64_t>::max(),
58+
std::numeric_limits<size_t>::max());
59+
// subtract 63 to prevent overflow after the size is aligned
60+
to_alloc -= 63;
5761
ASSERT_RAISES(OutOfMemory, pool->Allocate(to_alloc, &data));
5862
}
5963

cpp/src/arrow/memory_pool.cc

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <cstdlib>
2424
#include <cstring>
2525
#include <iostream>
26+
#include <limits>
2627
#include <memory>
2728
#include <sstream> // IWYU pragma: keep
2829

@@ -44,7 +45,13 @@ namespace {
4445
// Allocate memory according to the alignment requirements for Arrow
4546
// (as of May 2016 64 bytes)
4647
Status AllocateAligned(int64_t size, uint8_t** out) {
47-
// TODO(emkornfield) find something compatible with windows
48+
// TODO(emkornfield) find something compatible with windows
49+
if (size < 0) {
50+
return Status::Invalid("negative malloc size");
51+
}
52+
if (static_cast<uint64_t>(size) >= std::numeric_limits<size_t>::max()) {
53+
return Status::CapacityError("malloc size overflows size_t");
54+
}
4855
#ifdef _WIN32
4956
// Special code path for Windows
5057
*out =
@@ -104,7 +111,14 @@ class DefaultMemoryPool : public MemoryPool {
104111
Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override {
105112
#ifdef ARROW_JEMALLOC
106113
uint8_t* previous_ptr = *ptr;
107-
*ptr = reinterpret_cast<uint8_t*>(rallocx(*ptr, new_size, MALLOCX_ALIGN(kAlignment)));
114+
if (new_size < 0) {
115+
return Status::Invalid("negative realloc size");
116+
}
117+
if (static_cast<uint64_t>(new_size) >= std::numeric_limits<size_t>::max()) {
118+
return Status::CapacityError("realloc overflows size_t");
119+
}
120+
*ptr = reinterpret_cast<uint8_t*>(
121+
rallocx(*ptr, static_cast<size_t>(new_size), MALLOCX_ALIGN(kAlignment)));
108122
if (*ptr == NULL) {
109123
std::stringstream ss;
110124
ss << "realloc of size " << new_size << " failed";

cpp/src/arrow/util/bit-util.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ static inline int CountLeadingZeros(uint32_t value) {
170170
static inline int CountLeadingZeros(uint64_t value) {
171171
#if defined(__clang__) || defined(__GNUC__)
172172
if (value == 0) return 64;
173-
return static_cast<int>(__builtin_clzl(value));
173+
return static_cast<int>(__builtin_clzll(value));
174174
#elif defined(_MSC_VER)
175175
unsigned long index; // NOLINT
176176
if (_BitScanReverse64(&index, value)) { // NOLINT

cpp/src/arrow/util/compression_zstd.cc

Lines changed: 31 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <zstd.h>
2525

2626
#include "arrow/status.h"
27+
#include "arrow/util/logging.h"
2728
#include "arrow/util/macros.h"
2829

2930
using std::size_t;
@@ -34,6 +35,12 @@ namespace util {
3435
// XXX level = 1 probably doesn't compress very much
3536
constexpr int kZSTDDefaultCompressionLevel = 1;
3637

38+
static Status ZSTDError(size_t ret, const char* prefix_msg) {
39+
std::stringstream ss;
40+
ss << prefix_msg << ZSTD_getErrorName(ret);
41+
return Status::IOError(ss.str());
42+
}
43+
3744
// ----------------------------------------------------------------------
3845
// ZSTD decompressor implementation
3946

@@ -47,7 +54,7 @@ class ZSTDDecompressor : public Decompressor {
4754
finished_ = false;
4855
size_t ret = ZSTD_initDStream(stream_);
4956
if (ZSTD_isError(ret)) {
50-
return ZSTDError(ret, "zstd init failed: ");
57+
return ZSTDError(ret, "ZSTD init failed: ");
5158
} else {
5259
return Status::OK();
5360
}
@@ -69,7 +76,7 @@ class ZSTDDecompressor : public Decompressor {
6976
size_t ret;
7077
ret = ZSTD_decompressStream(stream_, &out_buf, &in_buf);
7178
if (ZSTD_isError(ret)) {
72-
return ZSTDError(ret, "zstd decompress failed: ");
79+
return ZSTDError(ret, "ZSTD decompress failed: ");
7380
}
7481
*bytes_read = static_cast<int64_t>(in_buf.pos);
7582
*bytes_written = static_cast<int64_t>(out_buf.pos);
@@ -81,12 +88,6 @@ class ZSTDDecompressor : public Decompressor {
8188
bool IsFinished() override { return finished_; }
8289

8390
protected:
84-
Status ZSTDError(size_t ret, const char* prefix_msg) {
85-
std::stringstream ss;
86-
ss << prefix_msg << ZSTD_getErrorName(ret);
87-
return Status::IOError(ss.str());
88-
}
89-
9091
ZSTD_DStream* stream_;
9192
bool finished_;
9293
};
@@ -103,7 +104,7 @@ class ZSTDCompressor : public Compressor {
103104
Status Init() {
104105
size_t ret = ZSTD_initCStream(stream_, kZSTDDefaultCompressionLevel);
105106
if (ZSTD_isError(ret)) {
106-
return ZSTDError(ret, "zstd init failed: ");
107+
return ZSTDError(ret, "ZSTD init failed: ");
107108
} else {
108109
return Status::OK();
109110
}
@@ -119,12 +120,6 @@ class ZSTDCompressor : public Compressor {
119120
bool* should_retry) override;
120121

121122
protected:
122-
Status ZSTDError(size_t ret, const char* prefix_msg) {
123-
std::stringstream ss;
124-
ss << prefix_msg << ZSTD_getErrorName(ret);
125-
return Status::IOError(ss.str());
126-
}
127-
128123
ZSTD_CStream* stream_;
129124
};
130125

@@ -144,7 +139,7 @@ Status ZSTDCompressor::Compress(int64_t input_len, const uint8_t* input,
144139
size_t ret;
145140
ret = ZSTD_compressStream(stream_, &out_buf, &in_buf);
146141
if (ZSTD_isError(ret)) {
147-
return ZSTDError(ret, "zstd compress failed: ");
142+
return ZSTDError(ret, "ZSTD compress failed: ");
148143
}
149144
*bytes_read = static_cast<int64_t>(in_buf.pos);
150145
*bytes_written = static_cast<int64_t>(out_buf.pos);
@@ -162,7 +157,7 @@ Status ZSTDCompressor::Flush(int64_t output_len, uint8_t* output, int64_t* bytes
162157
size_t ret;
163158
ret = ZSTD_flushStream(stream_, &out_buf);
164159
if (ZSTD_isError(ret)) {
165-
return ZSTDError(ret, "zstd flush failed: ");
160+
return ZSTDError(ret, "ZSTD flush failed: ");
166161
}
167162
*bytes_written = static_cast<int64_t>(out_buf.pos);
168163
*should_retry = ret > 0;
@@ -180,7 +175,7 @@ Status ZSTDCompressor::End(int64_t output_len, uint8_t* output, int64_t* bytes_w
180175
size_t ret;
181176
ret = ZSTD_endStream(stream_, &out_buf);
182177
if (ZSTD_isError(ret)) {
183-
return ZSTDError(ret, "zstd end failed: ");
178+
return ZSTDError(ret, "ZSTD end failed: ");
184179
}
185180
*bytes_written = static_cast<int64_t>(out_buf.pos);
186181
*should_retry = ret > 0;
@@ -206,10 +201,20 @@ Status ZSTDCodec::MakeDecompressor(std::shared_ptr<Decompressor>* out) {
206201

207202
Status ZSTDCodec::Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
208203
uint8_t* output_buffer) {
209-
int64_t decompressed_size =
210-
ZSTD_decompress(output_buffer, static_cast<size_t>(output_len), input,
211-
static_cast<size_t>(input_len));
212-
if (decompressed_size != output_len) {
204+
if (output_buffer == nullptr) {
205+
// We may pass a NULL 0-byte output buffer but some zstd versions demand
206+
// a valid pointer: https://github.com/facebook/zstd/issues/1385
207+
static uint8_t empty_buffer[1];
208+
DCHECK_EQ(output_len, 0);
209+
output_buffer = empty_buffer;
210+
}
211+
212+
size_t ret = ZSTD_decompress(output_buffer, static_cast<size_t>(output_len), input,
213+
static_cast<size_t>(input_len));
214+
if (ZSTD_isError(ret)) {
215+
return ZSTDError(ret, "ZSTD decompression failed: ");
216+
}
217+
if (static_cast<int64_t>(ret) != output_len) {
213218
return Status::IOError("Corrupt ZSTD compressed data.");
214219
}
215220
return Status::OK();
@@ -223,12 +228,13 @@ int64_t ZSTDCodec::MaxCompressedLen(int64_t input_len,
223228
Status ZSTDCodec::Compress(int64_t input_len, const uint8_t* input,
224229
int64_t output_buffer_len, uint8_t* output_buffer,
225230
int64_t* output_length) {
226-
*output_length =
231+
size_t ret =
227232
ZSTD_compress(output_buffer, static_cast<size_t>(output_buffer_len), input,
228233
static_cast<size_t>(input_len), kZSTDDefaultCompressionLevel);
229-
if (ZSTD_isError(*output_length)) {
230-
return Status::IOError("ZSTD compression failure.");
234+
if (ZSTD_isError(ret)) {
235+
return ZSTDError(ret, "ZSTD compression failed: ");
231236
}
237+
*output_length = static_cast<int64_t>(ret);
232238
return Status::OK();
233239
}
234240

0 commit comments

Comments
 (0)