Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
74e828a
fix typings issues (ARROW-1903)
trxcllnt Jan 11, 2018
6ff18e9
ship es2015 commonJS in main package to avoid confusion
trxcllnt Jan 11, 2018
62db338
update dependencies and add es6+ umd targets to jest transform ignore…
trxcllnt Jan 11, 2018
61dc699
WIP -- refactor types to closer match arrow-cpp
trxcllnt Jan 11, 2018
d18d915
fix struct and map rows
trxcllnt Jan 11, 2018
9be080f
ARROW-1991: [Website] Fix Docker documentation build
wesm Jan 17, 2018
9e2fc04
ARROW-2002: [Python] check write_queue is not full and writer_thread …
kmiku7 Jan 17, 2018
1ffce26
ARROW-1856: [Python] Auto-detect Parquet ABI version when using PARQU…
xhochy Jan 17, 2018
fa7c17a
passing all tests except es5 umd mangler ones
trxcllnt Jan 18, 2018
58a24c5
ARROW-2004: [C++] Add shrink_to_fit parameter to BufferBuilder::Resiz…
wesm Jan 18, 2018
bc9f9e5
ARROW-1966: [C++] Accommodate JAVA_HOME on Linux that includes the jr…
wesm Jan 18, 2018
e3f629d
fix rest of the mangling issues
trxcllnt Jan 18, 2018
f3f3b86
rename table.ts to recordbatch.ts in preparation for merging latest
trxcllnt Jan 19, 2018
d2b18d5
Merge remote-tracking branch 'ccri/table-scan-perf' into js-cpp-refac…
trxcllnt Jan 19, 2018
6c91ed4
Merge branch 'master' of github.com:apache/arrow into js-cpp-refactor…
trxcllnt Jan 19, 2018
e859e13
fix package.json bin entry
trxcllnt Jan 20, 2018
700a47c
export visitors
trxcllnt Jan 20, 2018
e81082f
export vector views, allow cloning data as another type
trxcllnt Jan 22, 2018
b7f5bfb
rename numRows to length, add table.getColumn()
trxcllnt Jan 22, 2018
87334a5
Merge branch 'table-scan-perf' of github.com:ccri/arrow into js-cpp-r…
trxcllnt Jan 22, 2018
614b688
add asEpochMs to date and timestamp vectors
trxcllnt Jan 23, 2018
5bb63af
Don't read OFFSET vector for FixedSizeList
Jan 23, 2018
e33c068
Merge pull request #2 from ccri/fixed-size-list
trxcllnt Jan 23, 2018
c0fd2f9
use the dictionary of the last chunked vector list for chunked dictio…
trxcllnt Jan 23, 2018
e537789
make it easier to run all integration tests from local data
trxcllnt Jan 24, 2018
fe31ee0
slice the flat data values before returning an iterator of them
trxcllnt Jan 24, 2018
40b3638
run integration tests with local data for coverage stats
trxcllnt Jan 24, 2018
54d4f5b
lazily allocate table and recordbatch columns, support NestedView's g…
trxcllnt Jan 24, 2018
a00415e
Fix perf
Jan 24, 2018
c8cd286
Add Table.fromStruct
Jan 24, 2018
a5f200f
Merge pull request #3 from ccri/table-from-struct
trxcllnt Jan 25, 2018
7e43b78
add test:integration npm script
trxcllnt Jan 25, 2018
18807c6
rename ChunkData's fields so it's more clear they're not semantically…
trxcllnt Jan 25, 2018
f1dead0
compute chunked nested childData list correctly
trxcllnt Jan 25, 2018
8ddce0a
check bounds in getChildAt(i) to avoid NPEs
trxcllnt Jan 25, 2018
7bc7363
Fix exception for empty Table
Jan 25, 2018
272d293
Merge pull request #4 from ccri/empty-table
trxcllnt Jan 25, 2018
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion c_glib/arrow-glib/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

CLEANFILES =
DISTCLEANFILES =

EXTRA_DIST = \
meson.build
Expand Down Expand Up @@ -169,6 +170,10 @@ BUILT_SOURCES = \
stamp-enums.c \
stamp-enums.h

DISTCLEANFILES += \
stamp-enums.c \
stamp-enums.h

EXTRA_DIST += \
enums.c.template \
enums.h.template
Expand Down Expand Up @@ -214,7 +219,7 @@ INTROSPECTION_SCANNER_ARGS =
INTROSPECTION_SCANNER_ENV =
if USE_ARROW_BUILD_DIR
INTROSPECTION_SCANNER_ENV += \
LD_LIBRARY_PATH=$(ARROW_LIB_DIR):$${PKG_CONFIG_PATH}
LD_LIBRARY_PATH=$(ARROW_LIB_DIR):$${LD_LIBRARY_PATH}
endif
if OS_MACOS
INTROSPECTION_SCANNER_ENV += \
Expand Down
4 changes: 4 additions & 0 deletions cpp/apidoc/HDFS.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob`
* `ARROW_LIBHDFS_DIR` (optional): explicit location of `libhdfs.so` if it is
installed somewhere other than `$HADOOP_HOME/lib/native`.

To accommodate distribution-specific nuances, the `JAVA_HOME` variable may be
set to the root path for the Java SDK, the JRE path itself, or to the directory
containing the `libjvm` library.

### Mac Specifics

The installed location of Java on OS X can vary, however the following snippet
Expand Down
18 changes: 16 additions & 2 deletions cpp/cmake_modules/FindParquet.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,22 @@ if(PARQUET_HOME)
PATHS ${PARQUET_HOME} NO_DEFAULT_PATH
PATH_SUFFIXES "lib")
get_filename_component(PARQUET_LIBS ${PARQUET_LIBRARIES} PATH )
set(PARQUET_ABI_VERSION "1.0.0")
set(PARQUET_SO_VERSION "1")

# Try to autodiscover the Parquet ABI version
get_filename_component(PARQUET_LIB_REALPATH ${PARQUET_LIBRARIES} REALPATH)
get_filename_component(PARQUET_EXT_REALPATH ${PARQUET_LIB_REALPATH} EXT)
string(REGEX MATCH ".([0-9]+.[0-9]+.[0-9]+)" HAS_ABI_VERSION ${PARQUET_EXT_REALPATH})
if (HAS_ABI_VERSION)
if (APPLE)
string(REGEX REPLACE ".([0-9]+.[0-9]+.[0-9]+).dylib" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
else()
string(REGEX REPLACE ".so.([0-9]+.[0-9]+.[0-9]+)" "\\1" PARQUET_ABI_VERSION ${PARQUET_EXT_REALPATH})
endif()
string(REGEX REPLACE "([0-9]+).[0-9]+.[0-9]+" "\\1" PARQUET_SO_VERSION ${PARQUET_ABI_VERSION})
else()
set(PARQUET_ABI_VERSION "1.0.0")
set(PARQUET_SO_VERSION "1")
endif()
else()
pkg_check_modules(PARQUET parquet)
if (PARQUET_FOUND)
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/arrow/buffer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,29 @@ TEST(TestBuffer, SliceMutableBuffer) {
ASSERT_TRUE(slice->Equals(expected));
}

TEST(TestBufferBuilder, ResizeReserve) {
const std::string data = "some data";
auto data_ptr = data.c_str();

BufferBuilder builder;

ASSERT_OK(builder.Append(data_ptr, 9));
ASSERT_EQ(9, builder.length());

ASSERT_OK(builder.Resize(128));
ASSERT_EQ(128, builder.capacity());

// Do not shrink to fit
ASSERT_OK(builder.Resize(64, false));
ASSERT_EQ(128, builder.capacity());

// Shrink to fit
ASSERT_OK(builder.Resize(64));
ASSERT_EQ(64, builder.capacity());

// Reserve elements
ASSERT_OK(builder.Reserve(60));
ASSERT_EQ(128, builder.capacity());
}

} // namespace arrow
41 changes: 30 additions & 11 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,20 +25,20 @@
#include <string>
#include <type_traits>

#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/bit-util.h"
#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"

namespace arrow {

class MemoryPool;

// ----------------------------------------------------------------------
// Buffer classes

/// Immutable API for a chunk of bytes which may or may not be owned by the
/// class instance.
/// \class Buffer
/// \brief Object containing a pointer to a piece of contiguous memory with a
/// particular size. Base class does not own its memory
///
/// Buffers have two related notions of length: size and capacity. Size is
/// the number of bytes that might have valid data. Capacity is the number
Expand Down Expand Up @@ -133,7 +133,8 @@ ARROW_EXPORT
std::shared_ptr<Buffer> SliceMutableBuffer(const std::shared_ptr<Buffer>& buffer,
const int64_t offset, const int64_t length);

/// A Buffer whose contents can be mutated. May or may not own its data.
/// \class MutableBuffer
/// \brief A Buffer whose contents can be mutated. May or may not own its data.
class ARROW_EXPORT MutableBuffer : public Buffer {
public:
MutableBuffer(uint8_t* data, const int64_t size) : Buffer(data, size) {
Expand All @@ -148,6 +149,8 @@ class ARROW_EXPORT MutableBuffer : public Buffer {
MutableBuffer() : Buffer(NULLPTR, 0) {}
};

/// \class ResizableBuffer
/// \brief A mutable buffer that can be resized
class ARROW_EXPORT ResizableBuffer : public MutableBuffer {
public:
/// Change buffer reported size to indicated size, allocating memory if
Expand Down Expand Up @@ -190,13 +193,22 @@ class ARROW_EXPORT PoolBuffer : public ResizableBuffer {
MemoryPool* pool_;
};

/// \class BufferBuilder
/// \brief A class for incrementally building a contiguous chunk of in-memory data
class ARROW_EXPORT BufferBuilder {
public:
explicit BufferBuilder(MemoryPool* pool)
explicit BufferBuilder(MemoryPool* pool ARROW_MEMORY_POOL_DEFAULT)
: pool_(pool), data_(NULLPTR), capacity_(0), size_(0) {}

/// Resizes the buffer to the nearest multiple of 64 bytes per Layout.md
Status Resize(const int64_t elements) {
/// \brief Resizes the buffer to the nearest multiple of 64 bytes
///
/// \param elements the new capacity of the of the builder. Will be rounded
/// up to a multiple of 64 bytes for padding
/// \param shrink_to_fit if new capacity smaller than existing size,
/// reallocate internal buffer. Set to false to avoid reallocations when
/// shrinking the builder
/// \return Status
Status Resize(const int64_t elements, bool shrink_to_fit = true) {
// Resize(0) is a no-op
if (elements == 0) {
return Status::OK();
Expand All @@ -205,7 +217,7 @@ class ARROW_EXPORT BufferBuilder {
buffer_ = std::make_shared<PoolBuffer>(pool_);
}
int64_t old_capacity = capacity_;
RETURN_NOT_OK(buffer_->Resize(elements));
RETURN_NOT_OK(buffer_->Resize(elements, shrink_to_fit));
capacity_ = buffer_->capacity();
data_ = buffer_->mutable_data();
if (capacity_ > old_capacity) {
Expand All @@ -214,7 +226,14 @@ class ARROW_EXPORT BufferBuilder {
return Status::OK();
}

Status Append(const uint8_t* data, int64_t length) {
/// \brief Ensure that builder can accommodate the additional number of bytes
/// without the need to perform allocations
///
/// \param size number of additional bytes to make space for
/// \return Status
Status Reserve(const int64_t size) { return Resize(size_ + size, false); }

Status Append(const void* data, int64_t length) {
if (capacity_ < length + size_) {
int64_t new_capacity = BitUtil::NextPower2(length + size_);
RETURN_NOT_OK(Resize(new_capacity));
Expand Down Expand Up @@ -248,7 +267,7 @@ class ARROW_EXPORT BufferBuilder {
}

// Unsafe methods don't check existing size
void UnsafeAppend(const uint8_t* data, int64_t length) {
void UnsafeAppend(const void* data, int64_t length) {
memcpy(data_ + size_, data, static_cast<size_t>(length));
size_ += length;
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/io/hdfs-internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ static std::vector<fs::path> get_potential_libjvm_paths() {
file_name = "jvm.dll";
#elif __APPLE__
search_prefixes = {""};
search_suffixes = {"", "/jre/lib/server"};
search_suffixes = {"", "/jre/lib/server", "/lib/server"};
file_name = "libjvm.dylib";

// SFrame uses /usr/libexec/java_home to find JAVA_HOME; for now we are
Expand Down Expand Up @@ -175,7 +175,7 @@ static std::vector<fs::path> get_potential_libjvm_paths() {
"/usr/lib/jvm/default", // alt centos
"/usr/java/latest", // alt centos
};
search_suffixes = {"/jre/lib/amd64/server"};
search_suffixes = {"", "/jre/lib/amd64/server", "/lib/amd64/server"};
file_name = "libjvm.so";
#endif
// From direct environment variable
Expand Down
4 changes: 2 additions & 2 deletions dev/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
version: '3'
services:
gen_apidocs:
build:
build:
context: gen_apidocs
volumes:
- ../..:/apache-arrow
Expand All @@ -29,7 +29,7 @@ services:
volumes:
- ../..:/apache-arrow
dask_integration:
build:
build:
context: dask_integration
volumes:
- ../..:/apache-arrow
20 changes: 13 additions & 7 deletions dev/gen_apidocs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,24 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
FROM ubuntu:14.04
# Prerequsites for apt-add-repository
RUN apt-get update && apt-get install -y \
software-properties-common python-software-properties
FROM ubuntu:16.04

# Basic OS dependencies
RUN apt-add-repository -y ppa:ubuntu-toolchain-r/test && \
apt-get update && apt-get install -y \
RUN apt-get update && apt-get install -y \
wget \
rsync \
git \
gcc-4.9 \
g++-4.9 \
build-essential
build-essential \
software-properties-common

# Java build fails with default JDK8
RUN add-apt-repository ppa:openjdk-r/ppa &&\
apt-get update &&\
apt-get install -y openjdk-7-jdk &&\
update-java-alternatives -s java-1.7.0-openjdk-amd64

# This will install conda in /home/ubuntu/miniconda
RUN wget -O /tmp/miniconda.sh \
https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
Expand Down Expand Up @@ -73,6 +78,7 @@ RUN /home/ubuntu/miniconda/bin/conda create -y -q -n pyarrow-dev \
doxygen \
maven \
-c conda-forge

ADD . /apache-arrow
WORKDIR /apache-arrow
CMD arrow/dev/gen_apidocs/create_documents.sh
73 changes: 48 additions & 25 deletions dev/gen_apidocs/create_documents.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export ARROW_HOME=$(pwd)/dist
export PARQUET_HOME=$(pwd)/dist
CONDA_BASE=/home/ubuntu/miniconda
export LD_LIBRARY_PATH=$(pwd)/dist/lib:${CONDA_BASE}/lib:${LD_LIBRARY_PATH}
export PKG_CONFIG_PATH=$(pwd)/dist/lib/pkgconfig:${PKG_CONFIG_PATH}
export PATH=${CONDA_BASE}/bin:${PATH}

# Prepare the asf-site before copying api docs
Expand All @@ -38,16 +39,38 @@ git clone --branch=asf-site \
https://git-wip-us.apache.org/repos/asf/arrow-site.git asf-site
popd

# Make Java documentation
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
wget http://mirrors.gigenet.com/apache/maven/maven-3/3.5.2/binaries/apache-maven-3.5.2-bin.tar.gz
tar xvf apache-maven-3.5.2-bin.tar.gz
export PATH=$(pwd)/apache-maven-3.5.2/bin:$PATH

pushd arrow/java
rm -rf target/site/apidocs/*
mvn -Drat.skip=true install
mvn -Drat.skip=true site
mkdir -p ../site/asf-site/docs/java/
rsync -r target/site/apidocs/ ../site/asf-site/docs/java/
popd

# Make Python documentation (Depends on C++ )
# Build Arrow C++
source activate pyarrow-dev

export ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX
export BOOST_ROOT=$CONDA_PREFIX
export PARQUET_BUILD_TOOLCHAIN=$CONDA_PREFIX
export LD_LIBRARY_PATH=$CONDA_PREFIX/lib:${LD_LIBRARY_PATH}
export PKG_CONFIG_PATH=$CONDA_PREFIX/lib/pkgconfig:${PKG_CONFIG_PATH}

export CC=gcc-4.9
export CXX=g++-4.9

rm -rf arrow/cpp/build_docs
mkdir arrow/cpp/build_docs
pushd arrow/cpp/build_docs
CPP_BUILD_DIR=$(pwd)/arrow/cpp/build_docs

rm -rf $CPP_BUILD_DIR
mkdir $CPP_BUILD_DIR
pushd $CPP_BUILD_DIR
cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \
-DCMAKE_INSTALL_PREFIX=$ARROW_HOME \
-DARROW_PYTHON=on \
Expand All @@ -58,6 +81,28 @@ make -j4
make install
popd

# Build c_glib documentation
pushd arrow/c_glib
if [ -f Makefile ]; then
# Ensure updating to prevent auto re-configure
touch configure **/Makefile
make distclean
# Work around for 'make distclean' removes doc/reference/xml/
git checkout doc/reference/xml
fi
./autogen.sh
rm -rf build_docs
mkdir build_docs
pushd build_docs
../configure \
--prefix=${AROW_HOME} \
--enable-gtk-doc
make -j4 GTK_DOC_V_XREF=": "
mkdir -p ../../site/asf-site/docs/c_glib
rsync -r doc/reference/html/ ../../site/asf-site/docs/c_glib
popd
popd

# Build Parquet C++
rm -rf parquet-cpp/build_docs
mkdir parquet-cpp/build_docs
Expand All @@ -83,32 +128,10 @@ mkdir -p ../site/asf-site/docs/python
rsync -r doc/_build/html/ ../site/asf-site/docs/python
popd

# Build c_glib documentation
pushd arrow/c_glib
rm -rf doc/reference/html/*
./autogen.sh
./configure \
--with-arrow-cpp-build-dir=$(pwd)/../cpp/build \
--with-arrow-cpp-build-type=$ARROW_BUILD_TYPE \
--enable-gtk-doc
LD_LIBRARY_PATH=$(pwd)/../cpp/build/$ARROW_BUILD_TYPE make GTK_DOC_V_XREF=": "
mkdir -p ../site/asf-site/docs/c_glib
rsync -r doc/reference/html/ ../site/asf-site/docs/c_glib
popd

# Make C++ documentation
pushd arrow/cpp/apidoc
rm -rf html/*
doxygen Doxyfile
mkdir -p ../../site/asf-site/docs/cpp
rsync -r html/ ../../site/asf-site/docs/cpp
popd

# Make Java documentation
pushd arrow/java
rm -rf target/site/apidocs/*
mvn -Drat.skip=true install
mvn -Drat.skip=true site
mkdir -p ../site/asf-site/docs/java/
rsync -r target/site/apidocs/ ../site/asf-site/docs/java/
popd
Loading