Skip to content

Commit

Permalink
Merge branch 'main' into level-decoding-benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Jan 26, 2024
2 parents 4377930 + 667e917 commit c9e2007
Show file tree
Hide file tree
Showing 207 changed files with 1,498 additions and 1,250 deletions.
11 changes: 3 additions & 8 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1005,13 +1005,8 @@ if("${MAKE}" STREQUAL "")
endif()
endif()

# Args for external projects using make
if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.28")
# Prevent 'bad file descriptor' error see #39517 #39628
set(MAKE_BUILD_ARGS "-j1")
else()
set(MAKE_BUILD_ARGS "-j${NPROC}")
endif()
# Args for external projects using make.
set(MAKE_BUILD_ARGS "-j${NPROC}")

include(FetchContent)
set(FC_DECLARE_COMMON_OPTIONS)
Expand Down Expand Up @@ -2639,7 +2634,7 @@ macro(build_bzip2)
BUILD_IN_SOURCE 1
BUILD_COMMAND ${MAKE} libbz2.a ${MAKE_BUILD_ARGS}
${BZIP2_EXTRA_ARGS}
INSTALL_COMMAND ${MAKE} install -j1 PREFIX=${BZIP2_PREFIX}
INSTALL_COMMAND ${MAKE} install PREFIX=${BZIP2_PREFIX}
${BZIP2_EXTRA_ARGS}
INSTALL_DIR ${BZIP2_PREFIX}
URL ${ARROW_BZIP2_SOURCE_URL}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ struct ISOCalendar {
for (int i = 0; i < 3; i++) {
field_builders.push_back(
checked_cast<BuilderType*>(struct_builder->field_builder(i)));
RETURN_NOT_OK(field_builders[i]->Reserve(1));
RETURN_NOT_OK(field_builders[i]->Reserve(in.length));
}
auto visit_null = [&]() { return struct_builder->AppendNull(); };
std::function<Status(typename InType::c_type arg)> visit_value;
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/csv/writer_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ void BenchmarkWriteCsv(benchmark::State& state, const WriteOptions& options,
const RecordBatch& batch) {
int64_t total_size = 0;

while (state.KeepRunning()) {
for (auto _ : state) {
auto out = io::BufferOutputStream::Create().ValueOrDie();
ABORT_NOT_OK(WriteCSV(batch, options, out.get()));
auto buffer = out->Finish().ValueOrDie();
Expand All @@ -106,6 +106,7 @@ void BenchmarkWriteCsv(benchmark::State& state, const WriteOptions& options,

// byte size of the generated csv dataset
state.SetBytesProcessed(total_size);
state.SetItemsProcessed(state.iterations() * batch.num_columns() * batch.num_rows());
state.counters["null_percent"] = static_cast<double>(state.range(0));
}

Expand Down
27 changes: 21 additions & 6 deletions cpp/src/arrow/dataset/file_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@
namespace arrow {
namespace dataset {

static std::shared_ptr<Dataset> GetDataset() {
struct SampleDataset {
std::shared_ptr<Dataset> dataset;
int64_t num_fragments;
};

static SampleDataset GetDataset() {
std::vector<fs::FileInfo> files;
std::vector<std::string> paths;
for (int a = 0; a < 100; a++) {
Expand All @@ -50,25 +55,35 @@ static std::shared_ptr<Dataset> GetDataset() {
FinishOptions finish_options;
finish_options.inspect_options.fragments = 0;
EXPECT_OK_AND_ASSIGN(auto dataset, factory->Finish(finish_options));
return dataset;
return {dataset, static_cast<int64_t>(paths.size())};
}

// A benchmark of filtering fragments in a dataset.
static void GetAllFragments(benchmark::State& state) {
auto dataset = GetDataset();
for (auto _ : state) {
ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments());
ASSERT_OK_AND_ASSIGN(auto fragments, dataset.dataset->GetFragments());
ABORT_NOT_OK(fragments.Visit([](std::shared_ptr<Fragment>) { return Status::OK(); }));
}
state.SetItemsProcessed(state.iterations() * dataset.num_fragments);
state.counters["num_fragments"] = static_cast<double>(dataset.num_fragments);
}

static void GetFilteredFragments(benchmark::State& state, compute::Expression filter) {
auto dataset = GetDataset();
ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*dataset->schema()));
ASSERT_OK_AND_ASSIGN(filter, filter.Bind(*dataset.dataset->schema()));
int64_t num_filtered_fragments = 0;
for (auto _ : state) {
ASSERT_OK_AND_ASSIGN(auto fragments, dataset->GetFragments(filter));
ABORT_NOT_OK(fragments.Visit([](std::shared_ptr<Fragment>) { return Status::OK(); }));
num_filtered_fragments = 0;
ASSERT_OK_AND_ASSIGN(auto fragments, dataset.dataset->GetFragments(filter));
ABORT_NOT_OK(fragments.Visit([&](std::shared_ptr<Fragment>) {
++num_filtered_fragments;
return Status::OK();
}));
}
state.SetItemsProcessed(state.iterations() * dataset.num_fragments);
state.counters["num_fragments"] = static_cast<double>(dataset.num_fragments);
state.counters["num_filtered_fragments"] = static_cast<double>(num_filtered_fragments);
}

using compute::field_ref;
Expand Down
6 changes: 5 additions & 1 deletion cpp/src/arrow/dataset/scanner_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,11 @@ void ScanOnly(
acero::DeclarationToTable(std::move(scan)));

ASSERT_GT(collected->num_rows(), 0);
ASSERT_EQ(collected->num_columns(), 2);
if (factory_name == "scan") {
ASSERT_EQ(collected->num_columns(), 6);
} else if (factory_name == "scan2") {
ASSERT_EQ(collected->num_columns(), 2);
}
}

static constexpr int kScanIdx = 0;
Expand Down
Loading

0 comments on commit c9e2007

Please sign in to comment.