Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2218,3 +2218,25 @@ https://github.com/pypa/packaging/

which is made available under both the Apache license v2.0 and the
BSD 2-clause license.

--------------------------------------------------------------------------------

The files in cpp/src/arrow/vendored/pcg contain code from

https://github.com/imneme/pcg-cpp

and have the following copyright notice:

Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
and the PCG Project contributors.

SPDX-License-Identifier: (Apache-2.0 OR MIT)

Licensed under the Apache License, Version 2.0 (provided in
LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
or under the MIT license (provided in LICENSE-MIT.txt and at
http://opensource.org/licenses/MIT), at your option. This file may not
be copied, modified, or distributed except according to those terms.

Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
express or implied. See your chosen license for details.
11 changes: 8 additions & 3 deletions cpp/src/arrow/compute/kernels/aggregate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1303,12 +1303,17 @@ std::pair<double, double> WelfordVar(const ArrayType& array) {
template <typename ArrowType>
class TestVarStdKernelRandom : public TestPrimitiveVarStdKernel<ArrowType> {};

typedef ::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, FloatType,
DoubleType>
VarStdRandomTypes;
using VarStdRandomTypes =
::testing::Types<Int32Type, UInt32Type, Int64Type, UInt64Type, FloatType, DoubleType>;

TYPED_TEST_SUITE(TestVarStdKernelRandom, VarStdRandomTypes);

TYPED_TEST(TestVarStdKernelRandom, Basics) {
#if defined(__MINGW32__) && !defined(__MINGW64__)
if (TypeParam::type_id == Type::FLOAT) {
GTEST_SKIP() << "Precision issues on MinGW32 with float32";
}
#endif
// Cut array into small chunks
constexpr int array_size = 5000;
constexpr int chunk_size_max = 50;
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/testing/random.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#include "arrow/util/decimal.h"
#include "arrow/util/key_value_metadata.h"
#include "arrow/util/logging.h"
#include "arrow/util/pcg_random.h"
#include "arrow/util/value_parsing.h"

namespace arrow {
Expand Down Expand Up @@ -79,7 +80,7 @@ struct GenerateOptions {
GenerateTypedDataNoNan(data, n);
return;
}
std::default_random_engine rng(seed_++);
pcg32_fast rng(seed_++);
DistributionType dist(min_, max_);
std::bernoulli_distribution nan_dist(nan_probability_);
const ValueType nan_value = std::numeric_limits<ValueType>::quiet_NaN();
Expand All @@ -91,7 +92,7 @@ struct GenerateOptions {
}

void GenerateTypedDataNoNan(ValueType* data, size_t n) {
std::default_random_engine rng(seed_++);
pcg32_fast rng(seed_++);
DistributionType dist(min_, max_);

// A static cast is required due to the int16 -> int8 handling.
Expand All @@ -100,7 +101,7 @@ struct GenerateOptions {

void GenerateBitmap(uint8_t* buffer, size_t n, int64_t* null_count) {
int64_t count = 0;
std::default_random_engine rng(seed_++);
pcg32_fast rng(seed_++);
std::bernoulli_distribution dist(1.0 - probability_);

for (size_t i = 0; i < n; i++) {
Expand Down
11 changes: 7 additions & 4 deletions cpp/src/arrow/testing/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,32 +42,35 @@
#include "arrow/testing/random.h"
#include "arrow/util/io_util.h"
#include "arrow/util/logging.h"
#include "arrow/util/pcg_random.h"

namespace arrow {

using random::pcg32_fast;

uint64_t random_seed() {
return std::chrono::high_resolution_clock::now().time_since_epoch().count();
}

void random_null_bytes(int64_t n, double pct_null, uint8_t* null_bytes) {
const int random_seed = 0;
std::default_random_engine gen(random_seed);
pcg32_fast gen(random_seed);
std::uniform_real_distribution<double> d(0.0, 1.0);
std::generate(null_bytes, null_bytes + n,
[&d, &gen, &pct_null] { return d(gen) > pct_null; });
}

void random_is_valid(int64_t n, double pct_null, std::vector<bool>* is_valid,
int random_seed) {
std::default_random_engine gen(random_seed);
pcg32_fast gen(random_seed);
std::uniform_real_distribution<double> d(0.0, 1.0);
is_valid->resize(n, false);
std::generate(is_valid->begin(), is_valid->end(),
[&d, &gen, &pct_null] { return d(gen) > pct_null; });
}

void random_bytes(int64_t n, uint32_t seed, uint8_t* out) {
std::default_random_engine gen(seed);
pcg32_fast gen(seed);
std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
std::generate(out, out + n, [&d, &gen] { return static_cast<uint8_t>(d(gen)); });
}
Expand All @@ -80,7 +83,7 @@ std::string random_string(int64_t n, uint32_t seed) {
}

void random_decimals(int64_t n, uint32_t seed, int32_t precision, uint8_t* out) {
std::default_random_engine gen(seed);
pcg32_fast gen(seed);
std::uniform_int_distribution<uint32_t> d(0, std::numeric_limits<uint8_t>::max());
const int32_t required_bytes = DecimalType::DecimalSize(precision);
constexpr int32_t byte_width = 16;
Expand Down
31 changes: 31 additions & 0 deletions cpp/src/arrow/util/pcg_random.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "arrow/vendored/pcg/pcg_random.hpp" // IWYU pragma: export

namespace arrow {
namespace random {

using pcg32 = ::arrow_vendored::pcg32;
using pcg64 = ::arrow_vendored::pcg64;
using pcg32_fast = ::arrow_vendored::pcg32_fast;
using pcg64_fast = ::arrow_vendored::pcg64_fast;

} // namespace random
} // namespace arrow
26 changes: 26 additions & 0 deletions cpp/src/arrow/vendored/pcg/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<!--
PCG Random Number Generation for C++

Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
and the PCG Project contributors.

SPDX-License-Identifier: (Apache-2.0 OR MIT)

Licensed under the Apache License, Version 2.0 (provided in
LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
or under the MIT license (provided in LICENSE-MIT.txt and at
http://opensource.org/licenses/MIT), at your option. This file may not
be copied, modified, or distributed except according to those terms.

Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
express or implied. See your chosen license for details.

For additional information about the PCG random number generation scheme,
visit http://www.pcg-random.org/.
-->

Sources are taken from git changeset ffd522e7188bef30a00c74dc7eb9de5faff90092
(https://github.com/imneme/pcg-cpp).

Changes:
- enclosed in `arrow_vendored` namespace
Loading