pitrou · bkietz · Nov 18, 2020 · Feb 17, 2021
diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc
@@ -30,10 +30,6 @@
 #error "ASSIGN_OR_RAISE should not be visible from Arrow public headers."
 #endif
 
-#ifdef ARROW_UTIL_PARALLEL_H
-#error "arrow/util/parallel.h is an internal header"
-#endif
-
 #include <gmock/gmock-matchers.h>
 #include <gtest/gtest.h>
 

diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt
@@ -43,6 +43,7 @@ add_arrow_test(utility-test
                align_util_test.cc
                bit_block_counter_test.cc
                bit_util_test.cc
+               cache_test.cc
                checked_cast_test.cc
                compression_test.cc
                decimal_test.cc
@@ -73,6 +74,7 @@ add_arrow_test(threading-utility-test
 
 add_arrow_benchmark(bit_block_counter_benchmark)
 add_arrow_benchmark(bit_util_benchmark)
+add_arrow_benchmark(cache_benchmark)
 add_arrow_benchmark(compression_benchmark)
 add_arrow_benchmark(decimal_benchmark)
 add_arrow_benchmark(hashing_benchmark)

diff --git a/cpp/src/arrow/util/cache_benchmark.cc b/cpp/src/arrow/util/cache_benchmark.cc
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/cache_internal.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+static constexpr int32_t kCacheSize = 100;
+static constexpr int32_t kSmallKeyLength = 8;
+static constexpr int32_t kLargeKeyLength = 64;
+static constexpr int32_t kSmallValueLength = 16;
+static constexpr int32_t kLargeValueLength = 1024;
+
+static std::vector<std::string> MakeStrings(int64_t nvalues, int64_t min_length,
+                                            int64_t max_length) {
+  auto rng = ::arrow::random::RandomArrayGenerator(42);
+  auto arr = checked_pointer_cast<StringArray>(rng.String(
+      nvalues, static_cast<int32_t>(min_length), static_cast<int32_t>(max_length)));
+  std::vector<std::string> vec(nvalues);
+  for (int64_t i = 0; i < nvalues; ++i) {
+    vec[i] = arr->GetString(i);
+  }
+  return vec;
+}
+
+static std::vector<std::string> MakeStrings(int64_t nvalues, int64_t length) {
+  return MakeStrings(nvalues, length, length);
+}
+
+template <typename Cache, typename Key, typename Value>
+static void BenchmarkCacheLookups(benchmark::State& state, const std::vector<Key>& keys,
+                                  const std::vector<Value>& values) {
+  const int32_t nitems = static_cast<int32_t>(keys.size());
+  Cache cache(nitems);
+  for (int32_t i = 0; i < nitems; ++i) {
+    cache.Replace(keys[i], values[i]);
+  }
+
+  for (auto _ : state) {
+    int64_t nfinds = 0;
+    for (const auto& key : keys) {
+      nfinds += (cache.Find(key) != nullptr);
+    }
+    benchmark::DoNotOptimize(nfinds);
+    ARROW_CHECK_EQ(nfinds, nitems);
+  }
+  state.SetItemsProcessed(state.iterations() * nitems);
+}
+
+static void LRUCacheLookup(benchmark::State& state) {
+  const auto keys = MakeStrings(kCacheSize, state.range(0));
+  const auto values = MakeStrings(kCacheSize, state.range(1));
+  BenchmarkCacheLookups<LRUCache<std::string, std::string>>(state, keys, values);
+}
+
+static void SetCacheArgs(benchmark::internal::Benchmark* bench) {
+  bench->Args({kSmallKeyLength, kSmallValueLength});
+  bench->Args({kSmallKeyLength, kLargeValueLength});
+  bench->Args({kLargeKeyLength, kSmallValueLength});
+  bench->Args({kLargeKeyLength, kLargeValueLength});
+}
+
+BENCHMARK(LRUCacheLookup)->Apply(SetCacheArgs);
+
+struct Callable {
+  explicit Callable(std::vector<std::string> values)
+      : index_(0), values_(std::move(values)) {}
+
+  std::string operator()(const std::string& key) {
+    // Return a value unrelated to the key
+    if (++index_ >= static_cast<int64_t>(values_.size())) {
+      index_ = 0;
+    }
+    return values_[index_];
+  }
+
+ private:
+  int64_t index_;
+  std::vector<std::string> values_;
+};
+
+template <typename Memoized>
+static void BenchmarkMemoize(benchmark::State& state, Memoized&& mem,
+                             const std::vector<std::string>& keys) {
+  // Prime memoization cache
+  for (const auto& key : keys) {
+    mem(key);
+  }
+
+  for (auto _ : state) {
+    int64_t nbytes = 0;
+    for (const auto& key : keys) {
+      nbytes += static_cast<int64_t>(mem(key).length());
+    }
+    benchmark::DoNotOptimize(nbytes);
+  }
+  state.SetItemsProcessed(state.iterations() * keys.size());
+}
+
+static void MemoizeLRUCached(benchmark::State& state) {
+  const auto keys = MakeStrings(kCacheSize, state.range(0));
+  const auto values = MakeStrings(kCacheSize, state.range(1));
+  auto mem = MemoizeLRU(Callable(values), kCacheSize);
+  BenchmarkMemoize(state, mem, keys);
+}
+
+static void MemoizeLRUCachedThreadUnsafe(benchmark::State& state) {
+  const auto keys = MakeStrings(kCacheSize, state.range(0));
+  const auto values = MakeStrings(kCacheSize, state.range(1));
+  // Emulate recommended usage of MemoizeLRUCachedThreadUnsafe
+  // (the compiler is probably able to cache the TLS-looked up value, though)
+  thread_local auto mem = MemoizeLRUThreadUnsafe(Callable(values), kCacheSize);
+  BenchmarkMemoize(state, mem, keys);
+}
+
+BENCHMARK(MemoizeLRUCached)->Apply(SetCacheArgs);
+BENCHMARK(MemoizeLRUCachedThreadUnsafe)->Apply(SetCacheArgs);
+
+}  // namespace internal
+}  // namespace arrow
diff --git a/cpp/src/arrow/util/cache_internal.h b/cpp/src/arrow/util/cache_internal.h
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <functional>
+#include <list>
+#include <memory>
+#include <mutex>
+#include <type_traits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/util/functional.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+// A LRU (Least recently used) replacement cache
+template <typename Key, typename Value>
+class LRUCache {
+ public:
+  explicit LRUCache(int32_t capacity) : capacity_(capacity) {
+    // The map size can temporarily exceed the cache capacity, see Replace()
+    map_.reserve(capacity_ + 1);
+  }
+
+  ARROW_DISALLOW_COPY_AND_ASSIGN(LRUCache);
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(LRUCache);
+
+  void Clear() {
+    items_.clear();
+    map_.clear();
+    // The C++ spec doesn't tell whether map_.clear() will shrink the map capacity
+    map_.reserve(capacity_ + 1);
+  }
+
+  int32_t size() const {
+    assert(items_.size() == map_.size());
+    return static_cast<int32_t>(items_.size());
+  }
+
+  template <typename K>
+  Value* Find(K&& key) {
+    const auto it = map_.find(key);
+    if (it == map_.end()) {
+      return NULLPTR;
+    } else {
+      // Found => move item at front of the list
+      auto list_it = it->second;
+      items_.splice(items_.begin(), items_, list_it);
+      return &list_it->value;
+    }
+  }
+
+  template <typename K, typename V>
+  std::pair<bool, Value*> Replace(K&& key, V&& value) {
+    // Try to insert temporary iterator
+    auto pair = map_.emplace(std::forward<K>(key), ListIt{});
+    const auto it = pair.first;
+    const bool inserted = pair.second;
+    if (inserted) {
+      // Inserted => push item at front of the list, and update iterator
+      items_.push_front(Item{&it->first, std::forward<V>(value)});
+      it->second = items_.begin();
+      // Did we exceed the cache capacity?  If so, remove least recently used item
+      if (static_cast<int32_t>(items_.size()) > capacity_) {
+        const bool erased = map_.erase(*items_.back().key);
+        assert(erased);
+        ARROW_UNUSED(erased);
+        items_.pop_back();
+      }
+      return {true, &it->second->value};
+    } else {
+      // Already exists => move item at front of the list, and update value
+      auto list_it = it->second;
+      items_.splice(items_.begin(), items_, list_it);
+      list_it->value = std::forward<V>(value);
+      return {false, &list_it->value};
+    }
+  }
+
+ private:
+  struct Item {
+    // Pointer to the key inside the unordered_map
+    const Key* key;
+    Value value;
+  };
+  using List = std::list<Item>;
+  using ListIt = typename List::iterator;
+
+  const int32_t capacity_;
+  // In most to least recently used order
+  std::list<Item> items_;
+  std::unordered_map<Key, ListIt> map_;
+};
+
+namespace detail {
+
+template <typename Key, typename Value, typename Cache, typename Func>
+struct ThreadSafeMemoizer {
+  using RetType = Value;
+
+  ThreadSafeMemoizer(Func func, int32_t cache_capacity)
+      : func_(std::move(func)), cache_(cache_capacity) {}
+
+  // The memoizer can't return a pointer to the cached value, because
+  // the cache entry may be evicted by another thread.
+  Value operator()(const Key& key) {
+    std::unique_lock<std::mutex> lock(mutex_);
+    const Value* value_ptr;
+    value_ptr = cache_.Find(key);
+    if (ARROW_PREDICT_TRUE(value_ptr != NULLPTR)) {
+      return *value_ptr;
+    }
+    lock.unlock();
+    Value v = func_(key);
+    lock.lock();
+    return *cache_.Replace(key, std::move(v)).second;
+  }
+
+ private:
+  std::mutex mutex_;
+  Func func_;
+  Cache cache_;
+};
+
+template <typename Key, typename Value, typename Cache, typename Func>
+struct ThreadUnsafeMemoizer {
+  using RetType = const Value&;
+
+  ThreadUnsafeMemoizer(Func func, int32_t cache_capacity)
+      : func_(std::move(func)), cache_(cache_capacity) {}
+
+  const Value& operator()(const Key& key) {
+    const Value* value_ptr;
+    value_ptr = cache_.Find(key);
+    if (ARROW_PREDICT_TRUE(value_ptr != NULLPTR)) {
+      return *value_ptr;
+    }
+    return *cache_.Replace(key, func_(key)).second;
+  }
+
+ private:
+  Func func_;
+  Cache cache_;
+};
+
+template <template <typename...> class Cache,
+          template <typename...> class MemoizerType = ThreadSafeMemoizer, typename Func,
+          typename Key = typename std::decay<call_traits::argument_type<0, Func>>::type,
+          typename Value = typename std::decay<call_traits::return_type<Func>>::type,
+          typename Memoizer = MemoizerType<Key, Value, Cache<Key, Value>, Func>,
+          typename RetType = typename Memoizer::RetType>
+static std::function<RetType(const Key&)> Memoize(Func func, int32_t cache_capacity) {
+  // std::function<> requires copy constructibility
+  struct {
+    RetType operator()(const Key& key) const { return (*memoized_)(key); }
+    std::shared_ptr<Memoizer> memoized_;
+  } shared_memoized = {std::make_shared<Memoizer>(std::move(func), cache_capacity)};
+
+  return shared_memoized;
+}
+
+// template <template <typename K, typename V> class Cache,
+//           template <typename K, typename V, typename C, typename F>
+//           class MemoizerType = ThreadSafeMemoizer,
+//           typename Func,
+//           typename Key = typename std::decay<call_traits::argument_type<0,
+//           Func>>::type, typename Value = typename
+//           std::decay<call_traits::return_type<Func>>::type, typename Memoizer =
+//           MemoizerType<Key, Value, Cache<Key, Value>, Func>, typename RetFunc =
+//           Memoizer>
+// static RetFunc Memoize(Func&& func, int32_t cache_capacity) {
+//   return Memoizer(std::forward<Func>(func), cache_capacity);
+// }
+
+}  // namespace detail
+
+// Apply a LRU memoization cache to a callable.
+template <typename Func>
+static auto MemoizeLRU(Func&& func, int32_t cache_capacity)
+    -> decltype(detail::Memoize<LRUCache>(std::forward<Func>(func), cache_capacity)) {
+  return detail::Memoize<LRUCache>(std::forward<Func>(func), cache_capacity);
+}
+
+// Like MemoizeLRU, but not thread-safe.  This version allows for much faster
+// lookups (more than 2x faster), but you'll have to manage thread safety yourself.
+// A recommended usage is to declare per-thread caches using `thread_local`
+// (see cache_benchmark.cc).
+template <typename Func>
+static auto MemoizeLRUThreadUnsafe(Func&& func, int32_t cache_capacity)
+    -> decltype(detail::Memoize<LRUCache, detail::ThreadUnsafeMemoizer>(
+        std::forward<Func>(func), cache_capacity)) {
+  return detail::Memoize<LRUCache, detail::ThreadUnsafeMemoizer>(std::forward<Func>(func),
+                                                                 cache_capacity);
+}
+
+}  // namespace internal
+}  // namespace arrow