Skip to content

Commit

Permalink
[Courgette] PagedArray: Add Iterators and Parametrize Page Size as in…
Browse files Browse the repository at this point in the history
…t Template.

This is a refactoring CL to enable PagedArray usage by libdivsufsort.
In addition to overloading operator[], for more general usage we need
need pointer-like accessors to PagedArray. To this end we implement
PagedArray_const_iterator and PagedArray_const_iterator, which merely
wraps a PagedArray pointer along with an index. We also add various
operators needed by libdivsufsort. For optimization, '<' and '<='
operators omits pointer checks.

By default PagedArray page size is 2**18 elements (1 MiB for int32_t).
To enable better testing, we made (log) page size a tepmlate parameter.

BUG=608885

Review-Url: https://codereview.chromium.org/2008553007
Cr-Commit-Position: refs/heads/master@{#397311}
  • Loading branch information
samuelhuang authored and Commit bot committed Jun 2, 2016
1 parent cd2c298 commit 804ed8a
Show file tree
Hide file tree
Showing 2 changed files with 386 additions and 26 deletions.
186 changes: 173 additions & 13 deletions courgette/third_party/bsdiff/paged_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,40 +11,198 @@
#ifndef COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_
#define COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_

#include <stddef.h>
#include <cstddef>
#include <iterator>
#include <type_traits>

#include "base/logging.h"
#include "base/macros.h"
#include "base/process/memory.h"

namespace courgette {

// Page size of 2^18 * sizeof(T) is 1MB for T = int32_t.
constexpr int kPagedArrayDefaultPageLogSize = 18;

template <typename T, int LOG_PAGE_SIZE = kPagedArrayDefaultPageLogSize>
class PagedArray;

// A random access iterator with pointer-like semantics, for PagedArray.
template <typename ContainerType, typename T>
class PagedArray_iterator {
public:
using ThisType = PagedArray_iterator<ContainerType, T>;
using difference_type = ptrdiff_t;
using value_type = typename std::remove_const<T>::type;
using reference = T&;
using pointer = T*;
using iterator_category = std::random_access_iterator_tag;

PagedArray_iterator() : array_(nullptr), index_(0U) {}
PagedArray_iterator(ContainerType* array, size_t index)
: array_(array), index_(index) {}

template <typename ContainerType2, typename T2>
PagedArray_iterator(const PagedArray_iterator<ContainerType2, T2>& it)
: array_(it.array_), index_(it.index_) {}

PagedArray_iterator(std::nullptr_t) : array_(nullptr), index_(0) {}

~PagedArray_iterator() = default;

reference operator*() const { return (*array_)[index_]; }
reference operator[](size_t idx) const { return (*array_)[index_ + idx]; }
pointer operator->() const { return &(*array_)[index_]; }

ThisType& operator=(std::nullptr_t) {
array_ = nullptr;
index_ = 0;
return *this;
}

ThisType& operator++() {
++index_;
return *this;
}
ThisType& operator--() {
--index_;
return *this;
}

ThisType operator++(int) { return ThisType(array_, index_++); }
ThisType operator--(int) { return ThisType(array_, index_--); }

ThisType& operator+=(difference_type delta) {
index_ += delta;
return *this;
}
ThisType& operator-=(difference_type delta) {
index_ -= delta;
return *this;
}

ThisType operator+(difference_type offset) const {
return ThisType(array_, index_ + offset);
}
ThisType operator-(difference_type offset) const {
return ThisType(array_, index_ - offset);
}

template <typename ContainerType2, typename T2>
bool operator==(const PagedArray_iterator<ContainerType2, T2>& it) const {
return index_ == it.index_ && array_ == it.array_;
}
bool operator==(std::nullptr_t) const {
return index_ == 0 && array_ == nullptr;
}
template <typename ContainerType2, typename T2>
bool operator!=(const PagedArray_iterator<ContainerType2, T2>& it) const {
return !(*this == it);
}

template <typename ContainerType2, typename T2>
bool operator<(const PagedArray_iterator<ContainerType2, T2>& it) const {
#ifndef NDEBUG
// For performance, skip the |array_| check in Release builds.
if (array_ != it.array_)
return false;
#endif
return index_ < it.index_;
}
template <typename ContainerType2, typename T2>
bool operator<=(const PagedArray_iterator<ContainerType2, T2>& it) const {
#ifndef NDEBUG
// For performance, skip the |array_| check in Release builds.
if (array_ != it.array_)
return false;
#endif
return index_ <= it.index_;
}
template <typename ContainerType2, typename T2>
bool operator>(const PagedArray_iterator<ContainerType2, T2>& it) const {
#ifndef NDEBUG
// For performance, skip the |array_| check in Release builds.
if (array_ != it.array_)
return false;
#endif
return index_ > it.index_;
}
template <typename ContainerType2, typename T2>
bool operator>=(const PagedArray_iterator<ContainerType2, T2>& it) const {
#ifndef NDEBUG
// For performance, skip the |array_| check in Release builds.
if (array_ != it.array_)
return false;
#endif
return index_ >= it.index_;
}

template <typename ContainerType2, typename T2>
difference_type operator-(
const PagedArray_iterator<ContainerType2, T2>& it) const {
return index_ - it.index_;
}

private:
template <typename, typename>
friend class PagedArray_iterator;

ContainerType* array_;
size_t index_;
};

// PagedArray implements an array stored using many fixed-size pages.
template <typename T>
template <typename T, int LOG_PAGE_SIZE>
class PagedArray {
enum {
// Page size in elements. Page size of 2^18 * sizeof(T) is 1MB for T = int.
kLogPageSize = 18,
kPageSize = 1 << kLogPageSize
// Page size in elements.
kLogPageSize = LOG_PAGE_SIZE,
kPageSize = 1 << LOG_PAGE_SIZE
};

public:
PagedArray() : pages_(NULL), page_count_(0) {}
using ThisType = PagedArray<T, LOG_PAGE_SIZE>;
using const_iterator = PagedArray_iterator<const ThisType, const T>;
using iterator = PagedArray_iterator<ThisType, T>;

PagedArray() = default;
~PagedArray() { clear(); }

iterator begin() { return iterator(this, 0); }
iterator end() { return iterator(this, size_); }
const_iterator begin() const { return const_iterator(this, 0); }
const_iterator end() const { return const_iterator(this, size_); }

T& operator[](size_t i) {
size_t page = i >> kLogPageSize;
size_t offset = i & (kPageSize - 1);
// It is tempting to add a DCHECK(page < page_count_), but that makes
// bsdiff_create run 2x slower (even when compiled optimized.)
#ifndef NDEBUG
// Without the #ifndef, DCHECK() will significaltly slow down bsdiff_create
// even in optimized Release build (about 1.4x).
DCHECK(page < page_count_);
#endif
return pages_[page][offset];
}

const T& operator[](size_t i) const {
// Duplicating code here for performance. If we use common code for this
// then bsdiff_create slows down by ~5% in optimized Release build.
size_t page = i >> kLogPageSize;
size_t offset = i & (kPageSize - 1);
#ifndef NDEBUG
// Without the #ifndef, DCHECK() will significaltly slow down bsdiff_create
// even in optimized Release build (about 1.4x).
DCHECK(page < page_count_);
#endif
return pages_[page][offset];
}

// Allocates storage for |size| elements. Returns true on success and false if
// allocation fails.
bool Allocate(size_t size) {
clear();
size_t pages_needed = (size + kPageSize - 1) >> kLogPageSize;
size_ = size;
size_t pages_needed = (size_ + kPageSize - 1) >> kLogPageSize;
if (!base::UncheckedMalloc(sizeof(T*) * pages_needed,
reinterpret_cast<void**>(&pages_))) {
return false;
Expand All @@ -64,22 +222,24 @@ class PagedArray {

// Releases all storage. May be called more than once.
void clear() {
if (pages_ != NULL) {
if (pages_ != nullptr) {
while (page_count_ != 0) {
--page_count_;
free(pages_[page_count_]);
}
free(pages_);
pages_ = NULL;
pages_ = nullptr;
}
}

private:
T** pages_;
size_t page_count_;
T** pages_ = nullptr;
size_t size_ = 0U;
size_t page_count_ = 0U;

DISALLOW_COPY_AND_ASSIGN(PagedArray);
};

} // namespace courgette

#endif // COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_
Loading

0 comments on commit 804ed8a

Please sign in to comment.