From 804ed8a1e0f236f22e19abef7013e9641a3ad49d Mon Sep 17 00:00:00 2001 From: huangs Date: Wed, 1 Jun 2016 23:26:36 -0700 Subject: [PATCH] [Courgette] PagedArray: Add Iterators and Parametrize Page Size as int Template. This is a refactoring CL to enable PagedArray usage by libdivsufsort. In addition to overloading operator[], for more general usage we need need pointer-like accessors to PagedArray. To this end we implement PagedArray_const_iterator and PagedArray_const_iterator, which merely wraps a PagedArray pointer along with an index. We also add various operators needed by libdivsufsort. For optimization, '<' and '<=' operators omits pointer checks. By default PagedArray page size is 2**18 elements (1 MiB for int32_t). To enable better testing, we made (log) page size a tepmlate parameter. BUG=608885 Review-Url: https://codereview.chromium.org/2008553007 Cr-Commit-Position: refs/heads/master@{#397311} --- courgette/third_party/bsdiff/paged_array.h | 186 +++++++++++++- .../bsdiff/paged_array_unittest.cc | 226 +++++++++++++++++- 2 files changed, 386 insertions(+), 26 deletions(-) diff --git a/courgette/third_party/bsdiff/paged_array.h b/courgette/third_party/bsdiff/paged_array.h index 27c3c0b37db7fb..251ea973c6da22 100644 --- a/courgette/third_party/bsdiff/paged_array.h +++ b/courgette/third_party/bsdiff/paged_array.h @@ -11,32 +11,189 @@ #ifndef COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_ #define COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_ -#include +#include +#include +#include +#include "base/logging.h" #include "base/macros.h" #include "base/process/memory.h" namespace courgette { +// Page size of 2^18 * sizeof(T) is 1MB for T = int32_t. +constexpr int kPagedArrayDefaultPageLogSize = 18; + +template +class PagedArray; + +// A random access iterator with pointer-like semantics, for PagedArray. +template +class PagedArray_iterator { + public: + using ThisType = PagedArray_iterator; + using difference_type = ptrdiff_t; + using value_type = typename std::remove_const::type; + using reference = T&; + using pointer = T*; + using iterator_category = std::random_access_iterator_tag; + + PagedArray_iterator() : array_(nullptr), index_(0U) {} + PagedArray_iterator(ContainerType* array, size_t index) + : array_(array), index_(index) {} + + template + PagedArray_iterator(const PagedArray_iterator& it) + : array_(it.array_), index_(it.index_) {} + + PagedArray_iterator(std::nullptr_t) : array_(nullptr), index_(0) {} + + ~PagedArray_iterator() = default; + + reference operator*() const { return (*array_)[index_]; } + reference operator[](size_t idx) const { return (*array_)[index_ + idx]; } + pointer operator->() const { return &(*array_)[index_]; } + + ThisType& operator=(std::nullptr_t) { + array_ = nullptr; + index_ = 0; + return *this; + } + + ThisType& operator++() { + ++index_; + return *this; + } + ThisType& operator--() { + --index_; + return *this; + } + + ThisType operator++(int) { return ThisType(array_, index_++); } + ThisType operator--(int) { return ThisType(array_, index_--); } + + ThisType& operator+=(difference_type delta) { + index_ += delta; + return *this; + } + ThisType& operator-=(difference_type delta) { + index_ -= delta; + return *this; + } + + ThisType operator+(difference_type offset) const { + return ThisType(array_, index_ + offset); + } + ThisType operator-(difference_type offset) const { + return ThisType(array_, index_ - offset); + } + + template + bool operator==(const PagedArray_iterator& it) const { + return index_ == it.index_ && array_ == it.array_; + } + bool operator==(std::nullptr_t) const { + return index_ == 0 && array_ == nullptr; + } + template + bool operator!=(const PagedArray_iterator& it) const { + return !(*this == it); + } + + template + bool operator<(const PagedArray_iterator& it) const { +#ifndef NDEBUG + // For performance, skip the |array_| check in Release builds. + if (array_ != it.array_) + return false; +#endif + return index_ < it.index_; + } + template + bool operator<=(const PagedArray_iterator& it) const { +#ifndef NDEBUG + // For performance, skip the |array_| check in Release builds. + if (array_ != it.array_) + return false; +#endif + return index_ <= it.index_; + } + template + bool operator>(const PagedArray_iterator& it) const { +#ifndef NDEBUG + // For performance, skip the |array_| check in Release builds. + if (array_ != it.array_) + return false; +#endif + return index_ > it.index_; + } + template + bool operator>=(const PagedArray_iterator& it) const { +#ifndef NDEBUG + // For performance, skip the |array_| check in Release builds. + if (array_ != it.array_) + return false; +#endif + return index_ >= it.index_; + } + + template + difference_type operator-( + const PagedArray_iterator& it) const { + return index_ - it.index_; + } + + private: + template + friend class PagedArray_iterator; + + ContainerType* array_; + size_t index_; +}; + // PagedArray implements an array stored using many fixed-size pages. -template +template class PagedArray { enum { - // Page size in elements. Page size of 2^18 * sizeof(T) is 1MB for T = int. - kLogPageSize = 18, - kPageSize = 1 << kLogPageSize + // Page size in elements. + kLogPageSize = LOG_PAGE_SIZE, + kPageSize = 1 << LOG_PAGE_SIZE }; public: - PagedArray() : pages_(NULL), page_count_(0) {} + using ThisType = PagedArray; + using const_iterator = PagedArray_iterator; + using iterator = PagedArray_iterator; + PagedArray() = default; ~PagedArray() { clear(); } + iterator begin() { return iterator(this, 0); } + iterator end() { return iterator(this, size_); } + const_iterator begin() const { return const_iterator(this, 0); } + const_iterator end() const { return const_iterator(this, size_); } + T& operator[](size_t i) { size_t page = i >> kLogPageSize; size_t offset = i & (kPageSize - 1); - // It is tempting to add a DCHECK(page < page_count_), but that makes - // bsdiff_create run 2x slower (even when compiled optimized.) +#ifndef NDEBUG + // Without the #ifndef, DCHECK() will significaltly slow down bsdiff_create + // even in optimized Release build (about 1.4x). + DCHECK(page < page_count_); +#endif + return pages_[page][offset]; + } + + const T& operator[](size_t i) const { + // Duplicating code here for performance. If we use common code for this + // then bsdiff_create slows down by ~5% in optimized Release build. + size_t page = i >> kLogPageSize; + size_t offset = i & (kPageSize - 1); +#ifndef NDEBUG + // Without the #ifndef, DCHECK() will significaltly slow down bsdiff_create + // even in optimized Release build (about 1.4x). + DCHECK(page < page_count_); +#endif return pages_[page][offset]; } @@ -44,7 +201,8 @@ class PagedArray { // allocation fails. bool Allocate(size_t size) { clear(); - size_t pages_needed = (size + kPageSize - 1) >> kLogPageSize; + size_ = size; + size_t pages_needed = (size_ + kPageSize - 1) >> kLogPageSize; if (!base::UncheckedMalloc(sizeof(T*) * pages_needed, reinterpret_cast(&pages_))) { return false; @@ -64,22 +222,24 @@ class PagedArray { // Releases all storage. May be called more than once. void clear() { - if (pages_ != NULL) { + if (pages_ != nullptr) { while (page_count_ != 0) { --page_count_; free(pages_[page_count_]); } free(pages_); - pages_ = NULL; + pages_ = nullptr; } } private: - T** pages_; - size_t page_count_; + T** pages_ = nullptr; + size_t size_ = 0U; + size_t page_count_ = 0U; DISALLOW_COPY_AND_ASSIGN(PagedArray); }; } // namespace courgette + #endif // COURGETTE_THIRD_PARTY_BSDIFF_PAGED_ARRAY_H_ diff --git a/courgette/third_party/bsdiff/paged_array_unittest.cc b/courgette/third_party/bsdiff/paged_array_unittest.cc index 7ae4f1c7888088..17edde59dca92c 100644 --- a/courgette/third_party/bsdiff/paged_array_unittest.cc +++ b/courgette/third_party/bsdiff/paged_array_unittest.cc @@ -4,14 +4,132 @@ #include "courgette/third_party/bsdiff/paged_array.h" +#include + +#include +#include +#include +#include + #include "testing/gtest/include/gtest/gtest.h" +namespace { + +// Total allocation of 4GB will fail in 32 bit programs if allocations are +// leaked. +const int kIterations = 20; +const int kSizeBig = 200 * 1024 * 1024 / sizeof(int); // 200MB + +const size_t kLogBlockSizeSmall = 10; +const size_t kBlockSizeSmall = 1 << kLogBlockSizeSmall; +const size_t kSizeList[] = {1, + 16, + 123, + kBlockSizeSmall, + kBlockSizeSmall + 1, + 123 * kBlockSizeSmall + 567}; + +const size_t kIteratorTestDataSize = 7; +const int32_t kIteratorTestData[kIteratorTestDataSize] = {2, 3, 5, 7, + 11, 13, 17}; + +} // namespace + class PagedArrayTest : public testing::Test { public: - // Total allocation of 4GB will fail in 32 bit programs if allocations are - // leaked. - static const int kIterations = 20; - static const int kSize = 200 * 1024 * 1024 / sizeof(int); // 200MB + template + void TestIteratorBasic(Iterator begin, Iterator end) { + EXPECT_FALSE(begin == nullptr); + EXPECT_FALSE(end == nullptr); + + // Test TestPagedArray::iterator data read. + Iterator it = begin; + EXPECT_EQ(2, *it); + EXPECT_EQ(3, *(it + 1)); + EXPECT_EQ(5, it[2]); // Pseudo-pointer. + EXPECT_EQ(7, (it + 1)[2]); + EXPECT_EQ(3, *(++it)); + EXPECT_EQ(3, *it); + EXPECT_EQ(3, *(it++)); + EXPECT_EQ(5, *it); + EXPECT_EQ(11, *(it += 2)); + EXPECT_EQ(11, *it); + EXPECT_FALSE(it == begin); + EXPECT_TRUE(it != begin); + EXPECT_TRUE(it == (begin + 4)); + EXPECT_FALSE(it != (begin + 4)); + EXPECT_EQ(static_cast(kIteratorTestDataSize), end - begin); + it = begin + 5; + EXPECT_EQ(begin, it - 5); + EXPECT_EQ(13, *it); + EXPECT_EQ(11, *(--it)); + EXPECT_EQ(11, *it); + EXPECT_EQ(11, *(it--)); + EXPECT_EQ(7, *it); + EXPECT_EQ(3, *(it -= 2)); + EXPECT_EQ(3, *it); + + // Test binary operators for every pair of iterator. + EXPECT_TRUE(begin <= end); + EXPECT_TRUE(begin < end); + for (size_t i = 0; i < kIteratorTestDataSize; ++i) { + Iterator it1 = begin + i; + EXPECT_TRUE(it1 == it1); + EXPECT_FALSE(it1 != it1); + EXPECT_TRUE(begin <= it1); + EXPECT_FALSE(begin > it1); + EXPECT_TRUE(it1 >= begin); + EXPECT_FALSE(it1 < begin); + EXPECT_EQ(begin, it1 - i); + EXPECT_EQ(end, it1 + (kIteratorTestDataSize - i)); + EXPECT_EQ(kIteratorTestData[i], *it1); + EXPECT_EQ(kIteratorTestData[i], begin[i]); + for (size_t j = 0; i + j < kIteratorTestDataSize; ++j) { + Iterator it2 = it1 + j; + EXPECT_TRUE(it1 <= it2); + EXPECT_FALSE(it1 > it2); + EXPECT_TRUE(it2 >= it1); + EXPECT_FALSE(it2 < it1); + if (j > 0) { + EXPECT_TRUE(it1 < it2); + EXPECT_FALSE(it1 >= it2); + EXPECT_TRUE(it2 > it1); + EXPECT_FALSE(it2 <= it1); + EXPECT_FALSE(it1 == it2); + EXPECT_TRUE(it1 != it2); + } + EXPECT_EQ(kIteratorTestData[i + j], it1[j]); // Pseudo-pointer. + EXPECT_EQ(kIteratorTestData[i + j], *it2); + EXPECT_EQ(static_cast(j), it2 - it1); + EXPECT_EQ(it1, it2 - j); + } + EXPECT_TRUE(it1 < end); + EXPECT_FALSE(it1 >= end); + EXPECT_TRUE(it1 <= end); + EXPECT_FALSE(it1 > end); + EXPECT_TRUE(end > it1); + EXPECT_FALSE(end <= it1); + EXPECT_TRUE(end >= it1); + EXPECT_FALSE(end < it1); + EXPECT_TRUE(it1 != end); + EXPECT_FALSE(it1 == end); + } + + // Test initialize with null. + Iterator it_dummy; + it_dummy = nullptr; + EXPECT_TRUE(it_dummy == nullptr); + + // Test copy constructor. + Iterator begin_copy(begin); + EXPECT_TRUE(begin_copy == begin); + + // Test STL read-only usage. + std::vector::value_type> v(begin, + end); + EXPECT_TRUE(std::equal(begin, end, v.begin())); + EXPECT_TRUE(std::equal(v.begin(), v.end(), begin)); + } }; // AddressSanitizer on Windows adds additional memory overhead, which @@ -20,27 +138,109 @@ class PagedArrayTest : public testing::Test { TEST_F(PagedArrayTest, TestManyAllocationsDestructorFree) { for (int i = 0; i < kIterations; ++i) { courgette::PagedArray a; - EXPECT_TRUE(a.Allocate(kSize)); + EXPECT_TRUE(a.Allocate(kSizeBig)); } } TEST_F(PagedArrayTest, TestManyAllocationsManualFree) { courgette::PagedArray a; for (int i = 0; i < kIterations; ++i) { - EXPECT_TRUE(a.Allocate(kSize)); + EXPECT_TRUE(a.Allocate(kSizeBig)); a.clear(); } } #endif TEST_F(PagedArrayTest, TestAccess) { - const int kAccessSize = 3 * 1024 * 1024; - courgette::PagedArray a; - a.Allocate(kAccessSize); - for (int i = 0; i < kAccessSize; ++i) { - a[i] = i; + for (size_t size : kSizeList) { + courgette::PagedArray a; + EXPECT_TRUE(a.Allocate(size)); + for (size_t i = 0; i < size; ++i) + a[i] = i; + for (size_t i = 0; i < size; ++i) + EXPECT_EQ(static_cast(i), a[i]); } - for (int i = 0; i < kAccessSize; ++i) { - EXPECT_EQ(i, a[i]); +} + +TEST_F(PagedArrayTest, TestIterator) { + using TestPagedArray = courgette::PagedArray; // Page size = 2. + + TestPagedArray::const_iterator uninit_const_it; + EXPECT_TRUE(uninit_const_it == nullptr); + + TestPagedArray::iterator uninit_it; + EXPECT_TRUE(uninit_it == nullptr); + + TestPagedArray a; + EXPECT_TRUE(a.Allocate(kIteratorTestDataSize)); + std::copy(kIteratorTestData, kIteratorTestData + kIteratorTestDataSize, + a.begin()); + const TestPagedArray& a_const = a; + + // Test TestPagedArray::const_iterator. + TestIteratorBasic(a_const.begin(), a_const.end()); + + // Test TestPagedArray::iterator. + TestIteratorBasic(a.begin(), a.end()); + + // Test equality of const and non-const. + EXPECT_TRUE(a.begin() == a_const.begin()); + EXPECT_TRUE(a_const.begin() == a.begin()); + + // Test casting from non-const to const. + TestPagedArray::iterator non_const_it = a.begin(); + EXPECT_TRUE(non_const_it == a.begin()); + TestPagedArray::const_iterator const_it = non_const_it; + EXPECT_TRUE(const_it == non_const_it); + // The following should and will emit compile error: + // non_const_it = a_const.begin(); + + // Test copy constructor from non-const to const. + TestPagedArray::iterator const_it2(a.begin()); + EXPECT_TRUE(const_it2 == a.begin()); + + // Test pointer distance from non-const to const. + EXPECT_EQ(static_cast(kIteratorTestDataSize), + a.end() - a_const.begin()); + EXPECT_EQ(static_cast(kIteratorTestDataSize), + a_const.end() - a.begin()); + + // Test operator->(). + struct TestStruct { + int32_t value = 0; + }; + using TestStructPagedArray = courgette::PagedArray; + TestStructPagedArray b; + b.Allocate(3); + b[0].value = 100; + b[1].value = 200; + b[2].value = 300; + const TestStructPagedArray& b_const = b; + + EXPECT_EQ(100, b.begin()->value); + EXPECT_EQ(100, b_const.begin()->value); + EXPECT_EQ(200, (b.begin() + 1)->value); + EXPECT_EQ(200, (b_const.begin() + 1)->value); + (b.begin() + 2)->value *= -1; + EXPECT_EQ(-300, (b.begin() + 2)->value); + EXPECT_EQ(-300, (b_const.begin() + 2)->value); +} + +// Test generic read-write of itrators by sorting pseudo-random numbers. +TEST_F(PagedArrayTest, TestSort) { + courgette::PagedArray a; + std::minstd_rand pseudo_rand_gen; // Deterministic, using defaults. + for (size_t size : kSizeList) { + std::vector v(size); + courgette::PagedArray a; + EXPECT_TRUE(a.Allocate(size)); + for (size_t i = 0; i < size; ++i) { + v[i] = pseudo_rand_gen(); + a[i] = v[i]; + } + std::sort(v.begin(), v.end()); + std::sort(a.begin(), a.end()); + for (size_t i = 0; i < size; ++i) + EXPECT_EQ(v[i], a[i]); } }