Skip to content

Commit

Permalink
[Courgette] Initial Implementation of LabelManager
Browse files Browse the repository at this point in the history
This is part of the effort to reduce Courgette's peak memory. Main changes:
- Moving Label to image_utils.h, and change Label::count_ from int to int32.
- Adding utility class ConsecutiveRangeVisitor, with tests.
- Adding LabelManager, with tests.
The new code is not yet used in production.

Review URL: https://codereview.chromium.org/1491703003

Cr-Commit-Position: refs/heads/master@{#363688}
  • Loading branch information
samuelhuang authored and Commit bot committed Dec 8, 2015
1 parent 56c43a0 commit 7a2fea2
Show file tree
Hide file tree
Showing 9 changed files with 439 additions and 17 deletions.
5 changes: 5 additions & 0 deletions courgette/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ static_library("courgette_lib") {
"adjustment_method_2.cc",
"assembly_program.cc",
"assembly_program.h",
"consecutive_range_visitor.h",
"courgette.h",
"crc.cc",
"crc.h",
Expand All @@ -35,6 +36,8 @@ static_library("courgette_lib") {
"ensemble_apply.cc",
"ensemble_create.cc",
"image_utils.h",
"label_manager.cc",
"label_manager.h",
"memory_allocator.cc",
"memory_allocator.h",
"patch_generator_x86_32.h",
Expand Down Expand Up @@ -100,6 +103,7 @@ test("courgette_unittests") {
"base_test_unittest.cc",
"base_test_unittest.h",
"bsdiff_memory_unittest.cc",
"consecutive_range_visitor_unittest.cc",
"difference_estimator_unittest.cc",
"disassembler_elf_32_x86_unittest.cc",
"disassembler_win32_x64_unittest.cc",
Expand All @@ -108,6 +112,7 @@ test("courgette_unittests") {
"encoded_program_unittest.cc",
"ensemble_unittest.cc",
"image_utils_unittest.cc",
"label_manager_unittest.cc",
"memory_allocator_unittest.cc",
"rel32_finder_win32_x86_unittest.cc",
"streams_unittest.cc",
Expand Down
18 changes: 1 addition & 17 deletions courgette/assembly_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,13 @@
#include "base/memory/scoped_ptr.h"

#include "courgette/disassembler.h"
#include "courgette/image_utils.h"
#include "courgette/memory_allocator.h"

namespace courgette {

class EncodedProgram;

// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
// stored in a table and the Label will be replaced with the index into the
// table.
//
// TODO(sra): Make fields private and add setters and getters.
class Label {
public:
static const int kNoIndex = -1;
Label() : rva_(0), index_(kNoIndex), count_(0) {}
explicit Label(RVA rva) : rva_(rva), index_(kNoIndex), count_(0) {}

RVA rva_; // Address referred to by the label.
int index_; // Index of address in address table, kNoIndex until assigned.
int count_;
};

typedef std::map<RVA, Label*> RVAToLabel;

// Opcodes of simple assembly language
Expand Down
57 changes: 57 additions & 0 deletions courgette/consecutive_range_visitor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_
#define COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_

#include <iterator>

#include "base/macros.h"

namespace courgette {

// Usage note: First check whether std::unique() would suffice.
//
// ConsecutiveRangeVisitor is a visitor to read equal consecutive items
// ("ranges") between two iterators. The base value of InputIterator must
// implement the == operator.
//
// Example: "AAAAABZZZZOO" consists of ranges ["AAAAA", "B", "ZZZZ", "OO"]. The
// visitor provides accessors to iterate through the ranges, and to access each
// range's value and repeat, i.e., [('A', 5), ('B', 1), ('Z', 4), ('O', 2)].
template <class InputIterator>
class ConsecutiveRangeVisitor {
public:
ConsecutiveRangeVisitor(InputIterator begin, InputIterator end)
: head_(begin), end_(end) {
advance();
}

// Returns whether there are more ranges to traverse.
bool has_more() const { return tail_ != end_; }

// Returns an iterator to an element in the current range.
InputIterator cur() const { return tail_; }

// Returns the number of repeated elements in the current range.
size_t repeat() const { return std::distance(tail_, head_); }

// Advances to the next range.
void advance() {
tail_ = head_;
if (head_ != end_)
while (++head_ != end_ && *head_ == *tail_) {}
}

private:
InputIterator tail_; // The trailing pionter of a range (inclusive).
InputIterator head_; // The leading pointer of a range (exclusive).
InputIterator end_; // Store the end pointer so we know when to stop.

DISALLOW_COPY_AND_ASSIGN(ConsecutiveRangeVisitor);
};

} // namespace courgette

#endif // COURGETTE_CONSECUTIVE_RANGE_VISITOR_H_
70 changes: 70 additions & 0 deletions courgette/consecutive_range_visitor_unittest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "courgette/consecutive_range_visitor.h"

#include <string>

#include "testing/gtest/include/gtest/gtest.h"

namespace courgette {

TEST(ConsecutiveRangeVisitorTest, Basic) {
std::string s = "AAAAABZZZZOO";
ConsecutiveRangeVisitor<std::string::iterator> vis(s.begin(), s.end());
EXPECT_TRUE(vis.has_more());
EXPECT_EQ('A', *vis.cur());
EXPECT_EQ(5U, vis.repeat());
vis.advance();

EXPECT_TRUE(vis.has_more());
EXPECT_EQ('B', *vis.cur());
EXPECT_EQ(1U, vis.repeat());
vis.advance();

EXPECT_TRUE(vis.has_more());
EXPECT_EQ('Z', *vis.cur());
EXPECT_EQ(4U, vis.repeat());
vis.advance();

EXPECT_TRUE(vis.has_more());
EXPECT_EQ('O', *vis.cur());
EXPECT_EQ(2U, vis.repeat());
vis.advance();

EXPECT_FALSE(vis.has_more());
}

TEST(ConsecutiveRangeVisitorTest, UnitRanges) {
// Unsorted, no consecutive characters.
const char s[] = "elephant elephant";
ConsecutiveRangeVisitor<const char*> vis(std::begin(s), std::end(s) - 1);
for (const char* scan = &s[0]; *scan; ++scan) {
EXPECT_TRUE(vis.has_more());
EXPECT_EQ(*scan, *vis.cur());
EXPECT_EQ(1U, vis.repeat());
vis.advance();
}
EXPECT_FALSE(vis.has_more());
}

TEST(ConsecutiveRangeVisitorTest, SingleRange) {
for (size_t len = 1U; len < 10U; ++len) {
std::vector<int> v(len, 137);
ConsecutiveRangeVisitor<std::vector<int>::iterator> vis(v.begin(), v.end());
EXPECT_TRUE(vis.has_more());
EXPECT_EQ(137, *vis.cur());
EXPECT_EQ(len, vis.repeat());
vis.advance();
EXPECT_FALSE(vis.has_more());
}
}

TEST(ConsecutiveRangeVisitorTest, Empty) {
std::string s;
ConsecutiveRangeVisitor<std::string::iterator> vis(s.begin(), s.end());
EXPECT_FALSE(vis.has_more());
}

} // namespace courgette
5 changes: 5 additions & 0 deletions courgette/courgette.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
'adjustment_method.h',
'assembly_program.cc',
'assembly_program.h',
'consecutive_range_visitor.h',
'courgette.h',
'crc.cc',
'crc.h',
Expand All @@ -35,6 +36,8 @@
'ensemble_apply.cc',
'ensemble_create.cc',
'image_utils.h',
'label_manager.cc',
'label_manager.h',
'memory_allocator.cc',
'memory_allocator.h',
'region.h',
Expand Down Expand Up @@ -102,6 +105,7 @@
'bsdiff_memory_unittest.cc',
'base_test_unittest.cc',
'base_test_unittest.h',
'consecutive_range_visitor_unittest.cc',
'difference_estimator_unittest.cc',
'disassembler_elf_32_x86_unittest.cc',
'disassembler_win32_x86_unittest.cc',
Expand All @@ -110,6 +114,7 @@
'encode_decode_unittest.cc',
'ensemble_unittest.cc',
'image_utils_unittest.cc',
'label_manager_unittest.cc',
'memory_allocator_unittest.cc',
'rel32_finder_win32_x86_unittest.cc',
'streams_unittest.cc',
Expand Down
20 changes: 20 additions & 0 deletions courgette/image_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,26 @@ namespace courgette {

typedef uint32 RVA;

// A Label is a symbolic reference to an address. Unlike a conventional
// assembly language, we always know the address. The address will later be
// stored in a table and the Label will be replaced with the index into the
// table.
// TODO(huangs): Make this a struct, and remove "_" from member names.
class Label {
public:
enum : int { kNoIndex = -1 };
explicit Label(RVA rva) : rva_(rva) {}

bool operator==(const Label& other) const {
return rva_ == other.rva_ && index_ == other.index_ &&
count_ == other.count_;
}

RVA rva_ = 0; // Address referred to by the label.
int index_ = kNoIndex; // Index of address in address table.
int32 count_ = 0;
};

// These helper functions avoid the need for casts in the main code.
inline uint16 ReadU16(const uint8* address, size_t offset) {
return *reinterpret_cast<const uint16*>(address + offset);
Expand Down
74 changes: 74 additions & 0 deletions courgette/label_manager.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "courgette/label_manager.h"

#include <algorithm>

#include "base/logging.h"
#include "base/numerics/safe_math.h"
#include "courgette/consecutive_range_visitor.h"

namespace courgette {

LabelManager::RvaVisitor::~RvaVisitor() {}

LabelManager::LabelManager() {}

LabelManager::~LabelManager() {}

// We wish to minimize peak memory usage here. Analysis: Let
// m = number of (RVA) elements in |rva_visitor|,
// n = number of distinct (RVA) elements in |rva_visitor|.
// The final storage is n * sizeof(Label) bytes. During computation we uniquify
// m RVAs, and count repeats. Taking sizeof(RVA) = 4, an implementation using
// std::map or std::unordered_map would consume additionally 32 * n bytes.
// Meanwhile, our std::vector implementation consumes additionally 4 * m bytes
// For our typical usage (i.e. Chrome) we see m = ~4n, so we use 16 * n bytes of
// extra contiguous memory during computation. Assuming memory fragmentation
// would not be an issue, this is much better than using std::map.
void LabelManager::Read(RvaVisitor* rva_visitor) {
// Write all values in |rva_visitor| to |rvas|.
size_t num_rva = rva_visitor->Remaining();
std::vector<RVA> rvas(num_rva);
for (size_t i = 0; i < num_rva; ++i, rva_visitor->Next())
rvas[i] = rva_visitor->Get();

// Sort |rvas|, then count the number of distinct values.
using CRV = ConsecutiveRangeVisitor<std::vector<RVA>::iterator>;
std::sort(rvas.begin(), rvas.end());
size_t num_distinct_rva = 0;
for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance())
++num_distinct_rva;

// Reserve space for |labels_|, populate with sorted RVA and repeats.
DCHECK(labels_.empty());
labels_.reserve(num_distinct_rva);
for (CRV it(rvas.begin(), rvas.end()); it.has_more(); it.advance()) {
labels_.push_back(Label(*it.cur()));
base::CheckedNumeric<uint32> count = it.repeat();
labels_.back().count_ = count.ValueOrDie();
}
}

void LabelManager::RemoveUnderusedLabels(int32 count_threshold) {
if (count_threshold <= 0)
return;
labels_.erase(std::remove_if(labels_.begin(), labels_.end(),
[count_threshold](const Label& label) {
return label.count_ < count_threshold;
}),
labels_.end());
// Not shrinking |labels_|, since this may cause reallocation.
}

// Uses binary search to find |rva|.
Label* LabelManager::Find(RVA rva) {
auto it = std::lower_bound(
labels_.begin(), labels_.end(), Label(rva),
[](const Label& l1, const Label& l2) { return l1.rva_ < l2.rva_; });
return it == labels_.end() || it->rva_ != rva ? nullptr : &(*it);
}

} // namespace courgette
64 changes: 64 additions & 0 deletions courgette/label_manager.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COURGETTE_LABEL_MANAGER_H_
#define COURGETTE_LABEL_MANAGER_H_

#include <vector>

#include "base/macros.h"
#include "courgette/image_utils.h"

namespace courgette {

// A container to store and manage Label instances. A key consideration is peak
// memory usage reduction. To this end we preallocate Label instances in bulk,
// and carefully control transient memory usage when initializing Labels.
class LabelManager {
public:
// An adaptor to sequentially traverse multiple RVAs. This is useful for RVA
// translation without extra storage. For example, we might have a stored list
// of RVA locations, but we want to traverse the matching RVA targets.
class RvaVisitor {
public:
virtual ~RvaVisitor();

// Returns the number of remaining RVAs to visit.
virtual size_t Remaining() const = 0;

// Returns the current RVA.
virtual RVA Get() const = 0;

// Advances to the next RVA.
virtual void Next() = 0;
};

LabelManager();
virtual ~LabelManager();

// Initializes |labels_| using RVAs from |rva_visitor|. Each distinct RVA from
// |rva_visitor| yields a Label with |rva_| assigned as the RVA, and |count_|
// assigned as the repeat.
void Read(RvaVisitor* rva_visitor);

// Removes |labels_| elements whose |count_| is less than |count_threshold|.
void RemoveUnderusedLabels(int32 count_threshold);

// Efficiently searches for a Label that targets |rva|. Returns the pointer to
// the stored Label instance if found, or null otherwise.
Label* Find(RVA rva);

// TODO(huangs): Move AssignRemainingIndexes() here.

protected:
// The main list of Label instances, sorted by the |rva_| member.
std::vector<Label> labels_;

private:
DISALLOW_COPY_AND_ASSIGN(LabelManager);
};

} // namespace courgette

#endif // COURGETTE_LABEL_MANAGER_H_
Loading

0 comments on commit 7a2fea2

Please sign in to comment.