Skip to content

Commit

Permalink
[Courgette] Simple AssemblyProgram and Disassembler cleanups.
Browse files Browse the repository at this point in the history
We perform mundane cleanups to prepare for next step in
AssemblyProgram reduction:
- Delete dead code (static functions) in AssemblyProgram that were left
  over from LabelManager usage.
- Simplify image_base assignment for AssemblyProgram.
  - Source: Disassembler adds virtual function image_base() (PE file
    reads and stores it; ELF just returns 0).
  - Sink: AssemblyProgram takes it on construction (replaces mutator).
- Disassembler::Disassemble() overrides: Rename |target| to |program|.

BUG=660980

Review-Url: https://codereview.chromium.org/2583373002
Cr-Commit-Position: refs/heads/master@{#443609}
  • Loading branch information
samuelhuang authored and Commit bot committed Jan 13, 2017
1 parent 534dc4d commit 8563622
Show file tree
Hide file tree
Showing 10 changed files with 40 additions and 175 deletions.
5 changes: 2 additions & 3 deletions courgette/adjustment_method_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <vector>

#include "base/bind.h"
#include "base/strings/string_util.h"
#include "base/memory/ptr_util.h"
#include "courgette/assembly_program.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"
Expand All @@ -35,8 +35,7 @@ class AdjustmentMethodTest : public testing::Test {
// Returns one of two similar simple programs. These differ only in Label
// assignment, so it is possible to make them look identical.
std::unique_ptr<AssemblyProgram> MakeProgram(int kind) const {
std::unique_ptr<AssemblyProgram> prog(new AssemblyProgram(EXE_WIN_32_X86));
prog->set_image_base(0x00400000);
auto prog = base::MakeUnique<AssemblyProgram>(EXE_WIN_32_X86, 0x00400000);

RVA kRvaA = 0x00410000;
RVA kRvaB = 0x00410004;
Expand Down
124 changes: 3 additions & 121 deletions courgette/assembly_program.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,8 @@

#include "courgette/assembly_program.h"

#include <memory.h>
#include <stddef.h>
#include <stdint.h>

#include <memory>
#include <utility>
#include <vector>

#include "base/callback.h"
#include "base/logging.h"
#include "base/macros.h"
#include "courgette/courgette.h"
#include "courgette/encoded_program.h"

Expand Down Expand Up @@ -194,9 +186,8 @@ class InstructionStoreReceptor : public InstructionReceptor {

/******** AssemblyProgram ********/

AssemblyProgram::AssemblyProgram(ExecutableType kind)
: kind_(kind), image_base_(0) {
}
AssemblyProgram::AssemblyProgram(ExecutableType kind, uint64_t image_base)
: kind_(kind), image_base_(image_base) {}

AssemblyProgram::~AssemblyProgram() {
for (size_t i = 0; i < instructions_.size(); ++i) {
Expand Down Expand Up @@ -341,115 +332,6 @@ CheckBool AssemblyProgram::EmitShared(Instruction* instruction) {
return instruction && instructions_.push_back(instruction);
}

void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
current->index_ = Label::kNoIndex;
}
}

// DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
// address order.
void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
int index = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ != Label::kNoIndex)
NOTREACHED();
current->index_ = index;
++index;
}
}

// AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
// yet assigned an index.
void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
// An address table compresses best when each index is associated with an
// address that is slight larger than the previous index.

// First see which indexes have not been used. The 'available' vector could
// grow even bigger, but the number of addresses is a better starting size
// than empty.
std::vector<bool> available(labels->size(), true);
int used = 0;

for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
int index = p->second->index_;
if (index != Label::kNoIndex) {
while (static_cast<size_t>(index) >= available.size())
available.push_back(true);
available.at(index) = false;
++used;
}
}

VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";

// Are there any unused labels that happen to be adjacent following a used
// label?
int fill_forward_count = 0;
Label* prev = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
int index = 0;
if (prev && prev->index_ != Label::kNoIndex)
index = prev->index_ + 1;
if (index < static_cast<int>(available.size()) && available.at(index)) {
current->index_ = index;
available.at(index) = false;
++fill_forward_count;
}
}
prev = current;
}

// Are there any unused labels that happen to be adjacent preceeding a used
// label?
int fill_backward_count = 0;
prev = 0;
for (RVAToLabel::reverse_iterator p = labels->rbegin();
p != labels->rend();
++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
int prev_index;
if (prev)
prev_index = prev->index_;
else
prev_index = static_cast<uint32_t>(available.size());
if (prev_index != 0 &&
prev_index != Label::kNoIndex &&
available.at(prev_index - 1)) {
current->index_ = prev_index - 1;
available.at(current->index_) = false;
++fill_backward_count;
}
}
prev = current;
}

// Fill in any remaining indexes
int fill_infill_count = 0;
int index = 0;
for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
Label* current = p->second;
if (current->index_ == Label::kNoIndex) {
while (!available.at(index)) {
++index;
}
current->index_ = index;
available.at(index) = false;
++index;
++fill_infill_count;
}
}

VLOG(1) << " fill forward " << fill_forward_count
<< " backward " << fill_backward_count
<< " infill " << fill_infill_count;
}

std::unique_ptr<EncodedProgram> AssemblyProgram::Encode() const {
std::unique_ptr<EncodedProgram> encoded(new EncodedProgram());

Expand Down
21 changes: 6 additions & 15 deletions courgette/assembly_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@

#include <map>
#include <memory>
#include <set>
#include <vector>

#include "base/bind.h"
#include "base/callback_forward.h"
#include "base/macros.h"
#include "base/memory/free_deleter.h"
#include "courgette/courgette.h"
Expand Down Expand Up @@ -132,13 +130,11 @@ class AssemblyProgram {
using InstructionGenerator =
base::Callback<CheckBool(AssemblyProgram*, InstructionReceptor*)>;

explicit AssemblyProgram(ExecutableType kind);
AssemblyProgram(ExecutableType kind, uint64_t image_base);
~AssemblyProgram();

ExecutableType kind() const { return kind_; }

void set_image_base(uint64_t image_base) { image_base_ = image_base; }

// Traverses RVAs in |abs32_visitor| and |rel32_visitor| to precompute Labels.
void PrecomputeLabels(RvaVisitor* abs32_visitor, RvaVisitor* rel32_visitor);

Expand Down Expand Up @@ -212,8 +208,6 @@ class AssemblyProgram {
using ScopedInstruction =
std::unique_ptr<Instruction, UncheckedDeleter<Instruction>>;

ExecutableType kind_;

CheckBool Emit(ScopedInstruction instruction) WARN_UNUSED_RESULT;
CheckBool EmitShared(Instruction* instruction) WARN_UNUSED_RESULT;

Expand All @@ -222,16 +216,13 @@ class AssemblyProgram {
// Looks up a label or creates a new one. Might return NULL.
Label* FindLabel(RVA rva, RVAToLabel* labels);

// Helper methods for the public versions.
static void UnassignIndexes(RVAToLabel* labels);
static void DefaultAssignIndexes(RVAToLabel* labels);
static void AssignRemainingIndexes(RVAToLabel* labels);

// Sharing instructions that emit a single byte saves a lot of space.
Instruction* GetByteInstruction(uint8_t byte);
std::unique_ptr<Instruction* [], base::FreeDeleter> byte_instruction_cache_;

uint64_t image_base_; // Desired or mandated base address of image.
const ExecutableType kind_;
const uint64_t image_base_; // Desired or mandated base address of image.

std::unique_ptr<Instruction* [], base::FreeDeleter> byte_instruction_cache_;

InstructionVector instructions_; // All the instructions in program.

Expand Down
11 changes: 7 additions & 4 deletions courgette/disassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#ifndef COURGETTE_DISASSEMBLER_H_
#define COURGETTE_DISASSEMBLER_H_

#include <stddef.h>
#include <stdint.h>

#include <vector>
Expand Down Expand Up @@ -55,14 +54,18 @@ class Disassembler : public AddressTranslator {
virtual ~Disassembler();

// AddressTranslator interfaces.
virtual RVA FileOffsetToRVA(FileOffset file_offset) const override = 0;
virtual FileOffset RVAToFileOffset(RVA rva) const override = 0;
RVA FileOffsetToRVA(FileOffset file_offset) const override = 0;
FileOffset RVAToFileOffset(RVA rva) const override = 0;
const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override;
const uint8_t* RVAToPointer(RVA rva) const override;
RVA PointerToTargetRVA(const uint8_t* p) const = 0;
RVA PointerToTargetRVA(const uint8_t* p) const override = 0;

virtual ExecutableType kind() const = 0;

// Returns the preferred image base address. Using uint64_t to accommodate the
// general case of 64-bit architectures.
virtual uint64_t image_base() const = 0;

// Returns a caller-owned new RvaVisitor to iterate through abs32 target RVAs.
virtual RvaVisitor* CreateAbs32TargetRvaVisitor() = 0;

Expand Down
14 changes: 5 additions & 9 deletions courgette/disassembler_elf_32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include <algorithm>
#include <iterator>
#include <utility>

#include "base/bind.h"
#include "base/logging.h"
Expand Down Expand Up @@ -164,23 +163,20 @@ bool DisassemblerElf32::ParseHeader() {
return Good();
}

bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
bool DisassemblerElf32::Disassemble(AssemblyProgram* program) {
if (!ok())
return false;

// The Image Base is always 0 for ELF Executables
target->set_image_base(0);

if (!ParseAbs32Relocs())
return false;

if (!ParseRel32RelocsFromSections()) // Does not sort rel32 locations.
return false;

PrecomputeLabels(target);
RemoveUnusedRel32Locations(target);
PrecomputeLabels(program);
RemoveUnusedRel32Locations(program);

if (!target->GenerateInstructions(
if (!program->GenerateInstructions(
base::Bind(&DisassemblerElf32::ParseFile, base::Unretained(this)))) {
return false;
}
Expand All @@ -192,7 +188,7 @@ bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
DCHECK(rel32_locations_.empty() ||
rel32_locations_.back()->rva() != kUnassignedRVA);

target->DefaultAssignIndexes();
program->DefaultAssignIndexes();
return true;
}

Expand Down
3 changes: 2 additions & 1 deletion courgette/disassembler_elf_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ class DisassemblerElf32 : public Disassembler {
FileOffset RVAToFileOffset(RVA rva) const override;
RVA PointerToTargetRVA(const uint8_t* p) const override;
ExecutableType kind() const override = 0;
uint64_t image_base() const override { return 0; }
bool ParseHeader() override;
bool Disassemble(AssemblyProgram* target) override;
bool Disassemble(AssemblyProgram* program) override;

virtual e_machine_values ElfEM() const = 0;

Expand Down
8 changes: 4 additions & 4 deletions courgette/disassembler_elf_32_x86_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <string>
#include <vector>

#include "base/memory/ptr_util.h"
#include "courgette/assembly_program.h"
#include "courgette/base_test_unittest.h"
#include "courgette/image_utils.h"
Expand Down Expand Up @@ -69,9 +70,8 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name,
size_t expected_rel_count) const {
std::string file1 = FileContents(file_name);

std::unique_ptr<TestDisassemblerElf32X86> disassembler(
new TestDisassemblerElf32X86(
reinterpret_cast<const uint8_t*>(file1.c_str()), file1.length()));
auto disassembler = base::MakeUnique<TestDisassemblerElf32X86>(
reinterpret_cast<const uint8_t*>(file1.c_str()), file1.length());

bool can_parse_header = disassembler->ParseHeader();
EXPECT_TRUE(can_parse_header);
Expand All @@ -89,7 +89,7 @@ void DisassemblerElf32X86Test::TestExe(const char* file_name,
EXPECT_EQ('L', offset_p[2]);
EXPECT_EQ('F', offset_p[3]);

std::unique_ptr<AssemblyProgram> program(new AssemblyProgram(EXE_ELF_32_X86));
auto program = base::MakeUnique<AssemblyProgram>(EXE_ELF_32_X86, 0);

EXPECT_TRUE(disassembler->Disassemble(program.get()));

Expand Down
15 changes: 5 additions & 10 deletions courgette/disassembler_win32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

#include "courgette/disassembler_win32.h"

#include <stddef.h>
#include <stdint.h>

#include <algorithm>

#include "base/bind.h"
Expand Down Expand Up @@ -224,26 +221,24 @@ bool DisassemblerWin32::ParseHeader() {
return Good();
}

bool DisassemblerWin32::Disassemble(AssemblyProgram* target) {
bool DisassemblerWin32::Disassemble(AssemblyProgram* program) {
if (!ok())
return false;

target->set_image_base(image_base());

if (!ParseAbs32Relocs())
return false;

ParseRel32RelocsFromSections();

PrecomputeLabels(target);
RemoveUnusedRel32Locations(target);
PrecomputeLabels(program);
RemoveUnusedRel32Locations(program);

if (!target->GenerateInstructions(
if (!program->GenerateInstructions(
base::Bind(&DisassemblerWin32::ParseFile, base::Unretained(this)))) {
return false;
}

target->DefaultAssignIndexes();
program->DefaultAssignIndexes();
return true;
}

Expand Down
Loading

0 comments on commit 8563622

Please sign in to comment.