Skip to content

Commit

Permalink
[Courgette] Store delta encoding as signed int32.
Browse files Browse the repository at this point in the history
Owing to AdjustmentMethod permuting Rel32Address items, difference of
successive items may be negative. Previously Courgette stores deltas
vectors using uint32, so negative values can take 5 bytes each.

This CL changes storage of deltas vectors to signed int32. Experiment
(goo.gl/vvVADx) shows total diff file reduction of ~3%, and zipped
diff file reduction of ~1.5%.

We apply the same change for Abs32Address items for consistency.

Review URL: https://codereview.chromium.org/1328703002

Cr-Commit-Position: refs/heads/master@{#348039}
  • Loading branch information
samuelhuang authored and Commit bot committed Sep 9, 2015
1 parent cf3b020 commit 8099724
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 55 deletions.
8 changes: 4 additions & 4 deletions courgette/encode_decode_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,20 +70,20 @@ void EncodeDecodeTest::TestAssembleToStreamDisassemble(

TEST_F(EncodeDecodeTest, PE) {
std::string file = FileContents("setup1.exe");
TestAssembleToStreamDisassemble(file, 971851);
TestAssembleToStreamDisassemble(file, 972845);
}

TEST_F(EncodeDecodeTest, PE64) {
std::string file = FileContents("chrome64_1.exe");
TestAssembleToStreamDisassemble(file, 808845);
TestAssembleToStreamDisassemble(file, 809635);
}

TEST_F(EncodeDecodeTest, Elf_Small) {
std::string file = FileContents("elf-32-1");
TestAssembleToStreamDisassemble(file, 135989);
TestAssembleToStreamDisassemble(file, 136218);
}

TEST_F(EncodeDecodeTest, Elf_HighBSS) {
std::string file = FileContents("elf-32-high-bss");
TestAssembleToStreamDisassemble(file, 7309);
TestAssembleToStreamDisassemble(file, 7312);
}
47 changes: 16 additions & 31 deletions courgette/encoded_program.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,6 @@

namespace courgette {

// Stream indexes.
const int kStreamMisc = 0;
const int kStreamOps = 1;
const int kStreamBytes = 2;
const int kStreamAbs32Indexes = 3;
const int kStreamRel32Indexes = 4;
const int kStreamAbs32Addresses = 5;
const int kStreamRel32Addresses = 6;
const int kStreamCopyCounts = 7;
const int kStreamOriginAddresses = kStreamMisc;

const int kStreamLimit = 9;

// Constructor is here rather than in the header. Although the constructor
// appears to do nothing it is fact quite large because of the implicit calls to
// field constructors. Ditto for the destructor.
Expand Down Expand Up @@ -72,23 +59,23 @@ bool ReadVector(V* items, SourceStream* buffer) {
return ok;
}

// Serializes a vector, using delta coding followed by Varint32 coding.
// Serializes a vector, using delta coding followed by Varint32Signed coding.
template<typename V>
CheckBool WriteU32Delta(const V& set, SinkStream* buffer) {
CheckBool WriteSigned32Delta(const V& set, SinkStream* buffer) {
size_t count = set.size();
bool ok = buffer->WriteSizeVarint32(count);
uint32 prev = 0;
for (size_t i = 0; ok && i < count; ++i) {
for (size_t i = 0; ok && i < count; ++i) {
uint32 current = set[i];
uint32 delta = current - prev;
ok = buffer->WriteVarint32(delta);
int32 delta = current - prev;
ok = buffer->WriteVarint32Signed(delta);
prev = current;
}
return ok;
}

template <typename V>
static CheckBool ReadU32Delta(V* set, SourceStream* buffer) {
static CheckBool ReadSigned32Delta(V* set, SourceStream* buffer) {
uint32 count;

if (!buffer->ReadVarint32(&count))
Expand All @@ -97,17 +84,15 @@ static CheckBool ReadU32Delta(V* set, SourceStream* buffer) {
set->clear();
bool ok = set->reserve(count);
uint32 prev = 0;

for (size_t i = 0; ok && i < count; ++i) {
uint32 delta;
ok = buffer->ReadVarint32(&delta);
for (size_t i = 0; ok && i < count; ++i) {
int32 delta;
ok = buffer->ReadVarint32Signed(&delta);
if (ok) {
uint32 current = prev + delta;
uint32 current = static_cast<uint32>(prev + delta);
ok = set->push_back(current);
prev = current;
}
}

return ok;
}

Expand Down Expand Up @@ -333,13 +318,13 @@ CheckBool EncodedProgram::WriteTo(SinkStreamSet* streams) {
bool success = true;

if (select & INCLUDE_ABS32_ADDRESSES) {
success &= WriteU32Delta(abs32_rva_,
streams->stream(kStreamAbs32Addresses));
success &= WriteSigned32Delta(abs32_rva_,
streams->stream(kStreamAbs32Addresses));
}

if (select & INCLUDE_REL32_ADDRESSES) {
success &= WriteU32Delta(rel32_rva_,
streams->stream(kStreamRel32Addresses));
success &= WriteSigned32Delta(rel32_rva_,
streams->stream(kStreamRel32Addresses));
}

if (select & INCLUDE_MISC)
Expand Down Expand Up @@ -376,9 +361,9 @@ bool EncodedProgram::ReadFrom(SourceStreamSet* streams) {
}
image_base_ = (static_cast<uint64>(high) << 32) | low;

if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))
if (!ReadSigned32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))
return false;
if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))
if (!ReadSigned32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))
return false;
if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses)))
return false;
Expand Down
13 changes: 13 additions & 0 deletions courgette/encoded_program.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@

namespace courgette {

// Stream indexes.
const int kStreamMisc = 0;
const int kStreamOps = 1;
const int kStreamBytes = 2;
const int kStreamAbs32Indexes = 3;
const int kStreamRel32Indexes = 4;
const int kStreamAbs32Addresses = 5;
const int kStreamRel32Addresses = 6;
const int kStreamCopyCounts = 7;
const int kStreamOriginAddresses = kStreamMisc;

const int kStreamLimit = 9;

class SinkStream;
class SinkStreamSet;
class SourceStreamSet;
Expand Down
139 changes: 119 additions & 20 deletions courgette/encoded_program_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,78 @@

#include "courgette/encoded_program.h"

#include "base/macros.h"
#include "base/memory/scoped_ptr.h"
#include "courgette/disassembler.h"
#include "courgette/streams.h"
#include "testing/gtest/include/gtest/gtest.h"

TEST(EncodedProgramTest, Test) {
//
// Create a simple program with a few addresses and references and
// check that the bits produced are as expected.
//
courgette::EncodedProgram* program = new courgette::EncodedProgram();
namespace {

using courgette::EncodedProgram;

struct AddressSpec {
int32 index;
courgette::RVA rva;
};

// Creates a simple new program with given addresses. The orders of elements
// in |abs32_specs| and |rel32_specs| are important.
scoped_ptr<EncodedProgram> CreateTestProgram(AddressSpec* abs32_specs,
size_t num_abs32_specs,
AddressSpec* rel32_specs,
size_t num_rel32_specs) {
scoped_ptr<EncodedProgram> program(new EncodedProgram());

uint32 base = 0x00900000;
program->set_image_base(base);

EXPECT_TRUE(program->DefineRel32Label(5, 0)); // REL32 index 5 == base + 0
EXPECT_TRUE(program->DefineAbs32Label(7, 4)); // ABS32 index 7 == base + 4
for (size_t i = 0; i < num_abs32_specs; ++i) {
EXPECT_TRUE(program->DefineAbs32Label(abs32_specs[i].index,
abs32_specs[i].rva));
}
for (size_t i = 0; i < num_rel32_specs; ++i) {
EXPECT_TRUE(program->DefineRel32Label(rel32_specs[i].index,
rel32_specs[i].rva));
}
program->EndLabels();

EXPECT_TRUE(program->AddOrigin(0)); // Start at base.
EXPECT_TRUE(program->AddAbs32(7));
EXPECT_TRUE(program->AddRel32(5));
for (size_t i = 0; i < num_abs32_specs; ++i)
EXPECT_TRUE(program->AddAbs32(abs32_specs[i].index));
for (size_t i = 0; i < num_rel32_specs; ++i)
EXPECT_TRUE(program->AddRel32(rel32_specs[i].index));
return program;
}

bool CompareSink(const uint8 expected[],
size_t num_expected,
courgette::SinkStream* ss) {
size_t n = ss->Length();
if (num_expected != n)
return false;
const uint8* buffer = ss->Buffer();
return memcmp(&expected[0], buffer, n) == 0;
}

} // namespace

// Create a simple program with a few addresses and references and
// check that the bits produced are as expected.
TEST(EncodedProgramTest, Test) {
// ABS32 index 7 == base + 4.
AddressSpec abs32_specs[] = {{7, 4}};
// REL32 index 5 == base + 0.
AddressSpec rel32_specs[] = {{5, 0}};
scoped_ptr<EncodedProgram> program(
CreateTestProgram(abs32_specs, arraysize(abs32_specs),
rel32_specs, arraysize(rel32_specs)));

// Serialize and deserialize.

courgette::SinkStreamSet sinks;
EXPECT_TRUE(program->WriteTo(&sinks));
delete program;
program.reset();

courgette::SinkStream sink;
bool can_collect = sinks.CopyTo(&sink);
Expand All @@ -42,25 +88,78 @@ TEST(EncodedProgramTest, Test) {
bool can_get_source_streams = sources.Init(buffer, length);
EXPECT_TRUE(can_get_source_streams);

courgette::EncodedProgram *encoded2 = new courgette::EncodedProgram();
scoped_ptr<EncodedProgram> encoded2(new EncodedProgram());
bool can_read = encoded2->ReadFrom(&sources);
EXPECT_TRUE(can_read);

// Finally, try to assemble.
courgette::SinkStream assembled;
bool can_assemble = encoded2->AssembleTo(&assembled);
EXPECT_TRUE(can_assemble);
delete encoded2;

const void* assembled_buffer = assembled.Buffer();
size_t assembled_length = assembled.Length();
encoded2.reset();

EXPECT_EQ(8U, assembled_length);

static const uint8 golden[] = {
const uint8 golden[] = {
0x04, 0x00, 0x90, 0x00, // ABS32 to base + 4
0xF8, 0xFF, 0xFF, 0xFF // REL32 from next line to base + 2
};
EXPECT_TRUE(CompareSink(golden, arraysize(golden), &assembled));
}

// A larger test with multiple addresses. We encode the program and check the
// contents of the address streams.
TEST(EncodedProgramTest, TestWriteAddress) {
// Absolute addresses by index: [_, _, _, 2, _, 23, _, 11].
AddressSpec abs32_specs[] = {{7, 11}, {3, 2}, {5, 23}};
// Relative addresses by index: [16, 7, _, 32].
AddressSpec rel32_specs[] = {{0, 16}, {3, 32}, {1, 7}};
scoped_ptr<EncodedProgram> program(
CreateTestProgram(abs32_specs, arraysize(abs32_specs),
rel32_specs, arraysize(rel32_specs)));

courgette::SinkStreamSet sinks;
EXPECT_TRUE(program->WriteTo(&sinks));
program.reset();

EXPECT_EQ(0, memcmp(assembled_buffer, golden, 8));
// Check addresses in sinks.
const uint8 golden_abs32_indexes[] = {
0x03, 0x07, 0x03, 0x05 // 3 indexes: [7, 3, 5].
};
EXPECT_TRUE(CompareSink(golden_abs32_indexes,
arraysize(golden_abs32_indexes),
sinks.stream(courgette::kStreamAbs32Indexes)));

const uint8 golden_rel32_indexes[] = {
0x03, 0x00, 0x03, 0x01 // 3 indexes: [0, 3, 1].
};
EXPECT_TRUE(CompareSink(golden_rel32_indexes,
arraysize(golden_rel32_indexes),
sinks.stream(courgette::kStreamRel32Indexes)));

// Addresses: [_, _, _, 2, _, 23, _, 11].
// Padded: [0, 0, 0, 2, 2, 23, 23, 11].
// Delta: [0, 0, 0, 2, 0, 21, 0, -12].
// Hex: [0, 0, 0, 0x02, 0, 0x15, 0, 0xFFFFFFF4].
// Complement neg: [0, 0, 0, 0x02, 0, 0x15, 0, (0x0B)].
// Varint32 Signed: [0, 0, 0, 0x04, 0, 0x2A, 0, 0x17].
const uint8 golden_abs32_addresses[] = {
0x08, // 8 address deltas.
0x00, 0x00, 0x00, 0x04, 0x00, 0x2A, 0x00, 0x17,
};
EXPECT_TRUE(CompareSink(golden_abs32_addresses,
arraysize(golden_abs32_addresses),
sinks.stream(courgette::kStreamAbs32Addresses)));

// Addresses: [16, 7, _, 32].
// Padded: [16, 7, 7, 32].
// Delta: [16, -9, 0, 25].
// Hex: [0x10, 0xFFFFFFF7, 0, 0x19].
// Complement Neg: [0x10, (0x08), 0, 0x19].
// Varint32 Signed: [0x20, 0x11, 0, 0x32].
const uint8 golden_rel32_addresses[] = {
0x04, // 4 address deltas.
0x20, 0x11, 0x00, 0x32,
};
EXPECT_TRUE(CompareSink(golden_rel32_addresses,
arraysize(golden_rel32_addresses),
sinks.stream(courgette::kStreamRel32Addresses)));
}

0 comments on commit 8099724

Please sign in to comment.