Skip to content

Commit

Permalink
Refactor base::FuzzedDataProvider and fix the calling sites. The main…
Browse files Browse the repository at this point in the history
… goals:

1) Avoid using std::string as a container for non-string data. The problem
   is that the underlying std::string buffer is bigger than the data we put
   inside (at least by 1 byte (null terminator), and might be even bigger).
   This may hide buffer overflow errors from ASan.

2) Make FuzzedDataProvider portable (remove //base dependency).

3) Make the types it returns more explicit (e.g. `int32_t` instead of `int`).

Bug: 907103, 906080
Change-Id: Ibe1cd5ef6cb72140459a8ba3ac301f8c2bef48b9
Reviewed-on: https://chromium-review.googlesource.com/c/1344993
Commit-Queue: Max Moroz <mmoroz@chromium.org>
Reviewed-by: Cait Phillips <caitkp@chromium.org>
Reviewed-by: Nico Weber <thakis@chromium.org>
Reviewed-by: Matt Menke <mmenke@chromium.org>
Reviewed-by: Oliver Chang <ochang@chromium.org>
Cr-Commit-Position: refs/heads/master@{#610236}
  • Loading branch information
Dor1s authored and Commit Bot committed Nov 21, 2018
1 parent 9cc9723 commit c416f80
Show file tree
Hide file tree
Showing 21 changed files with 256 additions and 175 deletions.
1 change: 0 additions & 1 deletion base/test/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ static_library("test_support") {
"android/url_utils.h",
"bind_test_util.h",
"copy_only_int.h",
"fuzzed_data_provider.cc",
"fuzzed_data_provider.h",
"gtest_util.cc",
"gtest_util.h",
Expand Down
98 changes: 0 additions & 98 deletions base/test/fuzzed_data_provider.cc

This file was deleted.

191 changes: 162 additions & 29 deletions base/test/fuzzed_data_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,74 +5,207 @@
#ifndef BASE_TEST_FUZZED_DATA_PROVIDER_H_
#define BASE_TEST_FUZZED_DATA_PROVIDER_H_

#include <stddef.h>
#include <stdint.h>

#include <algorithm>
#include <limits>
#include <string>
#include <utility>
#include <vector>

#include "base/base_export.h"
#include "base/macros.h"
#include "base/strings/string_piece.h"
// A single-header library providing an utility class to break up an array of
// bytes (supposedly provided by a fuzzing engine) for multiple consumers.
// Whenever run on the same input, provides the same output, as long as its
// methods are called in the same order, with the same arguments.

// TODO(mmoroz): move this out of //base as it should be an independent library.
namespace base {

// Utility class to break up fuzzer input for multiple consumers. Whenever run
// on the same input, provides the same output, as long as its methods are
// called in the same order, with the same arguments.
class FuzzedDataProvider {
public:
typedef uint8_t data_type;

// |data| is an array of length |size| that the FuzzedDataProvider wraps to
// provide more granular access. |data| must outlive the FuzzedDataProvider.
FuzzedDataProvider(const uint8_t* data, size_t size);
~FuzzedDataProvider();
FuzzedDataProvider(const uint8_t* data, size_t size)
: data_ptr_(data), remaining_bytes_(size) {}
~FuzzedDataProvider() = default;

// Returns a std::vector containing |num_bytes| of input data. If fewer than
// |num_bytes| of data remain, returns a shorter std::vector containing all
// of the data that's left.
template <typename T = data_type>
std::vector<T> ConsumeBytes(size_t num_bytes) {
static_assert(sizeof(T) == sizeof(data_type), "Incompatible data type.");

num_bytes = std::min(num_bytes, remaining_bytes_);

// The point of using the size-based constructor below is to increase the
// odds of having a vector object with capacity being equal to the length.
// That part is always implementation specific, but at least both libc++ and
// libstdc++ allocate the requested number of bytes in that constructor,
// which seems to be a natual choice for other implementations as well.
// To increase the odds even more, we also call |shrink_to_fit| below.
std::vector<T> result(num_bytes);
std::memcpy(result.data(), data_ptr_, num_bytes);
Advance(num_bytes);

// Even though |shrink_to_fit| is also implementation specific, we expect it
// to provide an additional assurance in case vector's constructor allocated
// a buffer which is larger than the actual amount of data we put inside it.
result.shrink_to_fit();
return result;
}

// Prefer using |ConsumeBytes| unless you actually need a std::string object.
// Returns a std::string containing |num_bytes| of input data. If fewer than
// |num_bytes| of data remain, returns a shorter std::string containing all
// of the data that's left.
std::string ConsumeBytes(size_t num_bytes);
std::string ConsumeBytesAsString(size_t num_bytes) {
static_assert(sizeof(std::string::value_type) == sizeof(data_type),
"ConsumeBytesAsString cannot convert the data to a string.");

num_bytes = std::min(num_bytes, remaining_bytes_);
std::string result(
reinterpret_cast<const std::string::value_type*>(data_ptr_), num_bytes);
Advance(num_bytes);
return result;
}

// Returns a std::string containing all remaining bytes of the input data.
std::string ConsumeRemainingBytes();
// Returns a number in the range [min, max] by consuming bytes from the input
// data. The value might not be uniformly distributed in the given range. If
// there's no input data left, always returns |min|. |min| must be less than
// or equal to |max|.
template <typename T>
T ConsumeIntegralInRange(T min, T max) {
static_assert(std::is_integral<T>::value, "An integral type is required.");
static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");

if (min > max)
abort();

// Use the biggest type possible to hold the range and the result.
uint64_t range = max - min;
uint64_t result = 0;
size_t offset = 0;

while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
remaining_bytes_ != 0) {
// Pull bytes off the end of the seed data. Experimentally, this seems to
// allow the fuzzer to more easily explore the input space. This makes
// sense, since it works by modifying inputs that caused new code to run,
// and this data is often used to encode length of data read by
// |ConsumeBytes|. Separating out read lengths makes it easier modify the
// contents of the data that is actually read.
--remaining_bytes_;
result = (result << CHAR_BIT) | data_ptr_[remaining_bytes_];
offset += CHAR_BIT;
}

// Avoid division by 0, in the case |range + 1| results in overflow.
if (range != std::numeric_limits<decltype(range)>::max())
result = result % (range + 1);

return min + static_cast<T>(result);
}

// Returns a std::string of length from 0 to |max_length|. When it runs out of
// input data, returns what remains of the input. Designed to be more stable
// with respect to a fuzzer inserting characters than just picking a random
// length and then consuming that many bytes with ConsumeBytes().
std::string ConsumeRandomLengthString(size_t max_length);
// length and then consuming that many bytes with |ConsumeBytes|.
std::string ConsumeRandomLengthString(size_t max_length) {
// Reads bytes from the start of |data_ptr_|. Maps "\\" to "\", and maps "\"
// followed by anything else to the end of the string. As a result of this
// logic, a fuzzer can insert characters into the string, and the string
// will be lengthened to include those new characters, resulting in a more
// stable fuzzer than picking the length of a string independently from
// picking its contents.
std::string result;
for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
char next = static_cast<char>(data_ptr_[0]);
Advance(1);
if (next == '\\' && remaining_bytes_ != 0) {
next = static_cast<char>(data_ptr_[0]);
Advance(1);
if (next != '\\')
return result;
}
result += next;
}

result.shrink_to_fit();
return result;
}

// Returns a number in the range [min, max] by consuming bytes from the input
// data. The value might not be uniformly distributed in the given range. If
// there's no input data left, always returns |min|. |min| must be less than
// or equal to |max|.
uint32_t ConsumeUint32InRange(uint32_t min, uint32_t max);
int ConsumeInt32InRange(int min, int max);
// Returns a std::vector containing all remaining bytes of the input data.
template <typename T = data_type>
std::vector<T> ConsumeRemainingBytes() {
return ConsumeBytes<T>(remaining_bytes_);
}

// Prefer using |ConsumeRemainingBytes| unless you actually need a std::string
// object.
// Returns a std::vector containing all remaining bytes of the input data.
std::string ConsumeRemainingBytesAsString() {
return ConsumeBytesAsString(remaining_bytes_);
}

// TODO(mmoroz): consider deprecating these methods.
uint32_t ConsumeUint32InRange(uint32_t min, uint32_t max) {
return ConsumeIntegralInRange(min, max);
}

int32_t ConsumeInt32InRange(int32_t min, int32_t max) {
return ConsumeIntegralInRange(min, max);
}

// Returns a bool, or false when no data remains.
bool ConsumeBool();
int ConsumeIntInRange(int min, int max) {
return ConsumeIntegralInRange(min, max);
}

// Reads one byte and returns a bool, or false when no data remains.
bool ConsumeBool() { return 1 & ConsumeUint8(); }

// Returns a uint8_t from the input or 0 if nothing remains. This is
// equivalent to ConsumeUint32InRange(0, 0xFF).
uint8_t ConsumeUint8();
uint8_t ConsumeUint8() {
return ConsumeIntegralInRange(std::numeric_limits<uint8_t>::min(),
std::numeric_limits<uint8_t>::max());
}

// Returns a uint16_t from the input. If fewer than 2 bytes of data remain
// will fill the most significant bytes with 0. This is equivalent to
// ConsumeUint32InRange(0, 0xFFFF).
uint16_t ConsumeUint16();
uint16_t ConsumeUint16() {
return ConsumeIntegralInRange(std::numeric_limits<uint16_t>::min(),
std::numeric_limits<uint16_t>::max());
}

// Returns a value from |array|, consuming as many bytes as needed to do so.
// |array| must be a fixed-size array. Equivalent to
// array[ConsumeUint32InRange(sizeof(array)-1)];
// |array| must be a fixed-size array.
template <typename Type, size_t size>
Type PickValueInArray(Type (&array)[size]) {
return array[ConsumeUint32InRange(0, size - 1)];
return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
}

// Reports the remaining bytes available for fuzzed input.
size_t remaining_bytes() { return remaining_data_.length(); }
size_t remaining_bytes() { return remaining_bytes_; }

private:
StringPiece remaining_data_;
FuzzedDataProvider(const FuzzedDataProvider&) = delete;
FuzzedDataProvider& operator=(const FuzzedDataProvider&) = delete;

void Advance(size_t num_bytes) {
if (num_bytes > remaining_bytes_)
abort();

data_ptr_ += num_bytes;
remaining_bytes_ -= num_bytes;
}

DISALLOW_COPY_AND_ASSIGN(FuzzedDataProvider);
const data_type* data_ptr_;
size_t remaining_bytes_;
};

} // namespace base
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@

#include "components/subresource_filter/core/common/indexed_ruleset.h"

#include <stddef.h>
#include <stdint.h>

#include <string>
#include <vector>

#include "base/at_exit.h"
#include "base/i18n/icu_util.h"
Expand Down Expand Up @@ -37,7 +41,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
if (!url_to_check.is_valid())
return 0;

const std::string remaining_bytes = fuzzed_data.ConsumeRemainingBytes();
std::vector<uint8_t> remaining_bytes = fuzzed_data.ConsumeRemainingBytes();

// First, interpret the remaining fuzzed data as an unindexed ruleset.
google::protobuf::io::ArrayInputStream input_stream(remaining_bytes.data(),
Expand Down
7 changes: 5 additions & 2 deletions components/viz/host/hit_test/hit_test_query_fuzzer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <stdint.h>
#include <string.h>

#include <vector>

#include "base/command_line.h"
#include "base/test/fuzzed_data_provider.h"
#include "components/viz/host/hit_test/hit_test_query.h"
Expand Down Expand Up @@ -37,8 +39,9 @@ void AddHitTestRegion(base::FuzzedDataProvider* fuzz,
depth < kMaxDepthAllowed ? fuzz->ConsumeUint32InRange(0, 10) : 0;
gfx::Transform transform;
if (fuzz->ConsumeBool() && fuzz->remaining_bytes() >= sizeof(transform)) {
std::string matrix_bytes = fuzz->ConsumeBytes(sizeof(gfx::Transform));
memcpy(&transform, matrix_bytes.data(), sizeof(gfx::Transform));
std::vector<uint8_t> matrix_bytes =
fuzz->ConsumeBytes(sizeof(gfx::Transform));
memcpy(&transform, matrix_bytes.data(), matrix_bytes.size());
}
regions->emplace_back(frame_sink_id, flags, rect, transform, child_count,
reasons);
Expand Down
Loading

0 comments on commit c416f80

Please sign in to comment.