Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add libfuzzer compatible fuzz harness #7512

Merged
merged 11 commits into from
May 1, 2023
36 changes: 36 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
include(HalideTestHelpers)
include(CheckCXXCompilerFlag)

# Internal tests are a special case.
# HalideTestHelpers depends on this test being present.
Expand Down Expand Up @@ -61,3 +62,38 @@ else ()
endif ()

# FIXME: failing_with_issue is dead code :)

# Ensure that basic sanitizer flags are supported;
# - Address sanitizer is often used in conjunction with fuzzing as it will detect
# common high severity bugs. This sanitizer is used as a "default" for fuzzing
# when the sanitizer isn't otherwise specified.
# - Fuzzer sanitizer will link against libfuzzer and is currently only supported
# on clang/msvc and isn't supported with GCC. If you need to use these fuzzers
# with a GCC based project you should consider looking into the LIB_FUZZING_ENGINE
# env variable defined in `test/fuzz/CMakeLists.txt`.
set(CMAKE_REQUIRED_LINK_OPTIONS "-fsanitize=fuzzer,address")
set(CMAKE_REQUIRED_FLAGS "-fsanitize=fuzzer-no-link,address")
check_cxx_source_compiles([[
#include <cstdint>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, std::size_t Size) {
return 0;
}
]] HAS_FUZZ_FLAGS)

if (NOT HAS_FUZZ_FLAGS)
message(VERBOSE "Compiler does not support libfuzzer sanitizer.")
else ()
message(VERBOSE "Compiler supports libfuzzer sanitizer.")
endif ()

cmake_dependent_option(
WITH_TEST_FUZZ "Build fuzz tests" ON
HAS_FUZZ_FLAGS OFF
)

if (WITH_TEST_FUZZ)
message(STATUS "Building fuzz tests enabled")
add_subdirectory(fuzz)
else ()
message(STATUS "Building fuzz tests disabled")
endif ()
1 change: 0 additions & 1 deletion test/correctness/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ tests(GROUPS correctness
fuzz_bounds.cpp
fuzz_cse.cpp
fuzz_float_stores.cpp
fuzz_simplify.cpp
gameoflife.cpp
gather.cpp
gpu_allocation_cache.cpp
Expand Down
26 changes: 26 additions & 0 deletions test/fuzz/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
tests(GROUPS fuzz
SOURCES
simplify.cpp
)


# By default we are going to use the libfuzzer engine. However if
# LIB_FUZZING_ENGINE is declared you can override the fuzzing engine to one of;
# - Centipede
# - Hongfuzz
# - AFL++
# - etc.
set(LIB_FUZZING_ENGINE "$ENV{LIB_FUZZING_ENGINE}"
CACHE STRING "Compiler flags necessary to link the fuzzing engine of choice e.g. libfuzzer, afl etc.")

target_link_libraries(fuzz_simplify PRIVATE Halide::Halide)

# Allow OSS-fuzz to manage flags directly
if (LIB_FUZZING_ENGINE)
target_link_libraries(fuzz_simplify PRIVATE "${LIB_FUZZING_ENGINE}")
else ()
# By default just build with address-sanitizers/libfuzzer for local testing
target_compile_options(fuzz_simplify PRIVATE -fsanitize=fuzzer-no-link,address)
target_link_options(fuzz_simplify PRIVATE -fsanitize=fuzzer,address)
endif ()

121 changes: 55 additions & 66 deletions test/correctness/fuzz_simplify.cpp → test/fuzz/simplify.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "Halide.h"
#include <array>
#include <fuzzer/FuzzedDataProvider.h>
#include <random>
#include <stdio.h>
#include <time.h>
Expand All @@ -14,76 +15,73 @@ using namespace Halide::Internal;

const int fuzz_var_count = 5;

// use std::mt19937 instead of rand() to ensure consistent behavior on all systems
std::mt19937 rng(0);

Type fuzz_types[] = {UInt(1), UInt(8), UInt(16), UInt(32), Int(8), Int(16), Int(32)};
const int fuzz_type_count = sizeof(fuzz_types) / sizeof(fuzz_types[0]);

std::string fuzz_var(int i) {
return std::string(1, 'a' + i);
}

Expr random_var() {
int fuzz_count = rng() % fuzz_var_count;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: change as per #7546

Expr random_var(FuzzedDataProvider &fdp) {
int fuzz_count = fdp.ConsumeIntegralInRange<int>(0, fuzz_var_count);
return Variable::make(Int(0), fuzz_var(fuzz_count));
}

Type random_type(int width) {
Type T = fuzz_types[rng() % fuzz_type_count];
Type random_type(FuzzedDataProvider &fdp, int width) {
Type T = fdp.PickValueInArray(fuzz_types);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hadn't noticed this call existed -- perhaps we should aggressively use it in other cases too where possible (e.g. line 47, line 102)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It won't work on line 47 as its a vector and even though it would have been trivial to implement it doesn't work on vectors. But 102 could definetely use that.


if (width > 1) {
T = T.with_lanes(width);
}
return T;
}

int get_random_divisor(Type t) {
int get_random_divisor(FuzzedDataProvider &fdp, Type t) {
std::vector<int> divisors = {t.lanes()};
for (int dd = 2; dd < t.lanes(); dd++) {
if (t.lanes() % dd == 0) {
divisors.push_back(dd);
}
}

return divisors[rng() % divisors.size()];
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks fine as per #7546

return divisors[fdp.ConsumeIntegralInRange<size_t>(0, divisors.size() - 1)];
}

Expr random_leaf(Type T, bool overflow_undef = false, bool imm_only = false) {
Expr random_leaf(FuzzedDataProvider &fdp, Type T, bool overflow_undef = false, bool imm_only = false) {
if (T.is_int() && T.bits() == 32) {
overflow_undef = true;
}
if (T.is_scalar()) {
int var = rng() % fuzz_var_count + 1;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fdp.ConsumeIntegralInRange<int>(0, fuzz_var_count) as per #7546 NOTE: drop the + 1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The semantics of fuzz_var_count aren't clear to me -- in various for loops we do for (int i = 0; i < fuzz_var_count; i++) which implies that we need to limit the range to max=fuzz_var_count-1?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah looking at this again the var variable seems to only get used once on line 56, so I could probably just get rid of it and replace the condition on line 56 with fdp.ConsumeBool.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing it was +1 in the beginning so that the condition on line 56 was sometimes true/false. In any case it seems like a really roundabout way of doing it. I'm just going to replace it with the consumebool method.

int var = fdp.ConsumeIntegralInRange<int>(0, fuzz_var_count) + 1;
if (!imm_only && var < fuzz_var_count) {
auto v1 = random_var();
auto v1 = random_var(fdp);
return cast(T, v1);
} else {
if (overflow_undef) {
// For Int(32), we don't care about correctness during
// overflow, so just use numbers that are unlikely to
// overflow.
return cast(T, (int)(rng() % 256 - 128));
return cast(T, fdp.ConsumeIntegralInRange<int>(-128, 128));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fdp.ConsumeIntegralInRange<int>(-128, 127) as per #7546

} else {
return cast(T, (int)(rng() - RAND_MAX / 2));
return cast(T, fdp.ConsumeIntegralInRange<int>(-RAND_MAX / 2, RAND_MAX / 2));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fdp.ConsumeIntegralInRange<int>(-RAND_MAX / 2, RAND_MAX / 2 -1) as per #7546

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

...except, RAND_MAX applies to the rand() call in stdlib; does libfuzzer also make the same promise about range?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well the allowable range is going to be std::numeric_limits<int>::min() -> std::numeric_limits<int>::min() which is what ConsumeIntegral<int> will do but then if you want to constrain it further you can use the ConsumeIntegralInRange so no there aren't any promises other than the given range.

The FuzzedDataProvider class is just taking a stream of bytes from the fuzzer and turning them into other types,in this case an integral. The FuzzedDataProvider class just makes managing this easier as it essentially moves a cursor along the byte stream so that you aren't consuming the the same data twice.

}
}
} else {
int lanes = get_random_divisor(T);
if (rng() % 2 == 0) {
auto e1 = random_leaf(T.with_lanes(T.lanes() / lanes), overflow_undef);
auto e2 = random_leaf(T.with_lanes(T.lanes() / lanes), overflow_undef);
int lanes = get_random_divisor(fdp, T);
if (fdp.ConsumeBool()) {
auto e1 = random_leaf(fdp, T.with_lanes(T.lanes() / lanes), overflow_undef);
auto e2 = random_leaf(fdp, T.with_lanes(T.lanes() / lanes), overflow_undef);
return Ramp::make(e1, e2, lanes);
} else {
auto e1 = random_leaf(T.with_lanes(T.lanes() / lanes), overflow_undef);
auto e1 = random_leaf(fdp, T.with_lanes(T.lanes() / lanes), overflow_undef);
return Broadcast::make(e1, lanes);
}
}
}

Expr random_expr(Type T, int depth, bool overflow_undef = false);
Expr random_expr(FuzzedDataProvider &fdp, Type T, int depth, bool overflow_undef = false);

Expr random_condition(Type T, int depth, bool maybe_scalar) {
Expr random_condition(FuzzedDataProvider &fdp, Type T, int depth, bool maybe_scalar) {
typedef Expr (*make_bin_op_fn)(Expr, Expr);
static make_bin_op_fn make_bin_op[] = {
EQ::make,
Expand All @@ -95,13 +93,13 @@ Expr random_condition(Type T, int depth, bool maybe_scalar) {
};
const int op_count = sizeof(make_bin_op) / sizeof(make_bin_op[0]);

if (maybe_scalar && rng() % T.lanes() == 0) {
if (maybe_scalar && fdp.ConsumeBool()) {
T = T.element_of();
}

Expr a = random_expr(T, depth);
Expr b = random_expr(T, depth);
int op = rng() % op_count;
Expr a = random_expr(fdp, T, depth);
Expr b = random_expr(fdp, T, depth);
int op = fdp.ConsumeIntegralInRange<int>(0, op_count);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fdp.ConsumeIntegralInRange<int>(0, op_count-1) as per #7546

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or better yet, use PickValueInArray()?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True that would be better

return make_bin_op[op](a, b);
}

Expand All @@ -111,7 +109,7 @@ Expr make_absd(Expr a, Expr b) {
return cast(a.type(), absd(a, b));
}

Expr random_expr(Type T, int depth, bool overflow_undef) {
Expr random_expr(FuzzedDataProvider &fdp, Type T, int depth, bool overflow_undef) {
typedef Expr (*make_bin_op_fn)(Expr, Expr);
static make_bin_op_fn make_bin_op[] = {
Add::make,
Expand All @@ -134,60 +132,60 @@ Expr random_expr(Type T, int depth, bool overflow_undef) {
}

if (depth-- <= 0) {
return random_leaf(T, overflow_undef);
return random_leaf(fdp, T, overflow_undef);
}

const int bin_op_count = sizeof(make_bin_op) / sizeof(make_bin_op[0]);
const int bool_bin_op_count = sizeof(make_bool_bin_op) / sizeof(make_bool_bin_op[0]);
const int op_count = bin_op_count + bool_bin_op_count + 5;

int op = rng() % op_count;
int op = fdp.ConsumeIntegralInRange<int>(0, op_count);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fdp.ConsumeIntegralInRange<int>(0, op_count-1)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.. but looking at this now, the logic is a little fragile; the switch statement has implicit assumptions about the count and ordering of the two op arrays. What happens if someone inserts a new one? Would be nice to make this a bit more robust, e.g.:

  • Use custom enum values to associate the op functions with the switch statements?
  • Or maybe, rework make_bin_op and make_bool_bin_op arrays so that each entry is a pair<> with the existing op + a lambda function that contains the code from the relevant switch case, so that the switch goes away entirely?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I realize the fragility was pre-existing before your changes, but improving the resilience here seems easy and worthwhile while we are here)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, looking at this again a second time I'm struggling to understand what is going on here. Where did this 5 come from.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh nevermind I see what is going on here... that's a bit wack. I'll refactor as per your suggestions.

switch (op) {
case 0:
return random_leaf(T);
return random_leaf(fdp, T);
case 1: {
auto c = random_condition(T, depth, true);
auto e1 = random_expr(T, depth, overflow_undef);
auto e2 = random_expr(T, depth, overflow_undef);
auto c = random_condition(fdp, T, depth, true);
auto e1 = random_expr(fdp, T, depth, overflow_undef);
auto e2 = random_expr(fdp, T, depth, overflow_undef);
return Select::make(c, e1, e2);
}
case 2:
if (T.lanes() != 1) {
int lanes = get_random_divisor(T);
auto e1 = random_expr(T.with_lanes(T.lanes() / lanes), depth, overflow_undef);
int lanes = get_random_divisor(fdp, T);
auto e1 = random_expr(fdp, T.with_lanes(T.lanes() / lanes), depth, overflow_undef);
return Broadcast::make(e1, lanes);
}
break;
case 3:
if (T.lanes() != 1) {
int lanes = get_random_divisor(T);
auto e1 = random_expr(T.with_lanes(T.lanes() / lanes), depth, overflow_undef);
auto e2 = random_expr(T.with_lanes(T.lanes() / lanes), depth, overflow_undef);
int lanes = get_random_divisor(fdp, T);
auto e1 = random_expr(fdp, T.with_lanes(T.lanes() / lanes), depth, overflow_undef);
auto e2 = random_expr(fdp, T.with_lanes(T.lanes() / lanes), depth, overflow_undef);
return Ramp::make(e1, e2, lanes);
}
break;

case 4:
if (T.is_bool()) {
auto e1 = random_expr(T, depth);
auto e1 = random_expr(fdp, T, depth);
return Not::make(e1);
}
break;

case 5:
// When generating boolean expressions, maybe throw in a condition on non-bool types.
if (T.is_bool()) {
return random_condition(random_type(T.lanes()), depth, false);
return random_condition(fdp, random_type(fdp, T.lanes()), depth, false);
}
break;

case 6: {
// Get a random type that isn't T or int32 (int32 can overflow and we don't care about that).
Type subT;
do {
subT = random_type(T.lanes());
subT = random_type(fdp, T.lanes());
} while (subT == T || (subT.is_int() && subT.bits() == 32));
auto e1 = random_expr(subT, depth, overflow_undef);
auto e1 = random_expr(fdp, subT, depth, overflow_undef);
return Cast::make(T, e1);
}

Expand All @@ -198,12 +196,12 @@ Expr random_expr(Type T, int depth, bool overflow_undef) {
} else {
maker = make_bin_op[op % bin_op_count];
}
Expr a = random_expr(T, depth, overflow_undef);
Expr b = random_expr(T, depth, overflow_undef);
Expr a = random_expr(fdp, T, depth, overflow_undef);
Expr b = random_expr(fdp, T, depth, overflow_undef);
return maker(a, b);
}
// If we got here, try again.
return random_expr(T, depth, overflow_undef);
return random_expr(fdp, T, depth, overflow_undef);
}

bool test_simplification(Expr a, Expr b, Type T, const map<string, Expr> &vars) {
Expand Down Expand Up @@ -238,7 +236,7 @@ bool test_simplification(Expr a, Expr b, Type T, const map<string, Expr> &vars)
return true;
}

bool test_expression(Expr test, int samples) {
bool test_expression(FuzzedDataProvider &fdp, Expr test, int samples) {
Expr simplified = simplify(test);

map<string, Expr> vars;
Expand All @@ -248,10 +246,13 @@ bool test_expression(Expr test, int samples) {

for (int i = 0; i < samples; i++) {
for (std::map<string, Expr>::iterator v = vars.begin(); v != vars.end(); v++) {
size_t kMaxLeafIterations = 10000;
// Don't let the random leaf depend on v itself.
size_t iterations = 0;
do {
v->second = random_leaf(test.type().element_of(), true);
} while (expr_uses_var(v->second, v->first));
v->second = random_leaf(fdp, test.type().element_of(), true);
iterations++;
} while (expr_uses_var(v->second, v->first) && iterations < kMaxLeafIterations);
}

if (!test_simplification(test, simplified, test.type(), vars)) {
Expand Down Expand Up @@ -334,31 +335,19 @@ Expr e(Variable::make(Int(0), fuzz_var(4)));

} // namespace

int main(int argc, char **argv) {
// Number of random expressions to test.
const int count = 10000;
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
// Depth of the randomly generated expression trees.
const int depth = 5;
// Number of samples to test the generated expressions for.
const int samples = 3;

// We want different fuzz tests every time, to increase coverage.
// We also report the seed to enable reproducing failures.
int fuzz_seed = argc > 1 ? atoi(argv[1]) : time(nullptr);
rng.seed(fuzz_seed);
std::cout << "Simplify fuzz test seed: " << fuzz_seed << "\n";
FuzzedDataProvider fdp(data, size);

std::array<int, 6> vector_widths = {1, 2, 3, 4, 6, 8};
for (int n = 0; n < count; n++) {
int width = vector_widths[rng() % vector_widths.size()];
Type VT = random_type(width);
// Generate a random expr...
Expr test = random_expr(VT, depth);
if (!test_expression(test, samples)) {
return 1;
}
}

std::cout << "Success!\n";
int width = fdp.PickValueInArray(vector_widths);
Type VT = random_type(fdp, width);
// Generate a random expr...
Expr test = random_expr(fdp, VT, depth);
assert(test_expression(fdp, test, samples));
return 0;
}