Skip to content

Commit 9e8dfe6

Browse files
introspector: enable additional target selectors (#178)
The current oracle for finding interesting targets for harnesses is by way of `far-reach-low-coverage` from Fuzz Introspector. However, the logic behind this API only represents a subset of ways commonly used to find interesting targets. For example, Fuzz Introspector itself provides multiple oracles for findings targets, some of which are displayed here https://introspector.oss-fuzz.com/target_oracle (it will load, but may take some seconds to do so). This PR extends the data prep logic to enable one more heuristic from fuzz introspector, that's based on finding functions with interesting keywords in them (`serialize`, `parse`, `read` and such things) in combination with low coverage and decent cyclomatic complexity. We can extend this into a direction where we can have X target oracles (there are a handful of oracles in FI already) as well as attach the oracle IDs to the benchmarks which can be useful for large-scale experiments to understand how various ways of finding interesting targets perform. FYI this only works using a local deployment of FI -- a bump is needed on OSS-Fuzz to enable this using introspector.oss-fuzz.com --------- Signed-off-by: David Korczynski <david@adalogics.com>
1 parent dc3e922 commit 9e8dfe6

File tree

1 file changed

+42
-11
lines changed

1 file changed

+42
-11
lines changed

data_prep/introspector.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@
4040
DEFAULT_INTROSPECTOR_ENDPOINT = 'https://introspector.oss-fuzz.com/api'
4141
INTROSPECTOR_ENDPOINT = ''
4242
INTROSPECTOR_CFG = ''
43-
INTROSPECTOR_FUNCTION = ''
43+
INTROSPECTOR_ORACLE_FAR_REACH = ''
44+
INTROSPECTOR_ORACLE_KEYWORD = ''
4445
INTROSPECTOR_SOURCE = ''
4546
INTROSPECTOR_XREF = ''
4647
INTROSPECTOR_TYPE = ''
@@ -49,15 +50,18 @@
4950

5051
def set_introspector_endpoints(endpoint):
5152
"""Sets URLs for Fuzz Introspector endpoints to local or remote endpoints."""
52-
global INTROSPECTOR_ENDPOINT, INTROSPECTOR_CFG, INTROSPECTOR_FUNCTION, \
53+
global INTROSPECTOR_ENDPOINT, INTROSPECTOR_CFG, INTROSPECTOR_FUNC_SIG, \
5354
INTROSPECTOR_SOURCE, INTROSPECTOR_XREF, INTROSPECTOR_TYPE, \
54-
INTROSPECTOR_FUNC_SIG
55+
INTROSPECTOR_ORACLE_FAR_REACH, INTROSPECTOR_ORACLE_KEYWORD
5556

5657
INTROSPECTOR_ENDPOINT = endpoint
5758
logging.info('Fuzz Introspector endpoint set to %s', INTROSPECTOR_ENDPOINT)
5859

5960
INTROSPECTOR_CFG = f'{INTROSPECTOR_ENDPOINT}/annotated-cfg'
60-
INTROSPECTOR_FUNCTION = f'{INTROSPECTOR_ENDPOINT}/far-reach-but-low-coverage'
61+
INTROSPECTOR_ORACLE_FAR_REACH = (
62+
f'{INTROSPECTOR_ENDPOINT}/far-reach-but-low-coverage')
63+
INTROSPECTOR_ORACLE_KEYWORD = (
64+
f'{INTROSPECTOR_ENDPOINT}/far-reach-low-cov-fuzz-keyword')
6165
INTROSPECTOR_SOURCE = f'{INTROSPECTOR_ENDPOINT}/function-source-code'
6266
INTROSPECTOR_XREF = f'{INTROSPECTOR_ENDPOINT}/all-cross-references'
6367
INTROSPECTOR_TYPE = f'{INTROSPECTOR_ENDPOINT}/type-info'
@@ -135,15 +139,36 @@ def _get_data(resp: Optional[requests.Response], key: str,
135139
return default_value
136140

137141

138-
def query_introspector_for_unreached_functions(project: str) -> list[dict]:
139-
"""Queries FuzzIntrospector API for unreached functions in |project|."""
140-
resp = _query_introspector(INTROSPECTOR_FUNCTION, {'project': project})
142+
def query_introspector_oracle(project: str, oracle_api: str) -> list[dict]:
143+
"""Queries a fuzz target oracle API from Fuzz Introspector."""
144+
resp = _query_introspector(oracle_api, {'project': project})
141145
functions = _get_data(resp, 'functions', [])
142146
if functions:
143147
return functions
144148
sys.exit(1)
145149

146150

151+
def query_introspector_for_keyword_targets(project: str) -> list[dict]:
152+
"""Queries FuzzIntrospector for targets with interesting fuzz keywords."""
153+
return query_introspector_oracle(project, INTROSPECTOR_ORACLE_KEYWORD)
154+
155+
156+
def query_introspector_for_targets(project, target_oracle) -> list[Dict]:
157+
"""Queries introspector for target functions."""
158+
oracle_dict = {
159+
'far-reach-low-coverage': get_unreached_functions,
160+
'low-cov-with-fuzz-keyword': query_introspector_for_keyword_targets
161+
}
162+
query_func = oracle_dict.get(target_oracle, None)
163+
if not query_func:
164+
logging.error('No such oracle "%s"', target_oracle)
165+
sys.exit(1)
166+
functions = query_func(project)
167+
if functions:
168+
return functions
169+
sys.exit(1)
170+
171+
147172
def query_introspector_cfg(project: str) -> dict:
148173
"""Queries FuzzIntrospector API for CFG."""
149174
resp = _query_introspector(INTROSPECTOR_CFG, {'project': project})
@@ -198,7 +223,7 @@ def query_introspector_function_signature(project: str,
198223

199224

200225
def get_unreached_functions(project):
201-
functions = query_introspector_for_unreached_functions(project)
226+
functions = query_introspector_oracle(project, INTROSPECTOR_ORACLE_FAR_REACH)
202227
functions = [f for f in functions if not f['reached_by_fuzzers']]
203228
return functions
204229

@@ -311,9 +336,9 @@ def _group_function_params(param_types: list[str],
311336

312337

313338
def populate_benchmarks_using_introspector(project: str, language: str,
314-
limit: int):
339+
limit: int, target_oracle: str):
315340
"""Populates benchmark YAML files from the data from FuzzIntrospector."""
316-
functions = get_unreached_functions(project)
341+
functions = query_introspector_for_targets(project, target_oracle)
317342
if not functions:
318343
logging.error('No unreached functions found')
319344
return []
@@ -492,6 +517,11 @@ def _parse_arguments() -> argparse.Namespace:
492517
type=str,
493518
default=DEFAULT_INTROSPECTOR_ENDPOINT,
494519
help='Fuzz Introspecor API endpoint.')
520+
parser.add_argument('-t',
521+
'--target-oracle',
522+
type=str,
523+
default='far-reach-low-coverage',
524+
help='Oracle used to determine interesting targets.')
495525

496526
return parser.parse_args()
497527

@@ -517,7 +547,8 @@ def _parse_arguments() -> argparse.Namespace:
517547
cur_project_language = oss_fuzz_checkout.get_project_language(args.project)
518548
benchmarks = populate_benchmarks_using_introspector(args.project,
519549
cur_project_language,
520-
args.max_functions)
550+
args.max_functions,
551+
args.target_oracle)
521552
if benchmarks:
522553
benchmarklib.Benchmark.to_yaml(benchmarks, args.out)
523554
else:

0 commit comments

Comments
 (0)