Skip to content

Commit 481f921

Browse files
DonggeLiuhappy-qop
andauthored
[DO NOT MERGE] Experiment fixing stacktrace parsing (#198)
Related code fixing and improvements: 1. google/oss-fuzz-gen#187 (comment) 2. google/oss-fuzz-gen#187 (comment) 3. google/oss-fuzz-gen#187 (comment) 4. google/oss-fuzz-gen#187 (comment) --------- Co-authored-by: happy-qop <106136863+happy-qop@users.noreply.github.com>
1 parent 3b98bb2 commit 481f921

File tree

12 files changed

+325
-87
lines changed

12 files changed

+325
-87
lines changed

experiment/builder_runner.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,9 @@ def build_and_run(self, generated_project: str, target_path: str,
139139

140140
run_result = RunResult()
141141

142-
self.run_target_local(generated_project, benchmark_target_name,
143-
self.work_dirs.run_logs_target(benchmark_target_name))
142+
self.run_target_local(
143+
generated_project, benchmark_target_name,
144+
self.work_dirs.run_logs_target(benchmark_target_name, iteration))
144145
run_result.coverage, run_result.coverage_summary = (self.get_coverage_local(
145146
generated_project, benchmark_target_name))
146147
return build_result, run_result
@@ -436,7 +437,8 @@ def build_and_run(self, generated_project: str, target_path: str,
436437
print(f'Cannot find cloud build log of {os.path.realpath(target_path)} '
437438
f':{build_log_name}')
438439

439-
with open(self.work_dirs.run_logs_target(generated_target_name), 'wb') as f:
440+
with open(self.work_dirs.run_logs_target(generated_target_name, iteration),
441+
'wb') as f:
440442
blob = bucket.blob(run_log_name)
441443
if blob.exists():
442444
build_result.succeeded = True

experiment/evaluator.py

Lines changed: 121 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626

2727
from experiment import builder_runner, oss_fuzz_checkout, textcov
2828
from experiment.benchmark import Benchmark
29+
from experiment.builder_runner import BuildResult, RunResult
30+
from experiment.fuzz_target_error import SemanticCheckResult
2931
from experiment.workdir import WorkDirs
3032
from llm_toolkit import code_fixer
3133

@@ -42,7 +44,8 @@
4244

4345
OSS_FUZZ_COVERAGE_BUCKET = 'oss-fuzz-coverage'
4446

45-
LLVM_SOURCE_PATH_PREFIX = '/src/llvm-project/compiler-rt'
47+
LIBFUZZER_LOG_STACK_FRAME_LLVM = '/src/llvm-project/compiler-rt'
48+
LIBFUZZER_LOG_STACK_FRAME_CPP = '/usr/local/bin/../include/c++'
4649

4750
EARLY_FUZZING_ROUND_THRESHOLD = 3
4851

@@ -56,9 +59,9 @@ class Result:
5659
line_coverage_diff: float = 0.0
5760
coverage_report_path: str = ''
5861
reproducer_path: str = ''
59-
# produces false positive or no cov increase at all
60-
is_driver_fuzz_err: bool = False
61-
driver_fuzz_err: str = ''
62+
# Gramatically correct but has false positive or no cov increase at all.
63+
is_semantic_error: bool = False
64+
semantic_error: str = ''
6265

6366
def dict(self):
6467
return dataclasses.asdict(self)
@@ -205,8 +208,10 @@ def check_target(self, ai_binary, target_path: str) -> Optional[Result]:
205208
traceback.print_exc()
206209
return None
207210

208-
def _parse_stacks_from_libfuzzer_logs(self, lines: list[str]) -> list[str]:
211+
def _parse_stacks_from_libfuzzer_logs(self,
212+
lines: list[str]) -> list[list[str]]:
209213
"""Parse stack traces from libFuzzer logs."""
214+
# TODO (dongge): Use stack parsing from ClusterFuzz.
210215
# There can have over one thread stack in a log.
211216
stacks = []
212217

@@ -255,11 +260,13 @@ def _parse_fuzz_cov_info_from_libfuzzer_logs(
255260
return initcov, donecov, lastround
256261

257262
def _stack_func_is_of_testing_project(self, stack_frame: str) -> bool:
258-
return bool(CRASH_STACK_WITH_SOURCE_INFO.match(stack_frame)) and (
259-
LLVM_SOURCE_PATH_PREFIX not in stack_frame)
263+
return (bool(CRASH_STACK_WITH_SOURCE_INFO.match(stack_frame)) and
264+
LIBFUZZER_LOG_STACK_FRAME_LLVM not in stack_frame and
265+
LIBFUZZER_LOG_STACK_FRAME_CPP not in stack_frame)
260266

261267
def _parse_libfuzzer_logs(
262-
self, log_handle, logger: _Logger) -> tuple[int, int, bool, bool, str]:
268+
self, log_handle,
269+
logger: _Logger) -> tuple[int, int, bool, SemanticCheckResult]:
263270
"""Parses libFuzzer logs."""
264271
lines = None
265272
try:
@@ -268,13 +275,11 @@ def _parse_libfuzzer_logs(
268275
fuzzlog = fuzzlog.decode('utf-8', errors='ignore')
269276
lines = fuzzlog.split('\n')
270277
except MemoryError as e:
271-
# Some logs from abnormal drivers are too large to be parsed.
278+
# Some logs from abnormal fuzz targets are too large to be parsed.
272279
logger.log('%s is too large to parse: %s', log_handle.name, e)
273-
return 0, 0, False, True, 'LOG_MESS_UP'
280+
return 0, 0, False, SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP)
274281

275-
cov_pcs = 0
276-
total_pcs = 0
277-
crashes = False
282+
cov_pcs, total_pcs, crashes = 0, 0, False
278283

279284
for line in lines:
280285
m = LIBFUZZER_MODULES_LOADED_REGEX.match(line)
@@ -295,30 +300,80 @@ def _parse_libfuzzer_logs(
295300
initcov, donecov, lastround = self._parse_fuzz_cov_info_from_libfuzzer_logs(
296301
lines)
297302

298-
# NOTE: Crashes from incorrect drivers will not be counted.
303+
# NOTE: Crashes from incorrect fuzz targets will not be counted finally.
299304

300305
if crashes:
301-
# FP case 1: driver crashes at init or first few rounds.
306+
symptom = SemanticCheckResult.extract_symptom(fuzzlog)
307+
crash_stacks = self._parse_stacks_from_libfuzzer_logs(lines)
308+
309+
# FP case 1: fuzz target crashes at init or first few rounds.
302310
if lastround is None or lastround <= EARLY_FUZZING_ROUND_THRESHOLD:
303311
# No cov line has been identified or only INITED round has been passed.
304312
# This is very likely the false positive cases.
305-
return cov_pcs, total_pcs, True, True, 'FP_CRASH_NEAR_INIT'
306-
307-
# FP case 2: 1st func of the 1st thread stack is in driver.
308-
crash_stacks = self._parse_stacks_from_libfuzzer_logs(lines)
309-
for stack_frame in crash_stacks[:1]:
310-
if self._stack_func_is_of_testing_project(stack_frame):
311-
if 'LLVMFuzzerTestOneInput' in stack_frame:
312-
return cov_pcs, total_pcs, True, True, 'FP_CRASH_IN_DRIVER'
313-
break
313+
return cov_pcs, total_pcs, True, \
314+
SemanticCheckResult(SemanticCheckResult.FP_NEAR_INIT_CRASH,\
315+
symptom, crash_stacks)
316+
317+
# FP case 2: 1st func of the 1st thread stack is in fuzz target.
318+
if len(crash_stacks) > 0:
319+
first_stack = crash_stacks[0]
320+
# Check the first stack frame of the first stack only.
321+
for stack_frame in first_stack[:1]:
322+
if self._stack_func_is_of_testing_project(stack_frame):
323+
if 'LLVMFuzzerTestOneInput' in stack_frame:
324+
return cov_pcs, total_pcs, True, \
325+
SemanticCheckResult(SemanticCheckResult.FP_TARGET_CRASH,\
326+
symptom, crash_stacks)
327+
break
314328

315329
else:
316-
# Another error driver case: no cov increase.
330+
# Another error fuzz target case: no cov increase.
317331
if initcov is not None and donecov is not None:
318332
if initcov == donecov:
319-
return cov_pcs, total_pcs, True, True, 'NO_COV_INCREASE'
333+
return cov_pcs, total_pcs, True, SemanticCheckResult(
334+
SemanticCheckResult.NO_COV_INCREASE)
335+
336+
return cov_pcs, total_pcs, crashes, SemanticCheckResult(
337+
SemanticCheckResult.NO_SEMANTIC_ERR)
338+
339+
def _evaluate_generated_fuzz_target(
340+
self, generated_oss_fuzz_project: str, target_path: str,
341+
generated_target_name: str, iteration: int, logger: _Logger
342+
) -> tuple[BuildResult, Optional[RunResult], int, int, bool,
343+
SemanticCheckResult]:
344+
"""Evaluates the generated fuzz target."""
345+
build_result, run_result = self.builder_runner.build_and_run(
346+
generated_oss_fuzz_project, target_path, iteration)
320347

321-
return cov_pcs, total_pcs, crashes, False, ''
348+
if not build_result.succeeded:
349+
# Clear the variables for case that fuzz/build err <=> before/after fix.
350+
return build_result, run_result, 0, 0, False, SemanticCheckResult(
351+
SemanticCheckResult.NOT_APPLICABLE)
352+
353+
# Parse libfuzzer logs to get fuzz target runtime details.
354+
with open(self.work_dirs.run_logs_target(generated_target_name, iteration),
355+
'rb') as f:
356+
cov_pcs, total_pcs, crashes, semantic_error = self._parse_libfuzzer_logs(
357+
f, logger)
358+
359+
return build_result, run_result, cov_pcs, total_pcs, crashes, semantic_error
360+
361+
def _fix_generated_fuzz_target(self, ai_binary: str,
362+
generated_oss_fuzz_project: str,
363+
target_path: str, iteration: int,
364+
build_result: BuildResult,
365+
semantic_error: SemanticCheckResult):
366+
"""Fixes the generated fuzz target."""
367+
if build_result.succeeded:
368+
error_desc, errors = semantic_error.get_error_info()
369+
else:
370+
error_desc, errors = None, build_result.errors
371+
code_fixer.llm_fix(ai_binary, target_path, self.benchmark, iteration,
372+
error_desc, errors, self.builder_runner.fixer_model_name)
373+
shutil.copyfile(
374+
target_path,
375+
os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR, 'projects',
376+
generated_oss_fuzz_project, os.path.basename(target_path)))
322377

323378
def do_check_target(self, ai_binary: str, target_path: str) -> Result:
324379
"""Builds and runs a target."""
@@ -334,62 +389,64 @@ def do_check_target(self, ai_binary: str, target_path: str) -> Result:
334389
logger = _Logger(status_path)
335390

336391
# Try building and running the new target.
337-
llm_fix_count = 0
338-
build_result, run_result = self.builder_runner.build_and_run(
339-
generated_oss_fuzz_project, target_path, llm_fix_count)
340-
if build_result.succeeded:
341-
logger.log(f'Successfully built {target_path} without LLM code fix.')
392+
342393
# TODO: Log build failure.
343394
# TODO: Log run success/failure.
344395

345-
# Loop to try and fix the compilation error using the LLM.
346-
while not build_result.succeeded and llm_fix_count < LLM_FIX_LIMIT:
396+
# Loop of evaluating and fixing fuzz target.
397+
llm_fix_count = 0
398+
while True:
399+
# 1. Evaluating generated driver.
400+
(build_result, run_result, cov_pcs, total_pcs, crashes,
401+
semantic_error) = self._evaluate_generated_fuzz_target(
402+
generated_oss_fuzz_project, target_path, generated_target_name,
403+
llm_fix_count, logger)
404+
405+
gen_succ = build_result.succeeded and not semantic_error.has_err
406+
if gen_succ:
407+
# Successfully generate the fuzz target.
408+
break
409+
410+
if llm_fix_count >= LLM_FIX_LIMIT:
411+
# Not fix since the fix limit is reached.
412+
break
413+
414+
# 2. Fixing generated driver.
347415
llm_fix_count += 1
348416
logger.log(f'Fixing {target_path} with '
349417
f'{self.builder_runner.fixer_model_name}, '
350418
f'attempt {llm_fix_count}.')
351-
code_fixer.llm_fix(ai_binary, target_path, self.benchmark, llm_fix_count,
352-
build_result.errors,
353-
self.builder_runner.fixer_model_name)
354-
shutil.copyfile(
355-
target_path,
356-
os.path.join(oss_fuzz_checkout.OSS_FUZZ_DIR,
357-
'projects', generated_oss_fuzz_project,
358-
os.path.basename(target_path)))
359-
build_result, run_result = self.builder_runner.build_and_run(
360-
generated_oss_fuzz_project, target_path, llm_fix_count)
361-
if build_result.succeeded:
362-
logger.log(f'Successfully fixed {target_path} with '
363-
f'{self.builder_runner.fixer_model_name} in '
364-
f'{llm_fix_count} iterations.')
365-
break
419+
self._fix_generated_fuzz_target(ai_binary, generated_oss_fuzz_project,
420+
target_path, llm_fix_count, build_result,
421+
semantic_error)
366422

367-
if not build_result.succeeded:
423+
# Logs and returns the result.
424+
if gen_succ:
425+
logger.log(f'Successfully built {target_path} with '
426+
f'{self.builder_runner.fixer_model_name} in '
427+
f'{llm_fix_count} iterations.')
428+
else:
368429
logger.log(f'Failed to fix {target_path} with '
369430
f'{self.builder_runner.fixer_model_name} in '
370431
f'{llm_fix_count} iterations.')
371-
return logger.return_result(Result(False, False, 0.0, 0.0))
372-
373-
# Parse logs to get raw pc coverage and whether the target crashed.
374-
with open(self.work_dirs.run_logs_target(generated_target_name), 'rb') as f:
375-
cov_pcs, total_pcs, crashes, is_driver_fuzz_err,\
376-
driver_fuzz_err = self._parse_libfuzzer_logs(f, logger)
432+
return logger.return_result(
433+
Result(False, False, 0.0, 0.0, '', '', False, semantic_error.type))
377434

378435
if (not run_result or run_result.coverage_summary is None or
379436
run_result.coverage is None):
380437
logger.log(f'Warning: No run_result in {generated_oss_fuzz_project}.')
381438
return logger.return_result(
382-
Result(True, crashes, 0.0, 0.0, '', '', is_driver_fuzz_err,
383-
driver_fuzz_err))
439+
Result(True, crashes, 0.0, 0.0, '', '', False, semantic_error.type))
384440

385-
if is_driver_fuzz_err:
386-
logger.log(f'Warning: {driver_fuzz_err} in {generated_oss_fuzz_project}.')
441+
if semantic_error.has_err:
442+
logger.log(
443+
f'Warning: {semantic_error.type} in {generated_oss_fuzz_project}.')
387444
return logger.return_result(
388445
Result(True, crashes, 0.0, 0.0, run_result.coverage_report_path,
389-
run_result.reproducer_path, is_driver_fuzz_err,
390-
driver_fuzz_err))
446+
run_result.reproducer_path, semantic_error.has_err,
447+
semantic_error.type))
391448

392-
# Get line coverage (diff) details.
449+
# Gets line coverage (diff) details.
393450
coverage_summary = self._load_existing_coverage_summary()
394451
total_lines = _compute_total_lines_without_fuzz_targets(
395452
coverage_summary, generated_target_name)
@@ -415,7 +472,7 @@ def do_check_target(self, ai_binary: str, target_path: str) -> Result:
415472
return logger.return_result(
416473
Result(True, crashes, coverage_percent, coverage_diff,
417474
run_result.coverage_report_path, run_result.reproducer_path,
418-
is_driver_fuzz_err, driver_fuzz_err))
475+
semantic_error.has_err, semantic_error.type))
419476

420477
def _load_existing_coverage_summary(self) -> dict:
421478
"""Load existing summary.json."""

0 commit comments

Comments
 (0)