26
26
27
27
from experiment import builder_runner , oss_fuzz_checkout , textcov
28
28
from experiment .benchmark import Benchmark
29
+ from experiment .builder_runner import BuildResult , RunResult
30
+ from experiment .fuzz_target_error import SemanticCheckResult
29
31
from experiment .workdir import WorkDirs
30
32
from llm_toolkit import code_fixer
31
33
42
44
43
45
OSS_FUZZ_COVERAGE_BUCKET = 'oss-fuzz-coverage'
44
46
45
- LLVM_SOURCE_PATH_PREFIX = '/src/llvm-project/compiler-rt'
47
+ LIBFUZZER_LOG_STACK_FRAME_LLVM = '/src/llvm-project/compiler-rt'
48
+ LIBFUZZER_LOG_STACK_FRAME_CPP = '/usr/local/bin/../include/c++'
46
49
47
50
EARLY_FUZZING_ROUND_THRESHOLD = 3
48
51
@@ -56,9 +59,9 @@ class Result:
56
59
line_coverage_diff : float = 0.0
57
60
coverage_report_path : str = ''
58
61
reproducer_path : str = ''
59
- # produces false positive or no cov increase at all
60
- is_driver_fuzz_err : bool = False
61
- driver_fuzz_err : str = ''
62
+ # Gramatically correct but has false positive or no cov increase at all.
63
+ is_semantic_error : bool = False
64
+ semantic_error : str = ''
62
65
63
66
def dict (self ):
64
67
return dataclasses .asdict (self )
@@ -205,8 +208,10 @@ def check_target(self, ai_binary, target_path: str) -> Optional[Result]:
205
208
traceback .print_exc ()
206
209
return None
207
210
208
- def _parse_stacks_from_libfuzzer_logs (self , lines : list [str ]) -> list [str ]:
211
+ def _parse_stacks_from_libfuzzer_logs (self ,
212
+ lines : list [str ]) -> list [list [str ]]:
209
213
"""Parse stack traces from libFuzzer logs."""
214
+ # TODO (dongge): Use stack parsing from ClusterFuzz.
210
215
# There can have over one thread stack in a log.
211
216
stacks = []
212
217
@@ -255,11 +260,13 @@ def _parse_fuzz_cov_info_from_libfuzzer_logs(
255
260
return initcov , donecov , lastround
256
261
257
262
def _stack_func_is_of_testing_project (self , stack_frame : str ) -> bool :
258
- return bool (CRASH_STACK_WITH_SOURCE_INFO .match (stack_frame )) and (
259
- LLVM_SOURCE_PATH_PREFIX not in stack_frame )
263
+ return (bool (CRASH_STACK_WITH_SOURCE_INFO .match (stack_frame )) and
264
+ LIBFUZZER_LOG_STACK_FRAME_LLVM not in stack_frame and
265
+ LIBFUZZER_LOG_STACK_FRAME_CPP not in stack_frame )
260
266
261
267
def _parse_libfuzzer_logs (
262
- self , log_handle , logger : _Logger ) -> tuple [int , int , bool , bool , str ]:
268
+ self , log_handle ,
269
+ logger : _Logger ) -> tuple [int , int , bool , SemanticCheckResult ]:
263
270
"""Parses libFuzzer logs."""
264
271
lines = None
265
272
try :
@@ -268,13 +275,11 @@ def _parse_libfuzzer_logs(
268
275
fuzzlog = fuzzlog .decode ('utf-8' , errors = 'ignore' )
269
276
lines = fuzzlog .split ('\n ' )
270
277
except MemoryError as e :
271
- # Some logs from abnormal drivers are too large to be parsed.
278
+ # Some logs from abnormal fuzz targets are too large to be parsed.
272
279
logger .log ('%s is too large to parse: %s' , log_handle .name , e )
273
- return 0 , 0 , False , True , ' LOG_MESS_UP'
280
+ return 0 , 0 , False , SemanticCheckResult ( SemanticCheckResult . LOG_MESS_UP )
274
281
275
- cov_pcs = 0
276
- total_pcs = 0
277
- crashes = False
282
+ cov_pcs , total_pcs , crashes = 0 , 0 , False
278
283
279
284
for line in lines :
280
285
m = LIBFUZZER_MODULES_LOADED_REGEX .match (line )
@@ -295,30 +300,80 @@ def _parse_libfuzzer_logs(
295
300
initcov , donecov , lastround = self ._parse_fuzz_cov_info_from_libfuzzer_logs (
296
301
lines )
297
302
298
- # NOTE: Crashes from incorrect drivers will not be counted.
303
+ # NOTE: Crashes from incorrect fuzz targets will not be counted finally .
299
304
300
305
if crashes :
301
- # FP case 1: driver crashes at init or first few rounds.
306
+ symptom = SemanticCheckResult .extract_symptom (fuzzlog )
307
+ crash_stacks = self ._parse_stacks_from_libfuzzer_logs (lines )
308
+
309
+ # FP case 1: fuzz target crashes at init or first few rounds.
302
310
if lastround is None or lastround <= EARLY_FUZZING_ROUND_THRESHOLD :
303
311
# No cov line has been identified or only INITED round has been passed.
304
312
# This is very likely the false positive cases.
305
- return cov_pcs , total_pcs , True , True , 'FP_CRASH_NEAR_INIT'
306
-
307
- # FP case 2: 1st func of the 1st thread stack is in driver.
308
- crash_stacks = self ._parse_stacks_from_libfuzzer_logs (lines )
309
- for stack_frame in crash_stacks [:1 ]:
310
- if self ._stack_func_is_of_testing_project (stack_frame ):
311
- if 'LLVMFuzzerTestOneInput' in stack_frame :
312
- return cov_pcs , total_pcs , True , True , 'FP_CRASH_IN_DRIVER'
313
- break
313
+ return cov_pcs , total_pcs , True , \
314
+ SemanticCheckResult (SemanticCheckResult .FP_NEAR_INIT_CRASH ,\
315
+ symptom , crash_stacks )
316
+
317
+ # FP case 2: 1st func of the 1st thread stack is in fuzz target.
318
+ if len (crash_stacks ) > 0 :
319
+ first_stack = crash_stacks [0 ]
320
+ # Check the first stack frame of the first stack only.
321
+ for stack_frame in first_stack [:1 ]:
322
+ if self ._stack_func_is_of_testing_project (stack_frame ):
323
+ if 'LLVMFuzzerTestOneInput' in stack_frame :
324
+ return cov_pcs , total_pcs , True , \
325
+ SemanticCheckResult (SemanticCheckResult .FP_TARGET_CRASH ,\
326
+ symptom , crash_stacks )
327
+ break
314
328
315
329
else :
316
- # Another error driver case: no cov increase.
330
+ # Another error fuzz target case: no cov increase.
317
331
if initcov is not None and donecov is not None :
318
332
if initcov == donecov :
319
- return cov_pcs , total_pcs , True , True , 'NO_COV_INCREASE'
333
+ return cov_pcs , total_pcs , True , SemanticCheckResult (
334
+ SemanticCheckResult .NO_COV_INCREASE )
335
+
336
+ return cov_pcs , total_pcs , crashes , SemanticCheckResult (
337
+ SemanticCheckResult .NO_SEMANTIC_ERR )
338
+
339
+ def _evaluate_generated_fuzz_target (
340
+ self , generated_oss_fuzz_project : str , target_path : str ,
341
+ generated_target_name : str , iteration : int , logger : _Logger
342
+ ) -> tuple [BuildResult , Optional [RunResult ], int , int , bool ,
343
+ SemanticCheckResult ]:
344
+ """Evaluates the generated fuzz target."""
345
+ build_result , run_result = self .builder_runner .build_and_run (
346
+ generated_oss_fuzz_project , target_path , iteration )
320
347
321
- return cov_pcs , total_pcs , crashes , False , ''
348
+ if not build_result .succeeded :
349
+ # Clear the variables for case that fuzz/build err <=> before/after fix.
350
+ return build_result , run_result , 0 , 0 , False , SemanticCheckResult (
351
+ SemanticCheckResult .NOT_APPLICABLE )
352
+
353
+ # Parse libfuzzer logs to get fuzz target runtime details.
354
+ with open (self .work_dirs .run_logs_target (generated_target_name , iteration ),
355
+ 'rb' ) as f :
356
+ cov_pcs , total_pcs , crashes , semantic_error = self ._parse_libfuzzer_logs (
357
+ f , logger )
358
+
359
+ return build_result , run_result , cov_pcs , total_pcs , crashes , semantic_error
360
+
361
+ def _fix_generated_fuzz_target (self , ai_binary : str ,
362
+ generated_oss_fuzz_project : str ,
363
+ target_path : str , iteration : int ,
364
+ build_result : BuildResult ,
365
+ semantic_error : SemanticCheckResult ):
366
+ """Fixes the generated fuzz target."""
367
+ if build_result .succeeded :
368
+ error_desc , errors = semantic_error .get_error_info ()
369
+ else :
370
+ error_desc , errors = None , build_result .errors
371
+ code_fixer .llm_fix (ai_binary , target_path , self .benchmark , iteration ,
372
+ error_desc , errors , self .builder_runner .fixer_model_name )
373
+ shutil .copyfile (
374
+ target_path ,
375
+ os .path .join (oss_fuzz_checkout .OSS_FUZZ_DIR , 'projects' ,
376
+ generated_oss_fuzz_project , os .path .basename (target_path )))
322
377
323
378
def do_check_target (self , ai_binary : str , target_path : str ) -> Result :
324
379
"""Builds and runs a target."""
@@ -334,62 +389,64 @@ def do_check_target(self, ai_binary: str, target_path: str) -> Result:
334
389
logger = _Logger (status_path )
335
390
336
391
# Try building and running the new target.
337
- llm_fix_count = 0
338
- build_result , run_result = self .builder_runner .build_and_run (
339
- generated_oss_fuzz_project , target_path , llm_fix_count )
340
- if build_result .succeeded :
341
- logger .log (f'Successfully built { target_path } without LLM code fix.' )
392
+
342
393
# TODO: Log build failure.
343
394
# TODO: Log run success/failure.
344
395
345
- # Loop to try and fix the compilation error using the LLM.
346
- while not build_result .succeeded and llm_fix_count < LLM_FIX_LIMIT :
396
+ # Loop of evaluating and fixing fuzz target.
397
+ llm_fix_count = 0
398
+ while True :
399
+ # 1. Evaluating generated driver.
400
+ (build_result , run_result , cov_pcs , total_pcs , crashes ,
401
+ semantic_error ) = self ._evaluate_generated_fuzz_target (
402
+ generated_oss_fuzz_project , target_path , generated_target_name ,
403
+ llm_fix_count , logger )
404
+
405
+ gen_succ = build_result .succeeded and not semantic_error .has_err
406
+ if gen_succ :
407
+ # Successfully generate the fuzz target.
408
+ break
409
+
410
+ if llm_fix_count >= LLM_FIX_LIMIT :
411
+ # Not fix since the fix limit is reached.
412
+ break
413
+
414
+ # 2. Fixing generated driver.
347
415
llm_fix_count += 1
348
416
logger .log (f'Fixing { target_path } with '
349
417
f'{ self .builder_runner .fixer_model_name } , '
350
418
f'attempt { llm_fix_count } .' )
351
- code_fixer .llm_fix (ai_binary , target_path , self .benchmark , llm_fix_count ,
352
- build_result .errors ,
353
- self .builder_runner .fixer_model_name )
354
- shutil .copyfile (
355
- target_path ,
356
- os .path .join (oss_fuzz_checkout .OSS_FUZZ_DIR ,
357
- 'projects' , generated_oss_fuzz_project ,
358
- os .path .basename (target_path )))
359
- build_result , run_result = self .builder_runner .build_and_run (
360
- generated_oss_fuzz_project , target_path , llm_fix_count )
361
- if build_result .succeeded :
362
- logger .log (f'Successfully fixed { target_path } with '
363
- f'{ self .builder_runner .fixer_model_name } in '
364
- f'{ llm_fix_count } iterations.' )
365
- break
419
+ self ._fix_generated_fuzz_target (ai_binary , generated_oss_fuzz_project ,
420
+ target_path , llm_fix_count , build_result ,
421
+ semantic_error )
366
422
367
- if not build_result .succeeded :
423
+ # Logs and returns the result.
424
+ if gen_succ :
425
+ logger .log (f'Successfully built { target_path } with '
426
+ f'{ self .builder_runner .fixer_model_name } in '
427
+ f'{ llm_fix_count } iterations.' )
428
+ else :
368
429
logger .log (f'Failed to fix { target_path } with '
369
430
f'{ self .builder_runner .fixer_model_name } in '
370
431
f'{ llm_fix_count } iterations.' )
371
- return logger .return_result (Result (False , False , 0.0 , 0.0 ))
372
-
373
- # Parse logs to get raw pc coverage and whether the target crashed.
374
- with open (self .work_dirs .run_logs_target (generated_target_name ), 'rb' ) as f :
375
- cov_pcs , total_pcs , crashes , is_driver_fuzz_err ,\
376
- driver_fuzz_err = self ._parse_libfuzzer_logs (f , logger )
432
+ return logger .return_result (
433
+ Result (False , False , 0.0 , 0.0 , '' , '' , False , semantic_error .type ))
377
434
378
435
if (not run_result or run_result .coverage_summary is None or
379
436
run_result .coverage is None ):
380
437
logger .log (f'Warning: No run_result in { generated_oss_fuzz_project } .' )
381
438
return logger .return_result (
382
- Result (True , crashes , 0.0 , 0.0 , '' , '' , is_driver_fuzz_err ,
383
- driver_fuzz_err ))
439
+ Result (True , crashes , 0.0 , 0.0 , '' , '' , False , semantic_error .type ))
384
440
385
- if is_driver_fuzz_err :
386
- logger .log (f'Warning: { driver_fuzz_err } in { generated_oss_fuzz_project } .' )
441
+ if semantic_error .has_err :
442
+ logger .log (
443
+ f'Warning: { semantic_error .type } in { generated_oss_fuzz_project } .' )
387
444
return logger .return_result (
388
445
Result (True , crashes , 0.0 , 0.0 , run_result .coverage_report_path ,
389
- run_result .reproducer_path , is_driver_fuzz_err ,
390
- driver_fuzz_err ))
446
+ run_result .reproducer_path , semantic_error . has_err ,
447
+ semantic_error . type ))
391
448
392
- # Get line coverage (diff) details.
449
+ # Gets line coverage (diff) details.
393
450
coverage_summary = self ._load_existing_coverage_summary ()
394
451
total_lines = _compute_total_lines_without_fuzz_targets (
395
452
coverage_summary , generated_target_name )
@@ -415,7 +472,7 @@ def do_check_target(self, ai_binary: str, target_path: str) -> Result:
415
472
return logger .return_result (
416
473
Result (True , crashes , coverage_percent , coverage_diff ,
417
474
run_result .coverage_report_path , run_result .reproducer_path ,
418
- is_driver_fuzz_err , driver_fuzz_err ))
475
+ semantic_error . has_err , semantic_error . type ))
419
476
420
477
def _load_existing_coverage_summary (self ) -> dict :
421
478
"""Load existing summary.json."""
0 commit comments