Skip to content

Commit b71b015

Browse files
authored
Integrate Fuzz Introspector APIs and more logging (#112)
1 parent 662403a commit b71b015

File tree

2 files changed

+145
-79
lines changed

2 files changed

+145
-79
lines changed

data_prep/introspector.py

Lines changed: 139 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -33,28 +33,105 @@
3333
INTROSPECTOR_ENDPOINT = 'https://introspector.oss-fuzz.com/api'
3434
INTROSPECTOR_CFG = f'{INTROSPECTOR_ENDPOINT}/annotated-cfg'
3535
INTROSPECTOR_FUNCTION = f'{INTROSPECTOR_ENDPOINT}/far-reach-but-low-coverage'
36+
INTROSPECTOR_SOURCE = f'{INTROSPECTOR_ENDPOINT}/function-source-code'
37+
INTROSPECTOR_XREF = f'{INTROSPECTOR_ENDPOINT}/all-cross-references'
38+
INTROSPECTOR_TYPE = f'{INTROSPECTOR_ENDPOINT}/type-info'
39+
INTROSPECTOR_FUNC_SIG = f'{INTROSPECTOR_ENDPOINT}/function-signature'
40+
41+
42+
def _query_introspector(api: str, params: dict) -> dict:
43+
"""Queries FuzzIntrospector API and return data specified by |key|,
44+
returns None if unable to get the value."""
45+
resp = requests.get(api, params, timeout=TIMEOUT)
46+
if not resp.ok:
47+
logging.error(
48+
'Failed to get data from FI\n'
49+
'-----------Response received------------\n'
50+
'%s\n'
51+
'------------End of response-------------',
52+
resp.content.decode("utf-8").strip())
53+
return {}
54+
return resp.json()
3655

3756

3857
def query_introspector_for_unreached_functions(project: str) -> list[dict]:
39-
"""Quries FuzzIntrospector API for unreached functions in |project|."""
40-
resp = requests.get(INTROSPECTOR_FUNCTION,
41-
params={'project': project},
42-
timeout=TIMEOUT)
43-
data = resp.json()
58+
"""Queries FuzzIntrospector API for unreached functions in |project|."""
59+
data = _query_introspector(INTROSPECTOR_FUNCTION, {'project': project})
4460
functions = data.get('functions')
4561
if functions:
4662
return functions
4763
logging.error('No functions found from FI for project %s:\n %s', project,
48-
'\n '.join(data.get('extended_msgs')))
64+
'\n '.join(data.get('extended_msgs', [])))
4965
sys.exit(1)
5066

5167

52-
def query_introspector_cfg(project):
53-
resp = requests.get(INTROSPECTOR_CFG,
54-
params={'project': project},
55-
timeout=TIMEOUT)
56-
data = resp.json()
57-
return data.get('project', {})
68+
def query_introspector_cfg(project: str) -> dict:
69+
"""Queries FuzzIntrospector API for CFG."""
70+
return _query_introspector(INTROSPECTOR_CFG, {
71+
'project': project
72+
}).get('project', {})
73+
74+
75+
def query_introspector_function_source(project: str, func_sig: str) -> str:
76+
"""Queries FuzzIntrospector API for source code of |func_sig|."""
77+
data = _query_introspector(INTROSPECTOR_SOURCE, {
78+
'project': project,
79+
'function_signature': func_sig
80+
})
81+
source = data.get('source', '')
82+
if not source:
83+
logging.error('No function source found for %s in %s: %s', func_sig,
84+
project, data)
85+
86+
return source
87+
88+
89+
def query_introspector_cross_references(project: str,
90+
func_sig: str) -> list[str]:
91+
"""Queries FuzzIntrospector API for source code of functions
92+
cross-referenced |func_sig|."""
93+
data = _query_introspector(INTROSPECTOR_XREF, {
94+
'project': project,
95+
'function_signature': func_sig
96+
})
97+
call_sites = data.get('callsites', [])
98+
99+
xref_source = []
100+
for cs in call_sites:
101+
name = cs.get('dst_func')
102+
sig = query_introspector_function_signature(project, name)
103+
source = query_introspector_function_source(project, sig)
104+
xref_source.append(source)
105+
return xref_source
106+
107+
108+
def query_introspector_type_info(project: str, type_name: str) -> dict:
109+
"""Queries FuzzIntrospector API for information of |type_name|."""
110+
data = _query_introspector(INTROSPECTOR_TYPE, {
111+
'project': project,
112+
'name': type_name
113+
})
114+
type_info = data.get('type_data', {})
115+
if not type_info:
116+
logging.error('No type info found from FI for %s in %s: %s', type_name,
117+
project, data)
118+
119+
return type_info
120+
121+
122+
def query_introspector_function_signature(project: str,
123+
function_name: str) -> str:
124+
"""Queries FuzzIntrospector API for signature of |function_name|."""
125+
data = _query_introspector(INTROSPECTOR_FUNC_SIG, {
126+
'project': project,
127+
'function': function_name
128+
})
129+
func_sig = data.get('signature', '')
130+
if not func_sig:
131+
logging.error('No signature found from FI for %s in %s: %s', function_name,
132+
project, data)
133+
134+
return func_sig
58135

59136

60137
def get_unreached_functions(project):
@@ -88,54 +165,75 @@ def clean_type(name: str) -> str:
88165
return name
89166

90167

91-
def _get_raw_return_type(function: dict) -> str:
168+
def _get_raw_return_type(function: dict, project: str) -> str:
92169
"""Returns the raw function type."""
93-
return function.get('return-type') or function.get('return_type', '')
170+
return_type = function.get('return-type') or function.get('return_type', '')
171+
if not return_type:
172+
logging.error(
173+
'Missing return type in project: %s\n'
174+
' raw_function_name: %s', project,
175+
get_raw_function_name(function, project))
176+
return return_type
94177

95178

96-
def _get_clean_return_type(function: dict) -> str:
179+
def _get_clean_return_type(function: dict, project: str) -> str:
97180
"""Returns the cleaned function type."""
98-
raw_return_type = _get_raw_return_type(function).strip()
181+
raw_return_type = _get_raw_return_type(function, project).strip()
99182
if raw_return_type == 'N/A':
100183
# Bug in introspector: Unable to distinguish between bool and void right
101184
# now. More likely to be void for function return arguments.
102185
return 'void'
103186
return clean_type(raw_return_type)
104187

105188

106-
def _get_raw_function_name(function: dict) -> str:
189+
def get_raw_function_name(function: dict, project: str) -> str:
107190
"""Returns the raw function name."""
108-
return (function.get('raw-function-name') or
109-
function.get('raw_function_name', ''))
191+
raw_name = (function.get('raw-function-name') or
192+
function.get('raw_function_name', ''))
193+
if not raw_name:
194+
logging.error('No raw function name in project: %s for function: %s',
195+
project, function)
196+
return raw_name
110197

111198

112-
def _get_clean_arg_types(function: dict) -> list[str]:
199+
def _get_clean_arg_types(function: dict, project: str) -> list[str]:
113200
"""Returns the cleaned function argument types."""
114201
raw_arg_types = (function.get('arg-types') or
115-
function.get('function_arguments', ''))
202+
function.get('function_arguments', []))
203+
if not raw_arg_types:
204+
logging.error(
205+
'Missing argument types in project: %s\n'
206+
' raw_function_name: %s', project,
207+
get_raw_function_name(function, project))
116208
return [clean_type(arg_type) for arg_type in raw_arg_types]
117209

118210

119-
def _get_arg_names(function: dict) -> list[str]:
120-
"""Returns the cleaned function argument types."""
121-
return (function.get('arg-names') or
122-
function.get('function_argument_names', ''))
211+
def _get_arg_names(function: dict, project: str) -> list[str]:
212+
"""Returns the function argument names."""
213+
arg_names = (function.get('arg-names') or
214+
function.get('function_argument_names', []))
215+
if not arg_names:
216+
logging.error(
217+
'Missing argument names in project: %s\n'
218+
' raw_function_name: %s', project,
219+
get_raw_function_name(function, project))
220+
return arg_names
123221

124222

125223
def get_function_signature(function: dict, project: str) -> str:
126224
"""Returns the function signature."""
127-
function_signature = function.get('function_signature')
128-
if function_signature:
129-
return function_signature
130-
logging.warning(
131-
'Missing function signature in project: %s\n raw_function_name: %s',
132-
project, _get_raw_function_name(function))
133-
return ''
225+
function_signature = function.get('function_signature', '')
226+
if not function_signature:
227+
logging.error(
228+
'Missing function signature in project: %s\n'
229+
' raw_function_name: ', project,
230+
get_raw_function_name(function, project))
231+
return function_signature
134232

135233

136234
# TODO(dongge): Remove this function when FI fixes it.
137235
def _parse_type_from_raw_tagged_type(tagged_type: str) -> str:
138-
"""Returns type name from |targged_type| such as struct.TypeA"""
236+
"""Returns type name from |tagged_type| such as struct.TypeA"""
139237
# Assume: Types do not contain dot(.).
140238
return tagged_type.split('.')[-1]
141239

@@ -190,11 +288,11 @@ def populate_benchmarks_using_introspector(project: str, language: str,
190288
project,
191289
language,
192290
function_signature,
193-
_get_raw_function_name(function),
194-
_get_clean_return_type(function),
291+
get_raw_function_name(function, project),
292+
_get_clean_return_type(function, project),
195293
_group_function_params(
196-
_get_clean_arg_types(function),
197-
_get_arg_names(function)),
294+
_get_clean_arg_types(function, project),
295+
_get_arg_names(function, project)),
198296
harness,
199297
target_name,
200298
function_dict=function))
@@ -260,9 +358,9 @@ def _contains_function(funcs: List[Dict], target_func: Dict):
260358
return False
261359

262360

263-
def _postprocess_function(target_func: Dict):
361+
def _postprocess_function(target_func: dict, project_name: str):
264362
"""Post-processes target function."""
265-
target_func['return-type'] = _get_clean_return_type(target_func)
363+
target_func['return-type'] = _get_clean_return_type(target_func, project_name)
266364
target_func['function-name'] = demangle(target_func['function-name'])
267365

268366

@@ -298,7 +396,7 @@ def get_project_funcs(project_name: str) -> Dict[str, List[Dict]]:
298396
fuzz_target_funcs[fuzz_target_file] = []
299397
if _contains_function(fuzz_target_funcs[fuzz_target_file], target_func):
300398
continue
301-
_postprocess_function(target_func)
399+
_postprocess_function(target_func, project_name)
302400
fuzz_target_funcs[fuzz_target_file].append(target_func)
303401

304402
# Sort functions in each target file by their complexity.
@@ -313,7 +411,7 @@ def get_project_funcs(project_name: str) -> Dict[str, List[Dict]]:
313411
if __name__ == '__main__':
314412
logging.basicConfig(level=logging.INFO)
315413

316-
#TODO(Dongge): Use argparser.
414+
# TODO(Dongge): Use argparser.
317415
cur_project = sys.argv[1]
318416
max_num_function = 3
319417
if len(sys.argv) > 2:

data_prep/project_targets.py

Lines changed: 6 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,12 @@
1919

2020
import argparse
2121
import json
22-
import logging
2322
import os
2423
import re
2524
import sys
2625
from multiprocessing.pool import ThreadPool
2726
from typing import Dict, List
2827

29-
import requests
3028
from google.cloud import storage
3129

3230
from data_prep import introspector, project_src
@@ -86,40 +84,6 @@ def _match_target_path_content(target_paths: List[str],
8684
return path_contents
8785

8886

89-
# TODO(Jim): Replace the same function in introspector.py with this.
90-
# TODO(Jim): Pass project name to this function and log it if raw_name is not
91-
# found. Do the same for similar functions, e.g.,:
92-
# _get_raw_return_type, _get_arg_names/types, etc.
93-
def _get_raw_function_name(function: dict) -> str:
94-
"""Returns the raw function name."""
95-
raw_name = (function.get('raw-function-name') or
96-
function.get('raw_function_name', ''))
97-
if not raw_name:
98-
logging.error('No raw function name in function: %s', function)
99-
return raw_name
100-
101-
102-
# Merge this function into introspector.py, like other APIs.
103-
def _get_function_signature_from_api(func_info: dict, project_name: str):
104-
"""Requests function signature from FuzzIntrospector API."""
105-
raw_function_name = _get_raw_function_name(func_info)
106-
107-
function_signature_api = (
108-
f'{introspector.INTROSPECTOR_ENDPOINT}/function-signature')
109-
resp = requests.get(function_signature_api,
110-
params={
111-
'project': project_name,
112-
'function': raw_function_name
113-
},
114-
timeout=introspector.TIMEOUT)
115-
data = resp.json()
116-
function = data.get('signature', '')
117-
if not function:
118-
logging.error('No function signature found from FI for project %s: %s',
119-
project_name, data)
120-
return function
121-
122-
12387
def _bucket_match_target_content_signatures(
12488
target_funcs: Dict[str, List[Dict]], fuzz_target_dir: str,
12589
project_name: str) -> Dict[str, List[str]]:
@@ -156,7 +120,9 @@ def _bucket_match_target_content_signatures(
156120
target_content_signature_dict[content] = []
157121

158122
signatures = [
159-
_get_function_signature_from_api(func_info, project_name)
123+
introspector.query_introspector_function_signature(
124+
project_name,
125+
introspector.get_raw_function_name(func_info, project_name))
160126
for func_info in functions
161127
]
162128
target_content_signature_dict[content].extend(signatures)
@@ -272,7 +238,9 @@ def _match_target_content_signatures(
272238
target_content_signature_dict[content] = []
273239

274240
signatures = [
275-
_get_function_signature_from_api(func_info, project_name)
241+
introspector.query_introspector_function_signature(
242+
project_name,
243+
introspector.get_raw_function_name(func_info, project_name))
276244
for func_info in functions
277245
]
278246
target_content_signature_dict[content].extend(signatures)

0 commit comments

Comments
 (0)