Skip to content

Commit b33eac4

Browse files
committed
csfilter-kfp: script to filter known false positives
Resolves: https://issues.redhat.com/browse/OSH-736
1 parent a6c08ed commit b33eac4

16 files changed

+51589
-3
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.15)
1919
project(csdiff CXX)
2020
enable_testing()
2121

22-
# C/C++ sources
22+
# source code
2323
add_subdirectory(src)
2424

2525
# regression tests

make-srpm.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ make version.cc
202202
%doc README
203203
%license COPYING
204204
%{_bindir}/csdiff
205+
%{_bindir}/csfilter-kfp
205206
%{_bindir}/csgrep
206207
%{_bindir}/cshtml
207208
%{_bindir}/cslinker

src/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ install(TARGETS
8282
cstrans-df-run
8383
DESTINATION ${CMAKE_INSTALL_BINDIR})
8484

85+
# install the csfilter-kfp script
86+
install(PROGRAMS
87+
csfilter-kfp
88+
DESTINATION ${CMAKE_INSTALL_BINDIR})
89+
8590
# optionally build statically linked csgrep-static
8691
option(CSGREP_STATIC "Set to ON to build the csgrep-static executable" OFF)
8792
if(CSGREP_STATIC)

src/csfilter-kfp

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (C) 2024 Red Hat, Inc.
4+
#
5+
# This file is part of csdiff.
6+
#
7+
# csdiff is free software: you can redistribute it and/or modify
8+
# it under the terms of the GNU General Public License as published by
9+
# the Free Software Foundation, either version 3 of the License, or
10+
# any later version.
11+
#
12+
# csdiff is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with csdiff. If not, see <http://www.gnu.org/licenses/>.
19+
20+
import argparse
21+
import os
22+
import re
23+
import subprocess
24+
import sys
25+
26+
27+
# if neither --kfp-dir nor --kfp-git-url is specified, use the known-false-positives RPM pacakge
28+
DEFAULT_KFP_DIR = "/usr/share/csmock/known-false-positives.d"
29+
DEFAULT_KFP_JSON = "/usr/share/csmock/known-false-positives.js"
30+
31+
32+
def construct_init_cmd(args):
33+
# make bash exit on error
34+
cmd = 'set -e\n'
35+
36+
# make bash propagate exit code from piped commands
37+
cmd += 'set -o pipefail\n'
38+
39+
# make bash expand empty globs
40+
cmd += 'shopt -s nullglob\n'
41+
42+
# create a temporary directory with an automatic destructor
43+
cmd += 'export td=$(mktemp -d /tmp/tmp-csfilter-kfp.XXXXXXXXXX)\n'
44+
cmd += 'trap "rm -fr \'${td}\'" EXIT\n'
45+
46+
if args.verbose:
47+
# run shell in XTRACE mode
48+
cmd += 'set -x\n'
49+
50+
return cmd
51+
52+
53+
def construct_git_cmd(kfp_git_url):
54+
# split kfp_git_url into the clone URL and (optional) revision
55+
m = re.match("^(.*)#([0-9a-f]+)", kfp_git_url)
56+
if m:
57+
# checkout a specific revision
58+
return f"git clone {m.group(1)} ${{td}}/kfp\n" \
59+
f"(cd ${{td}}/kfp && git reset -q --hard {m.group(2)})\n"
60+
else:
61+
# shallow clone of the default branch
62+
return f"git clone --depth 1 {kfp_git_url} ${{td}}/kfp\n"
63+
64+
65+
def construct_prep_cmd(args):
66+
# check which KFP will be used
67+
have_kfp_json = False
68+
if args.kfp_git_url:
69+
# clone git repo
70+
cmd = construct_git_cmd(args.kfp_git_url)
71+
elif args.kfp_dir:
72+
# symlink a directory
73+
cmd = f'ln -s "{args.kfp_dir}" "${{td}}/kfp"\n'
74+
elif os.path.isfile(DEFAULT_KFP_JSON):
75+
# create symlinks to the known-false-positives RPM package installed on the system
76+
cmd = f'ln -s "{DEFAULT_KFP_DIR}" "${{td}}/kfp"\n' \
77+
f'ln -s "{DEFAULT_KFP_JSON}" "${{td}}/kfp.json"\n'
78+
have_kfp_json = True
79+
else:
80+
raise RuntimeError("no source of KFP specified, please use --kfp-dir or --kfp-git-url" \
81+
" (or install the known-false-positives RPM pacakge)")
82+
83+
if not have_kfp_json:
84+
# create all-in-one kfp.json file from files in ${td}/kfp
85+
cmd += 'touch "${td}/empty.err"\n'
86+
cmd += '(cd "${td}/kfp" && csgrep --mode=json --remove-duplicates ${td}/empty.err'
87+
cmd += ' */ignore.err */true-positives-ignore.err >"${td}/kfp.json")\n'
88+
89+
return cmd
90+
91+
92+
def construct_path_filter(args):
93+
if args.project_nvr is None:
94+
# TODO: read project_nvr from scan properties if available
95+
return ' cat\n'
96+
97+
# cut off the `-version-release` or `-version` suffix to obtain package name where `version` can be
98+
# a number optionally prefixed by `v` or a full-size SHA1 hash encoded in lowercase as, for example,
99+
# in `project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466`
100+
proj = re.sub("-(([v]?[0-9][^-]*)|([0-9a-f]{40}))(-[0-9][^-]*)?$", "", args.project_nvr)
101+
102+
# validate the resulting project name
103+
if not re.match("^[A-Za-z0-9-_]+$", proj):
104+
raise RuntimeError(f"invalid project name: {proj}")
105+
106+
# generate a script that will construct the filter at run-time
107+
cmd = f' ep="${{td}}/kfp/{proj}/exclude-paths.txt"\n'
108+
cmd += ' re=\n'
109+
cmd += ' while read line; do\n'
110+
cmd += ' re="${re}|(${line})"\n'
111+
cmd += ' done < <(grep -Esv "^(#|\\\\$)" "$ep")\n'
112+
cmd += ' if test -n "$re"; then\n'
113+
cmd += ' csgrep --mode=json --invert-match --path="${re#|}"\n'
114+
cmd += ' else\n'
115+
cmd += ' cat\n'
116+
cmd += ' fi\n'
117+
return cmd
118+
119+
120+
def construct_filter_cmd(args):
121+
# set shell options and create a temporary diretory ${td}
122+
cmd = construct_init_cmd(args)
123+
124+
# prepare the KFP data from the specified source
125+
cmd += construct_prep_cmd(args)
126+
127+
# read the whole input into a JSON file
128+
cmd += 'csgrep --mode=json'
129+
if args.input_file:
130+
cmd += f' {args.input_file}'
131+
cmd += ' >"${td}/input.json"\n'
132+
133+
# define path-based filter
134+
path_filter = construct_path_filter(args)
135+
cmd += f'path_filter() {{\n{path_filter}}}\n'
136+
137+
# exclude individual findings
138+
cmd += 'csdiff --show-internal "${td}/kfp.json" "${td}/input.json"'
139+
140+
# exclude paths in the scan results
141+
cmd += ' | path_filter >${td}/output.json\n'
142+
143+
if args.record_excluded:
144+
# record excluded findings to the specified file
145+
cmd += 'csdiff "${td}/output.json" "${td}/input.json"'
146+
cmd += f' >"{args.record_excluded}"\n'
147+
148+
if not args.json_output:
149+
# export plain-text format
150+
cmd += 'csgrep "${td}/output.json"\n'
151+
return cmd
152+
153+
# export JSON format
154+
cmd += 'csgrep --mode=json "${td}/output.json"'
155+
156+
# optionally record the source of known-false-positives
157+
if args.kfp_dir:
158+
cmd += f' --set-scan-prop="known-false-positives-dir:{args.kfp_dir}"'
159+
elif args.kfp_git_url:
160+
cmd += f' --set-scan-prop="known-false-positives-git-url:{args.kfp_git_url}"'
161+
cmd += '\n'
162+
163+
return cmd
164+
165+
166+
def main():
167+
# initialize argument parser
168+
parser = argparse.ArgumentParser()
169+
170+
parser.add_argument(
171+
"input_file", nargs="?",
172+
help="optional name of the input file (standard input is used by default)")
173+
174+
# source of known-false-positives
175+
kfp_source = parser.add_mutually_exclusive_group()
176+
kfp_source.add_argument(
177+
"--kfp-dir",
178+
help="known false positives file")
179+
kfp_source.add_argument(
180+
"--kfp-git-url",
181+
help="known false positives git URL (optionally taking a revision delimited by #)")
182+
183+
parser.add_argument(
184+
"--project-nvr",
185+
help="Name-Version-Release (NVR) of the scanned project, used to match path exclusions")
186+
187+
parser.add_argument(
188+
"--record-excluded",
189+
help="file to store all excluded findings to")
190+
191+
parser.add_argument(
192+
"--json-output", action="store_true", default=(not os.isatty(sys.stdout.fileno())),
193+
help="produce JSON output (default if stdout is not connected to a terminal)")
194+
195+
parser.add_argument(
196+
"-v", "--verbose", action="store_true",
197+
help="run shell in XTRACE mode while executing the filtering script")
198+
199+
parser.add_argument(
200+
"-n", "--dry-run", action="store_true",
201+
help="do not execute anything, only print the shell script that would be executed")
202+
203+
# parse command-line arguments
204+
args = parser.parse_args()
205+
206+
# if --kfp-dir is used, check that a directory was given
207+
if args.kfp_dir:
208+
if not os.path.isdir(args.kfp_dir):
209+
parser.error(f"'{args.kfp_dir}' given to --kfp-dir is not a directory")
210+
211+
# get rid of relative paths
212+
args.kfp_dir = os.path.realpath(args.kfp_dir)
213+
214+
# construct the command to filter
215+
try:
216+
cmd = construct_filter_cmd(args)
217+
except RuntimeError as e:
218+
parser.error(e)
219+
220+
if args.dry_run:
221+
# print the command and exit successfully
222+
print(cmd, end='')
223+
sys.exit(0)
224+
225+
# run the command
226+
try:
227+
subprocess.run(cmd, shell=True, check=True)
228+
except subprocess.CalledProcessError as e:
229+
sys.exit(e.returncode)
230+
231+
232+
if __name__ == "__main__":
233+
main()

tests/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ set(jsfilter "sed -e 's|\"version\": \"[^\"]*\"|\"version\": \"\"|g'")
2929

3030
macro(add_test_wrap test_name cmd)
3131
add_test("${test_name}" bash -c "${cmd}")
32-
set_tests_properties(${test_name} PROPERTIES
33-
ENVIRONMENT "PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
32+
set_tests_properties(${test_name} PROPERTIES ENVIRONMENT
33+
"PATH=${CMAKE_BINARY_DIR}/src:$ENV{PATH};PROJECT_ROOT=${CMAKE_SOURCE_DIR}")
3434

3535
set_tests_properties(${test_name} PROPERTIES COST ${test_cost})
3636
math(EXPR test_cost "${test_cost} - 1")
@@ -45,6 +45,7 @@ endmacro()
4545
set(test_cost 1048576)
4646

4747
add_subdirectory(csdiff)
48+
add_subdirectory(csfilter-kfp)
4849
add_subdirectory(csgrep)
4950
add_subdirectory(cshtml)
5051
add_subdirectory(cslinker)

tests/csfilter-kfp/0001-args.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
--kfp-dir "$PROJECT_ROOT/tests/csfilter-kfp/0001-kfp" --project-nvr project-koku-koku-cbe5e5c3355c1e140aa1cca7377aebe09d8d8466
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.*/test/.*
2+
.*/tests/.*
3+
.*testing/.*
4+
.*/[^/]*test_[^/]*\.py$
5+
.*docker-compose.*
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
Error: SNYK_CODE_WARNING (CWE-89):
2+
project-koku-koku-5c7647f/koku/masu/api/db_performance/dbp_views.py:483:24: error[python/Sqli]: Unsanitized input from the HTTP request body flows into execute, where it is used in an SQL query. This may result in an SQL Injection vulnerability.
3+
# 481| with DBPerformanceStats(get_identity_username(request), CONFIGURATOR) as dbp:
4+
# 482| try:
5+
# 483|-> data = dbp.explain_sql(query_params["sql_statement"])
6+
# 484| except ProgrammingError as e:
7+
# 485| data = {"query_plan": f"{type(e).__name__}: {str(e)}"}
8+
# dbp.explain_sql parses and sanitizes the inputted query params. The query is then run through a read-only db connection.
9+
10+
Error: IDENTIFIER_TYPO (CWE-688):
11+
project-koku-koku-30de2cf/koku/api/settings/tags/mapping/utils.py:96: identifier_typo: Using "provider__uuid" appears to be a typo:
12+
* Identifier "provider__uuid" is only known to be referenced here, or in copies of this code.
13+
* Identifier "provider_uuid" is referenced elsewhere at least 216 times.
14+
project-koku-koku-30de2cf/koku/api/settings/tags/mapping/utils.py:96: remediation: Should identifier "provider__uuid" be replaced by "provider_uuid"?
15+
project-koku-koku-30de2cf/dev/scripts/trino_query.py:11: identifier_use: Example 1: Using identifier "provider_uuid" (2 total uses in this function).
16+
project-koku-koku-30de2cf/koku/api/provider/models.py:224: identifier_use: Example 2: Using identifier "provider_uuid".
17+
project-koku-koku-30de2cf/koku/api/report/ocp/query_handler.py:147: identifier_use: Example 3: Using identifier "provider_uuid".
18+
project-koku-koku-30de2cf/koku/api/test_utils.py:303: identifier_use: Example 4: Using identifier "provider_uuid".
19+
project-koku-koku-30de2cf/koku/cost_models/cost_model_manager.py:123: identifier_use: Example 5: Using identifier "provider_uuid".
20+
# 94| provider_uuids = (
21+
# 95| OCPUsageReportPeriod.objects.filter(cluster_id__in=clusters, report_period_start=start_date)
22+
# 96|-> .values_list("provider__uuid", flat=True)
23+
# 97| .distinct()
24+
# 98| )
25+
# This is Django syntax to query through a foreign key.
26+
27+
Error: SNYK_CODE_WARNING (CWE-89):
28+
project-koku-koku-cf77b7a/koku/masu/api/trino.py:56:13: error[python/Sqli]: Unsanitized input from the HTTP request body flows into execute, where it is used in an SQL query. This may result in an SQL Injection vulnerability.
29+
# 54| ) as conn:
30+
# 55| cur = conn.cursor()
31+
# 56|-> cur.execute(query)
32+
# 57| cols = [des[0] for des in cur.description]
33+
# 58| rows = cur.fetchall()
34+
# this trino connection has been converted to a READONLY connection

0 commit comments

Comments
 (0)