From 2082a65012fe799aa7d59b057d0a9d83aeca47e0 Mon Sep 17 00:00:00 2001 From: bruntib Date: Tue, 24 Oct 2023 01:05:25 +0200 Subject: [PATCH] [feat] Introduce review status config file The review statuses can be provided by a yaml config file. This file can be used for setting the review status of reports in a given directory or a checker name. --- analyzer/codechecker_analyzer/cmd/analyze.py | 21 +++ analyzer/codechecker_analyzer/cmd/check.py | 10 ++ analyzer/codechecker_analyzer/cmd/parse.py | 9 ++ codechecker_common/review_status_handler.py | 141 +++++++++++++++++- codechecker_common/util.py | 1 + docs/analyzer/user_guide.md | 132 +++++++++++----- .../report/__init__.py | 3 +- .../report/output/html/html.py | 3 +- .../report/output/plaintext.py | 3 +- web/client/codechecker_client/cmd/store.py | 4 + .../codechecker_client/cmd_line_client.py | 15 +- .../codechecker_server/api/mass_store_run.py | 27 ++-- 12 files changed, 312 insertions(+), 57 deletions(-) diff --git a/analyzer/codechecker_analyzer/cmd/analyze.py b/analyzer/codechecker_analyzer/cmd/analyze.py index dd3fe1f8fe..13b9342671 100644 --- a/analyzer/codechecker_analyzer/cmd/analyze.py +++ b/analyzer/codechecker_analyzer/cmd/analyze.py @@ -193,6 +193,14 @@ def add_arguments_to_parser(parser): "Example: '/path/to/main.cpp', 'lib/*.cpp', " "*/test*'.") + parser.add_argument('--review-status-config', + dest="review_status_config", + required=False, + default=argparse.SUPPRESS, + help="Path of review_status.yaml config file which " + "contains review status settings assigned to " + "specific directories, checkers, bug hashes.") + parser.add_argument('-o', '--output', dest="output_path", required=True, @@ -853,6 +861,18 @@ def __update_skip_file(args): shutil.copyfile(args.skipfile, skip_file_to_send) +def __update_review_status_config(args): + rs_config_to_send = os.path.join(args.output_path, 'review_status.yaml') + + if os.path.exists(rs_config_to_send): + os.remove(rs_config_to_send) + + if 'review_status_config' in args: + LOG.debug("Copying review status config file %s to %s", + args.review_status_config, rs_config_to_send) + shutil.copyfile(args.review_status_config, rs_config_to_send) + + def __cleanup_metadata(metadata_prev, metadata): """ Cleanup metadata. @@ -1098,6 +1118,7 @@ def main(args): compile_cmd_count) __update_skip_file(args) + __update_review_status_config(args) LOG.debug("Cleanup metadata file started.") __cleanup_metadata(metadata_prev, metadata) diff --git a/analyzer/codechecker_analyzer/cmd/check.py b/analyzer/codechecker_analyzer/cmd/check.py index e116e041e0..3c70c212ee 100644 --- a/analyzer/codechecker_analyzer/cmd/check.py +++ b/analyzer/codechecker_analyzer/cmd/check.py @@ -400,6 +400,15 @@ def add_arguments_to_parser(parser): "the analysis is considered as a failed " "one.") + analyzer_opts.add_argument('--review-status-config', + dest="review_status_config", + required=False, + default=argparse.SUPPRESS, + help="Path of review_status.yaml config file " + "which contains review status settings " + "assigned to specific directories, " + "checkers, bug hashes.") + clang_has_z3 = analyzer_types.is_z3_capable() if clang_has_z3: @@ -872,6 +881,7 @@ def __update_if_key_exists(source, target, key): 'disable_all', 'ordered_checkers', # --enable and --disable. 'timeout', + 'review_status_config', 'compile_uniqueing', 'report_hash', 'enable_z3', diff --git a/analyzer/codechecker_analyzer/cmd/parse.py b/analyzer/codechecker_analyzer/cmd/parse.py index 458be06e2b..89eae4b91a 100644 --- a/analyzer/codechecker_analyzer/cmd/parse.py +++ b/analyzer/codechecker_analyzer/cmd/parse.py @@ -394,6 +394,15 @@ def get_output_file_path(default_file_name: str) -> Optional[str]: context.checker_labels) for dir_path, file_paths in report_file.analyzer_result_files(args.input): + review_status_cfg = os.path.join(dir_path, 'review_status.yaml') + if os.path.isfile(review_status_cfg): + try: + review_status_handler.set_review_status_config( + review_status_cfg) + except ValueError as err: + LOG.error(err) + sys.exit(1) + metadata = get_metadata(dir_path) if metadata and 'files' in args: diff --git a/codechecker_common/review_status_handler.py b/codechecker_common/review_status_handler.py index 2febf4e021..a6b71bec38 100644 --- a/codechecker_common/review_status_handler.py +++ b/codechecker_common/review_status_handler.py @@ -7,8 +7,10 @@ # ------------------------------------------------------------------------- +import fnmatch import os from typing import List, Optional +import yaml from codechecker_report_converter.report import Report, SourceReviewStatus from codechecker_common.logger import get_logger @@ -33,13 +35,31 @@ class ReviewStatusHandler: 3. Review status rule: A mapping between bug hashes and review statuses set in the GUI. - TODO: Option 2. will be handled in a future commit. TODO: Option 3. should also be covered by this handler class. """ + + REVIEW_STATUS_OPTIONS = [ + 'false_positive', + 'suppress', + 'confirmed', + 'intentional'] + + ALLOWED_FIELDS = [ + 'filepath_filter', + 'checker_filter', + 'report_hash_filter', + 'review_status', + 'message'] + def __init__(self, source_root=''): + """ + TODO: What happens if multiple report directories are stored? + """ + self.__review_status_yaml = None self.__source_root = source_root self.__source_comment_warnings = [] self.__source_commets = {} + self.__data = None def __parse_codechecker_review_comment( self, @@ -63,6 +83,46 @@ def __parse_codechecker_review_comment( return src_comment_data + def __validate_review_status_yaml_data(self): + """ + This function validates the data read from review_status.yaml file and + raises ValueError with the description of the error if the format is + invalid. + """ + if not isinstance(self.__data, list): + raise ValueError( + f"{self.__review_status_yaml} should be a list of review " + "status descriptor objects.") + + for item in self.__data: + if not isinstance(item, dict): + raise ValueError( + f"Format error in {self.__review_status_yaml}: {item} " + "should be a review status descriptor object.") + + for field in item: + if field not in ReviewStatusHandler.ALLOWED_FIELDS: + raise ValueError( + f"Format error in {self.__review_status_yaml}: field " + f"'{field}' is not allowed. Available fields are: " + f"{', '.join(ReviewStatusHandler.ALLOWED_FIELDS)}") + + if 'review_status' not in item: + raise ValueError( + f"Format error in {self.__review_status_yaml}: " + f"'review_status' field is missing from {item}.") + + if item['review_status'] \ + not in ReviewStatusHandler.REVIEW_STATUS_OPTIONS: + raise ValueError( + f"Invalid review status field: {item['review_status']} at " + f"{item} in {self.__review_status_yaml}. Available " + f"options are: " + f"{', '.join(ReviewStatusHandler.REVIEW_STATUS_OPTIONS)}.") + + if item['review_status'] == 'suppress': + item['review_status'] = 'false_positive' + def get_review_status(self, report: Report) -> SourceReviewStatus: """ Return the review status of the report based on source code comments. @@ -71,13 +131,84 @@ def get_review_status(self, report: Report) -> SourceReviewStatus: review status returns. This function throws ValueError if the review status is ambiguous (see get_review_status_from_source()). """ - rs_from_source = self.get_review_status_from_source(report) + # 1. Check if the report has in-source review status setting. + review_status = self.get_review_status_from_source(report) + + if review_status: + return review_status - if rs_from_source: - return rs_from_source + # 2. Check if the report has review status setting in the yaml config. + if self.__data: + review_status = self.get_review_status_from_config(report) + if review_status: + return review_status + # 3. Return "unreviewed" otherwise. return SourceReviewStatus(bug_hash=report.report_hash) + def set_review_status_config(self, config_file): + """ + Set the location of the review_status.yaml config file. + + When the content of multiple report directories are parsed then they + may contain separate config files. These need to be given before + parsing each report folders. + """ + self.__review_status_yaml = config_file + + with open(self.__review_status_yaml, + encoding='utf-8', errors='ignore') as f: + # TODO: Validate format. + # - Can filepath be a list? + # TODO: May throw yaml.scanner.ScannerError. + try: + self.__data = yaml.safe_load(f) + except yaml.scanner.ScannerError as err: + raise ValueError( + f"Invalid YAML format in {self.__review_status_yaml}:\n" + f"{err}") + + self.__validate_review_status_yaml_data() + + def get_review_status_from_config( + self, + report: Report + ) -> Optional[SourceReviewStatus]: + """ + Return the review status of the given report based on the config file + set by set_review_status_config(). If not config file set, or no + setting matches the report then None returns. + """ + assert self.__data is not None, \ + "Review status config file has to be set with " \ + "set_review_status_config()." + + # TODO: Document "in_source". + for item in self.__data: + if 'filepath_filter' in item and not fnmatch.fnmatch( + report.file.original_path, item['filepath_filter']): + continue + if 'checker_filter' in item and \ + report.checker_name != item['checker_filter']: + continue + if 'report_hash_filter' in item and \ + not report.report_hash.startswith( + item['report_hash_filter']): + continue + + if any(filt in item for filt in + ['filepath_filter', 'checker_filter', + 'report_hash_filter']): + return SourceReviewStatus( + status=item['review_status'], + message=item['message'] + .encode(encoding='utf-8', errors='ignore') + if 'message' in item else b'', + bug_hash=report.report_hash, + in_source=True) + + return None + def get_review_status_from_source( self, report: Report @@ -92,6 +223,8 @@ def get_review_status_from_source( contains information about the problem in a string. - If the soure file changed (which means that the source comments may have replaced, changed or removed) then None returns. + - If no review status belongs to the report in the source code, then + return None. TODO: This function either returns a SourceReviewStatus, or raises an exception or returns None. This is too many things that a caller needs to handle. The reason is that according to the current behaviors, diff --git a/codechecker_common/util.py b/codechecker_common/util.py index a58631a2cf..08743f8f35 100644 --- a/codechecker_common/util.py +++ b/codechecker_common/util.py @@ -86,6 +86,7 @@ def load_json(path: str, default=None, lock=False, display_warning=True): return ret + def get_linef(fp: TextIO, line_no: int) -> str: """'fp' should be (readable) file object. Return the line content at line_no or an empty line diff --git a/docs/analyzer/user_guide.md b/docs/analyzer/user_guide.md index ca48db0c28..78f69eea7d 100644 --- a/docs/analyzer/user_guide.md +++ b/docs/analyzer/user_guide.md @@ -47,15 +47,17 @@ - [`checkers`](#checkers) - [`analyzers`](#analyzers) - [Configuring Clang version](#configuring-clang-version) - - [Suppressing False positives (source code comments for review status)](#suppressing-false-positives-source-code-comments-for-review-status) - - [Supported formats](#supported-formats) - - [Change review status of a specific checker result](#change-review-status-of-a-specific-checker-result) - - [Change review status of a specific checker result by using a substring of the checker name](#change-review-status-of-a-specific-checker-result-by-using-a-substring-of-the-checker-name) - - [Change review status of all checker result](#change-review-status-of-all-checker-result) - - [Change review status of all checker result with C style comment](#change-review-status-of-all-checker-result-with-c-style-comment) - - [Multi line comments](#multi-line-comments) - - [Multi line C style comments](#multi-line-c-style-comments) - - [Change review status for multiple checker results in the same line](#change-review-status-for-multiple-checker-results-in-the-same-line) + - [Review status handling](#review-status-handling) + - [Setting with source code comments](#setting-with-source-code-comments) + - [Supported formats](#supported-formats) + - [Change review status of a specific checker result](#change-review-status-of-a-specific-checker-result) + - [Change review status of a specific checker result by using a substring of the checker name](#change-review-status-of-a-specific-checker-result-by-using-a-substring-of-the-checker-name) + - [Change review status of all checker results](#change-review-status-of-all-checker-results) + - [Change review status of all checker results with C style comment](#change-review-status-of-all-checker-results-with-c-style-comment) + - [Multi line comments](#multi-line-comments) + - [Multi line C style comments](#multi-line-c-style-comments) + - [Change review status for multiple checker results in the same line](#change-review-status-for-multiple-checker-results-in-the-same-line) + - [Setting with config file](#setting-with-config-file) ## Overview CodeChecker command line tooling provides sub-commands to perform **static code analysis** and to store reports into a **web-based storage**. @@ -68,7 +70,7 @@ The analysis related use-cases can be fully performed without a web-server. * [Execuing Static Analysis](#analyze) * [Showing analysis results in the command line](#parse) * [Applying code fixes](#fixit) -* [Suppressing false positives with source-code comments](#suppressing-false-positives-source-code-comments-for-review-status) +* [Suppressing false positives with source-code comments](#setting-with-source-code-comments) ## Quick Analysis @@ -2419,41 +2421,53 @@ Clang based tools search by default for in a path relative to the tool binary. `$(dirname /path/to/tool)/../lib/clang/8.0.0/include` -## Suppressing False positives (source code comments for review status) +## Review status handling + +Users can categorize CodeChecker reports by setting their _review status_. A +report can be _false positive_ or it can also indicate a _confirmed_ bug. +Sometimes a bug is _intentional_ in a test code. Developers can review the +reports and assign such a status accordingly with a short comment message. A +report is _unreviewed_ by default. + +The review status is not only a stateless indication of a report, but takes +effect in situations when CodeChecker needs to determine the set of outstanding +bugs. For example, a report with _intentional_ or _false positive_ detection +status is not considered outstanding, because the analyzed project's developers +don't need to worry about them. + +### Setting with source code comments Source code comments can be used in the source files to change the review -status of a specific or all checker results found in a particular line of code. -Source code comment should be above the line where the defect was found, and +status of a specific report found in a particular line of code. +Source code comments should be above the line where the defect was found, and __no__ empty lines are allowed between the line with the bug and the source code comment. -Comment lines staring with `//` or C style `/**/` comments are supported. +Comment lines staring with `//` or C style `/**/` comments are both supported. Watch out for the comment format! -### Supported formats +#### Supported formats The source code comment has the following format: -```sh -// codechecker comment type [checker name] comment +```c +// codechecker_ [] comment ``` -Multiple source code comment types are allowed: +Several source code comment types are allowed: - * `codechecker_suppress` - * `codechecker_false_positive` - * `codechecker_intentional` - * `codechecker_confirmed` - -Source code comment change the `review status` of a bug in the following form: - - * `codechecker_suppress` and `codechecker_false_positive` to `False positive` - * `codechecker_intentional` to `Intentional` - * `codechecker_confirmed` to `Confirmed`. - -Note: `codechecker_suppress` does the same as `codechecker_false_positive`. + * `codechecker_suppress`: Sets the review status to _false positive_. These + reports are not considered outstanding. The limitations of analyzers may + cause false positive reports, these can be categorized by this status value. + * `codechecker_false_positive`: Same as `codechecker_suppress`. + * `codechecker_intentional`: Sets the review status to _intentional_. These + reports are not considered outstanding. For example a bug may be intentional + in a test code. + * `codechecker_confirmed`: Sets the review status to _confirmed_. This is an + indication for developers to deal with this report. Such a report is + considered outstanding. You can read more about review status [here](https://github.com/Ericsson/codechecker/blob/master/www/userguide/userguide.md#userguide-review-status) -### Change review status of a specific checker result +#### Change review status of a specific checker result ```cpp void test() { int x; @@ -2462,7 +2476,7 @@ void test() { } ``` -### Change review status of a specific checker result by using a substring of the checker name +#### Change review status of a specific checker result by using a substring of the checker name There is no need to specify the whole checker name in the source code comment like `deadcode.DeadStores`, because it will not be resilient to package name changes. You are able to specify only a substring of the checker name for the @@ -2476,7 +2490,7 @@ void test() { ``` -### Change review status of all checker result +#### Change review status of all checker results ```cpp void test() { int x; @@ -2485,7 +2499,7 @@ void test() { } ``` -### Change review status of all checker result with C style comment +#### Change review status of all checker results with C style comment ```cpp void test() { int x; @@ -2494,7 +2508,7 @@ void test() { } ``` -### Multi line comments +#### Multi line comments ```cpp void test() { int x; @@ -2507,7 +2521,7 @@ void test() { } ``` -### Multi line C style comments +#### Multi line C style comments ```cpp void test() { int x; @@ -2534,7 +2548,7 @@ void test() { } ``` -### Change review status for multiple checker results in the same line +#### Change review status for multiple checker results in the same line You can change multiple checker reports with a single source code comment: ```cpp @@ -2545,7 +2559,7 @@ void test() { ``` The limitation of this format is that you can't use separate status or message -for checkers. To solve this problem you can use one of the following format: +for checkers. To solve this problem you can use one of the following formats: ```cpp void test_simple() { @@ -2579,3 +2593,45 @@ void testError2() { int x = 1 / 0; } ``` + +### Setting with config file + +Review status can be configured by a config file in YAML format. This config +file has to represent a list of review status settings: + +```yaml +- filepath_filter: /path/to/project/test/* + checker_filter: core.DivideZero + message: Division by zero in test files is automatically intentional. + review_status: intentional +- filepath_filter: /path/to/project/important/module/* + message: All reports in this module should be investigated. + review_status: confirmed +- filepath_filter: "*/project/test/*" + message: If a filter starts with asterix, then it should be quoted due to YAML format. + review_status: suppress +- report_hash_filter: b85851b34789e35c6acfa1a4aaf65382 + message: This report is false positive. + review_status: false_positive +``` + +The fields of a review status settings are: + +- `filepath_filter` (optional): A glob to a path where the given review status + is applied. A [https://docs.python.org/3/library/glob.html](glob) is a path + that may contain shell-style wildcards: `*` substitutes zero or more + characters, `?` substitutes exactly one character. This filter option is + applied on the full path of a source file, even if `--trim-path-prefix` flag + is used later. +- `checker_filter` (optional): Set the review status for only these checkers' + reports. +- `report_hash_filter` (optional): Set the review status for only the checkers + having this report hash. A prefix match is applied on report hashes, so it is + enough to provide the beginning of a hash. Make sure to use a quite long + prefix so it covers one specific report. +- `message` (optional): A comment message that describes the reason of the + setting. +- `review_status`: The review status to set. + +If none of the `_filter` options is provided, then that setting is not applied +on any report. diff --git a/tools/report-converter/codechecker_report_converter/report/__init__.py b/tools/report-converter/codechecker_report_converter/report/__init__.py index 71f8cf8c34..fc3512577b 100644 --- a/tools/report-converter/codechecker_report_converter/report/__init__.py +++ b/tools/report-converter/codechecker_report_converter/report/__init__.py @@ -489,7 +489,8 @@ def to_json(self) -> Dict: "analyzer_name": self.analyzer_name, "category": self.category, "type": self.type, - "review_status": self.review_status.status, + "review_status": self.review_status.status + if self.review_status else '', "bug_path_events": [e.to_json() for e in self.bug_path_events], "bug_path_positions": [ p.to_json() for p in self.bug_path_positions], diff --git a/tools/report-converter/codechecker_report_converter/report/output/html/html.py b/tools/report-converter/codechecker_report_converter/report/output/html/html.py index 819109e25f..fd4ef21342 100644 --- a/tools/report-converter/codechecker_report_converter/report/output/html/html.py +++ b/tools/report-converter/codechecker_report_converter/report/output/html/html.py @@ -249,7 +249,8 @@ def to_macro_expansions( 'events': to_bug_path_events(report.bug_path_events), 'macros': to_macro_expansions(report.macro_expansions), 'notes': to_bug_path_events(report.notes), - 'reviewStatus': report.review_status.formatted_status(), + 'reviewStatus': report.review_status.formatted_status() + if report.review_status else '', 'severity': self.get_severity(report.checker_name) }) diff --git a/tools/report-converter/codechecker_report_converter/report/output/plaintext.py b/tools/report-converter/codechecker_report_converter/report/output/plaintext.py index e838770538..c013e13314 100644 --- a/tools/report-converter/codechecker_report_converter/report/output/plaintext.py +++ b/tools/report-converter/codechecker_report_converter/report/output/plaintext.py @@ -64,7 +64,8 @@ def format_report(report: Report, content_is_not_changed: bool) -> str: out = f"[{report.severity}] {file_path}:{report.line}:{report.column}: " \ f"{report.message} [{report.checker_name}]" - if content_is_not_changed and report.review_status.status != 'unreviewed': + if content_is_not_changed and report.review_status and \ + report.review_status.status != 'unreviewed': out += f" [{report.review_status.formatted_status()}]" return out diff --git a/web/client/codechecker_client/cmd/store.py b/web/client/codechecker_client/cmd/store.py index 4328277444..668cc75adc 100644 --- a/web/client/codechecker_client/cmd/store.py +++ b/web/client/codechecker_client/cmd/store.py @@ -449,6 +449,10 @@ def assemble_zip(inputs, files_to_compress.add(skip_file_path) + review_status_file_path = os.path.join(dir_path, 'review_status.yaml') + if os.path.exists(review_status_file_path): + files_to_compress.add(review_status_file_path) + LOG.debug("Processing report files ...") # Currently ProcessPoolExecutor fails completely in windows. diff --git a/web/client/codechecker_client/cmd_line_client.py b/web/client/codechecker_client/cmd_line_client.py index e18f42d90f..78857207f0 100644 --- a/web/client/codechecker_client/cmd_line_client.py +++ b/web/client/codechecker_client/cmd_line_client.py @@ -201,10 +201,21 @@ def get_report_dir_results( Absolute paths are expected to the given report directories. """ all_reports = [] - review_status_handler = ReviewStatusHandler() processed_path_hashes = set() - for _, file_paths in report_file.analyzer_result_files(report_dirs): + for report_dir, file_paths in \ + report_file.analyzer_result_files(report_dirs): + + review_status_handler = ReviewStatusHandler() + review_status_cfg = os.path.join(report_dir, 'review_status.yaml') + if os.path.isfile(review_status_cfg): + try: + review_status_handler.set_review_status_config( + review_status_cfg) + except ValueError as err: + LOG.error(err) + sys.exit(1) + for file_path in file_paths: # Get reports. reports = report_file.get_reports(file_path, checker_labels) diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py index 8eb9be73ba..f311f9dc69 100644 --- a/web/server/codechecker_server/api/mass_store_run.py +++ b/web/server/codechecker_server/api/mass_store_run.py @@ -895,19 +895,20 @@ def get_missing_file_ids(report: Report) -> List[str]: analyzer_name = mip.checker_to_analyzer.get( report.checker_name, report.analyzer_name) - rs_from_source = SourceReviewStatus() + review_status = SourceReviewStatus() + try: - rs_from_source = \ - review_status_handler.get_review_status(report) - rs_from_source.author = self.user_name - rs_from_source.date = run_history_time + review_status = review_status_handler.get_review_status(report) except ValueError as err: self.__wrong_src_code_comments.append(str(err)) + review_status.author = self.user_name + review_status.date = run_history_time + # False positive and intentional reports are considered as closed # reports which is indicated with non-null "fixed_at" date. fixed_at = None - if rs_from_source.status in ['false_positive', 'intentional']: + if review_status.status in ['false_positive', 'intentional']: # Keep in mind that now this is not handling review status # rules, only review status source code comments if old_report and old_report.review_status in \ @@ -919,11 +920,11 @@ def get_missing_file_ids(report: Report) -> List[str]: self.__check_report_count() report_id = self.__add_report( session, run_id, report, file_path_to_id, - rs_from_source, detection_status, detected_at, + review_status, detection_status, detected_at, run_history_time, analysis_info, analyzer_name, fixed_at) self.__new_report_hashes[report.report_hash] = \ - rs_from_source.status + review_status.status self.__already_added_report_hashes.add(report_path_hash) LOG.debug("Storing report done. ID=%d", report_id) @@ -1061,13 +1062,19 @@ def get_skip_handler( # Processing analyzer result files. processed_result_file_count = 0 - review_status_handler = ReviewStatusHandler(source_root) - for root_dir_path, _, report_file_paths in os.walk(report_dir): LOG.debug("Get reports from '%s' directory", root_dir_path) skip_handler = get_skip_handler(root_dir_path) + review_status_handler = ReviewStatusHandler(source_root) + + review_status_cfg = \ + os.path.join(root_dir_path, 'review_status.yaml') + if os.path.isfile(review_status_cfg): + review_status_handler.set_review_status_config( + review_status_cfg) + mip = self.__mips[root_dir_path] enabled_checkers.update(mip.enabled_checkers) disabled_checkers.update(mip.disabled_checkers)