[feat] Introduce review status config file

The review statuses can be provided by a yaml config file. This file can be used for setting the review status of reports in a given directory or a checker name.
bruntib · Oct 26, 2023 · 2082a65 · 2082a65
1 parent 24bb998
commit 2082a65
Show file tree

Hide file tree

Showing 12 changed files with 312 additions and 57 deletions.
diff --git a/analyzer/codechecker_analyzer/cmd/analyze.py b/analyzer/codechecker_analyzer/cmd/analyze.py
@@ -193,6 +193,14 @@ def add_arguments_to_parser(parser):
                                 "Example: '/path/to/main.cpp', 'lib/*.cpp', "
                                 "*/test*'.")
 
+    parser.add_argument('--review-status-config',
+                        dest="review_status_config",
+                        required=False,
+                        default=argparse.SUPPRESS,
+                        help="Path of review_status.yaml config file which "
+                             "contains review status settings assigned to "
+                             "specific directories, checkers, bug hashes.")
+
     parser.add_argument('-o', '--output',
                         dest="output_path",
                         required=True,
@@ -853,6 +861,18 @@ def __update_skip_file(args):
         shutil.copyfile(args.skipfile, skip_file_to_send)
 
 
+def __update_review_status_config(args):
+    rs_config_to_send = os.path.join(args.output_path, 'review_status.yaml')
+
+    if os.path.exists(rs_config_to_send):
+        os.remove(rs_config_to_send)
+
+    if 'review_status_config' in args:
+        LOG.debug("Copying review status config file %s to %s",
+                  args.review_status_config, rs_config_to_send)
+        shutil.copyfile(args.review_status_config, rs_config_to_send)
+
+
 def __cleanup_metadata(metadata_prev, metadata):
     """ Cleanup metadata.
 
@@ -1098,6 +1118,7 @@ def main(args):
                               compile_cmd_count)
 
     __update_skip_file(args)
+    __update_review_status_config(args)
 
     LOG.debug("Cleanup metadata file started.")
     __cleanup_metadata(metadata_prev, metadata)

diff --git a/analyzer/codechecker_analyzer/cmd/check.py b/analyzer/codechecker_analyzer/cmd/check.py
@@ -400,6 +400,15 @@ def add_arguments_to_parser(parser):
                                     "the analysis is considered as a failed "
                                     "one.")
 
+    analyzer_opts.add_argument('--review-status-config',
+                               dest="review_status_config",
+                               required=False,
+                               default=argparse.SUPPRESS,
+                               help="Path of review_status.yaml config file "
+                                    "which contains review status settings "
+                                    "assigned to specific directories, "
+                                    "checkers, bug hashes.")
+
     clang_has_z3 = analyzer_types.is_z3_capable()
 
     if clang_has_z3:
@@ -872,6 +881,7 @@ def __update_if_key_exists(source, target, key):
                           'disable_all',
                           'ordered_checkers',  # --enable and --disable.
                           'timeout',
+                          'review_status_config',
                           'compile_uniqueing',
                           'report_hash',
                           'enable_z3',

diff --git a/analyzer/codechecker_analyzer/cmd/parse.py b/analyzer/codechecker_analyzer/cmd/parse.py
@@ -394,6 +394,15 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
             context.checker_labels)
 
     for dir_path, file_paths in report_file.analyzer_result_files(args.input):
+        review_status_cfg = os.path.join(dir_path, 'review_status.yaml')
+        if os.path.isfile(review_status_cfg):
+            try:
+                review_status_handler.set_review_status_config(
+                    review_status_cfg)
+            except ValueError as err:
+                LOG.error(err)
+                sys.exit(1)
+
         metadata = get_metadata(dir_path)
 
         if metadata and 'files' in args:

diff --git a/codechecker_common/review_status_handler.py b/codechecker_common/review_status_handler.py
@@ -7,8 +7,10 @@
 # -------------------------------------------------------------------------
 
 
+import fnmatch
 import os
 from typing import List, Optional
+import yaml
 
 from codechecker_report_converter.report import Report, SourceReviewStatus
 from codechecker_common.logger import get_logger
@@ -33,13 +35,31 @@ class ReviewStatusHandler:
     3. Review status rule:
         A mapping between bug hashes and review statuses set in the GUI.
 
-    TODO: Option 2. will be handled in a future commit.
     TODO: Option 3. should also be covered by this handler class.
     """
+
+    REVIEW_STATUS_OPTIONS = [
+        'false_positive',
+        'suppress',
+        'confirmed',
+        'intentional']
+
+    ALLOWED_FIELDS = [
+        'filepath_filter',
+        'checker_filter',
+        'report_hash_filter',
+        'review_status',
+        'message']
+
     def __init__(self, source_root=''):
+        """
+        TODO: What happens if multiple report directories are stored?
+        """
+        self.__review_status_yaml = None
         self.__source_root = source_root
         self.__source_comment_warnings = []
         self.__source_commets = {}
+        self.__data = None
 
     def __parse_codechecker_review_comment(
         self,
@@ -63,6 +83,46 @@ def __parse_codechecker_review_comment(
 
         return src_comment_data
 
+    def __validate_review_status_yaml_data(self):
+        """
+        This function validates the data read from review_status.yaml file and
+        raises ValueError with the description of the error if the format is
+        invalid.
+        """
+        if not isinstance(self.__data, list):
+            raise ValueError(
+                f"{self.__review_status_yaml} should be a list of review "
+                "status descriptor objects.")
+
+        for item in self.__data:
+            if not isinstance(item, dict):
+                raise ValueError(
+                    f"Format error in {self.__review_status_yaml}: {item} "
+                    "should be a review status descriptor object.")
+
+            for field in item:
+                if field not in ReviewStatusHandler.ALLOWED_FIELDS:
+                    raise ValueError(
+                        f"Format error in {self.__review_status_yaml}: field "
+                        f"'{field}' is not allowed. Available fields are: "
+                        f"{', '.join(ReviewStatusHandler.ALLOWED_FIELDS)}")
+
+            if 'review_status' not in item:
+                raise ValueError(
+                    f"Format error in {self.__review_status_yaml}: "
+                    f"'review_status' field is missing from {item}.")
+
+            if item['review_status'] \
+                    not in ReviewStatusHandler.REVIEW_STATUS_OPTIONS:
+                raise ValueError(
+                    f"Invalid review status field: {item['review_status']} at "
+                    f"{item} in {self.__review_status_yaml}. Available "
+                    f"options are: "
+                    f"{', '.join(ReviewStatusHandler.REVIEW_STATUS_OPTIONS)}.")
+
+            if item['review_status'] == 'suppress':
+                item['review_status'] = 'false_positive'
+
     def get_review_status(self, report: Report) -> SourceReviewStatus:
         """
         Return the review status of the report based on source code comments.
@@ -71,13 +131,84 @@ def get_review_status(self, report: Report) -> SourceReviewStatus:
         review status returns. This function throws ValueError if the review
         status is ambiguous (see get_review_status_from_source()).
         """
-        rs_from_source = self.get_review_status_from_source(report)
+        # 1. Check if the report has in-source review status setting.
+        review_status = self.get_review_status_from_source(report)
+
+        if review_status:
+            return review_status
 
-        if rs_from_source:
-            return rs_from_source
+        # 2. Check if the report has review status setting in the yaml config.
+        if self.__data:
+            review_status = self.get_review_status_from_config(report)
+            if review_status:
+                return review_status
 
+        # 3. Return "unreviewed" otherwise.
         return SourceReviewStatus(bug_hash=report.report_hash)
 
+    def set_review_status_config(self, config_file):
+        """
+        Set the location of the review_status.yaml config file.
+
+        When the content of multiple report directories are parsed then they
+        may contain separate config files. These need to be given before
+        parsing each report folders.
+        """
+        self.__review_status_yaml = config_file
+
+        with open(self.__review_status_yaml,
+                  encoding='utf-8', errors='ignore') as f:
+            # TODO: Validate format.
+            #  - Can filepath be a list?
+            # TODO: May throw yaml.scanner.ScannerError.
+            try:
+                self.__data = yaml.safe_load(f)
+            except yaml.scanner.ScannerError as err:
+                raise ValueError(
+                    f"Invalid YAML format in {self.__review_status_yaml}:\n"
+                    f"{err}")
+
+        self.__validate_review_status_yaml_data()
+
+    def get_review_status_from_config(
+        self,
+        report: Report
+    ) -> Optional[SourceReviewStatus]:
+        """
+        Return the review status of the given report based on the config file
+        set by set_review_status_config(). If not config file set, or no
+        setting matches the report then None returns.
+        """
+        assert self.__data is not None, \
+            "Review status config file has to be set with " \
+            "set_review_status_config()."
+
+        # TODO: Document "in_source".
+        for item in self.__data:
+            if 'filepath_filter' in item and not fnmatch.fnmatch(
+                    report.file.original_path, item['filepath_filter']):
+                continue
+            if 'checker_filter' in item and \
+                    report.checker_name != item['checker_filter']:
+                continue
+            if 'report_hash_filter' in item and \
+                    not report.report_hash.startswith(
+                        item['report_hash_filter']):
+                continue
+
+            if any(filt in item for filt in
+                   ['filepath_filter', 'checker_filter',
+                    'report_hash_filter']):
+                return SourceReviewStatus(
+                    status=item['review_status'],
+                    message=item['message']
+                    .encode(encoding='utf-8', errors='ignore')
+                    if 'message' in item else b'',
+                    bug_hash=report.report_hash,
+                    in_source=True)
+
+        return None
+
     def get_review_status_from_source(
         self,
         report: Report
@@ -92,6 +223,8 @@ def get_review_status_from_source(
             contains information about the problem in a string.
         - If the soure file changed (which means that the source comments may
             have replaced, changed or removed) then None returns.
+        - If no review status belongs to the report in the source code, then
+            return None.
         TODO: This function either returns a SourceReviewStatus, or raises an
         exception or returns None. This is too many things that a caller needs
         to handle. The reason is that according to the current behaviors,

diff --git a/codechecker_common/util.py b/codechecker_common/util.py
@@ -86,6 +86,7 @@ def load_json(path: str, default=None, lock=False, display_warning=True):
 
     return ret
 
+
 def get_linef(fp: TextIO, line_no: int) -> str:
     """'fp' should be (readable) file object.
     Return the line content at line_no or an empty line