[NFC] ReviewStatusHandler for source code comments

Currently there are 2 ways of handling review statuses for reports: - In source code comments: // codechecker_suppress [checker_name] message... - In the GUI: This sets a "review status rule" which means a relationship between bug hashes and review statuses. All reports with the same bug hash get the same review status. We are planning a 3rd way of setting review statuses: A review_status.yaml config file which contains review status mappings to files or directories. The long-term plan is to create a ReviewStatusHandler class that handles the review status of a report, in all three scenarios above. This commit is just a refactoring which moves the 1st scenario-related code (review status from source code comment) to the ReviewStatusHandler class.
bruntib · Oct 23, 2023 · 39e0f7b · 39e0f7b
1 parent 3c4c8ec
commit 39e0f7b
Show file tree

Hide file tree

Showing 2 changed files with 151 additions and 111 deletions.
diff --git a/codechecker_common/review_status_handler.py b/codechecker_common/review_status_handler.py
@@ -0,0 +1,136 @@
+# -------------------------------------------------------------------------
+#
+#  Part of the CodeChecker project, under the Apache License v2.0 with
+#  LLVM Exceptions. See LICENSE for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# -------------------------------------------------------------------------
+
+
+from dataclasses import dataclass
+from datetime import datetime
+import os
+from typing import Optional
+
+from logger import get_logger
+from codechecker_report_converter.report import Report
+from codechecker_report_converter.source_code_comment_handler import \
+    SourceCodeCommentHandler, SpellException, contains_codechecker_comment, \
+    SourceCodeComments
+
+
+LOG = get_logger('system')
+
+
+@dataclass
+class SourceReviewStatus:
+    """
+    Helper class for handling in source review statuses.
+    Collect the same info as a review status rule.
+
+    TODO: rename this class, because this not only represents review statuses
+          in source code comments but in review status yaml too.
+    """
+    status: str = "unreviewed"
+    message: bytes = b""
+    bug_hash: str = ""
+    in_source: bool = False
+    author: Optional[str] = None
+    date: Optional[datetime] = None
+
+
+class ReviewStatusHandler:
+    """
+    This class helps to determine the review status of a report. The review
+    status may come either from several sources:
+
+    1. Source code comment:
+        // codechecker_suppress [core.DivideZero] This is a false positive.
+    2. Review status configuration file:
+        <report_folder>/review_status.yaml contains review status settings on
+        file path or bug has granularity.
+    3. Review status rule:
+        A mapping between bug hashes and review statuses set in the GUI.
+
+    TODO: Option 2. will be handled in a future commit.
+    TODO: Option 3. should also be covered by this handler class.
+    """
+    def __init__(self, source_root):
+        self.__source_root = source_root
+
+    def __parse_codechecker_review_comment(
+        self,
+        source_file_name: str,
+        report_line: int,
+        checker_name: str
+    ) -> SourceCodeComments:
+        """Parse the CodeChecker review comments from a source file at a given
+        position.  Returns an empty list if there are no comments.
+        """
+        src_comment_data = []
+        with open(source_file_name, encoding='utf-8', errors='ignore') as f:
+            if contains_codechecker_comment(f):
+                sc_handler = SourceCodeCommentHandler()
+                try:
+                    src_comment_data = sc_handler.filter_source_line_comments(
+                        f, report_line, checker_name)
+                except SpellException as ex:
+                    LOG.warning("File %s contains %s", source_file_name, ex)
+
+        return src_comment_data
+
+    def get_review_status_from_source(
+        self,
+        report: Report
+    ) -> SourceReviewStatus:
+        """
+        Return the review status set in the source code belonging to
+        the given report.
+
+        - Return the review status if it is set in the source code.
+        - If the review status is ambiguous (i.e. multiple source code
+            comments belong to it) then a ValueError exception is raised which
+            contains information about the problem in a string.
+        # TODO: If not found then return None or unreviewed by default?
+        """
+        # The original file path is needed here, not the trimmed, because
+        # the source files are extracted as the original file path.
+        source_file_name = os.path.realpath(os.path.join(
+            self.__source_root, report.file.original_path.strip("/")))
+
+        if os.path.isfile(source_file_name):
+            src_comment_data = self.__parse_codechecker_review_comment(
+                source_file_name, report.line, report.checker_name)
+
+            if len(src_comment_data) == 1:
+                data = src_comment_data[0]
+                status, message = data.status, bytes(data.message, 'utf-8')
+
+                review_status = SourceReviewStatus(
+                    status=status,
+                    message=message,
+                    bug_hash=report.report_hash,
+                    in_source=True
+                )
+
+                return review_status
+            elif len(src_comment_data) > 1:
+                LOG.warning(
+                    "Multiple source code comment can be found "
+                    "for '%s' checker in '%s' at line %s. "
+                    "This suppression will not be taken into account!",
+                    report.checker_name, source_file_name, report.line)
+
+                raise ValueError(
+                    f"{source_file_name}|{report.line}|{report.checker_name}")
+
+        # A better way to handle reports where the review status is not
+        # set in the source is to return None, and set the reviews status info
+        # at report addition time.
+        return SourceReviewStatus(
+            status="unreviewed",
+            message=b'',
+            bug_hash=report.report_hash,
+            in_source=False
+        )
+
diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py
@@ -26,13 +26,13 @@
 
 from codechecker_common import skiplist_handler
 from codechecker_common.logger import get_logger
+from codechecker_common.review_status_handler import ReviewStatusHandler, \
+    SourceReviewStatus
 from codechecker_common.util import load_json
 
 from codechecker_report_converter.util import trim_path_prefixes
 from codechecker_report_converter.report import report_file, Report
 from codechecker_report_converter.report.hash import get_report_path_hash
-from codechecker_report_converter.source_code_comment_handler import \
-    SourceCodeCommentHandler, SpellException, contains_codechecker_comment
 
 from ..database import db_cleanup
 from ..database.config_db_model import Product
@@ -65,10 +65,6 @@ def __exit__(self, *args):
                  self.__msg, round(time.time() - self.__start_time, 2))
 
 
-# FIXME: when these types are introduced we need to use those.
-SourceLineComments = List[Any]
-
-
 def unzip(b64zip: str, output_dir: str) -> int:
     """
     This function unzips the base64 encoded zip file. This zip is extracted
@@ -101,27 +97,6 @@ def get_file_content(file_path: str) -> bytes:
         return f.read()
 
 
-def parse_codechecker_review_comment(
-    source_file_name: str,
-    report_line: int,
-    checker_name: str
-) -> SourceLineComments:
-    """Parse the CodeChecker review comments from a source file at a given
-    position.  Returns an empty list if there are no comments.
-    """
-    src_comment_data = []
-    with open(source_file_name, encoding='utf-8', errors='ignore') as f:
-        if contains_codechecker_comment(f):
-            sc_handler = SourceCodeCommentHandler()
-            try:
-                src_comment_data = sc_handler.filter_source_line_comments(
-                    f, report_line, checker_name)
-            except SpellException as ex:
-                LOG.warning("File %s contains %s", source_file_name, ex)
-
-    return src_comment_data
-
-
 def add_file_record(
     session: DBSession,
     file_path: str,
@@ -216,29 +191,6 @@ def get_blame_file_data(
     return blame_info, remote_url, tracking_branch
 
 
-class SourceReviewStatus:
-    """
-    Helper class for handling in source review statuses.
-    Collect the same info as a review status rule.
-    FIXME Change this to dataclass when available
-    """
-    def __init__(
-        self,
-        status: str,
-        message: bytes,
-        bug_hash: Any,
-        author: str,
-        date: datetime,
-        in_source: bool,
-    ):
-        self.status = status
-        self.message = message
-        self.bug_hash = bug_hash
-        self.author = author
-        self.date = date
-        self.in_source = in_source
-
-
 class MassStoreRun:
     def __init__(
         self,
@@ -876,11 +828,11 @@ def __process_report_file(
         self,
         report_file_path: str,
         session: DBSession,
-        source_root: str,
         run_id: int,
         file_path_to_id: Dict[str, int],
         run_history_time: datetime,
         skip_handler: skiplist_handler.SkipListHandler,
+        review_status_handler: ReviewStatusHandler,
         hash_map_reports: Dict[str, List[Any]]
     ) -> bool:
         """
@@ -891,63 +843,6 @@ def __process_report_file(
         if not reports:
             return True
 
-        def get_review_status_from_source(
-                report: Report) -> SourceReviewStatus:
-            """
-            Return the review status set in the source code belonging to
-            the given report.
-
-            - Return the review status if it is set in the source code.
-            - If the review status is ambiguous (i.e. multiple source code
-              comments belong to it) then (unreviewed, in_source=False)
-              returns.
-            """
-            # The original file path is needed here, not the trimmed, because
-            # the source files are extracted as the original file path.
-            source_file_name = os.path.realpath(os.path.join(
-                source_root, report.file.original_path.strip("/")))
-
-            if os.path.isfile(source_file_name):
-                src_comment_data = parse_codechecker_review_comment(
-                    source_file_name, report.line, report.checker_name)
-
-                if len(src_comment_data) == 1:
-                    data = src_comment_data[0]
-                    status, message = data.status, bytes(data.message, 'utf-8')
-
-                    review_status = SourceReviewStatus(
-                        status=status,
-                        message=message,
-                        bug_hash=report.report_hash,
-                        author=self.user_name,
-                        date=run_history_time,
-                        in_source=True
-                    )
-
-                    return review_status
-                elif len(src_comment_data) > 1:
-                    LOG.warning(
-                        "Multiple source code comment can be found "
-                        "for '%s' checker in '%s' at line %s. "
-                        "This suppression will not be taken into account!",
-                        report.checker_name, source_file_name, report.line)
-
-                    self.__wrong_src_code_comments.append(
-                        f"{source_file_name}|{report.line}|"
-                        f"{report.checker_name}")
-
-            # A better way to handle reports where the review status is not
-            # set in the source is to return None, and set the reviews status
-            # and set the review status info at report addition time.
-            return SourceReviewStatus(
-                status="unreviewed",
-                message=b'',
-                bug_hash=report.report_hash,
-                author=self.user_name,
-                date=run_history_time,
-                in_source=False
-            )
-
         def get_missing_file_ids(report: Report) -> List[str]:
             """ Returns file paths which database file id is missing. """
             missing_ids_for_files = []
@@ -1000,7 +895,14 @@ def get_missing_file_ids(report: Report) -> List[str]:
             analyzer_name = mip.checker_to_analyzer.get(
                 report.checker_name, report.analyzer_name)
 
-            rs_from_source = get_review_status_from_source(report)
+            rs_from_source = SourceReviewStatus()
+            try:
+                rs_from_source = \
+                    review_status_handler.get_review_status_from_source(report)
+                rs_from_source.author = self.user_name
+                rs_from_source.date = run_history_time
+            except ValueError as err:
+                self.__wrong_src_code_comments.append(str(err))
 
             # False positive and intentional reports are considered as closed
             # reports which is indicated with non-null "fixed_at" date.
@@ -1159,6 +1061,8 @@ def get_skip_handler(
         # Processing analyzer result files.
         processed_result_file_count = 0
 
+        review_status_handler = ReviewStatusHandler(source_root)
+
         for root_dir_path, _, report_file_paths in os.walk(report_dir):
             LOG.debug("Get reports from '%s' directory", root_dir_path)
 
@@ -1176,9 +1080,9 @@ def get_skip_handler(
 
                 report_file_path = os.path.join(root_dir_path, f)
                 self.__process_report_file(
-                    report_file_path, session, source_root, run_id,
+                    report_file_path, session, run_id,
                     file_path_to_id, run_history_time,
-                    skip_handler, report_to_report_id)
+                    skip_handler, review_status_handler, report_to_report_id)
                 processed_result_file_count += 1
 
         session.flush()