[NFC] Move source code comment handler from report-converter to codec…

…hecker_common The class SourceCodeCommentHandler which parses in-source codechecker suppressions and review status settings has been moved to codechecker_common. The reason is that we'd like to gather all review status setter methods to one place. So either Report objects handle the logic of in-source suppressions, review status config (future feature) and review status rules, or none of these. Since report-converter is a standalone tool, it can't depend on CodeChecker modules (for example the ones which determine review status rules). For this reason the source code comment handler moves to CodeChecker. (TODO: Unfortunately plaintext.convert() gets a ReviewStatusHandler object as parameter, so report-converter depends on CodeChecker. This dependency has to be eliminated later.)
bruntib · Oct 24, 2023 · 24bb998 · 24bb998
1 parent 39e0f7b
commit 24bb998
Show file tree

Hide file tree

Showing 23 changed files with 207 additions and 213 deletions.
diff --git a/analyzer/codechecker_analyzer/cmd/check.py b/analyzer/codechecker_analyzer/cmd/check.py
@@ -24,7 +24,7 @@
     analyzer_config, checker_config
 
 from codechecker_common import arg, cmd_config, logger
-from codechecker_report_converter.source_code_comment_handler import \
+from codechecker_common.source_code_comment_handler import \
     REVIEW_STATUS_VALUES
 
 from codechecker_analyzer.cmd.analyze import \

diff --git a/analyzer/codechecker_analyzer/cmd/parse.py b/analyzer/codechecker_analyzer/cmd/parse.py
@@ -25,15 +25,16 @@
 from codechecker_report_converter.report.output.html import \
     html as report_to_html
 from codechecker_report_converter.report.statistics import Statistics
-from codechecker_report_converter.source_code_comment_handler import \
-    REVIEW_STATUS_VALUES
 
 
 from codechecker_analyzer import analyzer_context, suppress_handler
 
 from codechecker_common import arg, logger, cmd_config
+from codechecker_common.review_status_handler import ReviewStatusHandler
 from codechecker_common.skiplist_handler import SkipListHandler, \
     SkipListHandlers
+from codechecker_common.source_code_comment_handler import \
+    REVIEW_STATUS_VALUES
 from codechecker_common.util import load_json
 
 
@@ -384,6 +385,7 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
     processed_path_hashes = set()
     processed_file_paths = set()
     print_steps = 'print_steps' in args
+    review_status_handler = ReviewStatusHandler()
 
     html_builder: Optional[report_to_html.HtmlBuilder] = None
     if export == 'html':
@@ -413,6 +415,20 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
             reports = report_file.get_reports(
                 file_path, context.checker_labels, file_cache)
 
+            for report in reports:
+                try:
+                    # TODO: skip_handler is used later in reports_helper.skip()
+                    # too. However, skipped reports shouldn't check source code
+                    # comments because they potentially raise an exception.
+                    # Skipped files shouldn't raise an exception, also, "skip"
+                    # shouldn't be checked twice.
+                    if not report.skip(skip_handlers):
+                        report.review_status = \
+                            review_status_handler.get_review_status(report)
+                except ValueError as err:
+                    LOG.error(err)
+                    sys.exit(1)
+
             reports = reports_helper.skip(
                 reports, processed_path_hashes, skip_handlers, suppr_handler,
                 src_comment_status_filter)
@@ -434,13 +450,17 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
                 file_report_map = plaintext.get_file_report_map(
                     reports, file_path, metadata)
                 plaintext.convert(
+                    review_status_handler,
                     file_report_map, processed_file_paths, print_steps)
             elif export == 'html':
                 print(f"Parsing input file '{file_path}'.")
                 report_to_html.convert(
                     file_path, reports, output_dir_path,
                     html_builder)
 
+    for warning in review_status_handler.source_comment_warnings():
+        LOG.warning(warning)
+
     if export is None:  # Plain text output
         statistics.write()
     elif export == 'html':

diff --git a/analyzer/codechecker_analyzer/suppress_file_handler.py b/analyzer/codechecker_analyzer/suppress_file_handler.py
@@ -24,7 +24,7 @@
 import re
 
 from codechecker_common.logger import get_logger
-from codechecker_report_converter.source_code_comment_handler import \
+from codechecker_common.source_code_comment_handler import \
     SourceCodeCommentHandler
 
 LOG = get_logger('system')

diff --git a/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py b/analyzer/tests/functional/analyze_and_parse/test_analyze_and_parse.py
@@ -232,13 +232,21 @@ def check_one_file(self, path, mode):
 
             # Replace full path only to file name on the following
             # formatted lines:
+            # 1.
             # [severity] /a/b/x.cpp:line:col: message [checker]
             # The replacement on this line will be the following:
             # [severity] x.cpp:line:col: message [checker]
+            # 2.
+            # [] - /a/b/x.cpp contains misspelled ...
+            # The replacement on this line will be the following:
+            # [] - x.cpp contains misspelled ...
             sep = re.escape(os.sep)
             line = re.sub(r'^(\[\w+\]\s)(?P<path>.+{0})'
                           r'(.+\:\d+\:\d+\:\s.*\s\[.*\])$'.format(sep),
                           r'\1\3', line)
+            line = re.sub(r'^\[\] - (?P<path>.+{0})'
+                          r'(.+ contains misspelled.+)'.format(sep),
+                          r'[] - \2', line)
 
             if not any([line.startswith(prefix) for prefix
                         in skip_prefixes]):

diff --git a/analyzer/tests/functional/analyze_and_parse/test_files/multi_error_suppress_typo.output b/analyzer/tests/functional/analyze_and_parse/test_files/multi_error_suppress_typo.output
@@ -22,13 +22,13 @@ CHECK#CodeChecker check --build "make multi_error_suppress_typo" --output $OUTPU
   return x/0;
           ^
 
-[] - multi_error_suppress_typo.cpp contains misspelled review status comment @9: // codechecker_suppressssss [all] some comment
 [LOW] multi_error_suppress_typo.cpp:10:3: Value stored to 'y' is never read [deadcode.DeadStores]
   y = 7;
   ^
 
 Found 2 defect(s) in multi_error_suppress_typo.cpp
 
+[] - multi_error_suppress_typo.cpp contains misspelled review status comment @9: // codechecker_suppressssss [all] some comment
 
 ----==== Severity Statistics ====----
 ----------------------------

diff --git a/...ource_code_comment_test_files/test_file_1 → ...ource_code_comment_test_files/test_file_1 b/...ource_code_comment_test_files/test_file_1 → ...ource_code_comment_test_files/test_file_1
diff --git a/...ource_code_comment_test_files/test_file_2 → ...ource_code_comment_test_files/test_file_2 b/...ource_code_comment_test_files/test_file_2 → ...ource_code_comment_test_files/test_file_2
diff --git a/...ource_code_comment_test_files/test_file_3 → ...ource_code_comment_test_files/test_file_3 b/...ource_code_comment_test_files/test_file_3 → ...ource_code_comment_test_files/test_file_3
diff --git a/..._code_comment/test_source_code_comment.py → ...er/tests/unit/test_source_code_comment.py b/..._code_comment/test_source_code_comment.py → ...er/tests/unit/test_source_code_comment.py
@@ -14,7 +14,7 @@
 import os
 import unittest
 
-from codechecker_report_converter.source_code_comment_handler import \
+from codechecker_common.source_code_comment_handler import \
     SourceCodeComment, SourceCodeCommentHandler
 
 

diff --git a/codechecker_common/review_status_handler.py b/codechecker_common/review_status_handler.py
@@ -7,38 +7,19 @@
 # -------------------------------------------------------------------------
 
 
-from dataclasses import dataclass
-from datetime import datetime
 import os
-from typing import Optional
+from typing import List, Optional
 
-from logger import get_logger
-from codechecker_report_converter.report import Report
-from codechecker_report_converter.source_code_comment_handler import \
+from codechecker_report_converter.report import Report, SourceReviewStatus
+from codechecker_common.logger import get_logger
+from codechecker_common.source_code_comment_handler import \
     SourceCodeCommentHandler, SpellException, contains_codechecker_comment, \
-    SourceCodeComments
+    SourceCodeComment, SourceCodeComments
 
 
 LOG = get_logger('system')
 
 
-@dataclass
-class SourceReviewStatus:
-    """
-    Helper class for handling in source review statuses.
-    Collect the same info as a review status rule.
-
-    TODO: rename this class, because this not only represents review statuses
-          in source code comments but in review status yaml too.
-    """
-    status: str = "unreviewed"
-    message: bytes = b""
-    bug_hash: str = ""
-    in_source: bool = False
-    author: Optional[str] = None
-    date: Optional[datetime] = None
-
-
 class ReviewStatusHandler:
     """
     This class helps to determine the review status of a report. The review
@@ -55,8 +36,10 @@ class ReviewStatusHandler:
     TODO: Option 2. will be handled in a future commit.
     TODO: Option 3. should also be covered by this handler class.
     """
-    def __init__(self, source_root):
+    def __init__(self, source_root=''):
         self.__source_root = source_root
+        self.__source_comment_warnings = []
+        self.__source_commets = {}
 
     def __parse_codechecker_review_comment(
         self,
@@ -75,62 +58,93 @@ def __parse_codechecker_review_comment(
                     src_comment_data = sc_handler.filter_source_line_comments(
                         f, report_line, checker_name)
                 except SpellException as ex:
-                    LOG.warning("File %s contains %s", source_file_name, ex)
+                    self.__source_comment_warnings.append(
+                        f"{source_file_name} contains {ex}")
 
         return src_comment_data
 
+    def get_review_status(self, report: Report) -> SourceReviewStatus:
+        """
+        Return the review status of the report based on source code comments.
+
+        If the review status is not set in the source code then "unreviewed"
+        review status returns. This function throws ValueError if the review
+        status is ambiguous (see get_review_status_from_source()).
+        """
+        rs_from_source = self.get_review_status_from_source(report)
+
+        if rs_from_source:
+            return rs_from_source
+
+        return SourceReviewStatus(bug_hash=report.report_hash)
+
     def get_review_status_from_source(
         self,
         report: Report
-    ) -> SourceReviewStatus:
+    ) -> Optional[SourceReviewStatus]:
         """
-        Return the review status set in the source code belonging to
+        Return the review status based on the source code comment belonging to
         the given report.
 
         - Return the review status if it is set in the source code.
         - If the review status is ambiguous (i.e. multiple source code
             comments belong to it) then a ValueError exception is raised which
             contains information about the problem in a string.
-        # TODO: If not found then return None or unreviewed by default?
+        - If the soure file changed (which means that the source comments may
+            have replaced, changed or removed) then None returns.
+        TODO: This function either returns a SourceReviewStatus, or raises an
+        exception or returns None. This is too many things that a caller needs
+        to handle. The reason is that according to the current behaviors,
+        ambiguous comment results a hard error, and changed files result a
+        warning with "unreviewed" status. In the future we could implement a
+        logic where we take the first comment into account in case of ambiguity
+        or return "unreviewed" with a warning, like changed files.
         """
         # The original file path is needed here, not the trimmed, because
         # the source files are extracted as the original file path.
+        # TODO: Should original_path be strip('/') at store?
         source_file_name = os.path.realpath(os.path.join(
-            self.__source_root, report.file.original_path.strip("/")))
+            self.__source_root, report.file.original_path))
+
+        if source_file_name in report.changed_files:
+            return None
 
         if os.path.isfile(source_file_name):
             src_comment_data = self.__parse_codechecker_review_comment(
                 source_file_name, report.line, report.checker_name)
 
             if len(src_comment_data) == 1:
                 data = src_comment_data[0]
-                status, message = data.status, bytes(data.message, 'utf-8')
+                status, message = data.status, data.message
+                self.__source_commets[report] = data
 
-                review_status = SourceReviewStatus(
+                return SourceReviewStatus(
                     status=status,
-                    message=message,
+                    message=message.encode('utf-8'),
                     bug_hash=report.report_hash,
-                    in_source=True
-                )
-
-                return review_status
+                    in_source=True)
             elif len(src_comment_data) > 1:
-                LOG.warning(
-                    "Multiple source code comment can be found "
-                    "for '%s' checker in '%s' at line %s. "
-                    "This suppression will not be taken into account!",
-                    report.checker_name, source_file_name, report.line)
-
                 raise ValueError(
-                    f"{source_file_name}|{report.line}|{report.checker_name}")
-
-        # A better way to handle reports where the review status is not
-        # set in the source is to return None, and set the reviews status info
-        # at report addition time.
-        return SourceReviewStatus(
-            status="unreviewed",
-            message=b'',
-            bug_hash=report.report_hash,
-            in_source=False
-        )
+                    f"Multiple source code comments can be found for "
+                    f"'{report.checker_name}' checker in '{source_file_name}' "
+                    f"at line {report.line}.")
+
+        return None
 
+    def source_comment_warnings(self) -> List[str]:
+        """
+        Sometimes it is not intuitive why the given review status is determined
+        for a report. For example, if an in-source suppression is misspelled,
+        then the report remains unreviewed:
+        // codechecker_suppresssss [<checker name>] comment
+        This function returns a list of warning messages on these unintuitive
+        behaviors.
+        """
+        return self.__source_comment_warnings
+
+    def source_comment(self, report: Report) -> Optional[SourceCodeComment]:
+        """
+        This ReviewStatusHandler class is caching source comments so they are
+        read and parsed only once for each report.
+        """
+        return self.__source_commets.get(report)
diff --git a/..._converter/source_code_comment_handler.py → ...ker_common/source_code_comment_handler.py b/..._converter/source_code_comment_handler.py → ...ker_common/source_code_comment_handler.py
diff --git a/codechecker_common/util.py b/codechecker_common/util.py
@@ -12,6 +12,7 @@
 
 import itertools
 import json
+from typing import TextIO
 import portalocker
 
 from codechecker_common.logger import get_logger
@@ -84,3 +85,15 @@ def load_json(path: str, default=None, lock=False, display_warning=True):
             LOG.warning(ex)
 
     return ret
+
+def get_linef(fp: TextIO, line_no: int) -> str:
+    """'fp' should be (readable) file object.
+    Return the line content at line_no or an empty line
+    if there is less lines than line_no.
+    """
+    fp.seek(0)
+    for line in fp:
+        line_no -= 1
+        if line_no == 0:
+            return line
+    return ''