Skip to content

Commit

Permalink
[NFC] Move source code comment handler from report-converter to codec…
Browse files Browse the repository at this point in the history
…hecker_common

The class SourceCodeCommentHandler which parses in-source codechecker
suppressions and review status settings has been moved to
codechecker_common. The reason is that we'd like to gather all review
status setter methods to one place. So either Report objects handle the
logic of in-source suppressions, review status config (future feature)
and review status rules, or none of these. Since report-converter is a
standalone tool, it can't depend on CodeChecker modules (for example the
ones which determine review status rules). For this reason the source
code comment handler moves to CodeChecker.

(TODO: Unfortunately plaintext.convert() gets a ReviewStatusHandler
object as parameter, so report-converter depends on CodeChecker. This
dependency has to be eliminated later.)
  • Loading branch information
bruntib committed Oct 24, 2023
1 parent 39e0f7b commit 24bb998
Show file tree
Hide file tree
Showing 23 changed files with 207 additions and 213 deletions.
2 changes: 1 addition & 1 deletion analyzer/codechecker_analyzer/cmd/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
analyzer_config, checker_config

from codechecker_common import arg, cmd_config, logger
from codechecker_report_converter.source_code_comment_handler import \
from codechecker_common.source_code_comment_handler import \
REVIEW_STATUS_VALUES

from codechecker_analyzer.cmd.analyze import \
Expand Down
24 changes: 22 additions & 2 deletions analyzer/codechecker_analyzer/cmd/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@
from codechecker_report_converter.report.output.html import \
html as report_to_html
from codechecker_report_converter.report.statistics import Statistics
from codechecker_report_converter.source_code_comment_handler import \
REVIEW_STATUS_VALUES


from codechecker_analyzer import analyzer_context, suppress_handler

from codechecker_common import arg, logger, cmd_config
from codechecker_common.review_status_handler import ReviewStatusHandler
from codechecker_common.skiplist_handler import SkipListHandler, \
SkipListHandlers
from codechecker_common.source_code_comment_handler import \
REVIEW_STATUS_VALUES
from codechecker_common.util import load_json


Expand Down Expand Up @@ -384,6 +385,7 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
processed_path_hashes = set()
processed_file_paths = set()
print_steps = 'print_steps' in args
review_status_handler = ReviewStatusHandler()

html_builder: Optional[report_to_html.HtmlBuilder] = None
if export == 'html':
Expand Down Expand Up @@ -413,6 +415,20 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
reports = report_file.get_reports(
file_path, context.checker_labels, file_cache)

for report in reports:
try:
# TODO: skip_handler is used later in reports_helper.skip()
# too. However, skipped reports shouldn't check source code
# comments because they potentially raise an exception.
# Skipped files shouldn't raise an exception, also, "skip"
# shouldn't be checked twice.
if not report.skip(skip_handlers):
report.review_status = \
review_status_handler.get_review_status(report)
except ValueError as err:
LOG.error(err)
sys.exit(1)

reports = reports_helper.skip(
reports, processed_path_hashes, skip_handlers, suppr_handler,
src_comment_status_filter)
Expand All @@ -434,13 +450,17 @@ def get_output_file_path(default_file_name: str) -> Optional[str]:
file_report_map = plaintext.get_file_report_map(
reports, file_path, metadata)
plaintext.convert(
review_status_handler,
file_report_map, processed_file_paths, print_steps)
elif export == 'html':
print(f"Parsing input file '{file_path}'.")
report_to_html.convert(
file_path, reports, output_dir_path,
html_builder)

for warning in review_status_handler.source_comment_warnings():
LOG.warning(warning)

if export is None: # Plain text output
statistics.write()
elif export == 'html':
Expand Down
2 changes: 1 addition & 1 deletion analyzer/codechecker_analyzer/suppress_file_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import re

from codechecker_common.logger import get_logger
from codechecker_report_converter.source_code_comment_handler import \
from codechecker_common.source_code_comment_handler import \
SourceCodeCommentHandler

LOG = get_logger('system')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,13 +232,21 @@ def check_one_file(self, path, mode):

# Replace full path only to file name on the following
# formatted lines:
# 1.
# [severity] /a/b/x.cpp:line:col: message [checker]
# The replacement on this line will be the following:
# [severity] x.cpp:line:col: message [checker]
# 2.
# [] - /a/b/x.cpp contains misspelled ...
# The replacement on this line will be the following:
# [] - x.cpp contains misspelled ...
sep = re.escape(os.sep)
line = re.sub(r'^(\[\w+\]\s)(?P<path>.+{0})'
r'(.+\:\d+\:\d+\:\s.*\s\[.*\])$'.format(sep),
r'\1\3', line)
line = re.sub(r'^\[\] - (?P<path>.+{0})'
r'(.+ contains misspelled.+)'.format(sep),
r'[] - \2', line)

if not any([line.startswith(prefix) for prefix
in skip_prefixes]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ CHECK#CodeChecker check --build "make multi_error_suppress_typo" --output $OUTPU
return x/0;
^

[] - multi_error_suppress_typo.cpp contains misspelled review status comment @9: // codechecker_suppressssss [all] some comment
[LOW] multi_error_suppress_typo.cpp:10:3: Value stored to 'y' is never read [deadcode.DeadStores]
y = 7;
^

Found 2 defect(s) in multi_error_suppress_typo.cpp

[] - multi_error_suppress_typo.cpp contains misspelled review status comment @9: // codechecker_suppressssss [all] some comment

----==== Severity Statistics ====----
----------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import os
import unittest

from codechecker_report_converter.source_code_comment_handler import \
from codechecker_common.source_code_comment_handler import \
SourceCodeComment, SourceCodeCommentHandler


Expand Down
122 changes: 68 additions & 54 deletions codechecker_common/review_status_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,19 @@
# -------------------------------------------------------------------------


from dataclasses import dataclass
from datetime import datetime
import os
from typing import Optional
from typing import List, Optional

from logger import get_logger
from codechecker_report_converter.report import Report
from codechecker_report_converter.source_code_comment_handler import \
from codechecker_report_converter.report import Report, SourceReviewStatus
from codechecker_common.logger import get_logger
from codechecker_common.source_code_comment_handler import \
SourceCodeCommentHandler, SpellException, contains_codechecker_comment, \
SourceCodeComments
SourceCodeComment, SourceCodeComments


LOG = get_logger('system')


@dataclass
class SourceReviewStatus:
"""
Helper class for handling in source review statuses.
Collect the same info as a review status rule.
TODO: rename this class, because this not only represents review statuses
in source code comments but in review status yaml too.
"""
status: str = "unreviewed"
message: bytes = b""
bug_hash: str = ""
in_source: bool = False
author: Optional[str] = None
date: Optional[datetime] = None


class ReviewStatusHandler:
"""
This class helps to determine the review status of a report. The review
Expand All @@ -55,8 +36,10 @@ class ReviewStatusHandler:
TODO: Option 2. will be handled in a future commit.
TODO: Option 3. should also be covered by this handler class.
"""
def __init__(self, source_root):
def __init__(self, source_root=''):
self.__source_root = source_root
self.__source_comment_warnings = []
self.__source_commets = {}

def __parse_codechecker_review_comment(
self,
Expand All @@ -75,62 +58,93 @@ def __parse_codechecker_review_comment(
src_comment_data = sc_handler.filter_source_line_comments(
f, report_line, checker_name)
except SpellException as ex:
LOG.warning("File %s contains %s", source_file_name, ex)
self.__source_comment_warnings.append(
f"{source_file_name} contains {ex}")

return src_comment_data

def get_review_status(self, report: Report) -> SourceReviewStatus:
"""
Return the review status of the report based on source code comments.
If the review status is not set in the source code then "unreviewed"
review status returns. This function throws ValueError if the review
status is ambiguous (see get_review_status_from_source()).
"""
rs_from_source = self.get_review_status_from_source(report)

if rs_from_source:
return rs_from_source

return SourceReviewStatus(bug_hash=report.report_hash)

def get_review_status_from_source(
self,
report: Report
) -> SourceReviewStatus:
) -> Optional[SourceReviewStatus]:
"""
Return the review status set in the source code belonging to
Return the review status based on the source code comment belonging to
the given report.
- Return the review status if it is set in the source code.
- If the review status is ambiguous (i.e. multiple source code
comments belong to it) then a ValueError exception is raised which
contains information about the problem in a string.
# TODO: If not found then return None or unreviewed by default?
- If the soure file changed (which means that the source comments may
have replaced, changed or removed) then None returns.
TODO: This function either returns a SourceReviewStatus, or raises an
exception or returns None. This is too many things that a caller needs
to handle. The reason is that according to the current behaviors,
ambiguous comment results a hard error, and changed files result a
warning with "unreviewed" status. In the future we could implement a
logic where we take the first comment into account in case of ambiguity
or return "unreviewed" with a warning, like changed files.
"""
# The original file path is needed here, not the trimmed, because
# the source files are extracted as the original file path.
# TODO: Should original_path be strip('/') at store?
source_file_name = os.path.realpath(os.path.join(
self.__source_root, report.file.original_path.strip("/")))
self.__source_root, report.file.original_path))

if source_file_name in report.changed_files:
return None

if os.path.isfile(source_file_name):
src_comment_data = self.__parse_codechecker_review_comment(
source_file_name, report.line, report.checker_name)

if len(src_comment_data) == 1:
data = src_comment_data[0]
status, message = data.status, bytes(data.message, 'utf-8')
status, message = data.status, data.message
self.__source_commets[report] = data

review_status = SourceReviewStatus(
return SourceReviewStatus(
status=status,
message=message,
message=message.encode('utf-8'),
bug_hash=report.report_hash,
in_source=True
)

return review_status
in_source=True)
elif len(src_comment_data) > 1:
LOG.warning(
"Multiple source code comment can be found "
"for '%s' checker in '%s' at line %s. "
"This suppression will not be taken into account!",
report.checker_name, source_file_name, report.line)

raise ValueError(
f"{source_file_name}|{report.line}|{report.checker_name}")

# A better way to handle reports where the review status is not
# set in the source is to return None, and set the reviews status info
# at report addition time.
return SourceReviewStatus(
status="unreviewed",
message=b'',
bug_hash=report.report_hash,
in_source=False
)
f"Multiple source code comments can be found for "
f"'{report.checker_name}' checker in '{source_file_name}' "
f"at line {report.line}.")

return None

def source_comment_warnings(self) -> List[str]:
"""
Sometimes it is not intuitive why the given review status is determined
for a report. For example, if an in-source suppression is misspelled,
then the report remains unreviewed:
// codechecker_suppresssss [<checker name>] comment
This function returns a list of warning messages on these unintuitive
behaviors.
"""
return self.__source_comment_warnings

def source_comment(self, report: Report) -> Optional[SourceCodeComment]:
"""
This ReviewStatusHandler class is caching source comments so they are
read and parsed only once for each report.
"""
return self.__source_commets.get(report)
13 changes: 13 additions & 0 deletions codechecker_common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import itertools
import json
from typing import TextIO
import portalocker

from codechecker_common.logger import get_logger
Expand Down Expand Up @@ -84,3 +85,15 @@ def load_json(path: str, default=None, lock=False, display_warning=True):
LOG.warning(ex)

return ret

def get_linef(fp: TextIO, line_no: int) -> str:
"""'fp' should be (readable) file object.
Return the line content at line_no or an empty line
if there is less lines than line_no.
"""
fp.seek(0)
for line in fp:
line_no -= 1
if line_no == 0:
return line
return ''
Loading

0 comments on commit 24bb998

Please sign in to comment.