Skip to content

Commit

Permalink
[NFC] ReviewStatusHandler for source code comments
Browse files Browse the repository at this point in the history
Currently there are 2 ways of handling review statuses for reports:
- In source code comments:
  // codechecker_suppress [checker_name] message...
- In the GUI:
  This sets a "review status rule" which means a relationship between
  bug hashes and review statuses. All reports with the same bug hash get
  the same review status.

We are planning a 3rd way of setting review statuses:
  A review_status.yaml config file which contains review status mappings
  to files or directories.

The long-term plan is to create a ReviewStatusHandler class that handles
the review status of a report, in all three scenarios above.

This commit is just a refactoring which moves the 1st scenario-related
code (review status from source code comment) to the ReviewStatusHandler
class.
  • Loading branch information
bruntib committed Oct 23, 2023
1 parent 3c4c8ec commit 39e0f7b
Show file tree
Hide file tree
Showing 2 changed files with 151 additions and 111 deletions.
136 changes: 136 additions & 0 deletions codechecker_common/review_status_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------


from dataclasses import dataclass
from datetime import datetime
import os
from typing import Optional

from logger import get_logger
from codechecker_report_converter.report import Report
from codechecker_report_converter.source_code_comment_handler import \
SourceCodeCommentHandler, SpellException, contains_codechecker_comment, \
SourceCodeComments


LOG = get_logger('system')


@dataclass
class SourceReviewStatus:
"""
Helper class for handling in source review statuses.
Collect the same info as a review status rule.
TODO: rename this class, because this not only represents review statuses
in source code comments but in review status yaml too.
"""
status: str = "unreviewed"
message: bytes = b""
bug_hash: str = ""
in_source: bool = False
author: Optional[str] = None
date: Optional[datetime] = None


class ReviewStatusHandler:
"""
This class helps to determine the review status of a report. The review
status may come either from several sources:
1. Source code comment:
// codechecker_suppress [core.DivideZero] This is a false positive.
2. Review status configuration file:
<report_folder>/review_status.yaml contains review status settings on
file path or bug has granularity.
3. Review status rule:
A mapping between bug hashes and review statuses set in the GUI.
TODO: Option 2. will be handled in a future commit.
TODO: Option 3. should also be covered by this handler class.
"""
def __init__(self, source_root):
self.__source_root = source_root

def __parse_codechecker_review_comment(
self,
source_file_name: str,
report_line: int,
checker_name: str
) -> SourceCodeComments:
"""Parse the CodeChecker review comments from a source file at a given
position. Returns an empty list if there are no comments.
"""
src_comment_data = []
with open(source_file_name, encoding='utf-8', errors='ignore') as f:
if contains_codechecker_comment(f):
sc_handler = SourceCodeCommentHandler()
try:
src_comment_data = sc_handler.filter_source_line_comments(
f, report_line, checker_name)
except SpellException as ex:
LOG.warning("File %s contains %s", source_file_name, ex)

return src_comment_data

def get_review_status_from_source(
self,
report: Report
) -> SourceReviewStatus:
"""
Return the review status set in the source code belonging to
the given report.
- Return the review status if it is set in the source code.
- If the review status is ambiguous (i.e. multiple source code
comments belong to it) then a ValueError exception is raised which
contains information about the problem in a string.
# TODO: If not found then return None or unreviewed by default?
"""
# The original file path is needed here, not the trimmed, because
# the source files are extracted as the original file path.
source_file_name = os.path.realpath(os.path.join(
self.__source_root, report.file.original_path.strip("/")))

if os.path.isfile(source_file_name):
src_comment_data = self.__parse_codechecker_review_comment(
source_file_name, report.line, report.checker_name)

if len(src_comment_data) == 1:
data = src_comment_data[0]
status, message = data.status, bytes(data.message, 'utf-8')

review_status = SourceReviewStatus(
status=status,
message=message,
bug_hash=report.report_hash,
in_source=True
)

return review_status
elif len(src_comment_data) > 1:
LOG.warning(
"Multiple source code comment can be found "
"for '%s' checker in '%s' at line %s. "
"This suppression will not be taken into account!",
report.checker_name, source_file_name, report.line)

raise ValueError(
f"{source_file_name}|{report.line}|{report.checker_name}")

# A better way to handle reports where the review status is not
# set in the source is to return None, and set the reviews status info
# at report addition time.
return SourceReviewStatus(
status="unreviewed",
message=b'',
bug_hash=report.report_hash,
in_source=False
)

126 changes: 15 additions & 111 deletions web/server/codechecker_server/api/mass_store_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@

from codechecker_common import skiplist_handler
from codechecker_common.logger import get_logger
from codechecker_common.review_status_handler import ReviewStatusHandler, \
SourceReviewStatus
from codechecker_common.util import load_json

from codechecker_report_converter.util import trim_path_prefixes
from codechecker_report_converter.report import report_file, Report
from codechecker_report_converter.report.hash import get_report_path_hash
from codechecker_report_converter.source_code_comment_handler import \
SourceCodeCommentHandler, SpellException, contains_codechecker_comment

from ..database import db_cleanup
from ..database.config_db_model import Product
Expand Down Expand Up @@ -65,10 +65,6 @@ def __exit__(self, *args):
self.__msg, round(time.time() - self.__start_time, 2))


# FIXME: when these types are introduced we need to use those.
SourceLineComments = List[Any]


def unzip(b64zip: str, output_dir: str) -> int:
"""
This function unzips the base64 encoded zip file. This zip is extracted
Expand Down Expand Up @@ -101,27 +97,6 @@ def get_file_content(file_path: str) -> bytes:
return f.read()


def parse_codechecker_review_comment(
source_file_name: str,
report_line: int,
checker_name: str
) -> SourceLineComments:
"""Parse the CodeChecker review comments from a source file at a given
position. Returns an empty list if there are no comments.
"""
src_comment_data = []
with open(source_file_name, encoding='utf-8', errors='ignore') as f:
if contains_codechecker_comment(f):
sc_handler = SourceCodeCommentHandler()
try:
src_comment_data = sc_handler.filter_source_line_comments(
f, report_line, checker_name)
except SpellException as ex:
LOG.warning("File %s contains %s", source_file_name, ex)

return src_comment_data


def add_file_record(
session: DBSession,
file_path: str,
Expand Down Expand Up @@ -216,29 +191,6 @@ def get_blame_file_data(
return blame_info, remote_url, tracking_branch


class SourceReviewStatus:
"""
Helper class for handling in source review statuses.
Collect the same info as a review status rule.
FIXME Change this to dataclass when available
"""
def __init__(
self,
status: str,
message: bytes,
bug_hash: Any,
author: str,
date: datetime,
in_source: bool,
):
self.status = status
self.message = message
self.bug_hash = bug_hash
self.author = author
self.date = date
self.in_source = in_source


class MassStoreRun:
def __init__(
self,
Expand Down Expand Up @@ -876,11 +828,11 @@ def __process_report_file(
self,
report_file_path: str,
session: DBSession,
source_root: str,
run_id: int,
file_path_to_id: Dict[str, int],
run_history_time: datetime,
skip_handler: skiplist_handler.SkipListHandler,
review_status_handler: ReviewStatusHandler,
hash_map_reports: Dict[str, List[Any]]
) -> bool:
"""
Expand All @@ -891,63 +843,6 @@ def __process_report_file(
if not reports:
return True

def get_review_status_from_source(
report: Report) -> SourceReviewStatus:
"""
Return the review status set in the source code belonging to
the given report.
- Return the review status if it is set in the source code.
- If the review status is ambiguous (i.e. multiple source code
comments belong to it) then (unreviewed, in_source=False)
returns.
"""
# The original file path is needed here, not the trimmed, because
# the source files are extracted as the original file path.
source_file_name = os.path.realpath(os.path.join(
source_root, report.file.original_path.strip("/")))

if os.path.isfile(source_file_name):
src_comment_data = parse_codechecker_review_comment(
source_file_name, report.line, report.checker_name)

if len(src_comment_data) == 1:
data = src_comment_data[0]
status, message = data.status, bytes(data.message, 'utf-8')

review_status = SourceReviewStatus(
status=status,
message=message,
bug_hash=report.report_hash,
author=self.user_name,
date=run_history_time,
in_source=True
)

return review_status
elif len(src_comment_data) > 1:
LOG.warning(
"Multiple source code comment can be found "
"for '%s' checker in '%s' at line %s. "
"This suppression will not be taken into account!",
report.checker_name, source_file_name, report.line)

self.__wrong_src_code_comments.append(
f"{source_file_name}|{report.line}|"
f"{report.checker_name}")

# A better way to handle reports where the review status is not
# set in the source is to return None, and set the reviews status
# and set the review status info at report addition time.
return SourceReviewStatus(
status="unreviewed",
message=b'',
bug_hash=report.report_hash,
author=self.user_name,
date=run_history_time,
in_source=False
)

def get_missing_file_ids(report: Report) -> List[str]:
""" Returns file paths which database file id is missing. """
missing_ids_for_files = []
Expand Down Expand Up @@ -1000,7 +895,14 @@ def get_missing_file_ids(report: Report) -> List[str]:
analyzer_name = mip.checker_to_analyzer.get(
report.checker_name, report.analyzer_name)

rs_from_source = get_review_status_from_source(report)
rs_from_source = SourceReviewStatus()
try:
rs_from_source = \
review_status_handler.get_review_status_from_source(report)
rs_from_source.author = self.user_name
rs_from_source.date = run_history_time
except ValueError as err:
self.__wrong_src_code_comments.append(str(err))

# False positive and intentional reports are considered as closed
# reports which is indicated with non-null "fixed_at" date.
Expand Down Expand Up @@ -1159,6 +1061,8 @@ def get_skip_handler(
# Processing analyzer result files.
processed_result_file_count = 0

review_status_handler = ReviewStatusHandler(source_root)

for root_dir_path, _, report_file_paths in os.walk(report_dir):
LOG.debug("Get reports from '%s' directory", root_dir_path)

Expand All @@ -1176,9 +1080,9 @@ def get_skip_handler(

report_file_path = os.path.join(root_dir_path, f)
self.__process_report_file(
report_file_path, session, source_root, run_id,
report_file_path, session, run_id,
file_path_to_id, run_history_time,
skip_handler, report_to_report_id)
skip_handler, review_status_handler, report_to_report_id)
processed_result_file_count += 1

session.flush()
Expand Down

0 comments on commit 39e0f7b

Please sign in to comment.