From dd017cdaed81257cd0d1a7a74032bdc20234d880 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A1rton=20Csord=C3=A1s?= <csordasmarton92@gmail.com>
Date: Thu, 27 Jan 2022 14:55:39 +0100
Subject: [PATCH] [analyzer] Handle relative file paths in compilation database

Let's suppose that we have the following directory stucture:
```
|- .codechecker
  |- compilation_commands.json
  |- reports
|- main.cpp
```

And all the file paths in the compilation database are relative:
```json
[
  {
    "file": "main.cpp",
    "command": "g++ -c main.cpp -o /dev/null",
    "directory": "."
  }
]
```

If we run the `CodeChecker analyze` command on this project, ClangSA analyzer
will generate a plist file where the file path will be relative to the directory
where the analyzer was called on.

After the plist files are produced, report converter will try to post-process these
files and creates absolute paths from the relative paths.

With this patch now we will create absolute paths based on the following values:
- Current directory (`os.getcwd()`) where the CodeChecker command was executed.
- The value of the `directory` key from the build action (in the example above it is `.`).
- File path in the plist file.

If any of the above mentioned values are already absolute paths during the join it will
skip the previous values from the file path.
---
 .../analyzers/clangsa/result_handler.py       |  3 +-
 .../analyzers/result_handler_base.py          |  6 ++
 .../tests/functional/analyze/test_analyze.py  | 62 +++++++++++++++++++
 docs/analyzer/user_guide.md                   |  4 ++
 .../report/parser/plist.py                    | 15 +++--
 .../report/report_file.py                     |  5 +-
 6 files changed, 86 insertions(+), 9 deletions(-)

diff --git a/analyzer/codechecker_analyzer/analyzers/clangsa/result_handler.py b/analyzer/codechecker_analyzer/analyzers/clangsa/result_handler.py
index cd1d5cf221..0b232c5e33 100644
--- a/analyzer/codechecker_analyzer/analyzers/clangsa/result_handler.py
+++ b/analyzer/codechecker_analyzer/analyzers/clangsa/result_handler.py
@@ -41,7 +41,8 @@ def postprocess_result(self, skip_handler: Optional[SkipListHandler]):
         """
         if os.path.exists(self.analyzer_result_file):
             reports = report_file.get_reports(
-                self.analyzer_result_file, self.checker_labels)
+                self.analyzer_result_file, self.checker_labels,
+                source_dir_path=self.source_dir_path)
             reports = [r for r in reports if not r.skip(skip_handler)]
 
             hash_type = None
diff --git a/analyzer/codechecker_analyzer/analyzers/result_handler_base.py b/analyzer/codechecker_analyzer/analyzers/result_handler_base.py
index fc5e6acedd..4ae2b0014e 100644
--- a/analyzer/codechecker_analyzer/analyzers/result_handler_base.py
+++ b/analyzer/codechecker_analyzer/analyzers/result_handler_base.py
@@ -75,6 +75,12 @@ def buildaction(self):
         """
         return self.__buildaction
 
+    @property
+    def source_dir_path(self):
+        """ Get directory path of the compiled source file. """
+        return os.path.normpath(os.path.join(
+            os.getcwd(), self.__buildaction.directory))
+
     @property
     def workspace(self):
         """
diff --git a/analyzer/tests/functional/analyze/test_analyze.py b/analyzer/tests/functional/analyze/test_analyze.py
index b3f1ceed31..8456676583 100644
--- a/analyzer/tests/functional/analyze/test_analyze.py
+++ b/analyzer/tests/functional/analyze/test_analyze.py
@@ -22,6 +22,7 @@
 
 from libtest import env
 
+from codechecker_report_converter.report import report_file
 from codechecker_analyzer.analyzers.clangsa import version
 
 
@@ -1060,3 +1061,64 @@ def test_invalid_compilation_database(self):
         process.communicate()
 
         self.assertEqual(process.returncode, 1)
+
+    def test_compilation_db_relative_file_path(self):
+        """
+        Test relative path in compilation database.
+
+        If the file/directory paths in the compilation database are relative
+        ClangSA analyzer will generate plist files where the file paths are
+        also relative to the current directory where the analyzer was executed.
+        After the plist files are created, report converter will try to
+        post-process these files and creates absolute paths from the relative
+        paths. This test will check whether these files paths are exist.
+        """
+        test_dir = os.path.join(self.test_workspace, "test_rel_file_path")
+        os.makedirs(test_dir)
+
+        source_file_name = "success.c"
+        shutil.copy(os.path.join(self.test_dir, source_file_name), test_dir)
+
+        cc_files_dir_path = os.path.join(test_dir, "codechecker_files")
+        os.makedirs(cc_files_dir_path, exist_ok=True)
+
+        build_json = os.path.join(cc_files_dir_path, "build.json")
+        report_dir = os.path.join(cc_files_dir_path, "reports")
+
+        # Create a compilation database.
+        build_log = [{
+            "directory": ".",
+            "command": f"cc -c {source_file_name} -o /dev/null",
+            "file": source_file_name}]
+
+        with open(build_json, 'w',
+                  encoding="utf-8", errors="ignore") as outfile:
+            json.dump(build_log, outfile)
+
+        # Analyze the project
+        analyze_cmd = [
+            self._codechecker_cmd, "analyze",
+            build_json,
+            "--report-hash", "context-free-v2",
+            "-o", report_dir,
+            "--clean"]
+
+        process = subprocess.Popen(
+            analyze_cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            cwd=test_dir,
+            encoding="utf-8",
+            errors="ignore")
+        process.communicate()
+
+        errcode = process.returncode
+        self.assertEqual(errcode, 0)
+
+        # Test that file paths in plist files are exist.
+        plist_files = glob.glob(os.path.join(report_dir, '*.plist'))
+        for plist_file in plist_files:
+            reports = report_file.get_reports(plist_file)
+            for r in reports:
+                for file in r.files:
+                    self.assertTrue(os.path.exists(file.original_path))
diff --git a/docs/analyzer/user_guide.md b/docs/analyzer/user_guide.md
index 467bd33630..2eca406e77 100644
--- a/docs/analyzer/user_guide.md
+++ b/docs/analyzer/user_guide.md
@@ -821,6 +821,10 @@ Example:
 CodeChecker analyze ../codechecker_myProject_build.log -o my_plists
 ```
 
+**Note**: If your compilation database log file contains relative paths you
+have to make sure that you run the analysis command from the same directory
+as the logger was run (i.e. that paths are relative to).
+
 `CodeChecker analyze` supports a myriad of fine-tuning arguments, explained
 below:
 
diff --git a/tools/report-converter/codechecker_report_converter/report/parser/plist.py b/tools/report-converter/codechecker_report_converter/report/parser/plist.py
index 54104b37cd..2725559b97 100644
--- a/tools/report-converter/codechecker_report_converter/report/parser/plist.py
+++ b/tools/report-converter/codechecker_report_converter/report/parser/plist.py
@@ -166,7 +166,7 @@ def parse(fp: BinaryIO):
 
 def get_file_index_map(
     plist: Any,
-    analyzer_result_dir_path: str,
+    source_dir_path: str,
     file_cache: Dict[str, File]
 ) -> Dict[int, File]:
     """ Get file index map from the given plist object. """
@@ -174,7 +174,7 @@ def get_file_index_map(
 
     for i, orig_file_path in enumerate(plist.get('files', [])):
         file_path = os.path.normpath(os.path.join(
-            analyzer_result_dir_path, orig_file_path))
+            source_dir_path, orig_file_path))
         file_index_map[i] = get_or_create_file(file_path, file_cache)
 
     return file_index_map
@@ -183,11 +183,15 @@ def get_file_index_map(
 class Parser(BaseParser):
     def get_reports(
         self,
-        analyzer_result_file_path: str
+        analyzer_result_file_path: str,
+        source_dir_path: Optional[str] = None
     ) -> List[Report]:
         """ Get reports from the given analyzer result file. """
         reports: List[Report] = []
 
+        if not source_dir_path:
+            source_dir_path = os.path.dirname(analyzer_result_file_path)
+
         try:
             with open(analyzer_result_file_path, 'rb') as fp:
                 plist = parse(fp)
@@ -196,10 +200,9 @@ def get_reports(
                 return reports
 
             metadata = plist.get('metadata')
-            analyzer_result_dir_path = os.path.dirname(
-                analyzer_result_file_path)
+
             files = get_file_index_map(
-                plist, analyzer_result_dir_path, self._file_cache)
+                plist, source_dir_path, self._file_cache)
 
             for diag in plist.get('diagnostics', []):
                 report = self.__create_report(
diff --git a/tools/report-converter/codechecker_report_converter/report/report_file.py b/tools/report-converter/codechecker_report_converter/report/report_file.py
index afffecfe0f..3bf6457bb0 100644
--- a/tools/report-converter/codechecker_report_converter/report/report_file.py
+++ b/tools/report-converter/codechecker_report_converter/report/report_file.py
@@ -46,13 +46,14 @@ def get_parser(
 def get_reports(
     analyzer_result_file_path: str,
     checker_labels: Optional[CheckerLabels] = None,
-    file_cache: Optional[Dict[str, File]] = None
+    file_cache: Optional[Dict[str, File]] = None,
+    source_dir_path: Optional[str] = None
 ) -> List[Report]:
     """ Get reports from the given report file. """
     parser = get_parser(analyzer_result_file_path, checker_labels, file_cache)
 
     if parser:
-        return parser.get_reports(analyzer_result_file_path)
+        return parser.get_reports(analyzer_result_file_path, source_dir_path)
 
     return []