Skip to content

Commit

Permalink
Initial implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
yuriisk committed Jun 16, 2020
1 parent 6ed2349 commit 47ab7d2
Show file tree
Hide file tree
Showing 9 changed files with 617 additions and 2 deletions.
28 changes: 26 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,26 @@
# clang-tidy-reformatter
Simple python script to reformat clang-tidy output.
# Clang-Tidy Converter

Python3 script to convert Clang-Tidy output to [Code Climate JSON](https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md#issue).

## Usage

`python3 -m clang_tidy_converter [-h] [-r PROJECT_ROOT] [-l]`

Reads Clang-Tidy output from `STDIN` and prints Code Climate JSON to `STDOUT`.

### Arguments

* `-h, --help` - show help message and exit.
* `-r PROJECT_ROOT, --project_root PROJECT_ROOT` - output file paths relative to `PROJECT_ROOT`. E.g. Clang-Tidy outputs '/home/user/projects/A/src/main.cpp' file path and `PROJECT_ROOT` is set to '/home/user/projects/A' then Code Climate JSON mentions the file as 'src/main.cpp'.
* `-l, --use_location_lines` - use _line-based_ locations instead of _position-based_ as defined in _Locations_ section of Code Climate specification.

## Example

GitLab code quality report is a JSON file that implements a subset of the Code Climate specification, so this script can be used to convert Clang-Tidy output to GitLab code quality report. The following command does it:

```bash
clang-tidy /path/to/my/project/file.cpp \
| python3 -m clang_tidy_converter --project_root /path/to/my/project \
--use_location_lines \
> gl-code-quality-report.json
```
2 changes: 2 additions & 0 deletions clang_tidy_converter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .formatter import *
from .parser import *
31 changes: 31 additions & 0 deletions clang_tidy_converter/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3

from .formatter import CodeClimateFormatter
from .parser import ClangTidyParser
from argparse import ArgumentParser
import os
import sys

def create_argparser():
p = ArgumentParser(description='Reads Clang-Tidy output from STDIN and prints Code Climate JSON to STDOUT.')
p.add_argument('-r', '--project_root', default='', help='output file paths relative to PROJECT_ROOT')
p.add_argument('-l', '--use_location_lines', action='store_const', const=True, default=False, help='use line-based locations instead of position-based as defined in Locations section of Code Climate specification.')
return p

def main(args):
parser = ClangTidyParser()
messages = parser.parse(sys.stdin.readlines())

if len(args.project_root) > 0:
convert_paths_to_relative(messages, args.project_root)

formatter = CodeClimateFormatter()
print(formatter.format(messages, args))

def convert_paths_to_relative(messages, root_dir):
for message in messages:
message.filepath = os.path.relpath(message.filepath, root_dir)
convert_paths_to_relative(message.children, root_dir)

if __name__ == "__main__":
main(create_argparser().parse_args())
1 change: 1 addition & 0 deletions clang_tidy_converter/formatter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .code_climate_formatter import CodeClimateFormatter
140 changes: 140 additions & 0 deletions clang_tidy_converter/formatter/code_climate_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3

import json
import hashlib

from ..parser import ClangMessage

def remove_duplicates(l):
return list(set(l))

class CodeClimateFormatter:
def __init__(self):
pass

def format(self, messages, args):
formatted_string = ""
for message in messages:
formatted_string += json.dumps(self._format_message(message, args), indent=2) + '\0\n'
return formatted_string

def _format_message(self, message, args):
return {
'type': 'issue',
'check_name': message.diagnostic_name,
'description': message.message,
'content': self._extract_content(message, args),
'categories': self._extract_categories(message, args),
'location': self._extract_location(message, args),
'trace': self._extract_trace(message, args),
'severity': self._extract_severity(message, args),
'fingerprint': self._generate_fingerprint(message)
}

def _extract_content(self, message, args):
return {
'body': '\n'.join(['```'] + message.details_lines + self._messages_to_text(message.children) + ['```'])
}

def _messages_to_text(self, messages):
text_lines = []
for message in messages:
text_lines.append(f'{message.filepath}:{message.line}:{message.column}: {message.message}')
text_lines.extend(message.details_lines)
text_lines.extend(self._messages_to_text(message.children))
return text_lines

def _extract_categories(self, message, args):
BUGRISC_CATEGORY='Bug Risk'
CLARITY_CATEGORY='Clarity'
COMPATIBILITY_CATEGORY='Compatibility'
COMPLEXITY_CATEGORY='Complexity'
DUPLICATION_CATEGORY='Duplication'
PERFORMANCE_CATEGORY='Performance'
SECURITY_CATEGORY='Security'
STYLE_CATEGORY='Style'

categories = []
if 'bugprone' in message.diagnostic_name:
categories.append(BUGRISC_CATEGORY)
if 'modernize' in message.diagnostic_name:
categories.append(COMPATIBILITY_CATEGORY)
if 'portability' in message.diagnostic_name:
categories.append(COMPATIBILITY_CATEGORY)
if 'performance' in message.diagnostic_name:
categories.append(PERFORMANCE_CATEGORY)
if 'readability' in message.diagnostic_name:
categories.append(CLARITY_CATEGORY)
if 'cloexec' in message.diagnostic_name:
categories.append(SECURITY_CATEGORY)
if 'security' in message.diagnostic_name:
categories.append(SECURITY_CATEGORY)
if 'naming' in message.diagnostic_name:
categories.append(STYLE_CATEGORY)
if 'misc' in message.diagnostic_name:
categories.append(STYLE_CATEGORY)
if 'cppcoreguidelines' in message.diagnostic_name:
categories.append(STYLE_CATEGORY)
if 'hicpp' in message.diagnostic_name:
categories.append(STYLE_CATEGORY)
if 'simplify' in message.diagnostic_name:
categories.append(COMPLEXITY_CATEGORY)
if 'redundant' in message.diagnostic_name:
categories.append(DUPLICATION_CATEGORY)
if message.diagnostic_name.startswith('boost-use-to-string'):
categories.append(COMPATIBILITY_CATEGORY)
if len(categories) == 0:
categories.append(BUGRISC_CATEGORY)
return remove_duplicates(categories)

def _extract_trace(self, message, args):
return {
'locations': self._extract_other_locations(message, args)
}

def _extract_other_locations(self, message, args):
locations_list = []
for child in message.children:
locations_list.append(self._extract_location(child, args))
locations_list.extend(self._extract_other_locations(child, args))
return locations_list

def _extract_location(self, message, args):
location = {
'path': message.filepath,
}
if args.use_location_lines:
location['lines'] = {
'begin': message.line
}
else:
location['positions'] = {
'begin': {
'line': message.line,
'column': message.column
}
}
return location

def _extract_severity(self, message, args):
if message.level == ClangMessage.Level.NOTE:
return 'info'
if message.level == ClangMessage.Level.REMARK:
return 'minor'
if message.level == ClangMessage.Level.WARNING:
return 'major'
if message.level == ClangMessage.Level.ERROR:
return 'critical'
if message.level == ClangMessage.Level.FATAL:
return 'blocker'

def _generate_fingerprint(self, message):
h = hashlib.md5()
h.update(message.filepath.encode('utf8'))
h.update(str(message.line).encode('utf8'))
h.update(str(message.column).encode('utf8'))
h.update(message.message.encode('utf8'))
h.update(message.diagnostic_name.encode('utf8'))
for child in message.children:
h.update(self._generate_fingerprint(child).encode('utf-8'))
return h.hexdigest()
1 change: 1 addition & 0 deletions clang_tidy_converter/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .clang_tidy_parser import ClangTidyParser, ClangMessage
81 changes: 81 additions & 0 deletions clang_tidy_converter/parser/clang_tidy_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3

from enum import Enum
import re

class ClangMessage:
class Level(Enum):
UNKNOWN = 0
NOTE = 1
REMARK = 2
WARNING = 3
ERROR = 4
FATAL = 5

def __init__(self, filepath=None, line=-1, column=-1, level=Level.UNKNOWN, message=None, diagnostic_name=None, details_lines=None, children=None):
self.filepath = filepath if filepath is not None else ''
self.line = line
self.column = column
self.level = level
self.message = message if message is not None else ''
self.diagnostic_name = diagnostic_name if diagnostic_name is not None else ''
self.details_lines = details_lines if details_lines is not None else []
self.children = children if children is not None else []

@staticmethod
def levelFromString(levelString):
if levelString == 'note':
return ClangMessage.Level.NOTE
if levelString == 'remark':
return ClangMessage.Level.REMARK
if levelString == 'warning':
return ClangMessage.Level.WARNING
if levelString == 'error':
return ClangMessage.Level.ERROR
if levelString == 'fatal':
return ClangMessage.Level.FATAL
return ClangMessage.Level.UNKNOWN

class ClangTidyParser:
MESSAGE_REGEX = re.compile(r"^(?P<filepath>.+):(?P<line>\d+):(?P<column>\d+): (?P<level>\S+): (?P<message>.*?)( \[(?P<diagnostic_name>.*)\])?$")
IGNORE_REGEX = re.compile(r"^error:.*$")

def __init__(self):
pass

def parse(self, lines):
messages = []
for line in lines:
if self._is_ignored(line):
continue
message = self._parse_message(line)
if message is None or message.level == ClangMessage.Level.UNKNOWN:
messages[-1].details_lines.append(line)
else:
messages.append(message)
return self._group_messages(messages)

def _parse_message(self, line):
regex_res = self.MESSAGE_REGEX.match(line)
if regex_res is not None:
return ClangMessage(
filepath=regex_res.group('filepath'),
line=int(regex_res.group('line')),
column=int(regex_res.group('column')),
level=ClangMessage.levelFromString(regex_res.group('level')),
message=regex_res.group('message'),
diagnostic_name=regex_res.group('diagnostic_name')
)
return None

def _is_ignored(self, line):
return self.IGNORE_REGEX.match(line) is not None

def _group_messages(self, messages):
groupped_messages = []
for msg in messages:
if msg.level == ClangMessage.Level.NOTE:
groupped_messages[-1].children.append(msg)
else:
groupped_messages.append(msg)
return groupped_messages
101 changes: 101 additions & 0 deletions tests/test_clang_tidy_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env python3

import unittest

from clang_tidy_converter import ClangTidyParser, ClangMessage

class ClangTidyParserTest(unittest.TestCase):
def test_warning_message(self):
parser = ClangTidyParser()
messages = parser.parse(['/usr/lib/include/some_include.h:1039:3: warning: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]'])
self.assertEqual(1, len(messages))
msg = messages[0]
self.assertEqual('/usr/lib/include/some_include.h', msg.filepath)
self.assertEqual(1039, msg.line)
self.assertEqual(3, msg.column)
self.assertEqual(ClangMessage.Level.WARNING, msg.level)
self.assertEqual('Potential memory leak', msg.message)
self.assertEqual('clang-analyzer-cplusplus.NewDeleteLeaks', msg.diagnostic_name)
self.assertEqual([], msg.details_lines)
self.assertEqual([], msg.children)

def test_remark_message_level(self):
parser = ClangTidyParser()
messages = parser.parse(['/usr/lib/include/some_include.h:1039:3: remark: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]'])
msg = messages[0]
self.assertEqual(ClangMessage.Level.REMARK, msg.level)

def test_error_message_level(self):
parser = ClangTidyParser()
messages = parser.parse(['/usr/lib/include/some_include.h:1039:3: error: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]'])
msg = messages[0]
self.assertEqual(ClangMessage.Level.ERROR, msg.level)

def test_fatal_message_level(self):
parser = ClangTidyParser()
messages = parser.parse(['/usr/lib/include/some_include.h:1039:3: fatal: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]'])
msg = messages[0]
self.assertEqual(ClangMessage.Level.FATAL, msg.level)

def test_unknown_message_level(self):
parser = ClangTidyParser()
messages = parser.parse(['/usr/lib/include/some_include.h:1039:3: fatal: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]',
'/usr/lib/include/some_include.h:1039:3: smth: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]'])
self.assertEqual(1, len(messages))
msg = messages[0]
self.assertEqual(['/usr/lib/include/some_include.h:1039:3: smth: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]'], msg.details_lines)

def test_multiline_warning_message(self):
parser = ClangTidyParser()
messages = parser.parse(['/usr/lib/include/some_include.h:1039:3: warning: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]',
' return new SomeFunction(',
' ^'])
self.assertEqual(1, len(messages))
msg = messages[0]
self.assertEqual('/usr/lib/include/some_include.h', msg.filepath)
self.assertEqual(1039, msg.line)
self.assertEqual(3, msg.column)
self.assertEqual(ClangMessage.Level.WARNING, msg.level)
self.assertEqual('Potential memory leak', msg.message)
self.assertEqual('clang-analyzer-cplusplus.NewDeleteLeaks', msg.diagnostic_name)
self.assertEqual([' return new SomeFunction(',
' ^'], msg.details_lines)
self.assertEqual([], msg.children)

def test_warning_message_children(self):
parser = ClangTidyParser()
messages = parser.parse(['/usr/lib/include/some_include.h:1039:3: warning: Potential memory leak [clang-analyzer-cplusplus.NewDeleteLeaks]',
' return new SomeFunction(',
' ^',
'/home/user/some_source.cpp:267:15: note: Calling \'OtherFunction\'',
' auto sf = OtherFunction( a, b, c );',
' ^'])
self.assertEqual(1, len(messages))
msg = messages[0]
self.assertEqual('/usr/lib/include/some_include.h', msg.filepath)
self.assertEqual(1039, msg.line)
self.assertEqual(3, msg.column)
self.assertEqual(ClangMessage.Level.WARNING, msg.level)
self.assertEqual('Potential memory leak', msg.message)
self.assertEqual('clang-analyzer-cplusplus.NewDeleteLeaks', msg.diagnostic_name)
self.assertEqual([' return new SomeFunction(',
' ^'], msg.details_lines)
self.assertEqual(1, len(msg.children))
child = msg.children[0]
self.assertEqual('/home/user/some_source.cpp', child.filepath)
self.assertEqual(267, child.line)
self.assertEqual(15, child.column)
self.assertEqual(ClangMessage.Level.NOTE, child.level)
self.assertEqual('Calling \'OtherFunction\'', child.message)
self.assertEqual('', child.diagnostic_name)
self.assertEqual([' auto sf = OtherFunction( a, b, c );',
' ^'], child.details_lines)
self.assertEqual([], child.children)

def test_ignorance_of_generic_errors(self):
parser = ClangTidyParser()
messages = parser.parse(['error: -mapcs-frame not supported'])
self.assertEqual([], messages)

if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit 47ab7d2

Please sign in to comment.