Skip to content

Commit a87bf6e

Browse files
authored
Feature/comment block to struct (#3)
* Fixed commentBlock and enum parsing * Added commentAttributes to structure * Added commentAttributes for Enums * Improved parsing of enum values * Fixed commentAttributes to support whole line values. * Added enum commentAttributes support * Improved regex * Merging same comment values with same comment key. * Adding unit test Signed-off-by: Cervenka Dusan <cervenka@acrios.com>
1 parent f2d2966 commit a87bf6e

File tree

2 files changed

+143
-54
lines changed

2 files changed

+143
-54
lines changed

dissect/cstruct/cstruct.py

Lines changed: 76 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class {name}(Structure):
7070
def __init__(self, cstruct, structure, source=None):
7171
self.structure = structure
7272
self.source = source
73-
super({name}, self).__init__(cstruct, structure.name, structure.fields)
73+
super({name}, self).__init__(cstruct, structure.name, structure.fields, structure.commentAttributes)
7474
7575
def _read(self, stream):
7676
r = OrderedDict()
@@ -333,43 +333,57 @@ def _constants(self, data):
333333

334334
def _enums(self, data):
335335
r = re.finditer(
336-
r'enum\s+(?P<name>[^\s:{]+)\s*(:\s*(?P<type>[^\s]+)\s*)?\{(?P<values>[^}]+)\}\s*;',
336+
r'(?P<commentBlock>\/\*(\*(?!\/)|[^*])*\*\/)?[ \t\r\n]*enum\s+(?P<name>[^\s:{]+)\s*(:\s*(?P<type>[^\s]+)\s*)?\{(?P<values>[^}]+)\}\s*;',
337337
data,
338338
)
339339
for t in r:
340340
d = t.groupdict()
341341

342-
nextval = 0
343-
values = {}
344-
for line in d['values'].split('\n'):
345-
line, sep, comment = line.partition("//")
346-
for v in line.split(","):
347-
key, sep, val = v.partition("=")
348-
key = key.strip()
349-
val = val.strip()
350-
if not key:
351-
continue
352-
if not val:
353-
val = nextval
354-
else:
355-
val = Expression(self.cstruct, val).evaluate({})
356-
357-
nextval = val + 1
358-
359-
values[key] = val
360-
361342
if not d['type']:
362343
d['type'] = 'uint32'
363344

345+
values, valuesDetails = self._parse_fields_enums(d['values'])
346+
347+
commentAttributes = self.parse_comment_block(d['commentBlock'])
348+
364349
enum = Enum(
365-
self.cstruct, d['name'], self.cstruct.resolve(d['type']), values
350+
self.cstruct, d['name'], self.cstruct.resolve(d['type']), values, valuesDetails, commentAttributes
366351
)
367352
self.cstruct.addtype(enum.name, enum)
368353

354+
def _parse_fields_enums(self, s):
355+
nextval = 0
356+
values = {}
357+
valuesDetails = {}
358+
fields = re.finditer(
359+
r'(\n?[ ]*\/\/[^\n]*\n)*[ ]*(?P<commentBlock>\/\*(\*(?!\/)|[^*])*\*\/)?[ \t\r\n]*(?P<value>[a-zA-z][^\n,/]*),?[ ]*(?P<commentBlockAfter>\/\*(\*(?!\/)|[^*])*\*\/)?(\n?[ ]*\/\/[^\n]*\n*)*',
360+
s,
361+
)
362+
363+
for f in fields:
364+
d = f.groupdict()
365+
366+
# Ignore fo now
367+
commentAttributes = self.parse_comment_block(d['commentBlock'])
368+
369+
field = re.finditer(
370+
r'(?P<key>[a-zA-z][^ =]*)[ ]*=?[ ]*(?P<value>[^ ]+)?',
371+
d["value"],
372+
)
373+
374+
f = list(field)[0].groupdict()
375+
376+
values[f["key"]] = Expression(self.cstruct, f["value"]).evaluate({}) if f["value"] != None else nextval
377+
378+
nextval = values[f["key"]] + 1
379+
valuesDetails[f["key"]] = {"value":values[f["key"]], "commentAttributes":commentAttributes}
380+
381+
return values, valuesDetails
382+
369383
def _structs(self, data):
370384
compiler = Compiler(self.cstruct)
371385
r = re.finditer(
372-
r'(#(?P<flags>(?:compile))\s+)?((?P<typedef>typedef)\s+)?(?P<type>[^\s]+)\s+(__attribute__\(\([^)]+\)\)\s*)?(?P<name>[^\s]+)?(?P<fields>\s*\{(\s*//[^\n]*|/\*[^*]*\*/|[^}])+\}(?P<defs>\s+[^;\n]+)?)?\s*;',
386+
r'(?P<commentBlock>\/\*(\*(?!\/)|[^*])*\*\/)?[ \t\r\n]*(#(?P<flags>(?:compile))\s+)?((?P<typedef>typedef)\s+)?(?P<type>[^\s]+)\s+(__attribute__\(\([^)]+\)\)\s*)?(?P<name>[^\s]+)?(?P<fields>\s*\{(\s*//[^\n]*|/\*[^*]*\*/|[^}])+\}(?P<defs>\s+[^;\n]+)?)?\s*;',
373387
data,
374388
)
375389
for t in r:
@@ -383,8 +397,9 @@ def _structs(self, data):
383397
raise ParserError("No name for struct")
384398

385399
if d['type'] == 'struct':
386-
data = self._parse_fields(d['fields'][1:-1].strip())
387-
st = Structure(self.cstruct, name, data)
400+
data = self._parse_fields_struct(d['fields'][1:-1].strip())
401+
commentAttributes = self.parse_comment_block(d['commentBlock'])
402+
st = Structure(self.cstruct, name, data, commentAttributes)
388403
if d['flags'] == 'compile' or self.compiled:
389404
st = compiler.compile(st)
390405
elif d['typedef'] == 'typedef':
@@ -400,10 +415,9 @@ def _structs(self, data):
400415
td = td.strip()
401416
self.cstruct.addtype(td, st)
402417

403-
def _parse_fields(self, s):
404-
commentAttributes = {}
418+
def _parse_fields_struct(self, s):
405419
fields = re.finditer(
406-
r'(?P<commentBlock>\/\*(\*(?!\/)|[^*])*\*\/)?[ \t\r\n]*(?P<type>[^\s]+)\s+(?P<name>[^\s\[:]+)(\s*:\s*(?P<bits>\d+))?(\[(?P<count>[^;\n]*)\])?;',
420+
r'(\n?[ ]*\/\/[^\n]*\n)*[ ]*(?P<commentBlock>\/\*(\*(?!\/)|[^*])*\*\/)?[ \t\r\n]*(?P<type>[^\s]+)\s+(?P<name>[^\s\[:]+)(\s*:\s*(?P<bits>\d+))?(\[(?P<count>[^;\n]*)\])?;[ ]*(?P<commentBlockAfter>\/\*(\*(?!\/)|[^*])*\*\/)?(\n?[ ]*\/\/[^\n]*\n*)*',
407421
s,
408422
)
409423
r = []
@@ -412,20 +426,7 @@ def _parse_fields(self, s):
412426
if d['type'].startswith('//'):
413427
continue
414428

415-
commentAttributes={}
416-
417-
#parse the comment header
418-
if d['commentBlock'] is not None and d['commentBlock'].startswith('/*'):
419-
commentfields = re.finditer(
420-
r'@(?P<commentType>[^@,;:\\]+):[ \t]*(?P<commentVal>[^@,;:\s\\]+)',
421-
d['commentBlock'],
422-
)
423-
for cf in commentfields:
424-
cd=cf.groupdict()
425-
try:
426-
commentAttributes[cd['commentType']]=cd['commentVal']
427-
except Exception:
428-
pass
429+
commentAttributes = self.parse_comment_block(d['commentBlock'])
429430

430431
type_ = self.cstruct.resolve(d['type'])
431432

@@ -449,11 +450,32 @@ def _parse_fields(self, s):
449450
d['name'] = d['name'][1:]
450451
type_ = Pointer(self.cstruct, type_)
451452

452-
field = Field(d['name'], type_, int(d['bits']) if d['bits'] else None, commentAttributes=commentAttributes)
453+
field = StructureField(d['name'], type_, int(d['bits']) if d['bits'] else None, commentAttributes=commentAttributes)
453454
r.append(field)
454455

455456
return r
456457

458+
def parse_comment_block(self,s):
459+
commentAttributes={}
460+
461+
#parse the comment header
462+
if s is not None and s.startswith('/*'):
463+
commentfields = re.finditer(
464+
r'@(?P<commentType>[^@,;:\\]+):[ \t]*(?P<commentVal>[^@\n]+)',
465+
s,
466+
)
467+
for cf in commentfields:
468+
cd=cf.groupdict()
469+
try:
470+
oldData = commentAttributes.get(cd['commentType'],"")
471+
if "" != oldData:
472+
oldData += " "
473+
commentAttributes[cd['commentType']]=oldData + cd['commentVal']
474+
except Exception:
475+
pass
476+
477+
return commentAttributes
478+
457479
def _lookups(self, data, consts):
458480
r = re.finditer(r'\$(?P<name>[^\s]+) = ({[^}]+})\w*\n', data)
459481

@@ -757,11 +779,12 @@ def __repr__(self):
757779
class Structure(BaseType):
758780
"""Type class for structures."""
759781

760-
def __init__(self, cstruct, name, fields=None):
782+
def __init__(self, cstruct, name, fields=None, commentAttributes={}):
761783
self.name = name
762784
self.size = None
763785
self.lookup = OrderedDict()
764786
self.fields = fields if fields else []
787+
self.commentAttributes = commentAttributes
765788

766789

767790
for f in self.fields:
@@ -885,7 +908,7 @@ def add_fields(self, name, type_, offset=None, commentAttributes={}):
885908
type_: The field type.
886909
offset: The field offset.
887910
"""
888-
field = Field(name, type_, offset=offset, commentAttributes=commentAttributes)
911+
field = StructureField(name, type_, offset=offset, commentAttributes=commentAttributes)
889912
self.fields.append(field)
890913
self.lookup[name] = field
891914
self.size = None
@@ -915,7 +938,7 @@ def __repr__(self):
915938
def show(self, indent=0):
916939
"""Pretty print this structure."""
917940
if indent == 0:
918-
print("struct {}".format(self.name))
941+
print("{} struct {}".format(self.commentAttributes, self.name))
919942

920943
for field in self.fields:
921944
if field.offset is None:
@@ -983,7 +1006,7 @@ def reset(self):
9831006
self._remaining = 0
9841007

9851008

986-
class Field(object):
1009+
class StructureField(object):
9871010
"""Holds a structure field."""
9881011

9891012
def __init__(self, name, type_, bits=None, offset=None, commentAttributes={}):
@@ -1312,9 +1335,11 @@ class Enum(RawType):
13121335
};
13131336
"""
13141337

1315-
def __init__(self, cstruct, name, type_, values):
1338+
def __init__(self, cstruct, name, type_, values, valuesDetails, commentAttributes={}):
13161339
self.type = type_
13171340
self.values = values
1341+
self.valuesDetails = valuesDetails
1342+
self.commentAttributes = commentAttributes
13181343
self.reverse = {}
13191344

13201345
for k, v in values.items():
@@ -1365,6 +1390,9 @@ def __getattr__(self, attr):
13651390
def __contains__(self, attr):
13661391
return attr in self.values
13671392

1393+
def __repr__(self):
1394+
return '<Enum {}>'.format(self.name)
1395+
13681396

13691397
class EnumInstance(object):
13701398
"""Implements a value instance of an Enum"""

tests/test_basic.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,15 @@ def test_enum_comments():
257257
assert c.Inline.foo == 9
258258
assert c.Inline.bar == 10
259259

260+
assert c.Inline.valuesDetails["hello"]["value"] == 7
261+
assert c.Inline.valuesDetails["hello"]["commentAttributes"] == {}
262+
assert c.Inline.valuesDetails["world"]["value"] == 8
263+
assert c.Inline.valuesDetails["world"]["commentAttributes"] == {}
264+
assert c.Inline.valuesDetails["foo"]["value"] == 9
265+
assert c.Inline.valuesDetails["foo"]["commentAttributes"] == {}
266+
assert c.Inline.valuesDetails["bar"]["value"] == 10
267+
assert c.Inline.valuesDetails["bar"]["commentAttributes"] == {}
268+
260269
assert c.Test.a == 2
261270
assert c.Test.b == 3
262271
assert c.Test.c == 4
@@ -772,35 +781,87 @@ def test_dumpstruct(capsys):
772781
assert captured_1.out == captured_2.out
773782

774783

775-
def test_commentfieldparse(capsys):
784+
def test_comment_field_parse_struct(capsys):
776785
c = cstruct.cstruct()
777786
c.load("""
778-
/*discardedCom1*/
787+
/*
788+
* @comment: Hello,
789+
* @comment: how are you?
790+
*/
779791
struct test{
792+
// int notAnStruct;
780793
/*
781-
* @scale: 0.001
782-
* @unit: µtestUnit1
783-
*/
794+
* @comment: I am fine.
795+
* @comment: Thank you.
796+
* @scale: 0.001
797+
* @unit: µtestUnit1
798+
*/
784799
int testVar1;
800+
// int testVar1;
785801
int testVar2;
786802
/* dicardedCom2
787803
* @scale: 5
788804
* @unit: %testUnit2
789805
*/
790806
int testVar3;
807+
// int notAnStruct;
791808
};
792809
""", compiled=False)
793810

794811
assert c.test.name == 'test'
812+
assert c.test.commentAttributes['comment'] == 'Hello, how are you?'
795813

796814
assert 'testVar1' in c.test.lookup
797815
assert 'testVar2' in c.test.lookup
798816
assert 'testVar2' in c.test.lookup
799817

818+
assert c.test.lookup['testVar1'].commentAttributes['comment'] == 'I am fine. Thank you.'
800819
assert c.test.lookup['testVar1'].commentAttributes['scale'] == '0.001'
801820
assert c.test.lookup['testVar1'].commentAttributes['unit'] == 'µtestUnit1'
802821

803822
assert c.test.lookup['testVar2'].commentAttributes == {}
804823

805824
assert c.test.lookup['testVar3'].commentAttributes['scale'] == '5'
806-
assert c.test.lookup['testVar3'].commentAttributes['unit'] == '%testUnit2'
825+
assert c.test.lookup['testVar3'].commentAttributes['unit'] == '%testUnit2'
826+
827+
assert "notAnStruct" not in c.test.lookup
828+
829+
def test_comment_field_parse_enum(capsys):
830+
c = cstruct.cstruct()
831+
c.load("""
832+
/*discardedCom1*/
833+
enum test{
834+
// notAnEnum=4,
835+
/*
836+
* @comment: Comments are working
837+
*/
838+
testEnumVar1=5,
839+
// testEnumVar1,
840+
testEnumVar2,
841+
/*
842+
* @comment: Comments are working 2
843+
*/
844+
testEnumVar3,
845+
// notAnEnum=14,
846+
};
847+
""", compiled=False)
848+
849+
assert c.test.name == 'test'
850+
assert 'comment' not in c.test.commentAttributes
851+
852+
assert 'testEnumVar1' in c.test.values
853+
assert 'testEnumVar1' in c.test.valuesDetails
854+
assert 'testEnumVar2' in c.test.values
855+
assert 'testEnumVar2' in c.test.valuesDetails
856+
assert 'testEnumVar3' in c.test.values
857+
assert 'testEnumVar3' in c.test.valuesDetails
858+
859+
860+
assert c.test.valuesDetails['testEnumVar1']["commentAttributes"]['comment'] == 'Comments are working'
861+
862+
assert c.test.valuesDetails['testEnumVar2']["commentAttributes"] == {}
863+
864+
assert c.test.valuesDetails['testEnumVar3']["commentAttributes"]['comment'] == 'Comments are working 2'
865+
866+
assert "notAnEnum" not in c.test.values
867+
assert "notAnEnum" not in c.test.valuesDetails

0 commit comments

Comments
 (0)