From b8deecdaf937629a9926acfaebcec503d9b7c762 Mon Sep 17 00:00:00 2001 From: Andrei Litvin Date: Fri, 27 Oct 2023 18:35:17 -0400 Subject: [PATCH] Add more lenient parsing (DM XML scraper workarounds) (#30065) * More lenient parsing: naming and types * Even better logic * Lenient parsing updates for more type logic * Another constant * Update the test * Restyle * Merge with master with better diffing --- .../data_model_xml/handlers/parsing.py | 31 ++++++++- .../matter_idl/test_data_model_xml.py | 64 +++++++++++++++++++ 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py index 948d698a5d4a6e..3adb852b846334 100644 --- a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py +++ b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py @@ -90,20 +90,36 @@ def NormalizeDataType(t: str) -> str: return _TYPE_REMAP.get(t.lower(), t.replace("-", "_")) +# Handle oddities in current data model XML schema for nicer diffs +_REF_NAME_MAPPING = { + "<>": "char_string", + "<>": "octet_string", + "<>": "vendor_id", + "<>": "endpoint_no", +} + + def ParseType(t: str) -> ParsedType: """Parse a data type entry. Specifically parses a name like "list[Foo Type]". """ + # very rough matcher ... is_list = False if t.startswith("list[") and t.endswith("]"): is_list = True t = t[5:-1] + elif t.startswith("<>[") and t.endswith("]"): + is_list = True + t = t[21:-1] if t.endswith(" Type"): t = t[:-5] + if t in _REF_NAME_MAPPING: + t = _REF_NAME_MAPPING[t] + return ParsedType(name=NormalizeDataType(t), is_list=is_list) @@ -140,9 +156,20 @@ def NormalizeName(name: str) -> str: return name -def FieldName(name: str) -> str: +def FieldName(input_name: str) -> str: """Normalized name with the first letter lowercase. """ - name = NormalizeName(name) + name = NormalizeName(input_name) + + # Some exception handling for nicer diffs + if name == "ID": + return "id" + + # If the name starts with a all-uppercase thing, keep it that + # way. This is typical for "NOC", "IPK", "CSR" and such + if len(input_name) > 1: + if input_name[0].isupper() and input_name[1].isupper(): + return name + return name[0].lower() + name[1:] diff --git a/scripts/py_matter_idl/matter_idl/test_data_model_xml.py b/scripts/py_matter_idl/matter_idl/test_data_model_xml.py index eea2308323abec..6117f35e9f1ad2 100755 --- a/scripts/py_matter_idl/matter_idl/test_data_model_xml.py +++ b/scripts/py_matter_idl/matter_idl/test_data_model_xml.py @@ -395,6 +395,70 @@ def testAttributes(self): self.assertIdlEqual(xml_idl, expected_idl) + def testXmlNameWorkarounds(self): + # Validate an attribute with a type list + # This is a manually-edited copy of an attribute test (not real data) + + xml_idl = XmlToIdl(''' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ''') + + expected_idl = IdlTextToIdl(''' + client cluster Test = 123 { + struct OutputInfoStruct { + char_string id = 0; + int8u items[] = 1; + endpoint_no endpoints[] = 2; + } + + readonly attribute OutputInfoStruct outputList[] = 0; + readonly attribute optional enum8 testConform = 1; + + readonly attribute attrib_id attributeList[] = 65531; + readonly attribute event_id eventList[] = 65530; + readonly attribute command_id acceptedCommandList[] = 65529; + readonly attribute command_id generatedCommandList[] = 65528; + readonly attribute bitmap32 featureMap = 65532; + readonly attribute int16u clusterRevision = 65533; + } + ''') + + self.assertIdlEqual(xml_idl, expected_idl) + def testComplexInput(self): # This parses a known copy of Switch.xml which happens to be fully # spec-conformant (so assuming it as a good input)