From b8deecdaf937629a9926acfaebcec503d9b7c762 Mon Sep 17 00:00:00 2001
From: Andrei Litvin <andy314@gmail.com>
Date: Fri, 27 Oct 2023 18:35:17 -0400
Subject: [PATCH] Add more lenient parsing (DM XML scraper workarounds) 
 (#30065)

* More lenient parsing: naming and types

* Even better logic

* Lenient parsing updates for more type logic

* Another constant

* Update the test

* Restyle

* Merge with master with better diffing
---
 .../data_model_xml/handlers/parsing.py        | 31 ++++++++-
 .../matter_idl/test_data_model_xml.py         | 64 +++++++++++++++++++
 2 files changed, 93 insertions(+), 2 deletions(-)
diff --git a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py
index 948d698a5d4a6e..3adb852b846334 100644
--- a/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py
+++ b/scripts/py_matter_idl/matter_idl/data_model_xml/handlers/parsing.py
@@ -90,20 +90,36 @@ def NormalizeDataType(t: str) -> str:
     return _TYPE_REMAP.get(t.lower(), t.replace("-", "_"))
 
 
+# Handle oddities in current data model XML schema for nicer diffs
+_REF_NAME_MAPPING = {
+    "<<ref_DataTypeString>>": "char_string",
+    "<<ref_DataTypeOctstr>>": "octet_string",
+    "<<ref_DataTypeVendorId>>": "vendor_id",
+    "<<ref_DataTypeEndpointNumber>>": "endpoint_no",
+}
+
+
 def ParseType(t: str) -> ParsedType:
     """Parse a data type entry.
 
     Specifically parses a name like "list[Foo Type]".
     """
+
     # very rough matcher ...
     is_list = False
     if t.startswith("list[") and t.endswith("]"):
         is_list = True
         t = t[5:-1]
+    elif t.startswith("<<ref_DataTypeList>>[") and t.endswith("]"):
+        is_list = True
+        t = t[21:-1]
 
     if t.endswith(" Type"):
         t = t[:-5]
 
+    if t in _REF_NAME_MAPPING:
+        t = _REF_NAME_MAPPING[t]
+
     return ParsedType(name=NormalizeDataType(t), is_list=is_list)
 
 
@@ -140,9 +156,20 @@ def NormalizeName(name: str) -> str:
     return name
 
 
-def FieldName(name: str) -> str:
+def FieldName(input_name: str) -> str:
     """Normalized name with the first letter lowercase. """
-    name = NormalizeName(name)
+    name = NormalizeName(input_name)
+
+    # Some exception handling for nicer diffs
+    if name == "ID":
+        return "id"
+
+    # If the name starts with a all-uppercase thing, keep it that
+    # way. This is typical for "NOC", "IPK", "CSR" and such
+    if len(input_name) > 1:
+        if input_name[0].isupper() and input_name[1].isupper():
+            return name
+
     return name[0].lower() + name[1:]
 
 
diff --git a/scripts/py_matter_idl/matter_idl/test_data_model_xml.py b/scripts/py_matter_idl/matter_idl/test_data_model_xml.py
index eea2308323abec..6117f35e9f1ad2 100755
--- a/scripts/py_matter_idl/matter_idl/test_data_model_xml.py
+++ b/scripts/py_matter_idl/matter_idl/test_data_model_xml.py
@@ -395,6 +395,70 @@ def testAttributes(self):
 
         self.assertIdlEqual(xml_idl, expected_idl)
 
+    def testXmlNameWorkarounds(self):
+        # Validate an attribute with a type list
+        # This is a manually-edited copy of an attribute test (not real data)
+
+        xml_idl = XmlToIdl('''
+            <cluster id="123" name="Test" revision="1">
+              <dataTypes>
+                <struct name="OutputInfoStruct">
+                  <field id="0" name="ID" type="&lt;&lt;ref_DataTypeString&gt;&gt;">
+                    <access read="true" write="true"/>
+                    <mandatoryConform/>
+                  </field>
+                  <field id="1" name="items" type="&lt;&lt;ref_DataTypeList&gt;&gt;[uint8]">
+                    <access read="true" write="true"/>
+                    <mandatoryConform/>
+                  </field>
+                  <field id="2" name="endpoints" type="&lt;&lt;ref_DataTypeList&gt;&gt;[&lt;&lt;ref_DataTypeEndpointNumber&gt;&gt; Type]">
+                    <access read="true" write="true"/>
+                    <mandatoryConform/>
+                  </field>
+                </struct>
+              </dataTypes>
+              <attributes>
+                <attribute id="0x0000" name="OutputList" type="list[OutputInfoStruct Type]">
+                  <access read="true" readPrivilege="view"/>
+                  <mandatoryConform/>
+                </attribute>
+                <attribute id="0x0001" name="TestConform" type="enum8">
+                  <access read="true" readPrivilege="view"/>
+                  <otherwiseConform>
+                    <mandatoryConform>
+                      <feature name="PRSCONST"/>
+                    </mandatoryConform>
+                    <optionalConform>
+                      <feature name="AUTO"/>
+                    </optionalConform>
+                  </otherwiseConform>
+                </attribute>
+              </attributes>
+            </cluster>
+        ''')
+
+        expected_idl = IdlTextToIdl('''
+            client cluster Test = 123 {
+               struct OutputInfoStruct {
+                  char_string id = 0;
+                  int8u items[] = 1;
+                  endpoint_no endpoints[] = 2;
+               }
+
+               readonly attribute OutputInfoStruct outputList[] = 0;
+               readonly attribute optional enum8 testConform = 1;
+
+               readonly attribute attrib_id attributeList[] = 65531;
+               readonly attribute event_id eventList[] = 65530;
+               readonly attribute command_id acceptedCommandList[] = 65529;
+               readonly attribute command_id generatedCommandList[] = 65528;
+               readonly attribute bitmap32 featureMap = 65532;
+               readonly attribute int16u clusterRevision = 65533;
+           }
+        ''')
+
+        self.assertIdlEqual(xml_idl, expected_idl)
+
     def testComplexInput(self):
         # This parses a known copy of Switch.xml which happens to be fully
         # spec-conformant (so assuming it as a good input)