Skip to content

Commit ebda402

Browse files
add Lark AST -> HCL2 reconstructor and LarkTree formatter; various other fixes and changes:
* preserve order of serialized attributes and blocks * make RuleTransformer.__default_token__ differentiate between StringToken and StaticStringToken * add separate ProviderFunctionCallRule class for more accurate reconstruction
1 parent 5932662 commit ebda402

File tree

12 files changed

+663
-123
lines changed

12 files changed

+663
-123
lines changed

hcl2/rule_transformer/deserializer.py

Lines changed: 82 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import json
2+
from abc import ABC, abstractmethod
3+
from dataclasses import dataclass
24
from functools import lru_cache
3-
from typing import Any, TextIO, List, Union
5+
from typing import Any, TextIO, List, Union, Optional
46

57
from regex import regex
68

@@ -31,7 +33,7 @@
3133
StringRule,
3234
InterpolationRule,
3335
StringPartRule,
34-
HeredocTemplateRule,
36+
HeredocTemplateRule,
3537
HeredocTrimTemplateRule,
3638
)
3739
from hcl2.rule_transformer.rules.tokens import (
@@ -51,34 +53,62 @@
5153
LBRACE,
5254
HEREDOC_TRIM_TEMPLATE,
5355
HEREDOC_TEMPLATE,
56+
COLON,
5457
)
58+
from hcl2.rule_transformer.rules.whitespace import NewLineOrCommentRule
5559
from hcl2.rule_transformer.transformer import RuleTransformer
56-
from hcl2.rule_transformer.utils import DeserializationOptions, HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN
60+
from hcl2.rule_transformer.utils import HEREDOC_TRIM_PATTERN, HEREDOC_PATTERN
5761

5862

59-
class Deserializer:
60-
def __init__(self, options=DeserializationOptions()):
61-
self.options = options
63+
@dataclass
64+
class DeserializerOptions:
65+
heredocs_to_strings: bool = False
66+
indent_length: int = 2
67+
object_elements_colon: bool = False
68+
object_elements_trailing_comma: bool = True
69+
70+
71+
class LarkElementTreeDeserializer(ABC):
72+
def __init__(self, options: DeserializerOptions = None):
73+
self.options = options or DeserializerOptions()
74+
75+
@abstractmethod
76+
def loads(self, value: str) -> LarkElement:
77+
raise NotImplementedError()
78+
79+
def load(self, file: TextIO) -> LarkElement:
80+
return self.loads(file.read())
81+
82+
83+
class BaseDeserializer(LarkElementTreeDeserializer):
84+
def __init__(self, options=None):
85+
super().__init__(options)
86+
self._current_line = 1
87+
self._last_new_line: Optional[NewLineOrCommentRule] = None
6288

6389
@property
6490
@lru_cache
6591
def _transformer(self) -> RuleTransformer:
6692
return RuleTransformer()
6793

6894
def load_python(self, value: Any) -> LarkElement:
69-
return StartRule([self._deserialize(value)])
95+
result = StartRule([self._deserialize(value)])
96+
return result
7097

7198
def loads(self, value: str) -> LarkElement:
7299
return self.load_python(json.loads(value))
73100

74-
def load(self, file: TextIO) -> LarkElement:
75-
return self.loads(file.read())
76-
77101
def _deserialize(self, value: Any) -> LarkElement:
78102
if isinstance(value, dict):
79103
if self._contains_block_marker(value):
80-
elements = self._deserialize_block_elements(value)
81-
return BodyRule(elements)
104+
105+
children = []
106+
107+
block_elements = self._deserialize_block_elements(value)
108+
for element in block_elements:
109+
children.append(element)
110+
111+
return BodyRule(children)
82112

83113
return self._deserialize_object(value)
84114

@@ -89,14 +119,13 @@ def _deserialize(self, value: Any) -> LarkElement:
89119

90120
def _deserialize_block_elements(self, value: dict) -> List[LarkRule]:
91121
children = []
92-
93122
for key, value in value.items():
94123
if self._is_block(value):
95124
# this value is a list of blocks, iterate over each block and deserialize them
96125
for block in value:
97126
children.append(self._deserialize_block(key, block))
98-
else:
99127

128+
else:
100129
# otherwise it's just an attribute
101130
if key != IS_BLOCK:
102131
children.append(self._deserialize_attribute(key, value))
@@ -106,28 +135,24 @@ def _deserialize_block_elements(self, value: dict) -> List[LarkRule]:
106135
def _deserialize_text(self, value: Any) -> LarkRule:
107136
try:
108137
int_val = int(value)
138+
if "." in str(value):
139+
return FloatLitRule([FloatLiteral(float(value))])
109140
return IntLitRule([IntLiteral(int_val)])
110141
except ValueError:
111142
pass
112143

113-
try:
114-
float_val = float(value)
115-
return FloatLitRule([FloatLiteral(float_val)])
116-
except ValueError:
117-
pass
118-
119144
if isinstance(value, str):
120145
if value.startswith('"') and value.endswith('"'):
121146
if not self.options.heredocs_to_strings and value.startswith('"<<-'):
122147
match = HEREDOC_TRIM_PATTERN.match(value[1:-1])
123148
if match:
124149
return self._deserialize_heredoc(value[1:-1], True)
125-
150+
126151
if not self.options.heredocs_to_strings and value.startswith('"<<'):
127152
match = HEREDOC_PATTERN.match(value[1:-1])
128153
if match:
129154
return self._deserialize_heredoc(value[1:-1], False)
130-
155+
131156
return self._deserialize_string(value)
132157

133158
if self._is_expression(value):
@@ -151,7 +176,6 @@ def _deserialize_string(self, value: str) -> StringRule:
151176
pattern = regex.compile(r"(\${1,2}\{(?:[^{}]|(?R))*\})")
152177
parts = [part for part in pattern.split(value) if part != ""]
153178

154-
155179
for part in parts:
156180
if part == '"':
157181
continue
@@ -181,10 +205,12 @@ def _deserialize_string_part(self, value: str) -> StringPartRule:
181205

182206
return StringPartRule([STRING_CHARS(value)])
183207

184-
def _deserialize_heredoc(self, value: str, trim: bool) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]:
208+
def _deserialize_heredoc(
209+
self, value: str, trim: bool
210+
) -> Union[HeredocTemplateRule, HeredocTrimTemplateRule]:
185211
if trim:
186212
return HeredocTrimTemplateRule([HEREDOC_TRIM_TEMPLATE(value)])
187-
return HeredocTemplateRule([HEREDOC_TEMPLATE(value)])
213+
return HeredocTemplateRule([HEREDOC_TEMPLATE(value)])
188214

189215
def _deserialize_expression(self, value: str) -> ExprTermRule:
190216
"""Deserialize an expression string into an ExprTermRule."""
@@ -200,7 +226,9 @@ def _deserialize_expression(self, value: str) -> ExprTermRule:
200226
# transform parsed tree into LarkElement tree
201227
rules_tree = self._transformer.transform(parsed_tree)
202228
# extract expression from the tree
203-
return rules_tree.body.children[0].expression
229+
result = rules_tree.body.children[0].expression
230+
231+
return result
204232

205233
def _deserialize_block(self, first_label: str, value: dict) -> BlockRule:
206234
"""Deserialize a block by extracting labels and body"""
@@ -220,14 +248,24 @@ def _deserialize_block(self, first_label: str, value: dict) -> BlockRule:
220248
break
221249

222250
return BlockRule(
223-
[*[self._deserialize(label) for label in labels], self._deserialize(body)]
251+
[
252+
*[self._deserialize(label) for label in labels],
253+
LBRACE(),
254+
self._deserialize(body),
255+
RBRACE(),
256+
]
224257
)
225258

226259
def _deserialize_attribute(self, name: str, value: Any) -> AttributeRule:
260+
expr_term = self._deserialize(value)
261+
262+
if not isinstance(expr_term, ExprTermRule):
263+
expr_term = ExprTermRule([expr_term])
264+
227265
children = [
228266
self._deserialize_identifier(name),
229267
EQ(),
230-
ExprTermRule([self._deserialize(value)]),
268+
expr_term,
231269
]
232270
return AttributeRule(children)
233271

@@ -247,11 +285,18 @@ def _deserialize_object(self, value: dict) -> ObjectRule:
247285
children = []
248286
for key, value in value.items():
249287
children.append(self._deserialize_object_elem(key, value))
288+
250289
return ObjectRule([LBRACE(), *children, RBRACE()])
251290

252291
def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule:
253292
if self._is_expression(key):
254-
key = ObjectElemKeyExpressionRule([self._deserialize_expression(key)])
293+
key = ObjectElemKeyExpressionRule(
294+
[
295+
child
296+
for child in self._deserialize_expression(key).children
297+
if child is not None
298+
]
299+
)
255300
elif "." in key:
256301
parts = key.split(".")
257302
children = []
@@ -262,13 +307,14 @@ def _deserialize_object_elem(self, key: str, value: Any) -> ObjectElemRule:
262307
else:
263308
key = self._deserialize_text(key)
264309

265-
return ObjectElemRule(
266-
[
267-
ObjectElemKeyRule([key]),
268-
EQ(),
269-
ExprTermRule([self._deserialize_text(value)]),
270-
]
271-
)
310+
result = [
311+
ObjectElemKeyRule([key]),
312+
COLON() if self.options.object_elements_colon else EQ(),
313+
ExprTermRule([self._deserialize(value)]),
314+
]
315+
if self.options.object_elements_trailing_comma:
316+
result.append(COMMA())
317+
return ObjectElemRule(result)
272318

273319
def _is_expression(self, value: str) -> bool:
274320
return value.startswith("${") and value.endswith("}")

0 commit comments

Comments
 (0)