From d61846702f54f87640d46f835bbc9bf7fd5aecac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Laurenz=20Altenm=C3=BCller?= Date: Mon, 5 Aug 2024 16:42:10 +0200 Subject: [PATCH] Quote strings that contain `:` --- docs/index.md | 1 + src/yamlfix/adapters.py | 86 ++++++++++++++++++++++++++++- tests/unit/test_services.py | 104 ++++++++++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index 00aeb96..35d78ab 100644 --- a/docs/index.md +++ b/docs/index.md @@ -87,6 +87,7 @@ variable, use `fix_code`: - item - item ``` +- Quote strings that contain `:` to prevent some parsers from misinterpreting them as mappings. # Configuration diff --git a/src/yamlfix/adapters.py b/src/yamlfix/adapters.py index ac2d0d4..b29c680 100644 --- a/src/yamlfix/adapters.py +++ b/src/yamlfix/adapters.py @@ -4,8 +4,9 @@ import re from functools import partial from io import StringIO -from typing import Any, Callable, List, Match, Optional, Tuple +from typing import Any, Callable, List, Match, NamedTuple, Optional, Tuple +from ruyaml.comments import CommentedSeq from ruyaml.main import YAML from ruyaml.nodes import MappingNode, Node, ScalarNode, SequenceNode from ruyaml.representer import RoundTripRepresenter @@ -349,6 +350,7 @@ def fix(self, source_code: str) -> str: self._fix_truthy_strings, self._fix_jinja_variables, self._ruamel_yaml_fixer, + self._quote_strings_with_colons, self._restore_truthy_strings, self._restore_jinja_variables, self._restore_double_exclamations, @@ -795,3 +797,85 @@ def _restore_jinja_variables(source_code: str) -> str: fixed_source_lines.append(line) return "\n".join(fixed_source_lines) + + def _quote_strings_with_colons(self, source_code: str) -> str: + """Fix strings with colons to be quoted. + + Example: + volumes: [/root:/mapped] + becomes + volumes: ["/root:/mapped"] + + We do this by + 1. loading the yaml + 2. recursively scanning for strings that + * contain colons + * are not already quoted + * are not multi-line strings (see note). + 3. Adding quotes at the string start and end locations in the source_code. + + Note: Multi-line strings are not supported because ruyaml only provides the + start location of a scalar, but not the end location. For single-line strings + you can calculate the end location by adding the length of the string to the + start, but for strings broken over multiple lines this is not straightforward. + """ + log.debug("Fixing unquoted strings with colons...") + + class ToFix(NamedTuple): + """Where to insert quotes.""" + + line: int + start: int + end: int + + positions_to_quote = set() # type: set[ToFix] + lines = source_code.splitlines() + + def add(item: str, line: int, col: int) -> None: + is_quoted = lines[line][col] in ['"', "'"] + if ":" in item and not is_quoted: + to_fix = ToFix( + line=line, + start=col, + end=col + len(item), + ) + if to_fix.end <= len(lines[to_fix.line]): + positions_to_quote.add(to_fix) + else: + log.debug("Skipping %r which is multi-line", item) + + def check(value: CommentedSeq | dict) -> None: + if isinstance(value, CommentedSeq): + for i, item in enumerate(value): + if isinstance(item, str): + line, col = value.lc.item(i) + add(item, line, col) + else: + check(item) + elif isinstance(value, dict): + for key, item in value.items(): + if isinstance(item, str): + try: + line, col = value.lc.value(key) + except (KeyError, TypeError): + # May not be available if merged from an anchor + pass + else: + add(item, line, col) + else: + check(item) + + source_dicts = self.yaml.load_all(source_code) # type: Iterator[CommentedMap] + for source_dict in source_dicts: + check(source_dict) + + for to_fix in sorted(positions_to_quote, reverse=True): + lines[to_fix.line] = (self.config.quote_representation or "'").join( + [ + lines[to_fix.line][: to_fix.start], # noqa: E203: black disagrees + lines[to_fix.line][to_fix.start : to_fix.end], # noqa: E203 + lines[to_fix.line][to_fix.end :], # noqa: E203 + ] + ) + + return "\n".join(lines) diff --git a/tests/unit/test_services.py b/tests/unit/test_services.py index ca23f1b..f185c9f 100644 --- a/tests/unit/test_services.py +++ b/tests/unit/test_services.py @@ -497,6 +497,7 @@ def test_fix_code_functions_emit_debug_logs( "Fixing comments...", "Fixing top level lists...", "Fixing flow-style lists...", + "Fixing unquoted strings with colons...", } assert set(caplog.messages) == expected_logs for record in caplog.records: @@ -1034,3 +1035,106 @@ def test_fix_code_fix_whitelines( result = fix_code(source_code=source, config=config) assert result == desired_source + + @pytest.mark.parametrize( + ("source", "config", "desired_source"), + [ + ( + "volumes: [/root:/mapped, a:b, 'c:d']", + YamlfixConfig(sequence_style=YamlNodeStyle.FLOW_STYLE), + dedent( + """\ + --- + volumes: ['/root:/mapped', 'a:b', 'c:d'] + """ + ), + ), + ( + dedent( + """\ + volumes: + - /root:/mapped + - a:b + - 'c:d' + """ + ), + YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE), + dedent( + """\ + --- + volumes: + - '/root:/mapped' + - 'a:b' + - 'c:d' + """ + ), + ), + ( + dedent( + """\ + test: + - "this one:\ + is ok" + - fix this:one + - | + multiline strings: + are not supported yet + - >- + multiline strings: + are not supported yet + """ + ), + YamlfixConfig(sequence_style=YamlNodeStyle.BLOCK_STYLE), + dedent( + """\ + --- + test: + - 'this one:\ + is ok' + - 'fix this:one' + - | + multiline strings: + are not supported yet + - >- + multiline strings: + are not supported yet + """ + ), + ), + ( + dedent( + """\ + merge0: &anchor + host: host.docker.internal:host-gateway + merge1: + <<: *anchor + merge2: + <<: *anchor + """ + ), + None, + dedent( + """\ + --- + merge0: &anchor + host: 'host.docker.internal:host-gateway' + merge1: + <<: *anchor + merge2: + <<: *anchor + """ + ), + ), + ], + ) + def test_strings_with_colons_are_quoted( + self, source: str, config: Optional[YamlfixConfig], desired_source: str + ) -> None: + """ + Given: Code with a string containing `:` + When: fix_code is run + Then: The string is quoted and not turned into a mapping + """ + result = fix_code(source, config=config) + + assert result == desired_source