nginxinc · aluttik · Aug 22, 2018 · Aug 22, 2018 · Aug 22, 2018
diff --git a/crossplane/__main__.py b/crossplane/__main__.py
@@ -108,8 +108,10 @@ def build(filename, dirname=None, force=False, indent=4, tabs=False,
 
 def lex(filename, out, indent=None, line_numbers=False):
     payload = list(lex_file(filename))
-    if not line_numbers:
-        payload = [token for token, lineno in payload]
+    if line_numbers:
+        payload = [(token, lineno) for token, lineno, quoted in payload]
+    else:
+        payload = [token for token, lineno, quoted in payload]
     _dump_payload(payload, out, indent=indent)
 
 

diff --git a/crossplane/ext/lua.py b/crossplane/ext/lua.py
@@ -34,18 +34,18 @@ def register_extension(self):
         register_external_builder(directives=self.directives, builder=self.build)
 
     @fix_pep_479
-    def lex(self, token_iterator, directive):
+    def lex(self, char_iterator, directive):
         if directive == "set_by_lua_block":
             # https://github.com/openresty/lua-nginx-module#set_by_lua_block
             # The sole *_by_lua_block directive that has an arg
             arg = ''
-            for char, line in token_iterator:
+            for char, line in char_iterator:
                 if char.isspace():
                     if arg:
-                        yield (arg, line)
+                        yield (arg, line, False)
                         break
                     while char.isspace():
-                        char, line = next(token_iterator)
+                        char, line = next(char_iterator)
 
                 arg += char
 
@@ -54,37 +54,38 @@ def lex(self, token_iterator, directive):
 
         # check that Lua block starts correctly
         while True:
-            char, line = next(token_iterator)
+            char, line = next(char_iterator)
             if not char.isspace():
                 break
 
         if char != "{":
             reason = 'expected { to start Lua block'
             raise LuaBlockParserSyntaxError(reason, filename=None, lineno=line)
+
         depth += 1
 
         # Grab everything in Lua block as a single token
         # and watch for curly brace '{' in strings
-        for char, line in token_iterator:
+        for char, line in char_iterator:
             if char == '{':
                 depth += 1
             elif char == '}':
                 depth -= 1
             elif char in ('"', "'"):
                 quote = char
                 token += quote
-                char, line = next(token_iterator)
+                char, line = next(char_iterator)
                 while char != quote:
                     token += quote if char == quote else char
-                    char, line = next(token_iterator)
+                    char, line = next(char_iterator)
 
             if depth < 0:
                 reason = 'unxpected "}"'
                 raise LuaBlockParserSyntaxError(reason, filename=None, lineno=line)
 
             if depth == 0:
-                yield (token, line)
-                yield (';', line)
+                yield (token, line, True)  # True because this is treated like a string
+                yield (';', line, False)
                 raise StopIteration
             token += char
 

diff --git a/crossplane/lexer.py b/crossplane/lexer.py
@@ -28,7 +28,11 @@ def _iterlinecount(iterable):
 
 @fix_pep_479
 def _lex_file_object(file_obj):
-    """Generates token tuples from an nginx config file object"""
+    """
+    Generates token tuples from an nginx config file object
+
+    Yields 3-tuples like (token, lineno, quoted)
+    """
     token = ''  # the token buffer
     token_line = 0  # the line the token starts on
     next_token_is_directive = True
@@ -42,7 +46,7 @@ def _lex_file_object(file_obj):
         if char.isspace():
             # if token complete yield it and reset token buffer
             if token:
-                yield (token, token_line)
+                yield (token, token_line, False)
                 if next_token_is_directive and token in EXTERNAL_LEXERS:
                     for custom_lexer_token in EXTERNAL_LEXERS[token](it, token):
                         yield custom_lexer_token
@@ -60,7 +64,7 @@ def _lex_file_object(file_obj):
             while not char.endswith('\n'):
                 token = token + char
                 char, _ = next(it)
-            yield (token, line)
+            yield (token, line, False)
             token = ''
             continue
 
@@ -87,26 +91,28 @@ def _lex_file_object(file_obj):
                 token += quote if char == '\\' + quote else char
                 char, line = next(it)
 
-            yield (token, token_line)
+            yield (token, token_line, True)  # True because this is in quotes
+
+            # handle quoted external directives
             if next_token_is_directive and token in EXTERNAL_LEXERS:
                 for custom_lexer_token in EXTERNAL_LEXERS[token](it, token):
                     yield custom_lexer_token
                     next_token_is_directive = True
             else:
                 next_token_is_directive = False
-            token = ''
 
+            token = ''
             continue
 
         # handle special characters that are treated like full tokens
         if char in ('{', '}', ';'):
             # if token complete yield it and reset token buffer
             if token:
-                yield (token, token_line)
+                yield (token, token_line, False)
                 token = ''
 
             # this character is a full token so yield it now
-            yield (char, line)
+            yield (char, line, False)
             next_token_is_directive = True
             continue
 
@@ -118,18 +124,18 @@ def _balance_braces(tokens, filename=None):
     """Raises syntax errors if braces aren't balanced"""
     depth = 0
 
-    for token, line in tokens:
-        if token == '}':
+    for token, line, quoted in tokens:
+        if token == '}' and not quoted:
             depth -= 1
-        elif token == '{':
+        elif token == '{' and not quoted:
             depth += 1
 
         # raise error if we ever have more right braces than left
         if depth < 0:
             reason = 'unexpected "}"'
             raise NgxParserSyntaxError(reason, filename, line)
         else:
-            yield token, line
+            yield (token, line, quoted)
 
     # raise error if we have less right braces than left at EOF
     if depth > 0:
@@ -142,8 +148,8 @@ def lex(filename):
     with io.open(filename, mode='r', encoding='utf-8') as f:
         it = _lex_file_object(f)
         it = _balance_braces(it, filename)
-        for token, line in it:
-            yield token, line
+        for token, line, quoted in it:
+            yield (token, line, quoted)
 
 
 def register_external_lexer(directives, lexer):

diff --git a/crossplane/parser.py b/crossplane/parser.py
@@ -72,15 +72,15 @@ def _parse(parsing, tokens, ctx=(), consume=False):
         parsed = []
 
         # parse recursively by pulling from a flat stream of tokens
-        for token, lineno in tokens:
+        for token, lineno, quoted in tokens:
             # we are parsing a block, so break if it's closing
-            if token == '}':
+            if token == '}' and not quoted:
                 break
 
             # if we are consuming, then just continue until end of context
             if consume:
                 # if we find a block inside this context, consume it too
-                if token == '{':
+                if token == '{' and not quoted:
                     _parse(parsing, tokens, consume=True)
                 continue
 
@@ -102,7 +102,7 @@ def _parse(parsing, tokens, ctx=(), consume=False):
                 }
 
             # if token is comment
-            if directive.startswith('#'):
+            if directive.startswith('#') and not quoted:
                 if comments:
                     stmt['directive'] = '#'
                     stmt['comment'] = token[1:]
@@ -113,15 +113,15 @@ def _parse(parsing, tokens, ctx=(), consume=False):
 
             # parse arguments by reading tokens
             args = stmt['args']
-            token, __ = next(tokens)  # disregard line numbers of args
-            while token not in ('{', ';', '}'):
+            token, __, quoted = next(tokens)  # disregard line numbers of args
+            while token not in ('{', ';', '}') or quoted:
                 stmt['args'].append(token)
-                token, __ = next(tokens)
+                token, __, quoted = next(tokens)
 
             # consume the directive if it is ignored and move on
             if stmt['directive'] in ignore:
                 # if this directive was a block consume it too
-                if token == '{':
+                if token == '{' and not quoted:
                     _parse(parsing, tokens, consume=True)
                 continue
 
@@ -138,7 +138,7 @@ def _parse(parsing, tokens, ctx=(), consume=False):
 
                     # if it was a block but shouldn't have been then consume
                     if e.strerror.endswith(' is not terminated by ";"'):
-                        if token != '}':
+                        if token != '}' and not quoted:
                             _parse(parsing, tokens, consume=True)
                         else:
                             break
@@ -184,7 +184,7 @@ def _parse(parsing, tokens, ctx=(), consume=False):
                     stmt['includes'].append(index)
 
             # if this statement terminated with '{' then it is a block
-            if token == '{':
+            if token == '{' and not quoted:
                 inner = enter_block_ctx(stmt, ctx)  # get context for block
                 stmt['block'] = _parse(parsing, tokens, ctx=inner)
 

diff --git a/tests/configs/quoted-right-brace/nginx.conf b/tests/configs/quoted-right-brace/nginx.conf
@@ -0,0 +1,16 @@
+events {}
+http {
+    log_format main escape=json
+        '{ "@timestamp": "$time_iso8601", '
+          '"server_name": "$server_name", '
+          '"host": "$host", '
+          '"status": "$status", '
+          '"request": "$request", '
+          '"uri": "$uri", '
+          '"args": "$args", '
+          '"https": "$https", '
+          '"request_method": "$request_method", '
+          '"referer": "$http_referer", '
+          '"agent": "$http_user_agent"'
+        '}';
+}
diff --git a/tests/ext/test_lua.py b/tests/ext/test_lua.py
@@ -9,7 +9,7 @@ def test_lex_lua_block_simple():
     dirname = os.path.join(tests_dir, 'configs', 'lua-block-simple')
     config = os.path.join(dirname, 'nginx.conf')
     tokens = crossplane.lex(config)
-    assert list(tokens) == [
+    assert list((token, line) for token, line, quoted in tokens) == [
         ('http', 1),
         ('{', 1),
         ('init_by_lua_block', 2),
@@ -75,7 +75,7 @@ def test_lex_lua_block_larger():
     dirname = os.path.join(tests_dir, 'configs', 'lua-block-larger')
     config = os.path.join(dirname, 'nginx.conf')
     tokens = crossplane.lex(config)
-    assert list(tokens) == [
+    assert list((token, line) for token, line, quoted in tokens) == [
         ('http', 1),
         ('{', 1),
         ('content_by_lua_block', 2),
@@ -126,7 +126,7 @@ def test_lex_lua_block_tricky():
     dirname = os.path.join(tests_dir, 'configs', 'lua-block-tricky')
     config = os.path.join(dirname, 'nginx.conf')
     tokens = crossplane.lex(config)
-    assert list(tokens) == [
+    assert list((token, line) for token, line, quoted in tokens) == [
         ('http', 1),
         ('{', 1),
         ('server', 2),

diff --git a/tests/test_lex.py b/tests/test_lex.py
@@ -9,7 +9,7 @@ def test_simple_config():
     dirname = os.path.join(here, 'configs', 'simple')
     config = os.path.join(dirname, 'nginx.conf')
     tokens = crossplane.lex(config)
-    assert list(tokens) == [
+    assert list((token, line) for token, line, quoted in tokens) == [
         ('events', 1), ('{', 1), ('worker_connections', 2), ('1024', 2),
         (';', 2), ('}', 3), ('http', 5), ('{', 5), ('server', 6), ('{', 6),
         ('listen', 7), ('127.0.0.1:8080', 7), (';', 7), ('server_name', 8),
@@ -23,7 +23,7 @@ def test_with_config_comments():
     dirname = os.path.join(here, 'configs', 'with-comments')
     config = os.path.join(dirname, 'nginx.conf')
     tokens = crossplane.lex(config)
-    assert list(tokens) == [
+    assert list((token, line) for token, line, quoted in tokens) == [
         ('events', 1), ('{', 1), ('worker_connections', 2), ('1024', 2),
         (';', 2), ('}', 3),('#comment', 4), ('http', 5), ('{', 5),
         ('server', 6), ('{', 6), ('listen', 7), ('127.0.0.1:8080', 7),
@@ -38,7 +38,7 @@ def test_messy_config():
     dirname = os.path.join(here, 'configs', 'messy')
     config = os.path.join(dirname, 'nginx.conf')
     tokens = crossplane.lex(config)
-    assert list(tokens) == [
+    assert list((token, line) for token, line, quoted in tokens) == [
         ('user', 1), ('nobody', 1), (';', 1),
         ('# hello\\n\\\\n\\\\\\n worlddd  \\#\\\\#\\\\\\# dfsf\\n \\\\n \\\\\\n ', 2),
         ('events', 3), ('{', 3), ('worker_connections', 3), ('2048', 3),
@@ -73,7 +73,22 @@ def test_quote_behavior():
     dirname = os.path.join(here, 'configs', 'quote-behavior')
     config = os.path.join(dirname, 'nginx.conf')
     tokens = crossplane.lex(config)
-    assert list(token for token, line in tokens) == [
+    assert list(token for token, line, quoted in tokens) == [
         'outer-quote', 'left', '-quote', 'right-"quote"', 'inner"-"quote', ';',
         '', '', 'left-empty', 'right-empty""', 'inner""empty', 'right-empty-single"', ';',
     ]
+
+
+def test_quoted_right_brace():
+    dirname = os.path.join(here, 'configs', 'quoted-right-brace')
+    config = os.path.join(dirname, 'nginx.conf')
+    tokens = crossplane.lex(config)
+    assert list(token for token, line, quoted in tokens) == [
+        'events', '{', '}', 'http', '{', 'log_format', 'main', 'escape=json',
+        '{ "@timestamp": "$time_iso8601", ', '"server_name": "$server_name", ',
+        '"host": "$host", ', '"status": "$status", ',
+        '"request": "$request", ', '"uri": "$uri", ', '"args": "$args", ',
+        '"https": "$https", ', '"request_method": "$request_method", ',
+        '"referer": "$http_referer", ', '"agent": "$http_user_agent"', '}',
+        ';', '}'
+    ]