Skip to content

Added handling for quoted braces and semicolons #49

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 22, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions crossplane/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ def build(filename, dirname=None, force=False, indent=4, tabs=False,

def lex(filename, out, indent=None, line_numbers=False):
payload = list(lex_file(filename))
if not line_numbers:
payload = [token for token, lineno in payload]
if line_numbers:
payload = [(token, lineno) for token, lineno, quoted in payload]
else:
payload = [token for token, lineno, quoted in payload]
_dump_payload(payload, out, indent=indent)


Expand Down
21 changes: 11 additions & 10 deletions crossplane/ext/lua.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ def register_extension(self):
register_external_builder(directives=self.directives, builder=self.build)

@fix_pep_479
def lex(self, token_iterator, directive):
def lex(self, char_iterator, directive):
if directive == "set_by_lua_block":
# https://github.com/openresty/lua-nginx-module#set_by_lua_block
# The sole *_by_lua_block directive that has an arg
arg = ''
for char, line in token_iterator:
for char, line in char_iterator:
if char.isspace():
if arg:
yield (arg, line)
yield (arg, line, False)
break
while char.isspace():
char, line = next(token_iterator)
char, line = next(char_iterator)

arg += char

Expand All @@ -54,37 +54,38 @@ def lex(self, token_iterator, directive):

# check that Lua block starts correctly
while True:
char, line = next(token_iterator)
char, line = next(char_iterator)
if not char.isspace():
break

if char != "{":
reason = 'expected { to start Lua block'
raise LuaBlockParserSyntaxError(reason, filename=None, lineno=line)

depth += 1

# Grab everything in Lua block as a single token
# and watch for curly brace '{' in strings
for char, line in token_iterator:
for char, line in char_iterator:
if char == '{':
depth += 1
elif char == '}':
depth -= 1
elif char in ('"', "'"):
quote = char
token += quote
char, line = next(token_iterator)
char, line = next(char_iterator)
while char != quote:
token += quote if char == quote else char
char, line = next(token_iterator)
char, line = next(char_iterator)

if depth < 0:
reason = 'unxpected "}"'
raise LuaBlockParserSyntaxError(reason, filename=None, lineno=line)

if depth == 0:
yield (token, line)
yield (';', line)
yield (token, line, True) # True because this is treated like a string
yield (';', line, False)
raise StopIteration
token += char

Expand Down
32 changes: 19 additions & 13 deletions crossplane/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ def _iterlinecount(iterable):

@fix_pep_479
def _lex_file_object(file_obj):
"""Generates token tuples from an nginx config file object"""
"""
Generates token tuples from an nginx config file object

Yields 3-tuples like (token, lineno, quoted)
"""
token = '' # the token buffer
token_line = 0 # the line the token starts on
next_token_is_directive = True
Expand All @@ -42,7 +46,7 @@ def _lex_file_object(file_obj):
if char.isspace():
# if token complete yield it and reset token buffer
if token:
yield (token, token_line)
yield (token, token_line, False)
if next_token_is_directive and token in EXTERNAL_LEXERS:
for custom_lexer_token in EXTERNAL_LEXERS[token](it, token):
yield custom_lexer_token
Expand All @@ -60,7 +64,7 @@ def _lex_file_object(file_obj):
while not char.endswith('\n'):
token = token + char
char, _ = next(it)
yield (token, line)
yield (token, line, False)
token = ''
continue

Expand All @@ -87,26 +91,28 @@ def _lex_file_object(file_obj):
token += quote if char == '\\' + quote else char
char, line = next(it)

yield (token, token_line)
yield (token, token_line, True) # True because this is in quotes

# handle quoted external directives
if next_token_is_directive and token in EXTERNAL_LEXERS:
for custom_lexer_token in EXTERNAL_LEXERS[token](it, token):
yield custom_lexer_token
next_token_is_directive = True
else:
next_token_is_directive = False
token = ''

token = ''
continue

# handle special characters that are treated like full tokens
if char in ('{', '}', ';'):
# if token complete yield it and reset token buffer
if token:
yield (token, token_line)
yield (token, token_line, False)
token = ''

# this character is a full token so yield it now
yield (char, line)
yield (char, line, False)
next_token_is_directive = True
continue

Expand All @@ -118,18 +124,18 @@ def _balance_braces(tokens, filename=None):
"""Raises syntax errors if braces aren't balanced"""
depth = 0

for token, line in tokens:
if token == '}':
for token, line, quoted in tokens:
if token == '}' and not quoted:
depth -= 1
elif token == '{':
elif token == '{' and not quoted:
depth += 1

# raise error if we ever have more right braces than left
if depth < 0:
reason = 'unexpected "}"'
raise NgxParserSyntaxError(reason, filename, line)
else:
yield token, line
yield (token, line, quoted)

# raise error if we have less right braces than left at EOF
if depth > 0:
Expand All @@ -142,8 +148,8 @@ def lex(filename):
with io.open(filename, mode='r', encoding='utf-8') as f:
it = _lex_file_object(f)
it = _balance_braces(it, filename)
for token, line in it:
yield token, line
for token, line, quoted in it:
yield (token, line, quoted)


def register_external_lexer(directives, lexer):
Expand Down
20 changes: 10 additions & 10 deletions crossplane/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ def _parse(parsing, tokens, ctx=(), consume=False):
parsed = []

# parse recursively by pulling from a flat stream of tokens
for token, lineno in tokens:
for token, lineno, quoted in tokens:
# we are parsing a block, so break if it's closing
if token == '}':
if token == '}' and not quoted:
break

# if we are consuming, then just continue until end of context
if consume:
# if we find a block inside this context, consume it too
if token == '{':
if token == '{' and not quoted:
_parse(parsing, tokens, consume=True)
continue

Expand All @@ -102,7 +102,7 @@ def _parse(parsing, tokens, ctx=(), consume=False):
}

# if token is comment
if directive.startswith('#'):
if directive.startswith('#') and not quoted:
if comments:
stmt['directive'] = '#'
stmt['comment'] = token[1:]
Expand All @@ -113,15 +113,15 @@ def _parse(parsing, tokens, ctx=(), consume=False):

# parse arguments by reading tokens
args = stmt['args']
token, __ = next(tokens) # disregard line numbers of args
while token not in ('{', ';', '}'):
token, __, quoted = next(tokens) # disregard line numbers of args
while token not in ('{', ';', '}') or quoted:
stmt['args'].append(token)
token, __ = next(tokens)
token, __, quoted = next(tokens)

# consume the directive if it is ignored and move on
if stmt['directive'] in ignore:
# if this directive was a block consume it too
if token == '{':
if token == '{' and not quoted:
_parse(parsing, tokens, consume=True)
continue

Expand All @@ -138,7 +138,7 @@ def _parse(parsing, tokens, ctx=(), consume=False):

# if it was a block but shouldn't have been then consume
if e.strerror.endswith(' is not terminated by ";"'):
if token != '}':
if token != '}' and not quoted:
_parse(parsing, tokens, consume=True)
else:
break
Expand Down Expand Up @@ -184,7 +184,7 @@ def _parse(parsing, tokens, ctx=(), consume=False):
stmt['includes'].append(index)

# if this statement terminated with '{' then it is a block
if token == '{':
if token == '{' and not quoted:
inner = enter_block_ctx(stmt, ctx) # get context for block
stmt['block'] = _parse(parsing, tokens, ctx=inner)

Expand Down
16 changes: 16 additions & 0 deletions tests/configs/quoted-right-brace/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
events {}
http {
log_format main escape=json
'{ "@timestamp": "$time_iso8601", '
'"server_name": "$server_name", '
'"host": "$host", '
'"status": "$status", '
'"request": "$request", '
'"uri": "$uri", '
'"args": "$args", '
'"https": "$https", '
'"request_method": "$request_method", '
'"referer": "$http_referer", '
'"agent": "$http_user_agent"'
'}';
}
6 changes: 3 additions & 3 deletions tests/ext/test_lua.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_lex_lua_block_simple():
dirname = os.path.join(tests_dir, 'configs', 'lua-block-simple')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(tokens) == [
assert list((token, line) for token, line, quoted in tokens) == [
('http', 1),
('{', 1),
('init_by_lua_block', 2),
Expand Down Expand Up @@ -75,7 +75,7 @@ def test_lex_lua_block_larger():
dirname = os.path.join(tests_dir, 'configs', 'lua-block-larger')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(tokens) == [
assert list((token, line) for token, line, quoted in tokens) == [
('http', 1),
('{', 1),
('content_by_lua_block', 2),
Expand Down Expand Up @@ -126,7 +126,7 @@ def test_lex_lua_block_tricky():
dirname = os.path.join(tests_dir, 'configs', 'lua-block-tricky')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(tokens) == [
assert list((token, line) for token, line, quoted in tokens) == [
('http', 1),
('{', 1),
('server', 2),
Expand Down
23 changes: 19 additions & 4 deletions tests/test_lex.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def test_simple_config():
dirname = os.path.join(here, 'configs', 'simple')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(tokens) == [
assert list((token, line) for token, line, quoted in tokens) == [
('events', 1), ('{', 1), ('worker_connections', 2), ('1024', 2),
(';', 2), ('}', 3), ('http', 5), ('{', 5), ('server', 6), ('{', 6),
('listen', 7), ('127.0.0.1:8080', 7), (';', 7), ('server_name', 8),
Expand All @@ -23,7 +23,7 @@ def test_with_config_comments():
dirname = os.path.join(here, 'configs', 'with-comments')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(tokens) == [
assert list((token, line) for token, line, quoted in tokens) == [
('events', 1), ('{', 1), ('worker_connections', 2), ('1024', 2),
(';', 2), ('}', 3),('#comment', 4), ('http', 5), ('{', 5),
('server', 6), ('{', 6), ('listen', 7), ('127.0.0.1:8080', 7),
Expand All @@ -38,7 +38,7 @@ def test_messy_config():
dirname = os.path.join(here, 'configs', 'messy')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(tokens) == [
assert list((token, line) for token, line, quoted in tokens) == [
('user', 1), ('nobody', 1), (';', 1),
('# hello\\n\\\\n\\\\\\n worlddd \\#\\\\#\\\\\\# dfsf\\n \\\\n \\\\\\n ', 2),
('events', 3), ('{', 3), ('worker_connections', 3), ('2048', 3),
Expand Down Expand Up @@ -73,7 +73,22 @@ def test_quote_behavior():
dirname = os.path.join(here, 'configs', 'quote-behavior')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(token for token, line in tokens) == [
assert list(token for token, line, quoted in tokens) == [
'outer-quote', 'left', '-quote', 'right-"quote"', 'inner"-"quote', ';',
'', '', 'left-empty', 'right-empty""', 'inner""empty', 'right-empty-single"', ';',
]


def test_quoted_right_brace():
dirname = os.path.join(here, 'configs', 'quoted-right-brace')
config = os.path.join(dirname, 'nginx.conf')
tokens = crossplane.lex(config)
assert list(token for token, line, quoted in tokens) == [
'events', '{', '}', 'http', '{', 'log_format', 'main', 'escape=json',
'{ "@timestamp": "$time_iso8601", ', '"server_name": "$server_name", ',
'"host": "$host", ', '"status": "$status", ',
'"request": "$request", ', '"uri": "$uri", ', '"args": "$args", ',
'"https": "$https", ', '"request_method": "$request_method", ',
'"referer": "$http_referer", ', '"agent": "$http_user_agent"', '}',
';', '}'
]