Skip to content

tokenizer: add support for using unimplemented nodes for array assignment (fixes #88) #90

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bashlex/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,5 @@
'ASSNGLOBAL', # word is a global assignment to declare (declare/typeset -g)
'NOBRACE', # Don't perform brace expansion
'ASSIGNINT', # word is an integer assignment to declare
'UNIMPLEMENTED', # word uses unimplemented feature (e.g., array)
])
3 changes: 3 additions & 0 deletions bashlex/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ def p_simple_command_element(p):
# change the word node to an assignment if necessary
if p.slice[1].ttype == tokenizer.tokentype.ASSIGNMENT_WORD:
p[0][0].kind = 'assignment'
if (p.slice[1].flags & flags.word.UNIMPLEMENTED):
p[0][0].kind = 'unimplemented'

def p_redirection_list(p):
'''redirection_list : redirection
Expand Down Expand Up @@ -720,6 +722,7 @@ def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None,
self.tok = tokenizer.tokenizer(s,
parserstate=self.parserstate,
strictmode=strictmode,
proceedonerror=proceedonerror,
**tokenizerargs)

self.redirstack = self.tok.redirstack
Expand Down
27 changes: 24 additions & 3 deletions bashlex/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ def nopos(self):

class tokenizer(object):
def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
lastreadtoken=None, tokenbeforethat=None, twotokensago=None):
lastreadtoken=None, tokenbeforethat=None, twotokensago=None,
proceedonerror=None):
self._shell_eof_token = eoftoken
self._shell_input_line = s
self._added_newline = False
Expand Down Expand Up @@ -232,6 +233,7 @@ def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
self._positions = []

self._strictmode = strictmode
self._proceedonerror = proceedonerror

# hack: the tokenizer needs access to the stack of redirection
# nodes when it reads heredocs. this instance is shared between
Expand Down Expand Up @@ -391,7 +393,7 @@ def _readtoken(self):
def _readtokenword(self, c):
d = {}
d['all_digit_token'] = c.isdigit()
d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False
d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = d['unimplemented'] = False

tokenword = []

Expand Down Expand Up @@ -467,6 +469,19 @@ def handleshellexp():

# bashlex/parse.y L4699 ARRAY_VARS

def handlecompoundassignment():
# note: only finds matching parenthesis, so parsing can proceed
handled = False
if self._proceedonerror:
ttok = self._parse_matched_pair(None, '(', ')')
if ttok:
tokenword.append(c)
tokenword.extend(ttok)
d['compound_assignment'] = True
d['unimplemented'] = True
handled = True
return handled

def handleescapedchar():
tokenword.append(c)
d['all_digit_token'] &= c.isdigit()
Expand Down Expand Up @@ -512,6 +527,8 @@ def handleescapedchar():
elif _shellexp(c):
gotonext = not handleshellexp()
# bashlex/parse.y L4699
elif c == '(' and handlecompoundassignment():
gotonext = True
if not gotonext:
if _shellbreak(c):
self._ungetc(c)
Expand Down Expand Up @@ -573,14 +590,18 @@ def handleescapedchar():
tokenword.flags.add(wordflags.HASDOLLAR)
if d['quoted']:
tokenword.flags.add(wordflags.QUOTED)
if d['compound_assignment'] and tokenword[-1] == ')':
if d['compound_assignment'] and tokenword.value[-1] == ')':
tokenword.flags.add(wordflags.COMPASSIGN)
if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)):
tokenword.flags.add(wordflags.ASSIGNMENT)
if self._assignment_acceptable(self._last_read_token):
tokenword.flags.add(wordflags.NOSPLIT)
if self._parserstate & parserflags.COMPASSIGN:
tokenword.flags.add(wordflags.NOGLOB)
if d['compound_assignment']:
tokenword.flags.add(wordflags.ASSIGNARRAY)
if d['unimplemented']:
tokenword.flags.add(wordflags.UNIMPLEMENTED)

# bashlex/parse.y L4865
if self._command_token_position(self._last_read_token):
Expand Down
15 changes: 13 additions & 2 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ def patternnode(s, *parts):
def functionnode(s, name, body, *parts):
return ast.node(kind='function', name=name, body=body, parts=list(parts), s=s)

def unimplementednode(s, *parts):
return ast.node(kind='unimplemented', parts=list(parts), s=s)
def unimplementednode(s, *parts, **kwargs):
return ast.node(kind='unimplemented', parts=list(parts), s=s, **kwargs)

class test_parser(unittest.TestCase):

Expand Down Expand Up @@ -1250,3 +1250,14 @@ def test_unimplemented(self):
proceedonerror=True)
with self.assertRaises(NotImplementedError):
parse(s, proceedonerror=False)

def test_array_assignemnt(self):
s = "num1=2 arr=(1 2 3) num2=3"
self.assertASTEquals(s,
commandnode(s,
assignmentnode('num1=2', 'num1=2'),
unimplementednode('arr=(1 2 3)', word='arr=(1 2 3)'),
assignmentnode('num2=3', 'num2=3')),
proceedonerror=True)
with self.assertRaises(errors.ParsingError):
parse(s, proceedonerror=False)