Skip to content

Fix parsing of integer literals with base prefix #106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions esp32_ulp/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,13 +219,13 @@ def fill(self, section, amount, fill_byte):
raise ValueError('fill in bss section not allowed')
if section is TEXT: # TODO: text section should be filled with NOPs
raise ValueError('fill/skip/align in text section not supported')
fill = int(fill_byte or 0).to_bytes(1, 'little') * amount
fill = int(self.opcodes.eval_arg(str(fill_byte or 0))).to_bytes(1, 'little') * amount
self.offsets[section] += len(fill)
if section is not BSS:
self.sections[section].append(fill)

def d_skip(self, amount, fill=None):
amount = int(amount)
amount = int(self.opcodes.eval_arg(amount))
self.fill(self.section, amount, fill)

d_space = d_skip
Expand All @@ -246,7 +246,7 @@ def d_global(self, symbol):
self.symbols.set_global(symbol)

def append_data(self, wordlen, args):
data = [int(arg).to_bytes(wordlen, 'little') for arg in args]
data = [int(self.opcodes.eval_arg(arg)).to_bytes(wordlen, 'little') for arg in args]
self.append_section(b''.join(data))

def d_byte(self, *args):
Expand Down
11 changes: 8 additions & 3 deletions esp32_ulp/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN

from .soc import *
from .util import split_tokens, validate_expression
from .util import split_tokens, validate_expression, parse_int

# XXX dirty hack: use a global for the symbol table
symbols = None
Expand Down Expand Up @@ -285,7 +285,12 @@ def eval_arg(arg):
_, _, sym_value = symbols.get_sym(token)
parts.append(str(sym_value))
else:
parts.append(token)
try:
# attempt to parse, to convert numbers with base prefix correctly
int_token = parse_int(token)
parts.append(str(int_token))
except ValueError:
parts.append(token)
parts = "".join(parts)
if not validate_expression(parts):
raise ValueError('Unsupported expression: %s' % parts)
Expand All @@ -311,7 +316,7 @@ def arg_qualify(arg):
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
return ARG(COND, arg_lower, arg)
try:
return ARG(IMM, int(arg), arg)
return ARG(IMM, parse_int(arg), arg)
except ValueError:
pass
try:
Expand Down
11 changes: 8 additions & 3 deletions esp32_ulp/opcodes_s2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ucollections import namedtuple
from uctypes import struct, addressof, LITTLE_ENDIAN, UINT32, BFUINT32, BF_POS, BF_LEN

from .util import split_tokens, validate_expression
from .util import split_tokens, validate_expression, parse_int

# XXX dirty hack: use a global for the symbol table
symbols = None
Expand Down Expand Up @@ -301,7 +301,12 @@ def eval_arg(arg):
_, _, sym_value = symbols.get_sym(token)
parts.append(str(sym_value))
else:
parts.append(token)
try:
# attempt to parse, to convert numbers with base prefix correctly
int_token = parse_int(token)
parts.append(str(int_token))
except ValueError:
parts.append(token)
parts = "".join(parts)
if not validate_expression(parts):
raise ValueError('Unsupported expression: %s' % parts)
Expand All @@ -327,7 +332,7 @@ def arg_qualify(arg):
if arg_lower in ['--', 'eq', 'ov', 'lt', 'gt', 'ge', 'le']:
return ARG(COND, arg_lower, arg)
try:
return ARG(IMM, int(arg), arg)
return ARG(IMM, parse_int(arg), arg)
except ValueError:
pass
try:
Expand Down
12 changes: 12 additions & 0 deletions esp32_ulp/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,18 @@ def validate_expression(param):
return True


def parse_int(literal):
"""
GNU as compatible parsing of string literals into integers
Specifically, GNU as treats literals starting with 0 as octal
All other literals are correctly parsed by Python
See: https://sourceware.org/binutils/docs/as/Integers.html
"""
if len(literal) >= 2 and (literal.startswith("0") or literal.startswith("-0")) and literal.lstrip("-0").isdigit():
return int(literal, 8)
return int(literal, 0)


def file_exists(filename):
try:
os.stat(filename)
Expand Down
15 changes: 12 additions & 3 deletions tests/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
from esp32_ulp.opcodes import make_ins, make_ins_struct_def
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.opcodes import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
import esp32_ulp.opcodes as opcodes

Expand Down Expand Up @@ -46,6 +46,7 @@ def test_arg_qualify():
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
assert arg_qualify('1') == ARG(IMM, 1, '1')
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
Expand Down Expand Up @@ -96,6 +97,11 @@ def test_eval_arg():
assert eval_arg('const >> 1') == 21
assert eval_arg('(const|4)&0xf') == 0xe

assert eval_arg('0x7') == 7
assert eval_arg('010') == 8
assert eval_arg('-0x7') == -7 # negative
assert eval_arg('~0x7') == -8 # complement

assert_raises(ValueError, eval_arg, 'evil()')
assert_raises(ValueError, eval_arg, 'def cafe()')
assert_raises(ValueError, eval_arg, '1 ^ 2')
Expand All @@ -105,14 +111,17 @@ def test_eval_arg():
opcodes.symbols = None


def assert_raises(exception, func, *args):
def assert_raises(exception, func, *args, message=None):
try:
func(*args)
except exception:
except exception as e:
raised = True
actual_message = e.args[0]
else:
raised = False
assert raised
if message:
assert actual_message == message, '%s == %s' % (actual_message, message)


def test_reg_direct_ulp_addressing():
Expand Down
15 changes: 12 additions & 3 deletions tests/opcodes_s2.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from uctypes import UINT32, BFUINT32, BF_POS, BF_LEN
from esp32_ulp.opcodes_s2 import make_ins, make_ins_struct_def
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.opcodes_s2 import get_reg, get_imm, get_cond, arg_qualify, parse_int, eval_arg, ARG, REG, IMM, SYM, COND
from esp32_ulp.assemble import SymbolTable, ABS, REL, TEXT
import esp32_ulp.opcodes_s2 as opcodes

Expand Down Expand Up @@ -46,6 +46,7 @@ def test_arg_qualify():
assert arg_qualify('-1') == ARG(IMM, -1, '-1')
assert arg_qualify('1') == ARG(IMM, 1, '1')
assert arg_qualify('0x20') == ARG(IMM, 32, '0x20')
assert arg_qualify('0100') == ARG(IMM, 64, '0100')
assert arg_qualify('0o100') == ARG(IMM, 64, '0o100')
assert arg_qualify('0b1000') == ARG(IMM, 8, '0b1000')
assert arg_qualify('eq') == ARG(COND, 'eq', 'eq')
Expand Down Expand Up @@ -96,6 +97,11 @@ def test_eval_arg():
assert eval_arg('const >> 1') == 21
assert eval_arg('(const|4)&0xf') == 0xe

assert eval_arg('0x7') == 7
assert eval_arg('010') == 8
assert eval_arg('-0x7') == -7 # negative
assert eval_arg('~0x7') == -8 # complement

assert_raises(ValueError, eval_arg, 'evil()')
assert_raises(ValueError, eval_arg, 'def cafe()')
assert_raises(ValueError, eval_arg, '1 ^ 2')
Expand All @@ -105,14 +111,17 @@ def test_eval_arg():
opcodes.symbols = None


def assert_raises(exception, func, *args):
def assert_raises(exception, func, *args, message=None):
try:
func(*args)
except exception:
except exception as e:
raised = True
actual_message = e.args[0]
else:
raised = False
assert raised
if message:
assert actual_message == message, '%s == %s' % (actual_message, message)


def test_reg_direct_ulp_addressing():
Expand Down
45 changes: 44 additions & 1 deletion tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# SPDX-License-Identifier: MIT

import os
from esp32_ulp.util import split_tokens, validate_expression, file_exists
from esp32_ulp.util import split_tokens, validate_expression, parse_int, file_exists

tests = []

Expand All @@ -18,6 +18,19 @@ def test(param):
tests.append(param)


def assert_raises(exception, func, *args, message=None):
try:
func(*args)
except exception as e:
raised = True
actual_message = e.args[0]
else:
raised = False
assert raised
if message:
assert actual_message == message, '%s == %s' % (actual_message, message)


@test
def test_split_tokens():
assert split_tokens("") == []
Expand Down Expand Up @@ -69,6 +82,36 @@ def test_validate_expression():
assert validate_expression('def CAFE()') is False


@test
def test_parse_int():
# decimal
assert parse_int("0") == 0, "0 == 0"
assert parse_int("5") == 5, "5 == 5"
assert parse_int("-0") == 0, "-0 == 0"
assert parse_int("-5") == -5, "-5 == -5"
# hex
assert parse_int("0x5") == 5, "0x5 == 5"
assert parse_int("0x5a") == 90, "0x5a == 90"
assert parse_int("-0x5a") == -90, "-0x5a == -90"
# binary
assert parse_int("0b1001") == 9, "0b1001 == 9"
assert parse_int("-0b1001") == -9, "-0b1001 == 9"
# octal
assert parse_int("07") == 7, "07 == 7"
assert parse_int("0100") == 64, "0100 == 64"
assert parse_int("0o210") == 136, "0o210 == 136"
assert parse_int("00000010") == 8, "00000010 == 8"
assert parse_int("-07") == -7, "-07 == -7"
assert parse_int("-0100") == -64, "-0100 == -64"
assert parse_int("-0o210") == -136, "-0o210 == -136"
assert parse_int("-00000010") == -8, "-00000010 == -8"
# negative cases
assert_raises(ValueError, parse_int, '0b123', message="invalid syntax for integer with base 2: '123'")
assert_raises(ValueError, parse_int, '0900', message="invalid syntax for integer with base 8: '0900'")
assert_raises(ValueError, parse_int, '0o900', message="invalid syntax for integer with base 8: '900'")
assert_raises(ValueError, parse_int, '0xg', message="invalid syntax for integer with base 16: 'g'")


@test
def test_file_exists():
testfile = '.testfile'
Expand Down