forked from micropython/micropython
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tools: Add C middle-processor to make builtin tables proper hash tables.
- Loading branch information
Showing
1 changed file
with
262 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,262 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
This is a middle-processor for MicroPython source files. It takes the output | ||
of the C preprocessor, has the option to change it, then feeds this into the | ||
C compiler. | ||
It currently has the ability to reorder static hash tables so they are actually | ||
hashed, resulting in faster lookup times at runtime. | ||
To use, configure the Python variables below, and add the following line to the | ||
Makefile: | ||
CFLAGS += -no-integrated-cpp -B$(shell pwd)/../tools | ||
""" | ||
|
||
import sys | ||
import os | ||
import re | ||
|
||
################################################################################ | ||
# these are the configuration variables | ||
# TODO somehow make them externally configurable | ||
|
||
# this is the path to the true C compiler | ||
cc1_path = '/usr/lib/gcc/x86_64-unknown-linux-gnu/5.3.0/cc1' | ||
#cc1_path = '/usr/lib/gcc/arm-none-eabi/5.3.0/cc1' | ||
|
||
# this must be the same as MICROPY_QSTR_BYTES_IN_HASH | ||
bytes_in_qstr_hash = 2 | ||
|
||
# this must be 1 or more (can be a decimal) | ||
# larger uses more code size but yields faster lookups | ||
table_size_mult = 1 | ||
|
||
# these control output during processing | ||
print_stats = True | ||
print_debug = False | ||
|
||
# end configuration variables | ||
################################################################################ | ||
|
||
# precompile regexs | ||
re_preproc_line = re.compile(r'# [0-9]+ ') | ||
re_map_entry = re.compile(r'\{.+?\(MP_QSTR_([A-Za-z0-9_]+)\).+\},') | ||
re_mp_obj_dict_t = re.compile(r'(?P<head>(static )?const mp_obj_dict_t (?P<id>[a-z0-9_]+) = \{ \.base = \{&mp_type_dict\}, \.map = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$') | ||
re_mp_map_t = re.compile(r'(?P<head>(static )?const mp_map_t (?P<id>[a-z0-9_]+) = \{ \.all_keys_are_qstrs = 1, \.is_fixed = 1, \.is_ordered = )1(?P<tail>, \.used = .+ };)$') | ||
re_mp_rom_map_elem_t = re.compile(r'static const mp_rom_map_elem_t [a-z_0-9]+\[\] = {$') | ||
|
||
# this must match the equivalent function in qstr.c | ||
def compute_hash(qstr): | ||
hash = 5381 | ||
for char in qstr: | ||
hash = (hash * 33) ^ ord(char) | ||
# Make sure that valid hash is never zero, zero means "hash not computed" | ||
return (hash & ((1 << (8 * bytes_in_qstr_hash)) - 1)) or 1 | ||
|
||
# this algo must match the equivalent in map.c | ||
def hash_insert(map, key, value): | ||
hash = compute_hash(key) | ||
pos = hash % len(map) | ||
start_pos = pos | ||
if print_debug: | ||
print(' insert %s: start at %u/%u -- ' % (key, pos, len(map)), end='') | ||
while True: | ||
if map[pos] is None: | ||
# found empty slot, so key is not in table | ||
if print_debug: | ||
print('put at %u' % pos) | ||
map[pos] = (key, value) | ||
return | ||
else: | ||
# not yet found, keep searching | ||
if map[pos][0] == key: | ||
raise AssertionError("duplicate key '%s'" % (key,)) | ||
pos = (pos + 1) % len(map) | ||
assert pos != start_pos | ||
|
||
def hash_find(map, key): | ||
hash = compute_hash(key) | ||
pos = hash % len(map) | ||
start_pos = pos | ||
attempts = 0 | ||
while True: | ||
attempts += 1 | ||
if map[pos] is None: | ||
return attempts, None | ||
elif map[pos][0] == key: | ||
return attempts, map[pos][1] | ||
else: | ||
pos = (pos + 1) % len(map) | ||
if pos == start_pos: | ||
return attempts, None | ||
|
||
def process_map_table(file, line, output): | ||
output.append(line) | ||
|
||
# consume all lines that are entries of the table and concat them | ||
# (we do it this way because there can be multiple entries on one line) | ||
table_contents = [] | ||
while True: | ||
line = file.readline() | ||
if len(line) == 0: | ||
print('unexpected end of input') | ||
sys.exit(1) | ||
line = line.strip() | ||
if len(line) == 0: | ||
# empty line | ||
continue | ||
if re_preproc_line.match(line): | ||
# preprocessor line number comment | ||
continue | ||
if line == '};': | ||
# end of table (we assume it appears on a single line) | ||
break | ||
table_contents.append(line) | ||
|
||
# make combined string of entries | ||
entries_str = ''.join(table_contents) | ||
|
||
# split into individual entries | ||
entries = [] | ||
while entries_str: | ||
# look for single entry, by matching nested braces | ||
match = None | ||
if entries_str[0] == '{': | ||
nested_braces = 0 | ||
for i in range(len(entries_str)): | ||
if entries_str[i] == '{': | ||
nested_braces += 1 | ||
elif entries_str[i] == '}': | ||
nested_braces -= 1 | ||
if nested_braces == 0: | ||
match = re_map_entry.match(entries_str[:i + 2]) | ||
break | ||
|
||
if not match: | ||
print('unknown line in table:', entries_str) | ||
sys.exit(1) | ||
|
||
# extract single entry | ||
line = match.group(0) | ||
qstr = match.group(1) | ||
entries_str = entries_str[len(line):].lstrip() | ||
|
||
# add the qstr and the whole line to list of all entries | ||
entries.append((qstr, line)) | ||
|
||
# sort entries so hash table construction is deterministic | ||
entries.sort() | ||
|
||
# create hash table | ||
map = [None] * int(len(entries) * table_size_mult) | ||
for qstr, line in entries: | ||
# We assume that qstr does not have any escape sequences in it. | ||
# This is reasonably safe, since keys in a module or class dict | ||
# should be standard identifiers. | ||
# TODO verify this and raise an error if escape sequence found | ||
hash_insert(map, qstr, line) | ||
|
||
# compute statistics | ||
total_attempts = 0 | ||
for qstr, _ in entries: | ||
attempts, line = hash_find(map, qstr) | ||
assert line is not None | ||
if print_debug: | ||
print(' %s lookup took %u attempts' % (qstr, attempts)) | ||
total_attempts += attempts | ||
if len(entries): | ||
stats = len(map), len(entries) / len(map), total_attempts / len(entries) | ||
else: | ||
stats = 0, 0, 0 | ||
if print_debug: | ||
print(' table stats: size=%d, load=%.2f, avg_lookups=%.1f' % stats) | ||
|
||
# output hash table | ||
for row in map: | ||
if row is None: | ||
output.append('{ 0, 0 },\n') | ||
else: | ||
output.append(row[1] + '\n') | ||
output.append('};\n') | ||
|
||
# skip to next non-blank line | ||
while True: | ||
line = file.readline() | ||
if len(line) == 0: | ||
print('unexpected end of input') | ||
sys.exit(1) | ||
line = line.strip() | ||
if len(line) == 0: | ||
continue | ||
break | ||
|
||
# transform the is_ordered param from 1 to 0 | ||
match = re_mp_obj_dict_t.match(line) | ||
if match is None: | ||
match = re_mp_map_t.match(line) | ||
if match is None: | ||
print('expecting mp_obj_dict_t or mp_map_t definition') | ||
print(output[0]) | ||
print(line) | ||
sys.exit(1) | ||
line = match.group('head') + '0' + match.group('tail') + '\n' | ||
output.append(line) | ||
|
||
return (match.group('id'),) + stats | ||
|
||
def process_file(filename): | ||
output = [] | ||
file_changed = False | ||
with open(filename, 'rt') as f: | ||
while True: | ||
line = f.readline() | ||
if not line: | ||
break | ||
if re_mp_rom_map_elem_t.match(line): | ||
file_changed = True | ||
stats = process_map_table(f, line, output) | ||
if print_stats: | ||
print(' [%s: size=%d, load=%.2f, avg_lookups=%.1f]' % stats) | ||
else: | ||
output.append(line) | ||
|
||
if file_changed: | ||
if print_debug: | ||
print(' modifying static maps in', output[0].strip()) | ||
with open(filename, 'wt') as f: | ||
for line in output: | ||
f.write(line) | ||
|
||
def main(): | ||
# run actual C compiler | ||
# need to quote args that have special characters in them | ||
def quote(s): | ||
if s.find('<') != -1 or s.find('>') != -1: | ||
return "'" + s + "'" | ||
else: | ||
return s | ||
ret = os.system(cc1_path + ' ' + ' '.join(quote(s) for s in sys.argv[1:])) | ||
if ret != 0: | ||
ret = (ret & 0x7f) or 127 # make it in range 0-127, but non-zero | ||
sys.exit(ret) | ||
|
||
if sys.argv[1] == '-E': | ||
# CPP has been run, now do our processing stage | ||
for i, arg in enumerate(sys.argv): | ||
if arg == '-o': | ||
return process_file(sys.argv[i + 1]) | ||
|
||
print('%s: could not find "-o" option' % (sys.argv[0],)) | ||
sys.exit(1) | ||
elif sys.argv[1] == '-fpreprocessed': | ||
# compiler has been run, nothing more to do | ||
return | ||
else: | ||
# unknown processing stage | ||
print('%s: unknown first option "%s"' % (sys.argv[0], sys.argv[1])) | ||
sys.exit(1) | ||
|
||
if __name__ == '__main__': | ||
main() |