From bf1043179d6c7b63fa0b25cd4a5c960b2674ef56 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 11 Nov 2022 21:47:37 -0800 Subject: [PATCH] Refactor generate_cases.py. Tweak output a teensy bit. --- Python/generated_cases.c.h | 3 +- Tools/cases_generator/generate_cases.py | 519 +++++++++++++++--------- Tools/cases_generator/parser.py | 5 +- 3 files changed, 326 insertions(+), 201 deletions(-) diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0236e3c77170c8..b68b93cf87ea47 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -1,4 +1,5 @@ // This file is generated by Tools/cases_generator/generate_cases.py +// from Python/bytecodes.c // Do not edit! TARGET(NOP) { @@ -3721,7 +3722,7 @@ TARGET(BINARY_OP) { PREDICTED(BINARY_OP); - assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1); + static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size"); PyObject *rhs = PEEK(1); PyObject *lhs = PEEK(2); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 287b9de1d7da0f..2fbc8577b2a00f 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -1,52 +1,331 @@ -"""Generate the main interpreter switch.""" +"""Generate the main interpreter switch. -# Write the cases to generated_cases.c.h, which is #included in ceval.c. - -# TODO: Reuse C generation framework from deepfreeze.py? +Reads the instruction definitions from bytecodes.c. +Writes the cases to generated_cases.c.h, which is #included in ceval.c. +""" import argparse import os import re import sys -from typing import TextIO, cast +import typing import parser -from parser import InstDef # TODO: Use parser.InstDef +DEFAULT_INPUT = "Python/bytecodes.c" +DEFAULT_OUTPUT = "Python/generated_cases.c.h" +BEGIN_MARKER = "// BEGIN BYTECODES //" +END_MARKER = "// END BYTECODES //" RE_PREDICTED = r"(?s)(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);" arg_parser = argparse.ArgumentParser() -arg_parser.add_argument("-i", "--input", type=str, default="Python/bytecodes.c") -arg_parser.add_argument("-o", "--output", type=str, default="Python/generated_cases.c.h") - - -def parse_cases( - src: str, filename: str|None = None -) -> tuple[list[InstDef], list[parser.Super], list[parser.Family]]: - psr = parser.Parser(src, filename=filename) - # Skip until BEGIN marker - while tkn := psr.next(raw=True): - if tkn.text == "// BEGIN BYTECODES //": - break - else: - raise psr.make_syntax_error(f"Couldn't find {text!r} in {psr.filename}") - instrs: list[InstDef] = [] - supers: list[parser.Super] = [] - families: list[parser.Family] = [] - # Parse until END marker - while not psr.eof() and psr.peek(raw=True).text != "// END BYTECODES //": - if inst := psr.inst_def(): - instrs.append(inst) - elif sup := psr.super_def(): - supers.append(sup) - elif fam := psr.family_def(): - families.append(fam) +arg_parser.add_argument("-i", "--input", type=str, default=DEFAULT_INPUT) +arg_parser.add_argument("-o", "--output", type=str, default=DEFAULT_OUTPUT) + + +class Analyzer: + """Parse input, analyze it, and write to output.""" + + filename: str + src: str + + def __init__(self, filename: str): + """Read the input file.""" + self.filename = filename + with open(filename) as f: + self.src = f.read() + + instrs: dict[str, parser.InstDef] + supers: dict[str, parser.Super] + families: dict[str, parser.Family] + + def parse(self) -> None: + """Parse the source text.""" + psr = parser.Parser(self.src, filename=self.filename) + + # Skip until begin marker + while tkn := psr.next(raw=True): + if tkn.text == BEGIN_MARKER: + break else: - raise psr.make_syntax_error(f"Unexpected token") - return instrs, supers, families + raise psr.make_syntax_error(f"Couldn't find {marker!r} in {psr.filename}") + + # Parse until end marker + self.instrs = {} + self.supers = {} + self.families = {} + while (tkn := psr.peek(raw=True)) and tkn.text != END_MARKER: + if instr := psr.inst_def(): + self.instrs[instr.name] = instr + elif super := psr.super_def(): + self.supers[super.name] = super + elif family := psr.family_def(): + self.families[family.name] = family + else: + raise psr.make_syntax_error(f"Unexpected token") + + print( + f"Read {len(self.instrs)} instructions, " + f"{len(self.supers)} supers, " + f"and {len(self.families)} families from {self.filename}", + file=sys.stderr, + ) + + def analyze(self) -> None: + """Analyze the inputs. + + Raises SystemExit if there is an error. + """ + self.find_predictions() + self.compute_cache_offsets() + self.compute_stack_inputs() + self.compute_stack_outputs() + self.map_families() + errors = self.check_families() + if errors: + sys.exit(f"Found {errors} errors") + + predictions: set[str] = set() + + def find_predictions(self) -> None: + """Find the instructions that need PREDICTED() labels.""" + self.predictions = set() + for instr in self.instrs.values(): + for target in re.findall(RE_PREDICTED, instr.block.text): + self.predictions.add(target) + + cache_offsets: dict[str, int] + + def compute_cache_offsets(self) -> None: + """Compute the amount of cache space used per instruction.""" + self.cache_offsets = {} + for instr in self.instrs.values(): + cache_offset = 0 + for effect in instr.inputs: + if isinstance(effect, parser.CacheEffect): + cache_offset += effect.size + self.cache_offsets[instr.name] = cache_offset + + stack_inputs: dict[str, int] + + def compute_stack_inputs(self) -> None: + """Compute the number of stack items consumed per instruction.""" + self.stack_inputs = {} + for instr in self.instrs.values(): + stack_input = 0 + for effect in instr.inputs: + if isinstance(effect, parser.StackEffect): + stack_input += 1 + self.stack_inputs[instr.name] = stack_input + + stack_outputs: dict[str, int] + + def compute_stack_outputs(self) -> None: + """Compute the number of stack items produced per instruction.""" + self.stack_outputs = {} + for instr in self.instrs.values(): + stack_output = len(instr.outputs) + self.stack_outputs[instr.name] = stack_output + + family: dict[str, parser.Family] # instruction name -> family + + def map_families(self) -> None: + """Make instruction names back to their family, if they have one.""" + self.family = {} + for family in self.families.values(): + for member in family.members: + self.family[member] = family + + def check_families(self) -> int: + """Check each family: + + - Must have at least 2 members + - All members must be known instructions + - All members must have the same cache, input and output effects + """ + errors = 0 + for family in self.families.values(): + if len(family.members) < 2: + print(f"Family {family.name!r} has insufficient members") + errors += 1 + members = [member for member in family.members if member in self.instrs] + if members != family.members: + unknown = set(family.members) - set(members) + print(f"Family {family.name!r} has unknown members: {unknown}") + errors += 1 + if len(members) < 2: + continue + head = members[0] + cache = self.cache_offsets[head] + input = self.stack_inputs[head] + output = self.stack_outputs[head] + for member in members[1:]: + c = self.cache_offsets[member] + i = self.stack_inputs[member] + o = self.stack_outputs[member] + if (c, i, o) != (cache, input, output): + errors += 1 + print( + f"Family {family.name!r} has inconsistent " + f"(cache, inputs, outputs) effects:", + file=sys.stderr, + ) + print( + f" {family.members[0]} = {(cache, input, output)}; " + f"{member} = {(c, i, o)}", + file=sys.stderr, + ) + return errors + + indent: str = " " * 8 + + def write_instructions(self, filename: str) -> None: + """Write instructions to output file.""" + indent = self.indent + with open(filename, "w") as f: + # Write provenance header + f.write(f"// This file is generated by {os.path.relpath(__file__)}\n") + f.write(f"// from {os.path.relpath(self.filename)}\n") + f.write(f"// Do not edit!\n") + + # Write regular instructions + for name, instr in self.instrs.items(): + f.write(f"\n{indent}TARGET({name}) {{\n") + if name in self.predictions: + f.write(f"{indent} PREDICTED({name});\n") + self.write_instr(f, instr) + if not always_exits(instr.block): + f.write(f"{indent} DISPATCH();\n") + f.write(f"{indent}}}\n") + + # Write super-instructions + for name, sup in self.supers.items(): + components = [self.instrs[name] for name in sup.ops] + f.write(f"\n{indent}TARGET({sup.name}) {{\n") + for i, instr in enumerate(components): + if i > 0: + f.write(f"{indent} NEXTOPARG();\n") + f.write(f"{indent} next_instr++;\n") + f.write(f"{indent} {{\n") + self.write_instr(f, instr, dedent=-4) + f.write(f" {indent}}}\n") + f.write(f"{indent} DISPATCH();\n") + f.write(f"{indent}}}\n") + + print( + f"Wrote {len(self.instrs)} instructions and " + f"{len(self.supers)} super-instructions to {filename}", + file=sys.stderr, + ) + + def write_instr( + self, f: typing.TextIO, instr: parser.InstDef, dedent: int = 0 + ) -> None: + """Write one instruction, sans prologue and epilogue.""" + indent = self.indent + if dedent < 0: + indent += " " * -dedent # DO WE NEED THIS? + + # Get cache offset and maybe assert that it is correct + cache_offset = self.cache_offsets.get(instr.name, 0) + if family := self.family.get(instr.name): + if instr.name == family.members[0]: + if cache_size := family.size: + f.write( + f"{indent} static_assert({cache_size} == " + f'{cache_offset}, "incorrect cache size");\n' + ) + + # Separate cache effects from input stack effects + cache = [ + effect for effect in instr.inputs if isinstance(effect, parser.CacheEffect) + ] + stack = [ + effect for effect in instr.inputs if isinstance(effect, parser.StackEffect) + ] + + # Write cache effect variable declarations + for ceffect in cache: + if ceffect.name != "unused": + bits = ceffect.size * 16 + f.write( + f"{indent} PyObject *{ceffect.name} = " + f"read{bits}(next_instr + {cache_offset});\n" + ) + + # Write input stack effect variable declarations and initializations + for i, seffect in enumerate(reversed(stack), 1): + if seffect.name != "unused": + f.write(f"{indent} PyObject *{seffect.name} = PEEK({i});\n") + + # Write output stack effect variable declarations + for seffect in instr.outputs: + if seffect.name != "unused": + f.write(f"{indent} PyObject *{seffect.name};\n") + + self.write_instr_body(f, instr, dedent, len(stack)) + + # Skip the rest if the block always exits + if always_exits(instr.block): + return + + # Write net stack growth/shrinkage + diff = len(instr.outputs) - len(stack) + if diff > 0: + f.write(f"{indent} STACK_GROW({diff});\n") + elif diff < 0: + f.write(f"{indent} STACK_SHRINK({-diff});\n") + + # Write output stack effect assignments + input_names = [seffect.name for seffect in stack] + for i, output in enumerate(reversed(instr.outputs), 1): + if output.name not in input_names and output.name != "unused": + f.write(f"{indent} POKE({i}, {output.name});\n") + + # Write cache effect + if cache_offset: + f.write(f"{indent} next_instr += {cache_offset};\n") + + def write_instr_body( + self, f: typing.TextIO, instr: parser.InstDef, dedent: int, ninputs: int + ) -> None: + """Write the instruction body.""" + + # Get lines of text with proper dedelt + blocklines = instr.block.to_text(dedent=dedent).splitlines(True) + + # Remove blank lines from both ends + while blocklines and not blocklines[0].strip(): + blocklines.pop(0) + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Remove leading '{' and trailing '}' + assert blocklines and blocklines[0].strip() == "{" + assert blocklines and blocklines[-1].strip() == "}" + blocklines.pop() + blocklines.pop(0) + + # Remove trailing blank lines + while blocklines and not blocklines[-1].strip(): + blocklines.pop() + + # Write the body, substituting a goto for ERROR_IF() + for line in blocklines: + if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): + space, cond, label = m.groups() + # ERROR_IF() must remove the inputs from the stack. + # The code block is responsible for DECREF()ing them. + if ninputs: + f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") + else: + f.write(f"{space}if ({cond}) goto {label};\n") + else: + f.write(line) def always_exits(block: parser.Block) -> bool: + """Determine whether a block always ends in a return/goto/etc.""" text = block.text lines = text.splitlines() while lines and not lines[-1].strip(): @@ -58,175 +337,21 @@ def always_exits(block: parser.Block) -> bool: return False line = lines.pop().rstrip() # Indent must match exactly (TODO: Do something better) - if line[:12] != " "*12: + if line[:12] != " " * 12: return False line = line[12:] - return line.startswith(("goto ", "return ", "DISPATCH", "GO_TO_", "Py_UNREACHABLE()")) - - -def find_cache_size(instr: InstDef, families: list[parser.Family]) -> str | None: - for family in families: - if instr.name == family.members[0]: - return family.size - - -def write_instr( - instr: InstDef, predictions: set[str], indent: str, f: TextIO, dedent: int = 0, cache_size: str | None = None -) -> int: - # Returns cache offset - if dedent < 0: - indent += " " * -dedent - # Separate stack inputs from cache inputs - input_names: set[str] = set() - stack: list[parser.StackEffect] = [] - cache: list[parser.CacheEffect] = [] - for input in instr.inputs: - if isinstance(input, parser.StackEffect): - stack.append(input) - input_names.add(input.name) - else: - assert isinstance(input, parser.CacheEffect), input - cache.append(input) - outputs = instr.outputs - cache_offset = 0 - for ceffect in cache: - if ceffect.name != "unused": - bits = ceffect.size * 16 - f.write(f"{indent} PyObject *{ceffect.name} = read{bits}(next_instr + {cache_offset});\n") - cache_offset += ceffect.size - if cache_size: - f.write(f"{indent} assert({cache_size} == {cache_offset});\n") - # TODO: Is it better to count forward or backward? - for i, effect in enumerate(reversed(stack), 1): - if effect.name != "unused": - f.write(f"{indent} PyObject *{effect.name} = PEEK({i});\n") - for output in instr.outputs: - if output.name not in input_names and output.name != "unused": - f.write(f"{indent} PyObject *{output.name};\n") - blocklines = instr.block.to_text(dedent=dedent).splitlines(True) - # Remove blank lines from ends - while blocklines and not blocklines[0].strip(): - blocklines.pop(0) - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - # Remove leading '{' and trailing '}' - assert blocklines and blocklines[0].strip() == "{" - assert blocklines and blocklines[-1].strip() == "}" - blocklines.pop() - blocklines.pop(0) - # Remove trailing blank lines - while blocklines and not blocklines[-1].strip(): - blocklines.pop() - # Write the body - ninputs = len(stack) - for line in blocklines: - if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*$", line): - space, cond, label = m.groups() - # ERROR_IF() must remove the inputs from the stack. - # The code block is responsible for DECREF()ing them. - if ninputs: - f.write(f"{space}if ({cond}) goto pop_{ninputs}_{label};\n") - else: - f.write(f"{space}if ({cond}) goto {label};\n") - else: - f.write(line) - if always_exits(instr.block): - # None of the rest matters - return cache_offset - # Stack effect - noutputs = len(outputs) - diff = noutputs - ninputs - if diff > 0: - f.write(f"{indent} STACK_GROW({diff});\n") - elif diff < 0: - f.write(f"{indent} STACK_SHRINK({-diff});\n") - for i, output in enumerate(reversed(outputs), 1): - if output.name not in input_names and output.name != "unused": - f.write(f"{indent} POKE({i}, {output.name});\n") - # Cache effect - if cache_offset: - f.write(f"{indent} next_instr += {cache_offset};\n") - return cache_offset - - -def write_cases( - f: TextIO, instrs: list[InstDef], supers: list[parser.Super], families: list[parser.Family] -) -> dict[str, tuple[int, int, int]]: - predictions: set[str] = set() - for instr in instrs: - for target in re.findall(RE_PREDICTED, instr.block.text): - predictions.add(target) - indent = " " - f.write(f"// This file is generated by {os.path.relpath(__file__)}\n") - f.write(f"// Do not edit!\n") - instr_index: dict[str, InstDef] = {} - effects_table: dict[str, tuple[int, int, int]] = {} # name -> (ninputs, noutputs, cache_offset) - for instr in instrs: - instr_index[instr.name] = instr - f.write(f"\n{indent}TARGET({instr.name}) {{\n") - if instr.name in predictions: - f.write(f"{indent} PREDICTED({instr.name});\n") - cache_offset = write_instr( - instr, predictions, indent, f, - cache_size=find_cache_size(instr, families) - ) - effects_table[instr.name] = len(instr.inputs), len(instr.outputs), cache_offset - if not always_exits(instr.block): - f.write(f"{indent} DISPATCH();\n") - # Write trailing '}' - f.write(f"{indent}}}\n") - - for sup in supers: - components = [instr_index[name] for name in sup.ops] - f.write(f"\n{indent}TARGET({sup.name}) {{\n") - for i, instr in enumerate(components): - if i > 0: - f.write(f"{indent} NEXTOPARG();\n") - f.write(f"{indent} next_instr++;\n") - f.write(f"{indent} {{\n") - write_instr(instr, predictions, indent, f, dedent=-4) - f.write(f" {indent}}}\n") - f.write(f"{indent} DISPATCH();\n") - f.write(f"{indent}}}\n") - - return effects_table + return line.startswith( + ("goto ", "return ", "DISPATCH", "GO_TO_", "Py_UNREACHABLE()") + ) def main(): - args = arg_parser.parse_args() - with open(args.input) as f: - src = f.read() - instrs, supers, families = parse_cases(src, filename=args.input) - ninstrs = len(instrs) - nsupers = len(supers) - nfamilies = len(families) - print( - f"Read {ninstrs} instructions, {nsupers} supers, " - f"and {nfamilies} families from {args.input}", - file=sys.stderr, - ) - with open(args.output, "w") as f: - effects_table = write_cases(f, instrs, supers, families) - print( - f"Wrote {ninstrs + nsupers} instructions to {args.output}", - file=sys.stderr, - ) - # Check that families have consistent effects - errors = 0 - for family in families: - head = effects_table[family.members[0]] - for member in family.members: - if effects_table[member] != head: - errors += 1 - print( - f"Family {family.name!r} has inconsistent effects (inputs, outputs, cache units):", - file=sys.stderr, - ) - print( - f" {family.members[0]} = {head}; {member} = {effects_table[member]}", - ) - if errors: - sys.exit(1) + """Parse command line, parse input, analyze, write output.""" + args = arg_parser.parse_args() # Prints message and sys.exit(2) on error + a = Analyzer(args.input) # Raises OSError if file not found + a.parse() # Raises SyntaxError on failure + a.analyze() # Prints messages and raises SystemExit on failure + a.write_instructions(args.output) # Raises OSError if file can't be written if __name__ == "__main__": diff --git a/Tools/cases_generator/parser.py b/Tools/cases_generator/parser.py index 1f855312aeba9f..e4df69993edb1c 100644 --- a/Tools/cases_generator/parser.py +++ b/Tools/cases_generator/parser.py @@ -58,18 +58,17 @@ class Block(Node): @dataclass class Effect(Node): - pass + name: str @dataclass class StackEffect(Effect): - name: str + pass # TODO: type, condition @dataclass class CacheEffect(Effect): - name: str size: int