Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions examples/array.lol
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/* Demonstrate array syntax */
module io = import("stdio.h");

function main() -> i32 {
let array: Array[i32] = [0, 1, 2, 3];
io::printf("Array: [%d, %d, %d, %d]\n",
array[0], array[1], array[2], array[3]);
return 0;
}
28 changes: 22 additions & 6 deletions src/compiler/analyzer/lol_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
LolParserFunctionDefinition,
LolParserReturnStatement,
LolParserIfStatement,
LolParserItemAccess,
LolParserOperatorType,
)

################################################################################
Expand Down Expand Up @@ -374,7 +376,11 @@ def _parse_expression_recursively(
body_block: List[LolIRStatement],
) -> str:
if isinstance(x, LolParserOperatorExpression):
op_name: str = x.operator
op_name: str = {
LolParserOperatorType.BINARY_INFIX: "infix",
LolParserOperatorType.UNARY_PREFIX: "prefix",
LolParserOperatorType.UNARY_POSTFIX: "postfix",
}[x.type] + f"{x.operator}"
operands: List["LolAnalysisVariable"] = [
self._get_symbol(
module_symbol_table,
Expand Down Expand Up @@ -485,7 +491,10 @@ def _parse_statement(
if isinstance(x, LolParserVariableDefinition):
name = x.get_name_as_str()
ast_data_type = x.type
assert isinstance(ast_data_type, LolParserIdentifier)
assert isinstance(
ast_data_type, (LolParserIdentifier, LolParserItemAccess)
)

data_type = self._get_symbol(
module_symbol_table, ast_data_type.name
)
Expand Down Expand Up @@ -552,19 +561,26 @@ def add_to_module_symbol_table(self, name, symbol):

def add_builtin_types(self, caller_module: Optional["LolAnalysisModule"]):
if caller_module is None:
type_ = LolAnalysisBuiltinType("Type", {})
i32 = LolAnalysisBuiltinType("i32", {})
i32.ops["+"] = i32
i32.ops["-"] = i32
i32.ops["*"] = i32
i32.ops["/"] = i32
i32.ops["infix+"] = i32
i32.ops["infix-"] = i32
i32.ops["infix*"] = i32
i32.ops["infix/"] = i32
cstr = LolAnalysisBuiltinType("cstr", {})
void = LolAnalysisBuiltinType("void", {})
array_generic = LolAnalysisBuiltinType("Array", {})
array_generic.ops["postfix[]"] = type_
else:
# We want all of the built-in objects to be identical objects with
# even the pointers matching (so module_a's i32 is module_b's i32)
i32 = caller_module.module_symbol_table["i32"]
cstr = caller_module.module_symbol_table["cstr"]
void = caller_module.module_symbol_table["void"]
type_ = caller_module.module_symbol_table["Type"]
array_generic = caller_module.module_symbol_table["Array"]
self.add_to_module_symbol_table("Type", type_)
self.add_to_module_symbol_table("Array", array_generic)
self.add_to_module_symbol_table("i32", i32)
self.add_to_module_symbol_table("cstr", cstr)
self.add_to_module_symbol_table("void", void)
Expand Down
78 changes: 58 additions & 20 deletions src/compiler/parser/lol_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,15 @@ def get_name_as_str(self):
return self.name.name


@frozen_dataclass
class LolParserItemAccess(LolParserGeneric):
name: LolParserIdentifier
arguments: List[LolParserExpression]

def get_name_as_str(self):
return self.name.name


@frozen_dataclass
class LolParserVariableDefinition(LolParserGeneric):
name: LolParserIdentifier
Expand Down Expand Up @@ -251,35 +260,66 @@ def parse_parenthetic_expression(
return ret

@staticmethod
def parse_func_call_args(
stream: TokenStream, func_identifier: LolParserIdentifier
) -> LolParserFunctionCall:
start_pos = func_identifier.start_position
eat_token(stream, LolTokenType.LPAREN)
args: List[LolParserValueExpression] = []
def parse_comma_separated_expressions(
stream: TokenStream, start_position: int, end_token_type: LolTokenType
) -> list[LolParserExpression]:
args: List[LolParserExpression] = []
token = stream.get_token()
# Check if empty set of arguments
if token.is_type(LolTokenType.RPAREN):
rp_tok = eat_token(stream, LolTokenType.RPAREN)
return LolParserFunctionCall(
start_pos, get_end(rp_tok), func_identifier, args
)
if token.is_type(end_token_type):
eat_token(stream, end_token_type)
return args
# At this point, we have at least one argument (or error)
while True:
expr = Parser.parse_value_expression(stream)
args.append(expr)
token = stream.get_token()
if token.is_type(LolTokenType.RPAREN):
eat_token(stream, LolTokenType.RPAREN)
if token.is_type(end_token_type):
eat_token(stream, end_token_type)
break
elif token.is_type(LolTokenType.COMMA):
eat_token(stream, LolTokenType.COMMA)
continue
else:
raise ValueError("Expected COMMA or RPAREN")
error_msg = f"Expected COMMA or {end_token_type.name}, got {token.token_type.name}"
LolError.print_error(
stream.path, start_position, start_position + 1, error_msg
)
raise ValueError(error_msg)
return args

@staticmethod
def parse_func_call_args(
stream: TokenStream, func_identifier: LolParserIdentifier
) -> LolParserFunctionCall:
start_pos = func_identifier.start_position
eat_token(stream, LolTokenType.LPAREN)
args = Parser.parse_comma_separated_expressions(
stream, start_pos, LolTokenType.RPAREN
)
end_pos = get_end(stream.get_token(offset=-1))
return LolParserFunctionCall(start_pos, end_pos, func_identifier, args)

@staticmethod
def parse_item_access_args(
stream: TokenStream, func_identifier: LolParserIdentifier
) -> LolParserFunctionCall:
start_pos = func_identifier.start_position
eat_token(stream, LolTokenType.LSQB)
args = Parser.parse_comma_separated_expressions(
stream, start_pos, LolTokenType.RSQB
)
end_pos = get_end(stream.get_token(offset=-1))
return LolParserItemAccess(start_pos, end_pos, func_identifier, args)

@staticmethod
def parse_list(stream: TokenStream) -> list[LolParserExpression]:
start_pos = get_start(stream.get_token())
eat_token(stream, LolTokenType.LSQB)
return Parser.parse_comma_separated_expressions(
stream, start_pos, LolTokenType.RSQB
)

@staticmethod
def parse_identifier_with_namespace_separator(
stream: TokenStream, identifier_leaf: LolParserIdentifier
Expand Down Expand Up @@ -336,7 +376,7 @@ def parse_leading_identifier(
if token.is_type(LolTokenType.LPAREN):
return Parser.parse_func_call_args(stream, identifier_leaf)
elif token.is_type(LolTokenType.LSQB):
raise ValueError("accesses not supported yet... i.e. `x[100]`")
return Parser.parse_item_access_args(stream, identifier_leaf)
else:
return LolParserIdentifier(
identifier_leaf.start_position,
Expand Down Expand Up @@ -368,6 +408,8 @@ def parse_primary(stream: TokenStream) -> LolParserExpression:
return Parser.parse_literal(stream)
elif token.is_type(LolTokenType.LPAREN):
return Parser.parse_parenthetic_expression(stream)
elif token.is_type(LolTokenType.LSQB):
return Parser.parse_list(stream)
else:
error_msg = f"unrecognized primary {token}"
LolError.print_error(
Expand Down Expand Up @@ -474,11 +516,7 @@ def parse_expression(stream: TokenStream) -> LolParserExpression:

@staticmethod
def parse_type_expression(stream: TokenStream) -> LolParserTypeExpression:
# We only support single-token type expressions for now
ident = eat_token(stream, LolTokenType.IDENTIFIER)
return LolParserIdentifier(
get_start(ident), get_end(ident), ident.as_str()
)
return Parser.parse_expression(stream)

@staticmethod
def parse_value_expression(stream: TokenStream) -> LolParserValueExpression:
Expand Down
85 changes: 85 additions & 0 deletions src/lib/hash_tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/urs/bin/python3
""" @brief Cryptographically hash a file tree. """

import hashlib
import os
from pathlib import Path
from warnings import warn


def track_definite_cycle(path_set: set, p: Path):
p = p.absolute()
if p in path_set:
raise ValueError(f"cycle involving {p}")
path_set.add(p)


def track_potential_cycle(path_set: set, x: Path):
x = x.resolve()
if x in path_set:
warn(f"{x} already in set; may have encountered a cycle!")
path_set.add(x)


def get_canonical_file_tree(top: Path) -> list[Path]:
"""
@brief Get a file tree sorted by the string names.
@note I don't fully resolve paths in the tree because I want to
use the path as the user sees rather than symbolic links.
This means that my cycle detection won't work because if I
don't resolve the paths, then I wouldn't see repeat paths;
they'll just get longer and longer as I add more symlinks.
@todo Sort the files more intelligently, e.g. in a Merkle Tree.
"""
# Tree without resolving the symlinks.
tree = set()
# Tree of real, absolute paths.
real_tree = set()
# NOTE In Python 3.12, they introduce 'Path.walk()', which would
# be cleaner to use. Unfortunately, I use Python 3.11 on my
# Debian Bookworm system, so I'm stuck with using os.walk().
for root, dirs, files in os.walk(top):
for d in dirs:
path = Path(root) / d
track_definite_cycle(tree, path)
# This is redundant from tracking definite cycles, but I
# like to be explicit.
tree.add(path.absolute())
track_potential_cycle(real_tree, path)
for f in files:
path = Path(root) / f
# This is redundant from tracking definite cycles, but I
# like to be explicit.
track_definite_cycle(tree, path)
tree.add(path.absolute())
track_potential_cycle(real_tree, path)
tree = sorted(tree)
return tree


def sha256_file_tree(top: Path, tree: list[Path], hash_path: bool = False) -> str:
top = top.absolute()
m = hashlib.sha256()
for f in tree:
assert f.exists()
rel_path = f.relative_to(top)
if f.is_file():
if hash_path:
m.update(str(rel_path).encode())
m.update(f.read_bytes())
continue
if f.is_dir():
if hash_path:
m.update(str(rel_path).encode())
continue
m.digest()
return m.hexdigest()


if __name__ == "__main__":
top = Path("../compiler")
tree = get_canonical_file_tree(top)
digest = sha256_file_tree(top, tree, False)
print("Without hashing paths:", digest)
digest = sha256_file_tree(top, tree, True)
print("With hashing paths:", digest)
Loading