thedavidchu · thedavidchu · Jun 8, 2024 · Jun 26, 2024 · Jul 15, 2024 · Feb 20, 2025
diff --git a/examples/array.lol b/examples/array.lol
@@ -0,0 +1,9 @@
+/* Demonstrate array syntax */
+module io = import("stdio.h");
+
+function main() -> i32 {
+    let array: Array[i32] = [0, 1, 2, 3];
+    io::printf("Array: [%d, %d, %d, %d]\n",
+               array[0], array[1], array[2], array[3]);
+    return 0;
+}
diff --git a/src/compiler/analyzer/lol_analyzer.py b/src/compiler/analyzer/lol_analyzer.py
@@ -21,6 +21,8 @@
     LolParserFunctionDefinition,
     LolParserReturnStatement,
     LolParserIfStatement,
+    LolParserItemAccess,
+    LolParserOperatorType,
 )
 
 ################################################################################
@@ -374,7 +376,11 @@ def _parse_expression_recursively(
         body_block: List[LolIRStatement],
     ) -> str:
         if isinstance(x, LolParserOperatorExpression):
-            op_name: str = x.operator
+            op_name: str = {
+                LolParserOperatorType.BINARY_INFIX: "infix",
+                LolParserOperatorType.UNARY_PREFIX: "prefix",
+                LolParserOperatorType.UNARY_POSTFIX: "postfix",
+            }[x.type] + f"{x.operator}"
             operands: List["LolAnalysisVariable"] = [
                 self._get_symbol(
                     module_symbol_table,
@@ -485,7 +491,10 @@ def _parse_statement(
         if isinstance(x, LolParserVariableDefinition):
             name = x.get_name_as_str()
             ast_data_type = x.type
-            assert isinstance(ast_data_type, LolParserIdentifier)
+            assert isinstance(
+                ast_data_type, (LolParserIdentifier, LolParserItemAccess)
+            )
+
             data_type = self._get_symbol(
                 module_symbol_table, ast_data_type.name
             )
@@ -552,19 +561,26 @@ def add_to_module_symbol_table(self, name, symbol):
 
     def add_builtin_types(self, caller_module: Optional["LolAnalysisModule"]):
         if caller_module is None:
+            type_ = LolAnalysisBuiltinType("Type", {})
             i32 = LolAnalysisBuiltinType("i32", {})
-            i32.ops["+"] = i32
-            i32.ops["-"] = i32
-            i32.ops["*"] = i32
-            i32.ops["/"] = i32
+            i32.ops["infix+"] = i32
+            i32.ops["infix-"] = i32
+            i32.ops["infix*"] = i32
+            i32.ops["infix/"] = i32
             cstr = LolAnalysisBuiltinType("cstr", {})
             void = LolAnalysisBuiltinType("void", {})
+            array_generic = LolAnalysisBuiltinType("Array", {})
+            array_generic.ops["postfix[]"] = type_
         else:
             # We want all of the built-in objects to be identical objects with
             # even the pointers matching (so module_a's i32 is module_b's i32)
             i32 = caller_module.module_symbol_table["i32"]
             cstr = caller_module.module_symbol_table["cstr"]
             void = caller_module.module_symbol_table["void"]
+            type_ = caller_module.module_symbol_table["Type"]
+            array_generic = caller_module.module_symbol_table["Array"]
+        self.add_to_module_symbol_table("Type", type_)
+        self.add_to_module_symbol_table("Array", array_generic)
         self.add_to_module_symbol_table("i32", i32)
         self.add_to_module_symbol_table("cstr", cstr)
         self.add_to_module_symbol_table("void", void)

diff --git a/src/compiler/parser/lol_parser.py b/src/compiler/parser/lol_parser.py
@@ -114,6 +114,15 @@ def get_name_as_str(self):
         return self.name.name
 
 
+@frozen_dataclass
+class LolParserItemAccess(LolParserGeneric):
+    name: LolParserIdentifier
+    arguments: List[LolParserExpression]
+
+    def get_name_as_str(self):
+        return self.name.name
+
+
 @frozen_dataclass
 class LolParserVariableDefinition(LolParserGeneric):
     name: LolParserIdentifier
@@ -251,35 +260,66 @@ def parse_parenthetic_expression(
         return ret
 
     @staticmethod
-    def parse_func_call_args(
-        stream: TokenStream, func_identifier: LolParserIdentifier
-    ) -> LolParserFunctionCall:
-        start_pos = func_identifier.start_position
-        eat_token(stream, LolTokenType.LPAREN)
-        args: List[LolParserValueExpression] = []
+    def parse_comma_separated_expressions(
+        stream: TokenStream, start_position: int, end_token_type: LolTokenType
+    ) -> list[LolParserExpression]:
+        args: List[LolParserExpression] = []
         token = stream.get_token()
         # Check if empty set of arguments
-        if token.is_type(LolTokenType.RPAREN):
-            rp_tok = eat_token(stream, LolTokenType.RPAREN)
-            return LolParserFunctionCall(
-                start_pos, get_end(rp_tok), func_identifier, args
-            )
+        if token.is_type(end_token_type):
+            eat_token(stream, end_token_type)
+            return args
         # At this point, we have at least one argument (or error)
         while True:
             expr = Parser.parse_value_expression(stream)
             args.append(expr)
             token = stream.get_token()
-            if token.is_type(LolTokenType.RPAREN):
-                eat_token(stream, LolTokenType.RPAREN)
+            if token.is_type(end_token_type):
+                eat_token(stream, end_token_type)
                 break
             elif token.is_type(LolTokenType.COMMA):
                 eat_token(stream, LolTokenType.COMMA)
                 continue
             else:
-                raise ValueError("Expected COMMA or RPAREN")
+                error_msg = f"Expected COMMA or {end_token_type.name}, got {token.token_type.name}"
+                LolError.print_error(
+                    stream.path, start_position, start_position + 1, error_msg
+                )
+                raise ValueError(error_msg)
+        return args
+
+    @staticmethod
+    def parse_func_call_args(
+        stream: TokenStream, func_identifier: LolParserIdentifier
+    ) -> LolParserFunctionCall:
+        start_pos = func_identifier.start_position
+        eat_token(stream, LolTokenType.LPAREN)
+        args = Parser.parse_comma_separated_expressions(
+            stream, start_pos, LolTokenType.RPAREN
+        )
         end_pos = get_end(stream.get_token(offset=-1))
         return LolParserFunctionCall(start_pos, end_pos, func_identifier, args)
 
+    @staticmethod
+    def parse_item_access_args(
+        stream: TokenStream, func_identifier: LolParserIdentifier
+    ) -> LolParserFunctionCall:
+        start_pos = func_identifier.start_position
+        eat_token(stream, LolTokenType.LSQB)
+        args = Parser.parse_comma_separated_expressions(
+            stream, start_pos, LolTokenType.RSQB
+        )
+        end_pos = get_end(stream.get_token(offset=-1))
+        return LolParserItemAccess(start_pos, end_pos, func_identifier, args)
+
+    @staticmethod
+    def parse_list(stream: TokenStream) -> list[LolParserExpression]:
+        start_pos = get_start(stream.get_token())
+        eat_token(stream, LolTokenType.LSQB)
+        return Parser.parse_comma_separated_expressions(
+            stream, start_pos, LolTokenType.RSQB
+        )
+
     @staticmethod
     def parse_identifier_with_namespace_separator(
         stream: TokenStream, identifier_leaf: LolParserIdentifier
@@ -336,7 +376,7 @@ def parse_leading_identifier(
         if token.is_type(LolTokenType.LPAREN):
             return Parser.parse_func_call_args(stream, identifier_leaf)
         elif token.is_type(LolTokenType.LSQB):
-            raise ValueError("accesses not supported yet... i.e. `x[100]`")
+            return Parser.parse_item_access_args(stream, identifier_leaf)
         else:
             return LolParserIdentifier(
                 identifier_leaf.start_position,
@@ -368,6 +408,8 @@ def parse_primary(stream: TokenStream) -> LolParserExpression:
             return Parser.parse_literal(stream)
         elif token.is_type(LolTokenType.LPAREN):
             return Parser.parse_parenthetic_expression(stream)
+        elif token.is_type(LolTokenType.LSQB):
+            return Parser.parse_list(stream)
         else:
             error_msg = f"unrecognized primary {token}"
             LolError.print_error(
@@ -474,11 +516,7 @@ def parse_expression(stream: TokenStream) -> LolParserExpression:
 
     @staticmethod
     def parse_type_expression(stream: TokenStream) -> LolParserTypeExpression:
-        # We only support single-token type expressions for now
-        ident = eat_token(stream, LolTokenType.IDENTIFIER)
-        return LolParserIdentifier(
-            get_start(ident), get_end(ident), ident.as_str()
-        )
+        return Parser.parse_expression(stream)
 
     @staticmethod
     def parse_value_expression(stream: TokenStream) -> LolParserValueExpression:

diff --git a/src/lib/hash_tree.py b/src/lib/hash_tree.py
@@ -0,0 +1,85 @@
+#!/urs/bin/python3
+""" @brief  Cryptographically hash a file tree. """
+
+import hashlib
+import os
+from pathlib import Path
+from warnings import warn
+
+
+def track_definite_cycle(path_set: set, p: Path):
+    p = p.absolute()
+    if p in path_set:
+        raise ValueError(f"cycle involving {p}")
+    path_set.add(p)
+
+
+def track_potential_cycle(path_set: set, x: Path):
+    x = x.resolve()
+    if x in path_set:
+        warn(f"{x} already in set; may have encountered a cycle!")
+    path_set.add(x)
+
+
+def get_canonical_file_tree(top: Path) -> list[Path]:
+    """
+    @brief  Get a file tree sorted by the string names.
+    @note   I don't fully resolve paths in the tree because I want to
+            use the path as the user sees rather than symbolic links.
+            This means that my cycle detection won't work because if I
+            don't resolve the paths, then I wouldn't see repeat paths;
+            they'll just get longer and longer as I add more symlinks.
+    @todo   Sort the files more intelligently, e.g. in a Merkle Tree.
+    """
+    # Tree without resolving the symlinks.
+    tree = set()
+    # Tree of real, absolute paths.
+    real_tree = set()
+    # NOTE  In Python 3.12, they introduce 'Path.walk()', which would
+    #       be cleaner to use. Unfortunately, I use Python 3.11 on my
+    #       Debian Bookworm system, so I'm stuck with using os.walk().
+    for root, dirs, files in os.walk(top):
+        for d in dirs:
+            path = Path(root) / d
+            track_definite_cycle(tree, path)
+            # This is redundant from tracking definite cycles, but I
+            # like to be explicit.
+            tree.add(path.absolute())
+            track_potential_cycle(real_tree, path)
+        for f in files:
+            path = Path(root) / f
+            # This is redundant from tracking definite cycles, but I
+            # like to be explicit.
+            track_definite_cycle(tree, path)
+            tree.add(path.absolute())
+            track_potential_cycle(real_tree, path)
+    tree = sorted(tree)
+    return tree
+
+
+def sha256_file_tree(top: Path, tree: list[Path], hash_path: bool = False) -> str:
+    top = top.absolute()
+    m = hashlib.sha256()
+    for f in tree:
+        assert f.exists()
+        rel_path = f.relative_to(top)
+        if f.is_file():
+            if hash_path:
+                m.update(str(rel_path).encode())
+            m.update(f.read_bytes())
+            continue
+        if f.is_dir():
+            if hash_path:
+                m.update(str(rel_path).encode())
+            continue
+    m.digest()
+    return m.hexdigest()
+
+
+if __name__ == "__main__":
+    top = Path("../compiler")
+    tree = get_canonical_file_tree(top)
+    digest = sha256_file_tree(top, tree, False)
+    print("Without hashing paths:", digest)
+    digest = sha256_file_tree(top, tree, True)
+    print("With hashing paths:", digest)