Bumped treesitter to v0.20.8.

Strum355 · Dec 17, 2023 · 55cfc70 · 55cfc70
1 parent cd3cb7e
commit 55cfc70
Show file tree

Hide file tree

Showing 12 changed files with 1,134 additions and 857 deletions.
diff --git a/api.h b/api.h
@@ -381,6 +381,13 @@ TSNode ts_tree_root_node_with_offset(
  */
 const TSLanguage *ts_tree_language(const TSTree *);
 
+/**
+ * Get the array of included ranges that was used to parse the syntax tree.
+ *
+ * The returned pointer must be freed by the caller.
+ */
+TSRange *ts_tree_included_ranges(const TSTree *, uint32_t *length);
+
 /**
  * Edit the syntax tree to keep it in sync with source code that has been
  * edited.
@@ -413,7 +420,7 @@ TSRange *ts_tree_get_changed_ranges(
 /**
  * Write a DOT graph describing the syntax tree to the given file.
  */
-void ts_tree_print_dot_graph(const TSTree *, FILE *);
+void ts_tree_print_dot_graph(const TSTree *, int file_descriptor);
 
 /******************/
 /* Section - Node */
@@ -743,15 +750,26 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern(
   uint32_t *length
 );
 
-bool ts_query_is_pattern_rooted(
-  const TSQuery *self,
-  uint32_t pattern_index
-);
+/*
+ * Check if the given pattern in the query has a single root node.
+ */
+bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index);
 
-bool ts_query_is_pattern_guaranteed_at_step(
-  const TSQuery *self,
-  uint32_t byte_offset
-);
+/*
+ * Check if the given pattern in the query is 'non local'.
+ *
+ * A non-local pattern has multiple root nodes and can match within a
+ * repeating sequence of nodes, as specified by the grammar. Non-local
+ * patterns disable certain optimizations that would otherwise be possible
+ * when executing a query on a specific range of a syntax tree.
+ */
+bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index);
+
+/*
+ * Check if a given pattern is guaranteed to match once a given step is reached.
+ * The step is specified by its byte offset in the query's source code.
+ */
+bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset);
 
 /**
  * Get the name and length of one of the query's captures, or one of the

diff --git a/array.h b/array.h
@@ -170,10 +170,10 @@ static inline void array__swap(VoidArray *self, VoidArray *other) {
   *self = swap;
 }
 
-static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
-  size_t new_size = self->size + count;
+static inline void array__grow(VoidArray *self, uint32_t count, size_t element_size) {
+  uint32_t new_size = self->size + count;
   if (new_size > self->capacity) {
-    size_t new_capacity = self->capacity * 2;
+    uint32_t new_capacity = self->capacity * 2;
     if (new_capacity < 8) new_capacity = 8;
     if (new_capacity < new_size) new_capacity = new_size;
     array__reserve(self, element_size, new_capacity);

diff --git a/clock.h b/clock.h
@@ -1,6 +1,7 @@
 #ifndef TREE_SITTER_CLOCK_H_
 #define TREE_SITTER_CLOCK_H_
 
+#include <stdbool.h>
 #include <stdint.h>
 
 typedef uint64_t TSDuration;
@@ -82,6 +83,10 @@ static inline TSClock clock_after(TSClock base, TSDuration duration) {
   TSClock result = base;
   result.tv_sec += duration / 1000000;
   result.tv_nsec += (duration % 1000000) * 1000;
+  if (result.tv_nsec >= 1000000000) {
+    result.tv_nsec -= 1000000000;
+    ++(result.tv_sec);
+  }
   return result;
 }
 

diff --git a/lexer.c b/lexer.c
@@ -104,13 +104,16 @@ static void ts_lexer__get_lookahead(Lexer *self) {
 
 static void ts_lexer_goto(Lexer *self, Length position) {
   self->current_position = position;
-  bool found_included_range = false;
 
   // Move to the first valid position at or after the given position.
+  bool found_included_range = false;
   for (unsigned i = 0; i < self->included_range_count; i++) {
     TSRange *included_range = &self->included_ranges[i];
-    if (included_range->end_byte > position.bytes) {
-      if (included_range->start_byte >= position.bytes) {
+    if (
+      included_range->end_byte > self->current_position.bytes &&
+      included_range->end_byte > included_range->start_byte
+    ) {
+      if (included_range->start_byte >= self->current_position.bytes) {
         self->current_position = (Length) {
           .bytes = included_range->start_byte,
           .extent = included_range->start_point,
@@ -127,8 +130,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
     // If the current position is outside of the current chunk of text,
     // then clear out the current chunk of text.
     if (self->chunk && (
-      position.bytes < self->chunk_start ||
-      position.bytes >= self->chunk_start + self->chunk_size
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
     )) {
       ts_lexer__clear_chunk(self);
     }
@@ -164,27 +167,31 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) {
     }
   }
 
-  const TSRange *current_range = NULL;
-  if (self->current_included_range_index < self->included_range_count) {
-    current_range = &self->included_ranges[self->current_included_range_index];
-    if (self->current_position.bytes == current_range->end_byte) {
-      self->current_included_range_index++;
-      if (self->current_included_range_index < self->included_range_count) {
-        current_range++;
-        self->current_position = (Length) {
-          current_range->start_byte,
-          current_range->start_point,
-        };
-      } else {
-        current_range = NULL;
-      }
+  const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
+  while (
+    self->current_position.bytes >= current_range->end_byte ||
+    current_range->end_byte == current_range->start_byte
+  ) {
+    self->current_included_range_index++;
+    if (self->current_included_range_index < self->included_range_count) {
+      current_range++;
+      self->current_position = (Length) {
+        current_range->start_byte,
+        current_range->start_point,
+      };
+    } else {
+      current_range = NULL;
+      break;
     }
   }
 
   if (skip) self->token_start_position = self->current_position;
 
   if (current_range) {
-    if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
+    if (
+      self->current_position.bytes < self->chunk_start ||
+      self->current_position.bytes >= self->chunk_start + self->chunk_size
+    ) {
       ts_lexer__get_chunk(self);
     }
     ts_lexer__get_lookahead(self);
@@ -339,6 +346,13 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
     ts_lexer__mark_end(&self->data);
   }
 
+  // If the token ended at an included range boundary, then its end position
+  // will have been reset to the end of the preceding range. Reset the start
+  // position to match.
+  if (self->token_end_position.bytes < self->token_start_position.bytes) {
+    self->token_start_position = self->token_end_position;
+  }
+
   uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
 
   // In order to determine that a byte sequence is invalid UTF8 or UTF16,

diff --git a/parser.c b/parser.c
@@ -447,8 +447,14 @@ static Subtree ts_parser__lex(
         // avoid infinite loops which could otherwise occur, because the lexer is
         // looking for any possible token, instead of looking for the specific set of
         // tokens that are valid in some parse state.
+        //
+        // Note that it's possible that the token end position may be *before* the
+        // original position of the lexer because of the way that tokens are positioned
+        // at included range boundaries: when a token is terminated at the start of
+        // an included range, it is marked as ending at the *end* of the preceding
+        // included range.
         if (
-          self->lexer.token_end_position.bytes == current_position.bytes &&
+          self->lexer.token_end_position.bytes <= current_position.bytes &&
           (error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
           !external_scanner_state_changed
         ) {
@@ -525,10 +531,6 @@ static Subtree ts_parser__lex(
       self->language
     );
   } else {
-    if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
-      self->lexer.token_start_position = self->lexer.token_end_position;
-    }
-
     bool is_keyword = false;
     TSSymbol symbol = self->lexer.data.result_symbol;
     Length padding = length_sub(self->lexer.token_start_position, start_position);
@@ -605,7 +607,7 @@ static Subtree ts_parser__get_cached_token(
 
 static void ts_parser__set_cached_token(
   TSParser *self,
-  size_t byte_index,
+  uint32_t byte_index,
   Subtree last_external_token,
   Subtree token
 ) {
@@ -1461,7 +1463,9 @@ static bool ts_parser__advance(
       ((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
        (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
     ) {
-      ts_subtree_release(&self->tree_pool, lookahead);
+      if (lookahead.ptr) {
+        ts_subtree_release(&self->tree_pool, lookahead);
+      }
       return false;
     }
 
@@ -1937,8 +1941,16 @@ TSTree *ts_parser_parse(
       }
     }
 
+    // After advancing each version of the stack, re-sort the versions by their cost,
+    // removing any versions that are no longer worth pursuing.
     unsigned min_error_cost = ts_parser__condense_stack(self);
+
+    // If there's already a finished parse tree that's better than any in-progress version,
+    // then terminate parsing. Clear the parse stack to remove any extra references to subtrees
+    // within the finished tree, ensuring that these subtrees can be safely mutated in-place
+    // for rebalancing.
     if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
+      ts_stack_clear(self->stack);
       break;
     }