Skip to content

Commit

Permalink
Bumped treesitter to v0.20.8.
Browse files Browse the repository at this point in the history
  • Loading branch information
micksmix committed Dec 17, 2023
1 parent cd3cb7e commit 55cfc70
Show file tree
Hide file tree
Showing 12 changed files with 1,134 additions and 857 deletions.
36 changes: 27 additions & 9 deletions api.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,13 @@ TSNode ts_tree_root_node_with_offset(
*/
const TSLanguage *ts_tree_language(const TSTree *);

/**
* Get the array of included ranges that was used to parse the syntax tree.
*
* The returned pointer must be freed by the caller.
*/
TSRange *ts_tree_included_ranges(const TSTree *, uint32_t *length);

/**
* Edit the syntax tree to keep it in sync with source code that has been
* edited.
Expand Down Expand Up @@ -413,7 +420,7 @@ TSRange *ts_tree_get_changed_ranges(
/**
* Write a DOT graph describing the syntax tree to the given file.
*/
void ts_tree_print_dot_graph(const TSTree *, FILE *);
void ts_tree_print_dot_graph(const TSTree *, int file_descriptor);

/******************/
/* Section - Node */
Expand Down Expand Up @@ -743,15 +750,26 @@ const TSQueryPredicateStep *ts_query_predicates_for_pattern(
uint32_t *length
);

bool ts_query_is_pattern_rooted(
const TSQuery *self,
uint32_t pattern_index
);
/*
* Check if the given pattern in the query has a single root node.
*/
bool ts_query_is_pattern_rooted(const TSQuery *self, uint32_t pattern_index);

bool ts_query_is_pattern_guaranteed_at_step(
const TSQuery *self,
uint32_t byte_offset
);
/*
* Check if the given pattern in the query is 'non local'.
*
* A non-local pattern has multiple root nodes and can match within a
* repeating sequence of nodes, as specified by the grammar. Non-local
* patterns disable certain optimizations that would otherwise be possible
* when executing a query on a specific range of a syntax tree.
*/
bool ts_query_is_pattern_non_local(const TSQuery *self, uint32_t pattern_index);

/*
* Check if a given pattern is guaranteed to match once a given step is reached.
* The step is specified by its byte offset in the query's source code.
*/
bool ts_query_is_pattern_guaranteed_at_step(const TSQuery *self, uint32_t byte_offset);

/**
* Get the name and length of one of the query's captures, or one of the
Expand Down
6 changes: 3 additions & 3 deletions array.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,10 @@ static inline void array__swap(VoidArray *self, VoidArray *other) {
*self = swap;
}

static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
size_t new_size = self->size + count;
static inline void array__grow(VoidArray *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
size_t new_capacity = self->capacity * 2;
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
array__reserve(self, element_size, new_capacity);
Expand Down
5 changes: 5 additions & 0 deletions clock.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifndef TREE_SITTER_CLOCK_H_
#define TREE_SITTER_CLOCK_H_

#include <stdbool.h>
#include <stdint.h>

typedef uint64_t TSDuration;
Expand Down Expand Up @@ -82,6 +83,10 @@ static inline TSClock clock_after(TSClock base, TSDuration duration) {
TSClock result = base;
result.tv_sec += duration / 1000000;
result.tv_nsec += (duration % 1000000) * 1000;
if (result.tv_nsec >= 1000000000) {
result.tv_nsec -= 1000000000;
++(result.tv_sec);
}
return result;
}

Expand Down
54 changes: 34 additions & 20 deletions lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,16 @@ static void ts_lexer__get_lookahead(Lexer *self) {

static void ts_lexer_goto(Lexer *self, Length position) {
self->current_position = position;
bool found_included_range = false;

// Move to the first valid position at or after the given position.
bool found_included_range = false;
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *included_range = &self->included_ranges[i];
if (included_range->end_byte > position.bytes) {
if (included_range->start_byte >= position.bytes) {
if (
included_range->end_byte > self->current_position.bytes &&
included_range->end_byte > included_range->start_byte
) {
if (included_range->start_byte >= self->current_position.bytes) {
self->current_position = (Length) {
.bytes = included_range->start_byte,
.extent = included_range->start_point,
Expand All @@ -127,8 +130,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
// If the current position is outside of the current chunk of text,
// then clear out the current chunk of text.
if (self->chunk && (
position.bytes < self->chunk_start ||
position.bytes >= self->chunk_start + self->chunk_size
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
)) {
ts_lexer__clear_chunk(self);
}
Expand Down Expand Up @@ -164,27 +167,31 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) {
}
}

const TSRange *current_range = NULL;
if (self->current_included_range_index < self->included_range_count) {
current_range = &self->included_ranges[self->current_included_range_index];
if (self->current_position.bytes == current_range->end_byte) {
self->current_included_range_index++;
if (self->current_included_range_index < self->included_range_count) {
current_range++;
self->current_position = (Length) {
current_range->start_byte,
current_range->start_point,
};
} else {
current_range = NULL;
}
const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
while (
self->current_position.bytes >= current_range->end_byte ||
current_range->end_byte == current_range->start_byte
) {
self->current_included_range_index++;
if (self->current_included_range_index < self->included_range_count) {
current_range++;
self->current_position = (Length) {
current_range->start_byte,
current_range->start_point,
};
} else {
current_range = NULL;
break;
}
}

if (skip) self->token_start_position = self->current_position;

if (current_range) {
if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
if (
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
) {
ts_lexer__get_chunk(self);
}
ts_lexer__get_lookahead(self);
Expand Down Expand Up @@ -339,6 +346,13 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
ts_lexer__mark_end(&self->data);
}

// If the token ended at an included range boundary, then its end position
// will have been reset to the end of the preceding range. Reset the start
// position to match.
if (self->token_end_position.bytes < self->token_start_position.bytes) {
self->token_start_position = self->token_end_position;
}

uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;

// In order to determine that a byte sequence is invalid UTF8 or UTF16,
Expand Down
26 changes: 19 additions & 7 deletions parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,14 @@ static Subtree ts_parser__lex(
// avoid infinite loops which could otherwise occur, because the lexer is
// looking for any possible token, instead of looking for the specific set of
// tokens that are valid in some parse state.
//
// Note that it's possible that the token end position may be *before* the
// original position of the lexer because of the way that tokens are positioned
// at included range boundaries: when a token is terminated at the start of
// an included range, it is marked as ending at the *end* of the preceding
// included range.
if (
self->lexer.token_end_position.bytes == current_position.bytes &&
self->lexer.token_end_position.bytes <= current_position.bytes &&
(error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
!external_scanner_state_changed
) {
Expand Down Expand Up @@ -525,10 +531,6 @@ static Subtree ts_parser__lex(
self->language
);
} else {
if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
self->lexer.token_start_position = self->lexer.token_end_position;
}

bool is_keyword = false;
TSSymbol symbol = self->lexer.data.result_symbol;
Length padding = length_sub(self->lexer.token_start_position, start_position);
Expand Down Expand Up @@ -605,7 +607,7 @@ static Subtree ts_parser__get_cached_token(

static void ts_parser__set_cached_token(
TSParser *self,
size_t byte_index,
uint32_t byte_index,
Subtree last_external_token,
Subtree token
) {
Expand Down Expand Up @@ -1461,7 +1463,9 @@ static bool ts_parser__advance(
((self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
(!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)))
) {
ts_subtree_release(&self->tree_pool, lookahead);
if (lookahead.ptr) {
ts_subtree_release(&self->tree_pool, lookahead);
}
return false;
}

Expand Down Expand Up @@ -1937,8 +1941,16 @@ TSTree *ts_parser_parse(
}
}

// After advancing each version of the stack, re-sort the versions by their cost,
// removing any versions that are no longer worth pursuing.
unsigned min_error_cost = ts_parser__condense_stack(self);

// If there's already a finished parse tree that's better than any in-progress version,
// then terminate parsing. Clear the parse stack to remove any extra references to subtrees
// within the finished tree, ensuring that these subtrees can be safely mutated in-place
// for rebalancing.
if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
ts_stack_clear(self->stack);
break;
}

Expand Down
Loading

0 comments on commit 55cfc70

Please sign in to comment.