diff --git a/alloc.h b/alloc.h index 6e22a0ab..dd487ca2 100644 --- a/alloc.h +++ b/alloc.h @@ -9,7 +9,7 @@ extern "C" { #include #include -#if defined(TREE_SITTER_TEST) +#if defined(TREE_SITTER_ALLOCATION_TRACKING) void *ts_record_malloc(size_t); void *ts_record_calloc(size_t, size_t); diff --git a/api.h b/api.h index caa05f52..f02789ee 100644 --- a/api.h +++ b/api.h @@ -21,13 +21,13 @@ extern "C" { * The Tree-sitter library is generally backwards-compatible with languages * generated using older CLI versions, but is not forwards-compatible. */ -#define TREE_SITTER_LANGUAGE_VERSION 12 +#define TREE_SITTER_LANGUAGE_VERSION 13 /** * The earliest ABI version that is supported by the current version of the * library. */ -#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9 +#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 13 /*******************/ /* Section - Types */ @@ -487,6 +487,12 @@ TSNode ts_node_parent(TSNode); */ TSNode ts_node_child(TSNode, uint32_t); +/** + * Get the field name for node's child at the given index, where zero represents + * the first child. Returns NULL, if no field is found. + */ +const char *ts_node_field_name_for_child(TSNode, uint32_t); + /** * Get the node's number of children. */ @@ -645,12 +651,13 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *); /** * Move the cursor to the first child of its current node that extends beyond - * the given byte offset. + * the given byte offset or point. * * This returns the index of the child node if one was found, and returns -1 * if no such child was found. */ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t); +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *, TSPoint); TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *); @@ -791,6 +798,21 @@ void ts_query_cursor_delete(TSQueryCursor *); */ void ts_query_cursor_exec(TSQueryCursor *, const TSQuery *, TSNode); +/** + * Manage the maximum number of in-progress matches allowed by this query + * cursor. + * + * Query cursors have an optional maximum capacity for storing lists of + * in-progress captures. If this capacity is exceeded, then the + * earliest-starting match will silently be dropped to make room for further + * matches. This maximum capacity is optional — by default, query cursors allow + * any number of pending matches, dynamically allocating new space for them as + * needed as the query is executed. + */ +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *); +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *); +void ts_query_cursor_set_match_limit(TSQueryCursor *, uint32_t); + /** * Set the range of bytes or (row, column) positions in which the query * will be executed. diff --git a/language.c b/language.c index 9ccf2bc3..d1319e6a 100644 --- a/language.c +++ b/language.c @@ -12,11 +12,7 @@ uint32_t ts_language_version(const TSLanguage *self) { } uint32_t ts_language_field_count(const TSLanguage *self) { - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) { - return self->field_count; - } else { - return 0; - } + return self->field_count; } void ts_language_table_entry( @@ -57,11 +53,7 @@ TSSymbol ts_language_public_symbol( TSSymbol symbol ) { if (symbol == ts_builtin_sym_error) return symbol; - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - return self->public_symbol_map[symbol]; - } else { - return symbol; - } + return self->public_symbol_map[symbol]; } const char *ts_language_symbol_name( @@ -92,11 +84,7 @@ TSSymbol ts_language_symbol_for_name( if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue; const char *symbol_name = self->symbol_names[i]; if (!strncmp(symbol_name, string, length) && !symbol_name[length]) { - if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - return self->public_symbol_map[i]; - } else { - return i; - } + return self->public_symbol_map[i]; } } return 0; @@ -107,7 +95,7 @@ TSSymbolType ts_language_symbol_type( TSSymbol symbol ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol); - if (metadata.named) { + if (metadata.named && metadata.visible) { return TSSymbolTypeRegular; } else if (metadata.visible) { return TSSymbolTypeAnonymous; diff --git a/language.h b/language.h index 984bd7ed..2afec9aa 100644 --- a/language.h +++ b/language.h @@ -9,11 +9,6 @@ extern "C" { #include "parser.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) -#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING 11 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT 12 -#define TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP 12 typedef struct { const TSParseAction *actions; @@ -59,16 +54,6 @@ static inline const TSParseAction *ts_language_actions( return entry.actions; } -static inline bool ts_language_has_actions( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol -) { - TableEntry entry; - ts_language_table_entry(self, state, symbol, &entry); - return entry.action_count > 0; -} - static inline bool ts_language_has_reduce_action( const TSLanguage *self, TSStateId state, @@ -91,10 +76,7 @@ static inline uint16_t ts_language_lookup( TSStateId state, TSSymbol symbol ) { - if ( - self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES && - state >= self->large_state_count - ) { + if (state >= self->large_state_count) { uint32_t index = self->small_parse_table_map[state - self->large_state_count]; const uint16_t *data = &self->small_parse_table[index]; uint16_t group_count = *(data++); @@ -111,6 +93,14 @@ static inline uint16_t ts_language_lookup( } } +static inline bool ts_language_has_actions( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { + return ts_language_lookup(self, state, symbol) != 0; +} + // Iterate over all of the symbols that are valid in the given state. // // For 'large' parse states, this just requires iterating through @@ -121,9 +111,7 @@ static inline LookaheadIterator ts_language_lookaheads( const TSLanguage *self, TSStateId state ) { - bool is_small_state = - self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES && - state >= self->large_state_count; + bool is_small_state = state >= self->large_state_count; const uint16_t *data; const uint16_t *group_end = NULL; uint16_t group_count = 0; @@ -203,7 +191,7 @@ static inline TSStateId ts_language_next_state( if (count > 0) { TSParseAction action = actions[count - 1]; if (action.type == TSParseActionTypeShift) { - return action.params.shift.extra ? state : action.params.shift.state; + return action.shift.extra ? state : action.shift.state; } } return 0; @@ -248,7 +236,7 @@ static inline void ts_language_field_map( const TSFieldMapEntry **start, const TSFieldMapEntry **end ) { - if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) { + if (self->field_count == 0) { *start = NULL; *end = NULL; return; @@ -268,8 +256,6 @@ static inline void ts_language_aliases_for_symbol( *start = &self->public_symbol_map[original_symbol]; *end = *start + 1; - if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_ALIAS_MAP) return; - unsigned i = 0; for (;;) { TSSymbol symbol = self->alias_map[i++]; diff --git a/lexer.c b/lexer.c index 08e90a8c..5d1965ad 100644 --- a/lexer.c +++ b/lexer.c @@ -102,6 +102,56 @@ static void ts_lexer__get_lookahead(Lexer *self) { } } +static void ts_lexer_goto(Lexer *self, Length position) { + self->current_position = position; + bool found_included_range = false; + + // Move to the first valid position at or after the given position. + for (unsigned i = 0; i < self->included_range_count; i++) { + TSRange *included_range = &self->included_ranges[i]; + if (included_range->end_byte > position.bytes) { + if (included_range->start_byte > position.bytes) { + self->current_position = (Length) { + .bytes = included_range->start_byte, + .extent = included_range->start_point, + }; + } + + self->current_included_range_index = i; + found_included_range = true; + break; + } + } + + if (found_included_range) { + // If the current position is outside of the current chunk of text, + // then clear out the current chunk of text. + if (self->chunk && ( + position.bytes < self->chunk_start || + position.bytes >= self->chunk_start + self->chunk_size + )) { + ts_lexer__clear_chunk(self); + } + + self->lookahead_size = 0; + self->data.lookahead = '\0'; + } + + // If the given position is beyond any of included ranges, move to the EOF + // state - past the end of the included ranges. + else { + self->current_included_range_index = self->included_range_count; + TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; + self->current_position = (Length) { + .bytes = last_included_range->end_byte, + .extent = last_included_range->end_point, + }; + ts_lexer__clear_chunk(self); + self->lookahead_size = 1; + self->data.lookahead = '\0'; + } +} + // Advance to the next character in the source code, retrieving a new // chunk of source code if needed. static void ts_lexer__advance(TSLexer *_self, bool skip) { @@ -183,22 +233,8 @@ static void ts_lexer__mark_end(TSLexer *_self) { static uint32_t ts_lexer__get_column(TSLexer *_self) { Lexer *self = (Lexer *)_self; - uint32_t goal_byte = self->current_position.bytes; - - self->current_position.bytes -= self->current_position.extent.column; - self->current_position.extent.column = 0; - - if (self->current_position.bytes < self->chunk_start) { - ts_lexer__get_chunk(self); - } - - uint32_t result = 0; - while (self->current_position.bytes < goal_byte) { - ts_lexer__advance(&self->data, false); - result++; - } - - return result; + self->did_get_column = true; + return self->current_position.extent.column; } // Is the lexer at a boundary between two disjoint included ranges of @@ -247,56 +283,6 @@ void ts_lexer_delete(Lexer *self) { ts_free(self->included_ranges); } -static void ts_lexer_goto(Lexer *self, Length position) { - self->current_position = position; - bool found_included_range = false; - - // Move to the first valid position at or after the given position. - for (unsigned i = 0; i < self->included_range_count; i++) { - TSRange *included_range = &self->included_ranges[i]; - if (included_range->end_byte > position.bytes) { - if (included_range->start_byte > position.bytes) { - self->current_position = (Length) { - .bytes = included_range->start_byte, - .extent = included_range->start_point, - }; - } - - self->current_included_range_index = i; - found_included_range = true; - break; - } - } - - if (found_included_range) { - // If the current position is outside of the current chunk of text, - // then clear out the current chunk of text. - if (self->chunk && ( - position.bytes < self->chunk_start || - position.bytes >= self->chunk_start + self->chunk_size - )) { - ts_lexer__clear_chunk(self); - } - - self->lookahead_size = 0; - self->data.lookahead = '\0'; - } - - // If the given position is beyond any of included ranges, move to the EOF - // state - past the end of the included ranges. - else { - self->current_included_range_index = self->included_range_count; - TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1]; - self->current_position = (Length) { - .bytes = last_included_range->end_byte, - .extent = last_included_range->end_point, - }; - ts_lexer__clear_chunk(self); - self->lookahead_size = 1; - self->data.lookahead = '\0'; - } -} - void ts_lexer_set_input(Lexer *self, TSInput input) { self->input = input; ts_lexer__clear_chunk(self); @@ -315,6 +301,7 @@ void ts_lexer_start(Lexer *self) { self->token_start_position = self->current_position; self->token_end_position = LENGTH_UNDEFINED; self->data.result_symbol = 0; + self->did_get_column = false; if (!ts_lexer__eof(&self->data)) { if (!self->chunk_size) ts_lexer__get_chunk(self); if (!self->lookahead_size) ts_lexer__get_lookahead(self); diff --git a/lexer.h b/lexer.h index 053c7565..f79f6aa6 100644 --- a/lexer.h +++ b/lexer.h @@ -17,16 +17,17 @@ typedef struct { Length token_end_position; TSRange *included_ranges; - size_t included_range_count; - size_t current_included_range_index; - const char *chunk; + TSInput input; + TSLogger logger; + + uint32_t included_range_count; + uint32_t current_included_range_index; uint32_t chunk_start; uint32_t chunk_size; uint32_t lookahead_size; + bool did_get_column; - TSInput input; - TSLogger logger; char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE]; } Lexer; diff --git a/node.c b/node.c index 9ce0f0b3..156b9a18 100644 --- a/node.c +++ b/node.c @@ -150,9 +150,6 @@ static inline TSNode ts_node__child( while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node__is_relevant(child, include_anonymous)) { if (index == child_index) { - if (ts_node__is_relevant(self, true)) { - ts_tree_set_cached_parent(self.tree, &child, &self); - } return child; } index++; @@ -355,7 +352,6 @@ static inline TSNode ts_node__descendant_for_byte_range( node = child; if (ts_node__is_relevant(node, include_anonymous)) { - ts_tree_set_cached_parent(self.tree, &child, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -395,7 +391,6 @@ static inline TSNode ts_node__descendant_for_point_range( node = child; if (ts_node__is_relevant(node, include_anonymous)) { - ts_tree_set_cached_parent(self.tree, &child, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -464,10 +459,7 @@ bool ts_node_has_error(TSNode self) { } TSNode ts_node_parent(TSNode self) { - TSNode node = ts_tree_get_cached_parent(self.tree, &self); - if (node.id) return node; - - node = ts_tree_root_node(self.tree); + TSNode node = ts_tree_root_node(self.tree); uint32_t end_byte = ts_node_end_byte(self); if (node.id == self.id) return ts_node__null(); @@ -486,7 +478,6 @@ TSNode ts_node_parent(TSNode self) { if (iterator.position.bytes >= end_byte) { node = child; if (ts_node__is_relevant(child, true)) { - ts_tree_set_cached_parent(self.tree, &node, &last_visible_node); last_visible_node = node; } did_descend = true; @@ -561,17 +552,40 @@ TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { return child; } - // If the field refers to a hidden node, return its first visible - // child. - else { + // If the field refers to a hidden node with visible children, + // return the first visible child. + else if (ts_node_child_count(child) > 0 ) { return ts_node_child(child, 0); } + + // Otherwise, continue searching subsequent children. + else { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } } } return ts_node__null(); } +const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { + const TSFieldMapEntry *field_map_start = NULL, *field_map_end = NULL; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->production_id, + &field_map_start, + &field_map_end + ); + + for (const TSFieldMapEntry *i = field_map_start; i < field_map_end; i++) { + if (i->child_index == child_index) { + return self.tree->language->field_names[i->field_id]; + } + } + return NULL; +} + TSNode ts_node_child_by_field_name( TSNode self, const char *name, diff --git a/parser.c b/parser.c index e8955682..186fa540 100644 --- a/parser.c +++ b/parser.c @@ -25,6 +25,36 @@ ts_parser__log(self); \ } +#define LOG_LOOKAHEAD(symbol_name, size) \ + if (self->lexer.logger.log || self->dot_graph_file) { \ + char *buf = self->lexer.debug_buffer; \ + const char *symbol = symbol_name; \ + int off = sprintf(buf, "lexed_lookahead sym:"); \ + for ( \ + int i = 0; \ + symbol[i] != '\0' \ + && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \ + i++ \ + ) { \ + switch (symbol[i]) { \ + case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \ + case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \ + case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \ + case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \ + case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \ + case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \ + default: buf[off++] = symbol[i]; break; \ + } \ + } \ + snprintf( \ + buf + off, \ + TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \ + ", size:%u", \ + size \ + ); \ + ts_parser__log(self); \ + } + #define LOG_STACK() \ if (self->dot_graph_file) { \ ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \ @@ -373,6 +403,7 @@ static Subtree ts_parser__lex( bool found_external_token = false; bool error_mode = parse_state == ERROR_STATE; bool skipped_error = false; + bool called_get_column = false; int32_t first_error_character = 0; Length error_start_position = length_zero(); Length error_end_position = length_zero(); @@ -415,6 +446,7 @@ static Subtree ts_parser__lex( (!error_mode && ts_stack_has_advanced_since_error(self->stack, version)) )) { found_external_token = true; + called_get_column = self->lexer.did_get_column; break; } @@ -477,11 +509,9 @@ static Subtree ts_parser__lex( self->language ); - LOG( - "lexed_lookahead sym:%s, size:%u, character:'%c'", + LOG_LOOKAHEAD( SYM_NAME(ts_subtree_symbol(result)), - ts_subtree_total_size(result).bytes, - first_error_character + ts_subtree_total_size(result).bytes ); } else { if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) { @@ -518,6 +548,7 @@ static Subtree ts_parser__lex( lookahead_bytes, parse_state, found_external_token, + called_get_column, is_keyword, self->language ); @@ -534,8 +565,7 @@ static Subtree ts_parser__lex( ); } - LOG( - "lexed_lookahead sym:%s, size:%u", + LOG_LOOKAHEAD( SYM_NAME(ts_subtree_symbol(result)), ts_subtree_total_size(result).bytes ); @@ -985,15 +1015,15 @@ static bool ts_parser__do_all_potential_reductions( switch (action.type) { case TSParseActionTypeShift: case TSParseActionTypeRecover: - if (!action.params.shift.extra && !action.params.shift.repetition) has_shift_action = true; + if (!action.shift.extra && !action.shift.repetition) has_shift_action = true; break; case TSParseActionTypeReduce: - if (action.params.reduce.child_count > 0) + if (action.reduce.child_count > 0) ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){ - .symbol = action.params.reduce.symbol, - .count = action.params.reduce.child_count, - .dynamic_precedence = action.params.reduce.dynamic_precedence, - .production_id = action.params.reduce.production_id, + .symbol = action.reduce.symbol, + .count = action.reduce.child_count, + .dynamic_precedence = action.reduce.dynamic_precedence, + .production_id = action.reduce.production_id, }); break; default: @@ -1284,7 +1314,7 @@ static void ts_parser__recover( // be counted in error cost calculations. unsigned n; const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n); - if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].params.shift.extra) { + if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) { MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead); ts_subtree_set_extra(&mutable_lookahead); lookahead = ts_subtree_from_mut(mutable_lookahead); @@ -1414,17 +1444,13 @@ static bool ts_parser__advance( switch (action.type) { case TSParseActionTypeShift: { - if (action.params.shift.repetition) break; + if (action.shift.repetition) break; TSStateId next_state; - if (action.params.shift.extra) { - - // TODO: remove when TREE_SITTER_LANGUAGE_VERSION 9 is out. - if (state == ERROR_STATE) continue; - + if (action.shift.extra) { next_state = state; LOG("shift_extra"); } else { - next_state = action.params.shift.state; + next_state = action.shift.state; LOG("shift state:%u", next_state); } @@ -1433,7 +1459,7 @@ static bool ts_parser__advance( next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead)); } - ts_parser__shift(self, version, next_state, lookahead, action.params.shift.extra); + ts_parser__shift(self, version, next_state, lookahead, action.shift.extra); if (did_reuse) reusable_node_advance(&self->reusable_node); return true; } @@ -1441,10 +1467,10 @@ static bool ts_parser__advance( case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; bool end_of_non_terminal_extra = lookahead.ptr == NULL; - LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.reduce.symbol), action.params.reduce.child_count); + LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count); StackVersion reduction_version = ts_parser__reduce( - self, version, action.params.reduce.symbol, action.params.reduce.child_count, - action.params.reduce.dynamic_precedence, action.params.reduce.production_id, + self, version, action.reduce.symbol, action.reduce.child_count, + action.reduce.dynamic_precedence, action.reduce.production_id, is_fragile, end_of_non_terminal_extra ); if (reduction_version != STACK_VERSION_NONE) { diff --git a/parser.h b/parser.h index c5a788ff..cbbc7b4e 100644 --- a/parser.h +++ b/parser.h @@ -13,6 +13,8 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 +typedef uint16_t TSStateId; + #ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; @@ -30,12 +32,10 @@ typedef struct { uint16_t length; } TSFieldMapSlice; -typedef uint16_t TSStateId; - typedef struct { - bool visible : 1; - bool named : 1; - bool supertype: 1; + bool visible; + bool named; + bool supertype; } TSSymbolMetadata; typedef struct TSLexer TSLexer; @@ -57,21 +57,21 @@ typedef enum { TSParseActionTypeRecover, } TSParseActionType; -typedef struct { - union { - struct { - TSStateId state; - bool extra : 1; - bool repetition : 1; - } shift; - struct { - TSSymbol symbol; - int16_t dynamic_precedence; - uint8_t child_count; - uint8_t production_id; - } reduce; - } params; - TSParseActionType type : 4; +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; } TSParseAction; typedef struct { @@ -83,7 +83,7 @@ typedef union { TSParseAction action; struct { uint8_t count; - bool reusable : 1; + bool reusable; } entry; } TSParseActionEntry; @@ -93,13 +93,24 @@ struct TSLanguage { uint32_t alias_count; uint32_t token_count; uint32_t external_token_count; - const char **symbol_names; - const TSSymbolMetadata *symbol_metadata; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; const TSParseActionEntry *parse_actions; - const TSLexMode *lex_modes; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; const TSSymbol *alias_sequences; - uint16_t max_alias_sequence_length; + const TSLexMode *lex_modes; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); TSSymbol keyword_capture_token; @@ -112,16 +123,6 @@ struct TSLanguage { unsigned (*serialize)(void *, char *); void (*deserialize)(void *, const char *, unsigned); } external_scanner; - uint32_t field_count; - const TSFieldMapSlice *field_map_slices; - const TSFieldMapEntry *field_map_entries; - const char **field_names; - uint32_t large_state_count; - const uint16_t *small_parse_table; - const uint32_t *small_parse_table_map; - const TSSymbol *public_symbol_map; - const uint16_t *alias_map; - uint32_t state_count; }; /* @@ -170,66 +171,50 @@ struct TSLanguage { #define ACTIONS(id) id -#define SHIFT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value \ - } \ - }, \ - .type = TSParseActionTypeShift \ - } \ - } +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value \ + } \ + }} #define SHIFT_REPEAT(state_value) \ - { \ - { \ - .params = { \ - .shift = { \ - .state = state_value, \ - .repetition = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = state_value, \ + .repetition = true \ } \ - } - -#define RECOVER() \ - { \ - { .type = TSParseActionTypeRecover } \ - } + }} #define SHIFT_EXTRA() \ - { \ - { \ - .params = { \ - .shift = { \ - .extra = true \ - } \ - }, \ - .type = TSParseActionTypeShift \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ } \ - } + }} #define REDUCE(symbol_val, child_count_val, ...) \ - { \ - { \ - .params = { \ - .reduce = { \ - .symbol = symbol_val, \ - .child_count = child_count_val, \ - __VA_ARGS__ \ - }, \ - }, \ - .type = TSParseActionTypeReduce \ - } \ - } - -#define ACCEPT_INPUT() \ - { \ - { .type = TSParseActionTypeAccept } \ - } + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_val, \ + .child_count = child_count_val, \ + __VA_ARGS__ \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} #ifdef __cplusplus } diff --git a/point.h b/point.h index 5581c7ec..57039000 100644 --- a/point.h +++ b/point.h @@ -33,6 +33,10 @@ static inline bool point_lt(TSPoint a, TSPoint b) { return (a.row < b.row) || (a.row == b.row && a.column < b.column); } +static inline bool point_gt(TSPoint a, TSPoint b) { + return (a.row > b.row) || (a.row == b.row && a.column > b.column); +} + static inline bool point_eq(TSPoint a, TSPoint b) { return a.row == b.row && a.column == b.column; } diff --git a/query.c b/query.c index 68894262..bae2e4df 100644 --- a/query.c +++ b/query.c @@ -1,7 +1,6 @@ #include "api.h" #include "./alloc.h" #include "./array.h" -#include "./bits.h" #include "./language.h" #include "./point.h" #include "./tree_cursor.h" @@ -12,10 +11,10 @@ // #define LOG(...) fprintf(stderr, __VA_ARGS__) #define LOG(...) -#define MAX_CAPTURE_LIST_COUNT 32 #define MAX_STEP_CAPTURE_COUNT 3 #define MAX_STATE_PREDECESSOR_COUNT 100 #define MAX_ANALYSIS_STATE_DEPTH 12 +#define MAX_NEGATED_FIELD_COUNT 8 /* * Stream - A sequence of unicode characters derived from a UTF8 string. @@ -31,9 +30,9 @@ typedef struct { /* * QueryStep - A step in the process of matching a query. Each node within - * a query S-expression maps to one of these steps. An entire pattern is - * represented as a sequence of these steps. Fields: - * + * a query S-expression corresponds to one of these steps. An entire pattern + * is represented as a sequence of these steps. The basic properties of a + * node are represented by these fields: * - `symbol` - The grammar symbol to match. A zero value represents the * wildcard symbol, '_'. * - `field` - The field name to match. A zero value means that a field name @@ -42,16 +41,38 @@ typedef struct { * associated with this node in the pattern, terminated by a `NONE` value. * - `depth` - The depth where this node occurs in the pattern. The root node * of the pattern has depth zero. - * - `alternative_index` - The index of a different query step that serves as - * an alternative to this step. + * + * For simple patterns, steps are matched in sequential order. But in order to + * handle alternative/repeated/optional sub-patterns, query steps are not always + * structured as a linear sequence; they sometimes need to split and merge. This + * is done using the following fields: + * - `alternative_index` - The index of a different query step that serves as + * an alternative to this step. A `NONE` value represents no alternative. + * When a query state reaches a step with an alternative index, the state + * is duplicated, with one copy remaining at the original step, and one copy + * moving to the alternative step. The alternative may have its own alternative + * step, so this splitting is an iterative process. + * - `is_dead_end` - Indication that this state cannot be passed directly, and + * exists only in order to redirect to an alternative index, with no splitting. + * - `is_pass_through` - Indication that state has no matching logic of its own, + * and exists only to split a state. One copy of the state advances immediately + * to the next step, and one moves to the alternative step. + * + * Steps have some additional fields in order to handle the `.` (or "anchor") operator, + * which forbids additional child nodes: + * - `is_immediate` - Indication that the node matching this step cannot be preceded + * by other sibling nodes that weren't specified in the pattern. + * - `is_last_child` - Indicates that the node matching this step cannot have any + * subsequent named siblings. */ typedef struct { TSSymbol symbol; TSSymbol supertype_symbol; TSFieldId field; uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT]; - uint16_t alternative_index; uint16_t depth; + uint16_t alternative_index; + uint16_t negated_field_list_id; bool contains_captures: 1; bool is_immediate: 1; bool is_last_child: 1; @@ -81,16 +102,20 @@ typedef struct { } SymbolTable; /* - * PatternEntry - Information about the starting point for matching a - * particular pattern, consisting of the index of the pattern within the query, - * and the index of the patter's first step in the shared `steps` array. These - * entries are stored in a 'pattern map' - a sorted array that makes it - * possible to efficiently lookup patterns based on the symbol for their first - * step. + * PatternEntry - Information about the starting point for matching a particular + * pattern. These entries are stored in a 'pattern map' - a sorted array that + * makes it possible to efficiently lookup patterns based on the symbol for their + * first step. The entry consists of the following fields: + * - `pattern_index` - the index of the pattern within the query + * - `step_index` - the index of the pattern's first step in the shared `steps` array + * - `is_rooted` - whether or not the pattern has a single root node. This property + * affects decisions about whether or not to start the pattern for nodes outside + * of a QueryCursor's range restriction. */ typedef struct { uint16_t step_index; uint16_t pattern_index; + bool is_rooted; } PatternEntry; typedef struct { @@ -126,14 +151,14 @@ typedef struct { * other states that have the same captures as this state, but are at * different steps in their pattern. This means that in order to obey the * 'longest-match' rule, this state should not be returned as a match until - * it is clear that there can be no longer match. + * it is clear that there can be no other alternative match with more captures. */ typedef struct { uint32_t id; + uint32_t capture_list_id; uint16_t start_depth; uint16_t step_index; uint16_t pattern_index; - uint16_t capture_list_id; uint16_t consumed_capture_count: 12; bool seeking_immediate_match: 1; bool has_in_progress_alternatives: 1; @@ -144,15 +169,23 @@ typedef struct { typedef Array(TSQueryCapture) CaptureList; /* - * CaptureListPool - A collection of *lists* of captures. Each QueryState - * needs to maintain its own list of captures. To avoid repeated allocations, - * the reuses a fixed set of capture lists, and keeps track of which ones - * are currently in use. + * CaptureListPool - A collection of *lists* of captures. Each query state needs + * to maintain its own list of captures. To avoid repeated allocations, this struct + * maintains a fixed set of capture lists, and keeps track of which ones are + * currently in use by a query state. */ typedef struct { - CaptureList list[MAX_CAPTURE_LIST_COUNT]; + Array(CaptureList) list; CaptureList empty_list; - uint32_t usage_map; + // The maximum number of capture lists that we are allowed to allocate. We + // never allow `list` to allocate more entries than this, dropping pending + // matches if needed to stay under the limit. + uint32_t max_capture_list_count; + // The number of capture lists allocated in `list` that are not currently in + // use. We reuse those existing-but-unused capture lists before trying to + // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture + // list's length to indicate that it's not in use. + uint32_t free_capture_list_count; } CaptureListPool; /* @@ -196,6 +229,8 @@ typedef struct { /* * StatePredecessorMap - A map that stores the predecessors of each parse state. + * This is used during query analysis to determine which parse states can lead + * to which reduce actions. */ typedef struct { TSStateId *contents; @@ -214,10 +249,10 @@ struct TSQuery { Array(TSQueryPredicateStep) predicate_steps; Array(QueryPattern) patterns; Array(StepOffset) step_offsets; + Array(TSFieldId) negated_fields; Array(char) string_buffer; const TSLanguage *language; uint16_t wildcard_root_pattern_count; - TSSymbol *symbol_map; }; /* @@ -232,11 +267,12 @@ struct TSQueryCursor { uint32_t depth; uint32_t start_byte; uint32_t end_byte; - uint32_t next_state_id; TSPoint start_point; TSPoint end_point; + uint32_t next_state_id; bool ascending; bool halted; + bool did_exceed_match_limit; }; static const TSQueryError PARENT_DONE = -1; @@ -331,54 +367,72 @@ static uint32_t stream_offset(Stream *self) { static CaptureListPool capture_list_pool_new(void) { return (CaptureListPool) { + .list = array_new(), .empty_list = array_new(), - .usage_map = UINT32_MAX, + .max_capture_list_count = UINT32_MAX, + .free_capture_list_count = 0, }; } static void capture_list_pool_reset(CaptureListPool *self) { - self->usage_map = UINT32_MAX; - for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { - array_clear(&self->list[i]); + for (uint16_t i = 0; i < self->list.size; i++) { + // This invalid size means that the list is not in use. + self->list.contents[i].size = UINT32_MAX; } + self->free_capture_list_count = self->list.size; } static void capture_list_pool_delete(CaptureListPool *self) { - for (unsigned i = 0; i < MAX_CAPTURE_LIST_COUNT; i++) { - array_delete(&self->list[i]); + for (uint16_t i = 0; i < self->list.size; i++) { + array_delete(&self->list.contents[i]); } + array_delete(&self->list); } static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) { - if (id >= MAX_CAPTURE_LIST_COUNT) return &self->empty_list; - return &self->list[id]; + if (id >= self->list.size) return &self->empty_list; + return &self->list.contents[id]; } static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) { - assert(id < MAX_CAPTURE_LIST_COUNT); - return &self->list[id]; + assert(id < self->list.size); + return &self->list.contents[id]; } static bool capture_list_pool_is_empty(const CaptureListPool *self) { - return self->usage_map == 0; + // The capture list pool is empty if all allocated lists are in use, and we + // have reached the maximum allowed number of allocated lists. + return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count; } static uint16_t capture_list_pool_acquire(CaptureListPool *self) { - // In the usage_map bitmask, ones represent free lists, and zeros represent - // lists that are in use. A free list id can quickly be found by counting - // the leading zeros in the usage map. An id of zero corresponds to the - // highest-order bit in the bitmask. - uint16_t id = count_leading_zeros(self->usage_map); - if (id >= MAX_CAPTURE_LIST_COUNT) return NONE; - self->usage_map &= ~bitmask_for_index(id); - array_clear(&self->list[id]); - return id; + // First see if any already allocated capture list is currently unused. + if (self->free_capture_list_count > 0) { + for (uint16_t i = 0; i < self->list.size; i++) { + if (self->list.contents[i].size == UINT32_MAX) { + array_clear(&self->list.contents[i]); + self->free_capture_list_count--; + return i; + } + } + } + + // Otherwise allocate and initialize a new capture list, as long as that + // doesn't put us over the requested maximum. + uint32_t i = self->list.size; + if (i >= self->max_capture_list_count) { + return NONE; + } + CaptureList list; + array_init(&list); + array_push(&self->list, list); + return i; } static void capture_list_pool_release(CaptureListPool *self, uint16_t id) { - if (id >= MAX_CAPTURE_LIST_COUNT) return; - array_clear(&self->list[id]); - self->usage_map |= bitmask_for_index(id); + if (id >= self->list.size) return; + self->list.contents[id].size = UINT32_MAX; + self->free_capture_list_count++; } /************** @@ -455,6 +509,7 @@ static QueryStep query_step__new( .field = 0, .capture_ids = {NONE, NONE, NONE}, .alternative_index = NONE, + .negated_field_list_id = 0, .contains_captures = false, .is_last_child = false, .is_pass_through = false, @@ -664,8 +719,7 @@ static inline bool ts_query__pattern_map_search( static inline void ts_query__pattern_map_insert( TSQuery *self, TSSymbol symbol, - uint32_t start_step_index, - uint32_t pattern_index + PatternEntry new_entry ) { uint32_t index; ts_query__pattern_map_search(self, symbol, &index); @@ -678,7 +732,7 @@ static inline void ts_query__pattern_map_insert( PatternEntry *entry = &self->pattern_map.contents[index]; if ( self->steps.contents[entry->step_index].symbol == symbol && - entry->pattern_index < pattern_index + entry->pattern_index < new_entry.pattern_index ) { index++; } else { @@ -686,10 +740,7 @@ static inline void ts_query__pattern_map_insert( } } - array_insert(&self->pattern_map, index, ((PatternEntry) { - .step_index = start_step_index, - .pattern_index = pattern_index, - })); + array_insert(&self->pattern_map, index, new_entry); } static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { @@ -755,7 +806,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { const TSSymbol *aliases, *aliases_end; ts_language_aliases_for_symbol( self->language, - action->params.reduce.symbol, + action->reduce.symbol, &aliases, &aliases_end ); @@ -772,20 +823,22 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) { array_push(&subgraph->nodes, ((AnalysisSubgraphNode) { .state = state, - .production_id = action->params.reduce.production_id, - .child_index = action->params.reduce.child_count, + .production_id = action->reduce.production_id, + .child_index = action->reduce.child_count, .done = true, })); } } } - } else if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { - TSStateId next_state = action->params.shift.state; + } else if (action->type == TSParseActionTypeShift && !action->shift.extra) { + TSStateId next_state = action->shift.state; state_predecessor_map_add(&predecessor_map, next_state, state); } } - } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != state) { - state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); + } else if (lookahead_iterator.next_state != 0) { + if (lookahead_iterator.next_state != state) { + state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state); + } const TSSymbol *aliases, *aliases_end; ts_language_aliases_for_symbol( self->language, @@ -880,7 +933,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // For each non-terminal pattern, determine if the pattern can successfully match, // and identify all of the possible children within the pattern where matching could fail. - bool result = true; + bool all_patterns_are_valid = true; AnalysisStateSet states = array_new(); AnalysisStateSet next_states = array_new(); AnalysisStateSet deeper_states = array_new(); @@ -901,7 +954,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &i, &exists); assert(exists); *error_offset = self->step_offsets.contents[i].byte_offset; - result = false; + all_patterns_are_valid = false; break; } @@ -959,6 +1012,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } #endif + // If no further progress can be made within the current recursion depth limit, then + // bump the depth limit by one, and continue to process the states the exceeded the + // limit. But only allow this if progress has been made since the last time the depth + // limit was increased. if (states.size == 0) { if (deeper_states.size > 0 && final_step_indices.size > prev_final_step_count) { #ifdef DEBUG_ANALYZE_QUERY @@ -1019,12 +1076,12 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { TSStateId next_parse_state; if (lookahead_iterator.action_count) { const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1]; - if (action->type == TSParseActionTypeShift && !action->params.shift.extra) { - next_parse_state = action->params.shift.state; + if (action->type == TSParseActionTypeShift) { + next_parse_state = action->shift.extra ? parse_state : action->shift.state; } else { continue; } - } else if (lookahead_iterator.next_state != 0 && lookahead_iterator.next_state != parse_state) { + } else if (lookahead_iterator.next_state != 0) { next_parse_state = lookahead_iterator.next_state; } else { continue; @@ -1064,6 +1121,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } + // Create a new state that has advanced past this hypothetical subtree. AnalysisState next_state = *state; analysis_state__top(&next_state)->child_index++; analysis_state__top(&next_state)->parse_state = successor.state; @@ -1127,11 +1185,23 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { next_step->depth <= parent_depth + 1 ) break; } + } else if (next_parse_state == parse_state) { + continue; } for (;;) { - // If this state can make further progress, then add it to the states for the next iteration. - // Otherwise, record the fact that matching can fail at this step of the pattern. + // Skip pass-through states. Although these states have alternatives, they are only + // used to implement repetitions, and query analysis does not need to process + // repetitions in order to determine whether steps are possible and definite. + if (next_step->is_pass_through) { + next_state.step_index++; + next_step++; + continue; + } + + // If the pattern is finished or hypothetical parent node is complete, then + // record that matching can terminate at this step of the pattern. Otherwise, + // add this state to the list of states to process on the next iteration. if (!next_step->is_dead_end) { bool did_finish_pattern = self->steps.contents[next_state.step_index].depth != parent_depth + 1; if (did_finish_pattern) can_finish_pattern = true; @@ -1142,8 +1212,10 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { } } - // If the state has advanced to a step with an alternative step, then add another state at - // that alternative step to the next iteration. + // If the state has advanced to a step with an alternative step, then add another state + // at that alternative step. This process is simpler than the process of actually matching a + // pattern during query exection, because for the purposes of query analysis, there is no + // need to process repetitions. if ( does_match && next_step->alternative_index != NONE && @@ -1193,14 +1265,14 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { // If this pattern cannot match, store the pattern index so that it can be // returned to the caller. - if (result && !can_finish_pattern && !did_exceed_max_depth) { + if (all_patterns_are_valid && !can_finish_pattern && !did_exceed_max_depth) { assert(final_step_indices.size > 0); uint16_t impossible_step_index = *array_back(&final_step_indices); uint32_t i, exists; array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &i, &exists); - assert(exists); + if (i >= self->step_offsets.size) i = self->step_offsets.size - 1; *error_offset = self->step_offsets.contents[i].byte_offset; - result = false; + all_patterns_are_valid = false; break; } } @@ -1313,7 +1385,7 @@ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) { array_delete(&predicate_capture_ids); state_predecessor_map_delete(&predecessor_map); - return result; + return all_patterns_are_valid; } static void ts_query__finalize_steps(TSQuery *self) { @@ -1333,6 +1405,58 @@ static void ts_query__finalize_steps(TSQuery *self) { } } +static void ts_query__add_negated_fields( + TSQuery *self, + uint16_t step_index, + TSFieldId *field_ids, + uint16_t field_count +) { + QueryStep *step = &self->steps.contents[step_index]; + + // The negated field array stores a list of field lists, separated by zeros. + // Try to find the start index of an existing list that matches this new list. + bool failed_match = false; + unsigned match_count = 0; + unsigned start_i = 0; + for (unsigned i = 0; i < self->negated_fields.size; i++) { + TSFieldId existing_field_id = self->negated_fields.contents[i]; + + // At each zero value, terminate the match attempt. If we've exactly + // matched the new field list, then reuse this index. Otherwise, + // start over the matching process. + if (existing_field_id == 0) { + if (match_count == field_count) { + step->negated_field_list_id = start_i; + return; + } else { + start_i = i + 1; + match_count = 0; + failed_match = false; + } + } + + // If the existing list matches our new list so far, then advance + // to the next element of the new list. + else if ( + match_count < field_count && + existing_field_id == field_ids[match_count] && + !failed_match + ) { + match_count++; + } + + // Otherwise, this existing list has failed to match. + else { + match_count = 0; + failed_match = true; + } + } + + step->negated_field_list_id = self->negated_fields.size; + array_extend(&self->negated_fields, field_count, field_ids); + array_push(&self->negated_fields, 0); +} + static TSQueryError ts_query__parse_string_literal( TSQuery *self, Stream *stream @@ -1535,6 +1659,7 @@ static TSQueryError ts_query__parse_pattern( stream_advance(stream); break; } else if (e) { + if (e == PARENT_DONE) e = TSQueryErrorSyntax; array_delete(&branch_step_indices); return e; } @@ -1681,14 +1806,47 @@ static TSQueryError ts_query__parse_pattern( // Parse the child patterns bool child_is_immediate = false; - uint16_t child_start_step_index = self->steps.size; + uint16_t last_child_step_index = 0; + uint16_t negated_field_count = 0; + TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT]; for (;;) { + // Parse a negated field assertion + if (stream->next == '!') { + stream_advance(stream); + stream_skip_whitespace(stream); + if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax; + const char *field_name = stream->input; + stream_scan_identifier(stream); + uint32_t length = stream->input - field_name; + stream_skip_whitespace(stream); + + TSFieldId field_id = ts_language_field_id_for_name( + self->language, + field_name, + length + ); + if (!field_id) { + stream->input = field_name; + return TSQueryErrorField; + } + + // Keep the field ids sorted. + if (negated_field_count < MAX_NEGATED_FIELD_COUNT) { + negated_field_ids[negated_field_count] = field_id; + negated_field_count++; + } + + continue; + } + + // Parse a sibling anchor if (stream->next == '.') { child_is_immediate = true; stream_advance(stream); stream_skip_whitespace(stream); } + uint16_t step_index = self->steps.size; TSQueryError e = ts_query__parse_pattern( self, stream, @@ -1697,14 +1855,28 @@ static TSQueryError ts_query__parse_pattern( ); if (e == PARENT_DONE && stream->next == ')') { if (child_is_immediate) { - self->steps.contents[child_start_step_index].is_last_child = true; + if (last_child_step_index == 0) { + return TSQueryErrorSyntax; + } + self->steps.contents[last_child_step_index].is_last_child = true; } + + if (negated_field_count) { + ts_query__add_negated_fields( + self, + starting_step_index, + negated_field_ids, + negated_field_count + ); + } + stream_advance(stream); break; } else if (e) { return e; } + last_child_step_index = step_index; child_is_immediate = false; } } @@ -1806,8 +1978,6 @@ static TSQueryError ts_query__parse_pattern( // Parse suffixes modifiers for this pattern for (;;) { - QueryStep *step = &self->steps.contents[starting_step_index]; - // Parse the one-or-more operator. if (stream->next == '+') { stream_advance(stream); @@ -1831,6 +2001,7 @@ static TSQueryError ts_query__parse_pattern( repeat_step.alternative_is_immediate = true; array_push(&self->steps, repeat_step); + QueryStep *step = &self->steps.contents[starting_step_index]; while (step->alternative_index != NONE) { step = &self->steps.contents[step->alternative_index]; } @@ -1842,6 +2013,7 @@ static TSQueryError ts_query__parse_pattern( stream_advance(stream); stream_skip_whitespace(stream); + QueryStep *step = &self->steps.contents[starting_step_index]; while (step->alternative_index != NONE) { step = &self->steps.contents[step->alternative_index]; } @@ -1866,6 +2038,7 @@ static TSQueryError ts_query__parse_pattern( uint32_t step_index = starting_step_index; for (;;) { + QueryStep *step = &self->steps.contents[step_index]; query_step__add_capture(step, capture_id); if ( step->alternative_index != NONE && @@ -1896,33 +2069,6 @@ TSQuery *ts_query_new( uint32_t *error_offset, TSQueryError *error_type ) { - TSSymbol *symbol_map; - if (ts_language_version(language) >= TREE_SITTER_LANGUAGE_VERSION_WITH_SYMBOL_DEDUPING) { - symbol_map = NULL; - } else { - // Work around the fact that multiple symbols can currently be - // associated with the same name, due to "simple aliases". - // In the next language ABI version, this map will be contained - // in the language's `public_symbol_map` field. - uint32_t symbol_count = ts_language_symbol_count(language); - symbol_map = ts_malloc(sizeof(TSSymbol) * symbol_count); - for (unsigned i = 0; i < symbol_count; i++) { - const char *name = ts_language_symbol_name(language, i); - const TSSymbolType symbol_type = ts_language_symbol_type(language, i); - - symbol_map[i] = i; - - for (unsigned j = 0; j < i; j++) { - if (ts_language_symbol_type(language, j) == symbol_type) { - if (!strcmp(name, ts_language_symbol_name(language, j))) { - symbol_map[i] = j; - break; - } - } - } - } - } - TSQuery *self = ts_malloc(sizeof(TSQuery)); *self = (TSQuery) { .steps = array_new(), @@ -1933,11 +2079,13 @@ TSQuery *ts_query_new( .patterns = array_new(), .step_offsets = array_new(), .string_buffer = array_new(), - .symbol_map = symbol_map, + .negated_fields = array_new(), .wildcard_root_pattern_count = 0, .language = language, }; + array_push(&self->negated_fields, 0); + // Parse all of the S-expressions in the given string. Stream stream = stream_new(source, source_len); stream_skip_whitespace(&stream); @@ -1984,7 +2132,24 @@ TSQuery *ts_query_new( } } - ts_query__pattern_map_insert(self, step->symbol, start_step_index, pattern_index); + // Determine whether the pattern has a single root node. This affects + // decisions about whether or not to start matching the pattern when + // a query cursor has a range restriction. + bool is_rooted = true; + uint32_t start_depth = step->depth; + for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) { + QueryStep *step = &self->steps.contents[step_index]; + if (step->depth == start_depth) { + is_rooted = false; + break; + } + } + + ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) { + .step_index = start_step_index, + .pattern_index = pattern_index, + .is_rooted = is_rooted + }); if (step->symbol == WILDCARD_SYMBOL) { self->wildcard_root_pattern_count++; } @@ -2003,12 +2168,10 @@ TSQuery *ts_query_new( } } - if (self->language->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_STATE_COUNT) { - if (!ts_query__analyze_patterns(self, error_offset)) { - *error_type = TSQueryErrorStructure; - ts_query_delete(self); - return NULL; - } + if (!ts_query__analyze_patterns(self, error_offset)) { + *error_type = TSQueryErrorStructure; + ts_query_delete(self); + return NULL; } ts_query__finalize_steps(self); @@ -2024,9 +2187,9 @@ void ts_query_delete(TSQuery *self) { array_delete(&self->patterns); array_delete(&self->step_offsets); array_delete(&self->string_buffer); + array_delete(&self->negated_fields); symbol_table_delete(&self->captures); symbol_table_delete(&self->predicate_values); - ts_free(self->symbol_map); ts_free(self); } } @@ -2135,6 +2298,7 @@ void ts_query_disable_pattern( TSQueryCursor *ts_query_cursor_new(void) { TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor)); *self = (TSQueryCursor) { + .did_exceed_match_limit = false, .ascending = false, .halted = false, .states = array_new(), @@ -2158,6 +2322,18 @@ void ts_query_cursor_delete(TSQueryCursor *self) { ts_free(self); } +bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) { + return self->did_exceed_match_limit; +} + +uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) { + return self->capture_list_pool.max_capture_list_count; +} + +void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { + self->capture_list_pool.max_capture_list_count = limit; +} + void ts_query_cursor_exec( TSQueryCursor *self, const TSQuery *query, @@ -2172,6 +2348,7 @@ void ts_query_cursor_exec( self->ascending = false; self->halted = false; self->query = query; + self->did_exceed_match_limit = false; } void ts_query_cursor_set_byte_range( @@ -2180,7 +2357,6 @@ void ts_query_cursor_set_byte_range( uint32_t end_byte ) { if (end_byte == 0) { - start_byte = 0; end_byte = UINT32_MAX; } self->start_byte = start_byte; @@ -2193,7 +2369,6 @@ void ts_query_cursor_set_point_range( TSPoint end_point ) { if (end_point.row == 0 && end_point.column == 0) { - start_point = POINT_ZERO; end_point = POINT_MAX; } self->start_point = start_point; @@ -2214,31 +2389,44 @@ static bool ts_query_cursor__first_in_progress_capture( *byte_offset = UINT32_MAX; *pattern_index = UINT32_MAX; for (unsigned i = 0; i < self->states.size; i++) { - const QueryState *state = &self->states.contents[i]; + QueryState *state = &self->states.contents[i]; if (state->dead) continue; + const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); - if (captures->size > state->consumed_capture_count) { - uint32_t capture_byte = ts_node_start_byte(captures->contents[state->consumed_capture_count].node); - if ( - !result || - capture_byte < *byte_offset || - (capture_byte == *byte_offset && state->pattern_index < *pattern_index) - ) { - QueryStep *step = &self->query->steps.contents[state->step_index]; - if (is_definite) { - *is_definite = step->is_definite; - } else if (step->is_definite) { - continue; - } + if (state->consumed_capture_count >= captures->size) { + continue; + } + + TSNode node = captures->contents[state->consumed_capture_count].node; + if ( + ts_node_end_byte(node) <= self->start_byte || + point_lte(ts_node_end_point(node), self->start_point) + ) { + state->consumed_capture_count++; + i--; + continue; + } - result = true; - *state_index = i; - *byte_offset = capture_byte; - *pattern_index = state->pattern_index; + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + !result || + node_start_byte < *byte_offset || + (node_start_byte == *byte_offset && state->pattern_index < *pattern_index) + ) { + QueryStep *step = &self->query->steps.contents[state->step_index]; + if (is_definite) { + *is_definite = step->is_definite; + } else if (step->is_definite) { + continue; } + + result = true; + *state_index = i; + *byte_offset = node_start_byte; + *pattern_index = state->pattern_index; } } return result; @@ -2391,6 +2579,7 @@ static CaptureList *ts_query_cursor__prepare_to_capture( // state has captured the earliest node in the document, and steal its // capture list. if (state->capture_list_id == NONE) { + self->did_exceed_match_limit = true; uint32_t state_index, byte_offset, pattern_index; if ( ts_query_cursor__first_in_progress_capture( @@ -2512,7 +2701,7 @@ static inline bool ts_query_cursor__advance( } else if (ts_tree_cursor_goto_parent(&self->cursor)) { self->depth--; } else { - LOG("halt at root"); + LOG("halt at root\n"); self->halted = true; } @@ -2560,34 +2749,11 @@ static inline bool ts_query_cursor__advance( // Enter a new node. else { - // If this node is before the selected range, then avoid descending into it. - TSNode node = ts_tree_cursor_current_node(&self->cursor); - if ( - ts_node_end_byte(node) <= self->start_byte || - point_lte(ts_node_end_point(node), self->start_point) - ) { - if (!ts_tree_cursor_goto_next_sibling(&self->cursor)) { - self->ascending = true; - } - continue; - } - - // If this node is after the selected range, then stop walking. - if ( - self->end_byte <= ts_node_start_byte(node) || - point_lte(self->end_point, ts_node_start_point(node)) - ) { - LOG("halt at end of range"); - self->halted = true; - continue; - } - // Get the properties of the current node. + TSNode node = ts_tree_cursor_current_node(&self->cursor); + TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor); TSSymbol symbol = ts_node_symbol(node); bool is_named = ts_node_is_named(node); - if (symbol != ts_builtin_sym_error && self->query->symbol_map) { - symbol = self->query->symbol_map[symbol]; - } bool has_later_siblings; bool has_later_named_siblings; bool can_have_later_siblings_with_this_field; @@ -2612,28 +2778,51 @@ static inline bool ts_query_cursor__advance( self->finished_states.size ); - // Add new states for any patterns whose root node is a wildcard. + bool node_intersects_range = ( + ts_node_end_byte(node) > self->start_byte && + ts_node_start_byte(node) < self->end_byte && + point_gt(ts_node_end_point(node), self->start_point) && + point_lt(ts_node_start_point(node), self->end_point) + ); + + bool parent_intersects_range = ts_node_is_null(parent_node) || ( + ts_node_end_byte(parent_node) > self->start_byte && + ts_node_start_byte(parent_node) < self->end_byte && + point_gt(ts_node_end_point(parent_node), self->start_point) && + point_lt(ts_node_start_point(parent_node), self->end_point) + ); + + // Add new states for any patterns whose root node is a wildcard. for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) { PatternEntry *pattern = &self->query->pattern_map.contents[i]; - QueryStep *step = &self->query->steps.contents[pattern->step_index]; // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. - if (step->field && field_id != step->field) continue; - if (step->supertype_symbol && !supertype_count) continue; - ts_query_cursor__add_state(self, pattern); + QueryStep *step = &self->query->steps.contents[pattern->step_index]; + if ( + (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) && + (!step->field || field_id == step->field) && + (!step->supertype_symbol || supertype_count > 0) + ) { + ts_query_cursor__add_state(self, pattern); + } } // Add new states for any patterns whose root node matches this node. unsigned i; if (ts_query__pattern_map_search(self->query, symbol, &i)) { PatternEntry *pattern = &self->query->pattern_map.contents[i]; + QueryStep *step = &self->query->steps.contents[pattern->step_index]; do { // If this node matches the first step of the pattern, then add a new // state at the start of this pattern. - if (step->field && field_id != step->field) continue; - ts_query_cursor__add_state(self, pattern); + if ( + (node_intersects_range || (!pattern->is_rooted && parent_intersects_range)) && + (!step->field || field_id == step->field) + ) { + ts_query_cursor__add_state(self, pattern); + } // Advance to the next pattern whose root node matches this node. i++; @@ -2688,6 +2877,22 @@ static inline bool ts_query_cursor__advance( } } + if (step->negated_field_list_id) { + TSFieldId *negated_field_ids = &self->query->negated_fields.contents[step->negated_field_list_id]; + for (;;) { + TSFieldId negated_field_id = *negated_field_ids; + if (negated_field_id) { + negated_field_ids++; + if (ts_node_child_by_field_id(node, negated_field_id).id) { + node_does_match = false; + break; + } + } else { + break; + } + } + } + // Remove states immediately if it is ever clear that they cannot match. if (!node_does_match) { if (!later_sibling_can_match) { @@ -2711,7 +2916,7 @@ static inline bool ts_query_cursor__advance( // parent, then this query state cannot simply be updated in place. It must be // split into two states: one that matches this node, and one which skips over // this node, to preserve the possibility of matching later siblings. - if (later_sibling_can_match && step->contains_captures) { + if (later_sibling_can_match && (step->contains_captures || !step->is_definite)) { if (ts_query_cursor__copy_state(self, &state)) { LOG( " split state for capture. pattern:%u, step:%u\n", @@ -2910,8 +3115,32 @@ static inline bool ts_query_cursor__advance( } } - // Continue descending if possible. - if (ts_tree_cursor_goto_first_child(&self->cursor)) { + // When the current node ends prior to the desired start offset, + // only descend for the purpose of continuing in-progress matches. + bool should_descend = node_intersects_range; + if (!should_descend) { + for (unsigned i = 0; i < self->states.size; i++) { + QueryState *state = &self->states.contents[i];; + QueryStep *next_step = &self->query->steps.contents[state->step_index]; + if ( + next_step->depth != PATTERN_DONE_MARKER && + state->start_depth + next_step->depth > self->depth + ) { + should_descend = true; + break; + } + } + } + + if (!should_descend) { + LOG( + " not descending. node end byte: %u, start byte: %u\n", + ts_node_end_byte(node), + self->start_byte + ); + } + + if (should_descend && ts_tree_cursor_goto_first_child(&self->cursor)) { self->depth++; } else { self->ascending = true; @@ -2988,35 +3217,43 @@ bool ts_query_cursor_next_capture( QueryState *first_finished_state = NULL; uint32_t first_finished_capture_byte = first_unfinished_capture_byte; uint32_t first_finished_pattern_index = first_unfinished_pattern_index; - for (unsigned i = 0; i < self->finished_states.size; i++) { + for (unsigned i = 0; i < self->finished_states.size;) { QueryState *state = &self->finished_states.contents[i]; const CaptureList *captures = capture_list_pool_get( &self->capture_list_pool, state->capture_list_id ); - if (captures->size > state->consumed_capture_count) { - uint32_t capture_byte = ts_node_start_byte( - captures->contents[state->consumed_capture_count].node - ); - if ( - capture_byte < first_finished_capture_byte || - ( - capture_byte == first_finished_capture_byte && - state->pattern_index < first_finished_pattern_index - ) - ) { - first_finished_state = state; - first_finished_capture_byte = capture_byte; - first_finished_pattern_index = state->pattern_index; - } - } else { + + // Remove states whose captures are all consumed. + if (state->consumed_capture_count >= captures->size) { capture_list_pool_release( &self->capture_list_pool, state->capture_list_id ); array_erase(&self->finished_states, i); - i--; + continue; + } + + // Skip captures that precede the cursor's start byte. + TSNode node = captures->contents[state->consumed_capture_count].node; + if (ts_node_end_byte(node) <= self->start_byte) { + state->consumed_capture_count++; + continue; + } + + uint32_t node_start_byte = ts_node_start_byte(node); + if ( + node_start_byte < first_finished_capture_byte || + ( + node_start_byte == first_finished_capture_byte && + state->pattern_index < first_finished_pattern_index + ) + ) { + first_finished_state = state; + first_finished_capture_byte = node_start_byte; + first_finished_pattern_index = state->pattern_index; } + i++; } // If there is finished capture that is clearly before any unfinished diff --git a/stack.c b/stack.c index cc728b05..935aed50 100644 --- a/stack.c +++ b/stack.c @@ -737,7 +737,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) if (head->status == StackStatusHalted) continue; fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i); - fprintf(f, "node_head_%u -> node_%p [", i, head->node); + fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node); if (head->status == StackStatusPaused) { fprintf(f, "color=red "); @@ -782,7 +782,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) if (!node) continue; all_iterators_done = false; - fprintf(f, "node_%p [", node); + fprintf(f, "node_%p [", (void *)node); if (node->state == ERROR_STATE) { fprintf(f, "label=\"?\""); } else if ( @@ -807,7 +807,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) for (int j = 0; j < node->link_count; j++) { StackLink link = node->links[j]; - fprintf(f, "node_%p -> node_%p [", node, link.node); + fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node); if (link.is_pending) fprintf(f, "style=dashed "); if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray "); diff --git a/subtree.c b/subtree.c index e90dc9d7..5634f46d 100644 --- a/subtree.c +++ b/subtree.c @@ -166,7 +166,8 @@ static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t l Subtree ts_subtree_new_leaf( SubtreePool *pool, TSSymbol symbol, Length padding, Length size, - uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens, + uint32_t lookahead_bytes, TSStateId parse_state, + bool has_external_tokens, bool depends_on_column, bool is_keyword, const TSLanguage *language ) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); @@ -213,6 +214,7 @@ Subtree ts_subtree_new_leaf( .fragile_right = false, .has_changes = false, .has_external_tokens = has_external_tokens, + .depends_on_column = depends_on_column, .is_missing = false, .is_keyword = is_keyword, {{.first_leaf = {.symbol = 0, .parse_state = 0}}} @@ -245,7 +247,7 @@ Subtree ts_subtree_new_error( ) { Subtree result = ts_subtree_new_leaf( pool, ts_builtin_sym_error, padding, size, bytes_scanned, - parse_state, false, false, language + parse_state, false, false, false, language ); SubtreeHeapData *data = (SubtreeHeapData *)result.ptr; data->fragile_left = true; @@ -378,6 +380,7 @@ void ts_subtree_summarize_children( self.ptr->repeat_depth = 0; self.ptr->node_count = 1; self.ptr->has_external_tokens = false; + self.ptr->depends_on_column = false; self.ptr->dynamic_precedence = 0; uint32_t structural_index = 0; @@ -388,6 +391,13 @@ void ts_subtree_summarize_children( for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = children[i]; + if ( + self.ptr->size.extent.row == 0 && + ts_subtree_depends_on_column(child) + ) { + self.ptr->depends_on_column = true; + } + if (i == 0) { self.ptr->padding = ts_subtree_padding(child); self.ptr->size = ts_subtree_size(child); @@ -545,7 +555,7 @@ Subtree ts_subtree_new_missing_leaf( ) { Subtree result = ts_subtree_new_leaf( pool, symbol, padding, length_zero(), 0, - 0, false, false, language + 0, false, false, false, language ); if (result.data.is_inline) { result.data.is_missing = true; @@ -670,6 +680,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool Edit edit = entry.edit; bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes; bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes; + bool invalidate_first_row = ts_subtree_depends_on_column(*entry.tree); Length size = ts_subtree_size(*entry.tree); Length padding = ts_subtree_padding(*entry.tree); @@ -733,6 +744,7 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool data->fragile_right = false; data->has_changes = false; data->has_external_tokens = false; + data->depends_on_column = false; data->is_missing = result.data.is_missing; data->is_keyword = result.data.is_keyword; result.ptr = data; @@ -755,9 +767,18 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // If this child ends before the edit, it is not affected. if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue; - // If this child starts after the edit, then we're done processing children. - if (child_left.bytes > edit.old_end.bytes || - (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break; + // Keep editing child nodes until a node is reached that starts after the edit. + // Also, if this node's validity depends on its column position, then continue + // invaliditing child nodes until reaching a line break. + if (( + (child_left.bytes > edit.old_end.bytes) || + (child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0) + ) && ( + !invalidate_first_row || + child_left.extent.row > entry.tree->ptr->padding.extent.row + )) { + break; + } // Transform edit into the child's coordinate space. Edit child_edit = { @@ -775,8 +796,10 @@ Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool // Interpret all inserted text as applying to the *first* child that touches the edit. // Subsequent children are only never have any text inserted into them; they are only // shrunk to compensate for the edit. - if (child_right.bytes > edit.start.bytes || - (child_right.bytes == edit.start.bytes && is_pure_insertion)) { + if ( + child_right.bytes > edit.start.bytes || + (child_right.bytes == edit.start.bytes && is_pure_insertion) + ) { edit.new_end = edit.start; } @@ -969,7 +992,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, TSSymbol subtree_symbol = ts_subtree_symbol(*self); TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol; uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); - fprintf(f, "tree_%p [label=\"", self); + fprintf(f, "tree_%p [label=\"", (void *)self); ts_subtree__write_dot_string(f, ts_language_symbol_name(language, symbol)); fprintf(f, "\""); @@ -981,12 +1004,14 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, "state: %d\n" "error-cost: %u\n" "has-changes: %u\n" + "depends-on-column: %u\n" "repeat-depth: %u\n" "lookahead-bytes: %u", start_offset, end_offset, ts_subtree_parse_state(*self), ts_subtree_error_cost(*self), ts_subtree_has_changes(*self), + ts_subtree_depends_on_column(*self), ts_subtree_repeat_depth(*self), ts_subtree_lookahead_bytes(*self) ); @@ -1009,7 +1034,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, child_info_offset++; } ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f); - fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i); + fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", (void *)self, (void *)child, i); child_start_offset += ts_subtree_total_bytes(*child); } } diff --git a/subtree.h b/subtree.h index 899d592f..600d1e97 100644 --- a/subtree.h +++ b/subtree.h @@ -78,6 +78,7 @@ typedef struct { bool fragile_right : 1; bool has_changes : 1; bool has_external_tokens : 1; + bool depends_on_column: 1; bool is_missing : 1; bool is_keyword : 1; @@ -138,7 +139,7 @@ void ts_subtree_pool_delete(SubtreePool *); Subtree ts_subtree_new_leaf( SubtreePool *, TSSymbol, Length, Length, uint32_t, - TSStateId, bool, bool, const TSLanguage * + TSStateId, bool, bool, bool, const TSLanguage * ); Subtree ts_subtree_new_error( SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage * @@ -284,6 +285,10 @@ static inline bool ts_subtree_has_external_tokens(Subtree self) { return self.data.is_inline ? false : self.ptr->has_external_tokens; } +static inline bool ts_subtree_depends_on_column(Subtree self) { + return self.data.is_inline ? false : self.ptr->depends_on_column; +} + static inline bool ts_subtree_is_fragile(Subtree self) { return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right); } diff --git a/tree.c b/tree.c index 047c7ff3..99c628e5 100644 --- a/tree.c +++ b/tree.c @@ -5,8 +5,6 @@ #include "./tree_cursor.h" #include "./tree.h" -static const unsigned PARENT_CACHE_CAPACITY = 32; - TSTree *ts_tree_new( Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count @@ -14,9 +12,6 @@ TSTree *ts_tree_new( TSTree *result = ts_malloc(sizeof(TSTree)); result->root = root; result->language = language; - result->parent_cache = NULL; - result->parent_cache_start = 0; - result->parent_cache_size = 0; result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange)); memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange)); result->included_range_count = included_range_count; @@ -35,7 +30,6 @@ void ts_tree_delete(TSTree *self) { ts_subtree_release(&pool, self->root); ts_subtree_pool_delete(&pool); ts_free(self->included_ranges); - if (self->parent_cache) ts_free(self->parent_cache); ts_free(self); } @@ -78,8 +72,6 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) { SubtreePool pool = ts_subtree_pool_new(0); self->root = ts_subtree_edit(self->root, edit, &pool); - self->parent_cache_start = 0; - self->parent_cache_size = 0; ts_subtree_pool_delete(&pool); } @@ -111,38 +103,3 @@ TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uin void ts_tree_print_dot_graph(const TSTree *self, FILE *file) { ts_subtree_print_dot_graph(self->root, self->language, file); } - -TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) { - for (uint32_t i = 0; i < self->parent_cache_size; i++) { - uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY; - ParentCacheEntry *entry = &self->parent_cache[index]; - if (entry->child == node->id) { - return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol); - } - } - return ts_node_new(NULL, NULL, length_zero(), 0); -} - -void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) { - TSTree *self = (TSTree *)_self; - if (!self->parent_cache) { - self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry)); - } - - uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY; - self->parent_cache[index] = (ParentCacheEntry) { - .child = node->id, - .parent = (const Subtree *)parent->id, - .position = { - parent->context[0], - {parent->context[1], parent->context[2]} - }, - .alias_symbol = parent->context[3], - }; - - if (self->parent_cache_size == PARENT_CACHE_CAPACITY) { - self->parent_cache_start++; - } else { - self->parent_cache_size++; - } -} diff --git a/tree.h b/tree.h index 92a7e641..0334b824 100644 --- a/tree.h +++ b/tree.h @@ -15,17 +15,12 @@ typedef struct { struct TSTree { Subtree root; const TSLanguage *language; - ParentCacheEntry *parent_cache; - uint32_t parent_cache_start; - uint32_t parent_cache_size; TSRange *included_ranges; unsigned included_range_count; }; TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned); TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol); -TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *); -void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *); #ifdef __cplusplus } diff --git a/tree_cursor.c b/tree_cursor.c index b9856240..31c21d04 100644 --- a/tree_cursor.c +++ b/tree_cursor.c @@ -159,10 +159,43 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t g } } while (did_descend); - if (self->stack.size > initial_size && - ts_tree_cursor_goto_next_sibling((TSTreeCursor *)self)) { - return visible_child_index; - } + self->stack.size = initial_size; + return -1; +} + +int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *_self, TSPoint goal_point) { + TreeCursor *self = (TreeCursor *)_self; + uint32_t initial_size = self->stack.size; + uint32_t visible_child_index = 0; + + bool did_descend; + do { + did_descend = false; + + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + TSPoint end_point = point_add(entry.position.extent, ts_subtree_size(*entry.subtree).extent); + bool at_goal = point_gt(end_point, goal_point); + uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree); + if (at_goal) { + if (visible) { + array_push(&self->stack, entry); + return visible_child_index; + } + if (visible_child_count > 0) { + array_push(&self->stack, entry); + did_descend = true; + break; + } + } else if (visible) { + visible_child_index++; + } else { + visible_child_index += visible_child_count; + } + } + } while (did_descend); self->stack.size = initial_size; return -1; @@ -353,14 +386,12 @@ void ts_tree_cursor_current_status( // Determine if the current node can have later siblings with the same field name. if (*field_id) { for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { - if (i->field_id == *field_id) { - if ( - i->child_index > entry->structural_child_index || - (i->child_index == entry->structural_child_index && *has_later_named_siblings) - ) { - *can_have_later_siblings_with_this_field = true; - break; - } + if ( + i->field_id == *field_id && + i->child_index > entry->structural_child_index + ) { + *can_have_later_siblings_with_this_field = true; + break; } } } @@ -448,6 +479,7 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { TSTreeCursor res = {NULL, NULL, {0, 0}}; TreeCursor *copy = (TreeCursor *)&res; copy->tree = cursor->tree; + array_init(©->stack); array_push_all(©->stack, &cursor->stack); return res; } diff --git a/vendor.sh b/vendor.sh index 384cc675..f1f918c6 100755 --- a/vendor.sh +++ b/vendor.sh @@ -4,7 +4,7 @@ set -e -sitter_version=0.17.3 +sitter_version=v0.20.0 grammars=( "bash;v0.16.1;parser.c;scanner.cc" "c-sharp;v0.16.1;parser.c;scanner.c"