Skip to content

Commit 3b19d67

Browse files
committed
SA28-035: Don't store source locations in token data
Instead, compute them lazily from offset data. This creates a slight breakage in the generated slocs for token with negative spans. Since there is not much you could do with the end sloc of a token with a negative span range before, this is considered a minor breakage.
1 parent 416f8b8 commit 3b19d67

19 files changed

+294
-94
lines changed

langkit/support/langkit_support-token_data_handlers.adb

Lines changed: 140 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ package body Langkit_Support.Token_Data_Handlers is
4141
with package Element_Vectors is new Langkit_Support.Vectors (<>);
4242

4343
with function Compare
44-
(K : Key_Type; E_Index : Positive; E : Element_Vectors.Element_Type)
44+
(K : Key_Type;
45+
E_Index : Positive;
46+
E : Element_Vectors.Element_Type)
4547
return Relative_Position is <>;
4648
-- Tell where K is with respect to E (E_Index is the index of E in the
4749
-- vector).
@@ -117,19 +119,22 @@ package body Langkit_Support.Token_Data_Handlers is
117119
-- Initialize --
118120
----------------
119121

120-
procedure Initialize (TDH : out Token_Data_Handler; Symbols : Symbol_Table)
121-
is
122+
procedure Initialize
123+
(TDH : out Token_Data_Handler;
124+
Symbols : Symbol_Table;
125+
Tab_Stop : Positive := Default_Tab_Stop) is
122126
begin
123-
TDH := (Source_Buffer => null,
124-
Source_First => <>,
125-
Source_Last => <>,
126-
Filename => <>,
127-
Charset => <>,
128-
Tokens => <>,
129-
Symbols => Symbols,
130-
Tokens_To_Trivias => <>,
131-
Trivias => <>,
132-
Lines_Starts => <>);
127+
TDH := (Source_Buffer => null,
128+
Source_First => <>,
129+
Source_Last => <>,
130+
Filename => <>,
131+
Charset => <>,
132+
Tokens => <>,
133+
Symbols => Symbols,
134+
Tokens_To_Trivias => <>,
135+
Trivias => <>,
136+
Lines_Starts => <>,
137+
Tab_Stop => Tab_Stop);
133138
end Initialize;
134139

135140
-----------
@@ -217,7 +222,8 @@ package body Langkit_Support.Token_Data_Handlers is
217222
Symbols => No_Symbol_Table,
218223
Tokens_To_Trivias => <>,
219224
Trivias => <>,
220-
Lines_Starts => <>);
225+
Lines_Starts => <>,
226+
Tab_Stop => <>);
221227
end Move;
222228

223229
--------------------------
@@ -537,11 +543,12 @@ package body Langkit_Support.Token_Data_Handlers is
537543
declare
538544
Triv_Index : constant Natural := Natural (Key_Trivia);
539545
Tok_Index : constant Natural := Element_Index - 1;
540-
Key_Start_Sloc : constant Source_Location := Start_Sloc
541-
(TDH.Trivias.Get (Triv_Index).T.Sloc_Range);
546+
Key_Start_Sloc : constant Source_Location := Sloc_Start
547+
(TDH, TDH.Trivias.Get (Triv_Index).T);
542548
begin
543549
return Compare
544-
(TDH.Tokens.Get (Tok_Index).Sloc_Range, Key_Start_Sloc);
550+
(Sloc_Range (TDH, TDH.Tokens.Get (Tok_Index)),
551+
Key_Start_Sloc);
545552
end;
546553
end if;
547554

@@ -582,13 +589,13 @@ package body Langkit_Support.Token_Data_Handlers is
582589
(Sloc : Source_Location;
583590
Dummy_Index : Positive;
584591
Token : Stored_Token_Data) return Relative_Position
585-
is (Compare (Token.Sloc_Range, Sloc));
592+
is (Compare (Sloc_Range (TDH, Token), Sloc));
586593

587594
function Compare
588595
(Sloc : Source_Location;
589596
Dummy_Index : Positive;
590597
Trivia : Trivia_Node) return Relative_Position
591-
is (Compare (Trivia.T.Sloc_Range, Sloc));
598+
is (Compare (Sloc_Range (TDH, Trivia.T), Sloc));
592599

593600
function Token_Floor is new Floor
594601
(Key_Type => Source_Location,
@@ -617,7 +624,7 @@ package body Langkit_Support.Token_Data_Handlers is
617624

618625
declare
619626
function SS (Token : Stored_Token_Data) return Source_Location is
620-
(Start_Sloc (Token.Sloc_Range));
627+
(Sloc_Start (TDH, Token));
621628

622629
Tok_Sloc : constant Source_Location := SS (TDH.Tokens.Get (Token));
623630
Triv_Sloc : constant Source_Location :=
@@ -681,16 +688,121 @@ package body Langkit_Support.Token_Data_Handlers is
681688
begin
682689
-- Return slice from...
683690
return
684-
TDH.Source_Buffer (
685-
-- The first character in the requested line
686-
TDH.Lines_Starts.Get (Line_Number)
691+
TDH.Source_Buffer
692+
(
693+
-- The first character in the requested line
694+
TDH.Lines_Starts.Get (Line_Number)
695+
696+
..
687697

688-
..
698+
-- The character before the LF that precedes the first character of
699+
-- the next line.
700+
TDH.Lines_Starts.Get (Line_Number + 1) - 2
701+
);
689702

690-
-- The character before the LF that precedes the first character of
691-
-- the next line.
692-
TDH.Lines_Starts.Get (Line_Number + 1) - 2
693-
);
694703
end Get_Line;
695704

705+
--------------
706+
-- Get_Sloc --
707+
--------------
708+
709+
function Get_Sloc
710+
(TDH : Token_Data_Handler; Index : Natural) return Source_Location
711+
is
712+
function Compare
713+
(Sought : Positive;
714+
Dummy_Index : Positive;
715+
Line_Start : Positive) return Relative_Position
716+
is
717+
(if Sought > Line_Start then After
718+
elsif Sought = Line_Start then Inside
719+
else Before);
720+
721+
function Get_Line_Index is new Floor (Positive, Index_Vectors);
722+
-- Return the index of the first character of Line `N` in a given
723+
-- `TDH.Line_Starts` vector.
724+
725+
Column : Natural := 0;
726+
-- 0 based column number
727+
728+
Tab_Stop : Positive renames TDH.Tab_Stop;
729+
begin
730+
-- Allow 0 as an offset because it's a common value when the text buffer
731+
-- is empty: in that case just return a null source location.
732+
if Index = 0 then
733+
return No_Source_Location;
734+
end if;
735+
736+
declare
737+
Line_Index : constant Positive :=
738+
Get_Line_Index (Index, TDH.Lines_Starts);
739+
Line_Offset : constant Positive := TDH.Lines_Starts.Get (Line_Index);
740+
begin
741+
-- Allow a sloc pointing at the EOL char (hence the + 1)
742+
if Index > TDH.Source_Buffer'Last + 1 then
743+
raise Constraint_Error with "out of bound access";
744+
end if;
745+
746+
-- Make horizontal tabulations move by stride of Tab_Stop columns, as
747+
-- usually implemented in code editors.
748+
for I in Line_Offset .. Natural'Min (Index, TDH.Source_Last) - 1 loop
749+
if TDH.Source_Buffer (I) = Chars.HT then
750+
Column := (Column + Tab_Stop) / Tab_Stop * Tab_Stop;
751+
else
752+
Column := Column + 1;
753+
end if;
754+
end loop;
755+
756+
return Source_Location'
757+
(Line => Line_Number (Line_Index),
758+
Column =>
759+
Column_Number
760+
(Natural'Max (Column + 1, Index - Line_Offset + 1)));
761+
end;
762+
end Get_Sloc;
763+
764+
----------------
765+
-- Sloc_Start --
766+
----------------
767+
768+
function Sloc_Start
769+
(TDH : Token_Data_Handler;
770+
Token : Stored_Token_Data) return Source_Location is
771+
begin
772+
return Get_Sloc (TDH, Token.Source_First);
773+
end Sloc_Start;
774+
775+
--------------
776+
-- Sloc_End --
777+
--------------
778+
779+
function Sloc_End
780+
(TDH : Token_Data_Handler;
781+
Token : Stored_Token_Data) return Source_Location is
782+
begin
783+
return Get_Sloc
784+
(TDH,
785+
(if Token.Source_Last < Token.Source_First
786+
-- This is a special case for when the range is negative: in that
787+
-- case we want to propagate that behavior to the sloc itself,
788+
-- because negative ranges are used for tokens that have no
789+
-- "width", and shouldn't be lookup-able, like the termination
790+
-- token.
791+
then Token.Source_Last
792+
-- For regular cases, we want the sloc_end to be one column after
793+
-- the end of the token.
794+
else Token.Source_Last + 1));
795+
end Sloc_End;
796+
797+
----------------
798+
-- Sloc_Range --
799+
----------------
800+
801+
function Sloc_Range
802+
(TDH : Token_Data_Handler;
803+
Token : Stored_Token_Data) return Source_Location_Range is
804+
begin
805+
return Make_Range (Sloc_Start (TDH, Token), Sloc_End (TDH, Token));
806+
end Sloc_Range;
807+
696808
end Langkit_Support.Token_Data_Handlers;

langkit/support/langkit_support-token_data_handlers.ads

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,6 @@ package Langkit_Support.Token_Data_Handlers is
4747
-- this is either null or the symbolization of the token text.
4848
--
4949
-- For instance: null for keywords but actual text for identifiers.
50-
51-
Sloc_Range : Source_Location_Range;
52-
-- Source location range for this token. Note that the end bound is
53-
-- exclusive.
5450
end record;
5551
-- Holder for per-token data to be stored in the token data handler
5652

@@ -161,9 +157,16 @@ package Langkit_Support.Token_Data_Handlers is
161157
-- multiple Token_Data_Handlers.
162158

163159
Lines_Starts : Index_Vectors.Vector;
164-
-- Table keeping count of line starts and line endings
160+
-- Table keeping count of line starts and line endings. The index of the
161+
-- starting character for line N is at the Nth position in the vector.
162+
163+
Tab_Stop : Positive;
165164
end record;
166165

166+
Default_Tab_Stop : constant Positive := 8;
167+
-- Value that will be used for the default tab stop if none is passed
168+
-- during the initialization of a ``Token_Data_Handler``.
169+
167170
type Token_Data_Handler_Access is access all Token_Data_Handler;
168171

169172
function Initialized (TDH : Token_Data_Handler) return Boolean;
@@ -173,10 +176,15 @@ package Langkit_Support.Token_Data_Handlers is
173176
with Pre => Initialized (TDH);
174177
-- Return whether TDH was used to lex some input source
175178

176-
procedure Initialize (TDH : out Token_Data_Handler; Symbols : Symbol_Table)
179+
procedure Initialize
180+
(TDH : out Token_Data_Handler;
181+
Symbols : Symbol_Table;
182+
Tab_Stop : Positive := Default_Tab_Stop)
177183
with Pre => Symbols /= No_Symbol_Table,
178184
Post => Initialized (TDH) and then not Has_Source_Buffer (TDH);
179-
-- Create a token data handler that is associated with Symbols
185+
-- Create a token data handler that is associated with the ``Symbols``
186+
-- symbol table, and takes its value for the tabulation in the ``Tab_Stop``
187+
-- access.
180188

181189
procedure Reset
182190
(TDH : in out Token_Data_Handler;
@@ -285,4 +293,32 @@ package Langkit_Support.Token_Data_Handlers is
285293
(TDH : Token_Data_Handler; Line_Number : Positive) return Text_Type;
286294
-- Get the source text of line at index ``Line_Number``
287295

296+
function Get_Sloc
297+
(TDH : Token_Data_Handler; Index : Natural) return Source_Location;
298+
-- Return the sloc for given ``Index`` in ``TDH``. If:
299+
--
300+
-- - ``Index`` is ``0``, return ``No_Source_Location``.
301+
--
302+
-- - ``Index`` is in range ``1 .. TDH.Source_Buffer'Last + 1``, return a
303+
-- corresponding sloc (``TDH.Source_Buffer'Last + 1`` being the EOF
304+
-- sloc).
305+
--
306+
-- - ``Index`` is bigger than ``TDH.Source_Buffer'Last + 1``: raise a
307+
-- ``Constraint_Error``.
308+
309+
function Sloc_Start
310+
(TDH : Token_Data_Handler;
311+
Token : Stored_Token_Data) return Source_Location;
312+
-- Get the starting sloc for given ``Token`` in ``TDH``
313+
314+
function Sloc_End
315+
(TDH : Token_Data_Handler;
316+
Token : Stored_Token_Data) return Source_Location;
317+
-- Get the end sloc for given ``Token`` in ``TDH``
318+
319+
function Sloc_Range
320+
(TDH : Token_Data_Handler;
321+
Token : Stored_Token_Data) return Source_Location_Range;
322+
-- Get the sloc range for given ``Token`` in ``TDH``
323+
288324
end Langkit_Support.Token_Data_Handlers;

langkit/templates/parsers/opt_code_ada.mako

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ if ${subparser.pos_var} = No_Token_Index then
5151
## Emit a diagnostic informing the user that the sub parser has not
5252
## succeeded.
5353
Append (Parser.Diagnostics,
54-
Get_Token (Parser.TDH.all, ${parser.start_pos}).Sloc_Range,
54+
Sloc_Range (Parser.TDH.all,
55+
Get_Token (Parser.TDH.all, ${parser.start_pos})),
5556
To_Text ("Missing '${subparser.error_repr}'"));
5657
% endif
5758

langkit/templates/parsers/pkg_main_body_ada.mako

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,13 +174,13 @@ package body ${ada_lib_name}.Parsers is
174174
Get_Token (Parser.TDH.all, Parser.Last_Fail.Pos);
175175
D : constant Diagnostic :=
176176
(if Parser.Last_Fail.Kind = Token_Fail then
177-
Create (Last_Token.Sloc_Range, To_Text
177+
Create (Sloc_Range (Parser.TDH.all, Last_Token), To_Text
178178
("Expected "
179179
& Token_Error_Image (Parser.Last_Fail.Expected_Token_Id)
180180
& ", got "
181181
& Token_Error_Image (Parser.Last_Fail.Found_Token_Id)))
182182
else
183-
Create (Last_Token.Sloc_Range,
183+
Create (Sloc_Range (Parser.TDH.all, Last_Token),
184184
To_Text (Parser.Last_Fail.Custom_Message.all)));
185185
begin
186186
Parser.Diagnostics.Append (D);
@@ -210,7 +210,7 @@ package body ${ada_lib_name}.Parsers is
210210
begin
211211
Append
212212
(Parser.Diagnostics,
213-
First_Garbage_Token.Sloc_Range,
213+
Sloc_Range (Parser.TDH.all, First_Garbage_Token),
214214
To_Text
215215
("End of input expected, got """
216216
& Token_Kind_Name

langkit/templates/parsers/skip_code_ada.mako

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ ${parser.dest_node_parser.generate_code()}
2121
${parser.dest_node_parser.res_var}.Token_End_Index := ${parser.start_pos};
2222

2323
Append (Parser.Diagnostics,
24-
Get_Token (Parser.TDH.all, ${parser.start_pos}).Sloc_Range,
24+
Sloc_Range (Parser.TDH.all,
25+
Get_Token (Parser.TDH.all, ${parser.start_pos})),
2526
To_Text ("Skipped token ")
2627
& Common.Text
2728
(Wrap_Token_Reference

langkit/templates/parsers/transform_code_ada.mako

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ if ${parser.pos_var} /= No_Token_Index then
6464
${parser.parser.progress_var if is_row(parser.parser) else 1};
6565

6666
Append (Parser.Diagnostics,
67-
Get_Token (Parser.TDH.all, ${parser.start_pos}).Sloc_Range,
67+
Sloc_Range (Parser.TDH.all,
68+
Get_Token (Parser.TDH.all, ${parser.start_pos})),
6869
To_Text ("Cannot parse <${parser.name}>"));
6970

7071
Add_Last_Fail_Diagnostic (Parser);

langkit/templates/pkg_common_body_ada.mako

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ package body ${ada_lib_name}.Common is
376376
Source_Buffer => Text_Cst_Access (TDH.Source_Buffer),
377377
Source_First => Raw_Data.Source_First,
378378
Source_Last => Raw_Data.Source_Last,
379-
Sloc_Range => Raw_Data.Sloc_Range);
379+
Sloc_Range => Sloc_Range (TDH, Raw_Data));
380380
end Convert;
381381

382382
--------------------------

langkit/templates/pkg_debug_body_ada.mako

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ package body ${ada_lib_name}.Debug is
5454
Put (" " & Image (Langkit_Support
5555
.Token_Data_Handlers.Text (TDH.all, D),
5656
With_Quotes => True));
57-
Put_Line (" [" & Image (D.Sloc_Range) & "]");
57+
Put_Line (" [" & Image (Sloc_Range (TDH.all, D)) & "]");
5858
end;
5959
end if;
6060
end PTok;

langkit/templates/pkg_debug_spec_ada.mako

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use Langkit_Support.Token_Data_Handlers;
1111

1212
with Langkit_Support.Symbols; use Langkit_Support.Symbols;
1313

14-
with ${ada_lib_name}.Common;
1514
with ${ada_lib_name}.Implementation;
1615
use ${ada_lib_name}.Implementation;
1716

0 commit comments

Comments
 (0)