-
Notifications
You must be signed in to change notification settings - Fork 740
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit adds a lexer for Ada as described in the Consolidated Ada 2012 Language Reference Manual.
- Loading branch information
Showing
4 changed files
with
481 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
with Ada.Directories; | ||
with Ada.Direct_IO; | ||
with Ada.Text_IO; | ||
|
||
procedure Extra_IO.Read_File (Name : String) is | ||
|
||
package Dirs renames Ada.Directories; | ||
package Text_IO renames Ada.Text_IO; | ||
|
||
-- Get the size of the file for a new string. | ||
Size : Natural := Natural (Dirs.Size (Name)); | ||
subtype File_String is String (1 .. Size); | ||
|
||
-- Instantiate Direct_IO for our file type. | ||
package FIO is new Ada.Direct_IO (File_String); | ||
|
||
File : FIO.File_Type; | ||
Contents : File_String; | ||
|
||
begin | ||
FIO.Open (File, FIO.In_File, Name); | ||
FIO.Read (File, Contents); | ||
FIO.Close (File); | ||
|
||
Text_IO.Put (Contents); | ||
end Extra_IO.Read_File; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
module Rouge | ||
module Lexers | ||
class Ada < RegexLexer | ||
tag 'ada' | ||
filenames '*.ada', '*.ads', '*.adb', '*.gpr' | ||
mimetypes 'text/x-ada' | ||
|
||
title 'Ada' | ||
desc 'The Ada 2012 programming language' | ||
|
||
# Ada identifiers are Unicode with underscores only allowed as separators. | ||
ID = /\b[[:alpha:]](?:\p{Pc}?[[:alnum:]])*\b/ | ||
|
||
# Numerals can also contain underscores. | ||
NUM = /\d(_?\d)*/ | ||
XNUM = /\h(_?\h)*/ | ||
EXP = /(E[-+]?#{NUM})?/i | ||
|
||
# Return a hash mapping lower-case identifiers to token classes. | ||
def self.idents | ||
@idents ||= Hash.new(Name).tap do |h| | ||
%w( | ||
abort abstract accept access aliased all array at begin body | ||
case constant declare delay delta digits do else elsif end | ||
exception exit for generic goto if in interface is limited | ||
loop new null of others out overriding pragma private | ||
protected raise range record renames requeue return reverse | ||
select separate some synchronized tagged task terminate then | ||
until use when while with | ||
).each {|w| h[w] = Keyword} | ||
|
||
%w(abs and mod not or rem xor).each {|w| h[w] = Operator::Word} | ||
|
||
%w( | ||
entry function package procedure subtype type | ||
).each {|w| h[w] = Keyword::Declaration} | ||
|
||
%w( | ||
boolean character constraint_error duration float integer | ||
natural positive long_float long_integer long_long_float | ||
long_long_integer program_error short_float short_integer | ||
short_short_integer storage_error string tasking_error | ||
wide_character wide_string wide_wide_character | ||
wide_wide_string | ||
).each {|w| h[w] = Name::Builtin} | ||
end | ||
end | ||
|
||
state :whitespace do | ||
rule %r{\s+}m, Text | ||
rule %r{--.*$}, Comment::Single | ||
end | ||
|
||
state :dquote_string do | ||
rule %r{[^"\n]+}, Literal::String::Double | ||
rule %r{""}, Literal::String::Escape | ||
rule %r{"}, Literal::String::Double, :pop! | ||
rule %r{\n}, Error, :pop! | ||
end | ||
|
||
state :attr do | ||
mixin :whitespace | ||
rule ID, Name::Attribute, :pop! | ||
rule %r{}, Text, :pop! | ||
end | ||
|
||
# Handle a dotted name immediately following a declaration keyword. | ||
state :decl_name do | ||
mixin :whitespace | ||
rule %r{body\b}i, Keyword::Declaration # package body Foo.Bar is... | ||
rule %r{(#{ID})(\.)} do | ||
groups Name::Namespace, Punctuation | ||
end | ||
# function "<=" (Left, Right: Type) is ... | ||
rule %r{#{ID}|"(and|or|xor|/?=|<=?|>=?|\+|–|&\|/|mod|rem|\*?\*|abs|not)"}, | ||
Name::Function, :pop! | ||
rule %r{}, Text, :pop! | ||
end | ||
|
||
# Handle a sequence of library unit names: with Ada.Foo, Ada.Bar; | ||
# | ||
# There's a chance we entered this state mistakenly since 'with' | ||
# has multiple other uses in Ada (none of which are likely to | ||
# appear at the beginning of a line). Try to bail as soon as | ||
# possible if we see something suspicious like keywords. | ||
# | ||
# See ada_spec.rb for some examples. | ||
state :libunit_name do | ||
mixin :whitespace | ||
|
||
rule ID do |m| | ||
t = self.class.idents[m[0].downcase] | ||
if t <= Name | ||
# Convert all kinds of Name to namespaces in this context. | ||
token Name::Namespace | ||
else | ||
# Yikes, we're not supposed to get a keyword in a library unit name! | ||
# We probably entered this state by mistake, so try to fix it. | ||
token t | ||
if t == Keyword::Declaration | ||
goto :decl_name | ||
else | ||
pop! | ||
end | ||
end | ||
end | ||
|
||
rule %r{[.,]}, Punctuation | ||
rule %r{}, Text, :pop! | ||
end | ||
|
||
state :root do | ||
mixin :whitespace | ||
|
||
# String literals. | ||
rule %r{'.'}, Literal::String::Char | ||
rule %r{"[^"\n]*}, Literal::String::Double, :dquote_string | ||
|
||
# Real literals. | ||
rule %r{#{NUM}\.#{NUM}#{EXP}}, Literal::Number::Float | ||
rule %r{#{NUM}##{XNUM}\.#{XNUM}##{EXP}}, Literal::Number::Float | ||
|
||
# Integer literals. | ||
rule %r{2#[01](_?[01])*##{EXP}}, Literal::Number::Bin | ||
rule %r{8#[0-7](_?[0-7])*##{EXP}}, Literal::Number::Oct | ||
rule %r{16##{XNUM}*##{EXP}}, Literal::Number::Hex | ||
rule %r{#{NUM}##{XNUM}##{EXP}}, Literal::Number::Integer | ||
rule %r{#{NUM}#\w+#}, Error | ||
rule %r{#{NUM}#{EXP}}, Literal::Number::Integer | ||
|
||
# Special constructs. | ||
rule %r{'}, Punctuation, :attr | ||
rule %r{<<#{ID}>>}, Name::Label | ||
|
||
# Context clauses are tricky because the 'with' keyword is used | ||
# for many purposes. Detect at beginning of the line only. | ||
rule %r{^(?:(limited)(\s+))?(?:(private)(\s+))?(with)\b}i do | ||
groups Keyword::Namespace, Text, Keyword::Namespace, Text, Keyword::Namespace | ||
push :libunit_name | ||
end | ||
|
||
# Operators and punctuation characters. | ||
rule %r{[+*/&<=>|]|-|=>|\.\.|\*\*|[:></]=|<<|>>|<>}, Operator | ||
rule %r{[.,:;()]}, Punctuation | ||
|
||
rule ID do |m| | ||
t = self.class.idents[m[0].downcase] | ||
token t | ||
if t == Keyword::Declaration | ||
push :decl_name | ||
end | ||
end | ||
|
||
# Flag word-like things that don't match the ID pattern. | ||
rule %r{\b(\p{Pc}|[[alpha]])\p{Word}*}, Error | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
describe Rouge::Lexers::Ada do | ||
let(:subject) { Rouge::Lexers::Ada.new } | ||
|
||
describe 'guessing' do | ||
include Support::Guessing | ||
|
||
it 'guesses by filename' do | ||
assert_guess :filename => 'foo.ada' | ||
assert_guess :filename => 'foo.adb' | ||
assert_guess :filename => 'foo.ads' | ||
end | ||
|
||
it 'guesses by mimetype' do | ||
assert_guess :mimetype => 'text/x-ada' | ||
end | ||
end | ||
|
||
describe 'lexing' do | ||
include Support::Lexing | ||
|
||
it 'classifies identifiers' do | ||
assert_tokens_equal 'constant Boolean := A and B', | ||
['Keyword', 'constant'], | ||
['Text', ' '], | ||
['Name.Builtin', 'Boolean'], | ||
['Text', ' '], | ||
['Operator', ':='], | ||
['Text', ' '], | ||
['Name', 'A'], | ||
['Text', ' '], | ||
['Operator.Word', 'and'], | ||
['Text', ' '], | ||
['Name', 'B'] | ||
end | ||
|
||
it 'accepts Unicode identifiers' do | ||
assert_tokens_equal '東京', ['Name', '東京'] | ||
assert_tokens_equal '0東京', ['Error', '0'], ['Name', '東京'] | ||
assert_tokens_equal '東京0', ['Name', '東京0'] | ||
end | ||
|
||
it 'rejects identifiers with double or trailing underscores' do | ||
assert_tokens_equal '_ab', ['Error', '_ab'] | ||
assert_tokens_equal 'a__b', ['Error', 'a__b'] | ||
assert_tokens_equal 'a_b', ['Name', 'a_b'] | ||
assert_tokens_equal 'ab_', ['Error', 'ab_'] | ||
end | ||
|
||
it 'understands other connecting punctuation' do | ||
assert_tokens_equal 'a﹏b', ['Name', 'a﹏b'] | ||
assert_tokens_equal '﹏ab', ['Error', '﹏ab'] | ||
assert_tokens_equal 'a﹏﹏b', ['Error', 'a﹏﹏b'] | ||
assert_tokens_equal 'ab﹏', ['Error', 'ab﹏'] | ||
end | ||
|
||
it 'classifies based number literals' do | ||
assert_tokens_equal '2#0001_1110#', ['Literal.Number.Bin', '2#0001_1110#'] | ||
assert_tokens_equal '2#0001__1110#', ['Error', '2#0001__1110#'] | ||
assert_tokens_equal '8#1234_0000#', ['Literal.Number.Oct', '8#1234_0000#'] | ||
assert_tokens_equal '16#abc_BBB_12#', ['Literal.Number.Hex', '16#abc_BBB_12#'] | ||
assert_tokens_equal '4#1230000#e+5', ['Literal.Number.Integer', '4#1230000#e+5'] | ||
assert_tokens_equal '2#0001_1110#e3', ['Literal.Number.Bin', '2#0001_1110#e3'] | ||
|
||
assert_tokens_equal '16#abc_BBB.12#', ['Literal.Number.Float', '16#abc_BBB.12#'] | ||
end | ||
|
||
it 'recognizes exponents in integers and reals' do | ||
assert_tokens_equal '1e6', ['Literal.Number.Integer', '1e6'] | ||
assert_tokens_equal '123_456', ['Literal.Number.Integer', '123_456'] | ||
assert_tokens_equal '3.14159_26', ['Literal.Number.Float', '3.14159_26'] | ||
assert_tokens_equal '3.141_592e-20', ['Literal.Number.Float', '3.141_592e-20'] | ||
end | ||
|
||
it 'highlights escape sequences inside doubly quoted strings' do | ||
assert_tokens_equal '"Archimedes said ""Εύρηκα"""', | ||
['Literal.String.Double', '"Archimedes said '], | ||
['Literal.String.Escape', '""'], | ||
['Literal.String.Double', 'Εύρηκα'], | ||
['Literal.String.Escape', '""'], | ||
['Literal.String.Double', '"'] | ||
end | ||
|
||
it 'marks function names in declarations' do | ||
assert_tokens_equal 'Entry Foo IS', | ||
['Keyword.Declaration', 'Entry'], | ||
['Text', ' '], | ||
['Name.Function', 'Foo'], | ||
['Text', ' '], | ||
['Keyword', 'IS'] | ||
|
||
assert_tokens_equal 'package body Ada.Foo IS', | ||
['Keyword.Declaration', 'package'], | ||
['Text', ' '], | ||
['Keyword.Declaration', 'body'], | ||
['Text', ' '], | ||
['Name.Namespace', 'Ada'], | ||
['Punctuation', '.'], | ||
['Name.Function', 'Foo'], | ||
['Text', ' '], | ||
['Keyword', 'IS'] | ||
end | ||
|
||
it 'allows both names and builtin names in context clauses' do | ||
assert_tokens_equal 'limited with Math.Integer;', | ||
['Keyword.Namespace', 'limited'], | ||
['Text', ' '], | ||
['Keyword.Namespace', 'with'], | ||
['Text', ' '], | ||
['Name.Namespace', 'Math'], | ||
['Punctuation', '.'], | ||
['Name.Namespace', 'Integer'], | ||
['Punctuation', ';'] | ||
end | ||
|
||
it 'recovers quickly after mistakenly entering :libunit_name' do | ||
# A `with` keyword at the beginning of a line is 99.9% sure to be | ||
# a context clause, but there are things that could be mistaken if | ||
# they are indented strangely: | ||
# | ||
# generic | ||
# with function Random return Integer is <>; | ||
# procedure Foo; | ||
# | ||
# If that `with` is not indented, we should recover immediately: | ||
assert_tokens_equal 'with function Random', | ||
['Keyword.Namespace', 'with'], | ||
['Text', ' '], | ||
['Keyword.Declaration', 'function'], | ||
['Text', ' '], | ||
['Name.Function', 'Random'] | ||
|
||
# Another case that's even less likely to be at BOL: | ||
# | ||
# type Painted_Point is new Point with | ||
# record | ||
# Paint : Color := White; | ||
# end record; | ||
# | ||
# type Addition is new Binary_Operation with null record; | ||
assert_tokens_equal 'with record Paint', | ||
['Keyword.Namespace', 'with'], | ||
['Text', ' '], | ||
['Keyword', 'record'], | ||
['Text', ' '], | ||
['Name', 'Paint'] | ||
assert_tokens_equal 'with null record;', | ||
['Keyword.Namespace', 'with'], | ||
['Text', ' '], | ||
['Keyword', 'null'], | ||
['Text', ' '], | ||
['Keyword', 'record'], | ||
['Punctuation', ';'] | ||
|
||
# Finally: raise Runtime_Error with "NO!" | ||
assert_tokens_equal 'with "NO!"', | ||
['Keyword.Namespace', 'with'], | ||
['Text', ' '], | ||
['Literal.String.Double', '"NO!"'] | ||
end | ||
end | ||
end |
Oops, something went wrong.