Skip to content

Commit

Permalink
Add Ada lexer (#1255)
Browse files Browse the repository at this point in the history
This commit adds a lexer for Ada as described in the Consolidated Ada
2012 Language Reference Manual.
  • Loading branch information
stoklund authored and pyrmont committed Jul 11, 2019
1 parent 4adddf2 commit 7295682
Show file tree
Hide file tree
Showing 4 changed files with 481 additions and 0 deletions.
26 changes: 26 additions & 0 deletions lib/rouge/demos/ada
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
with Ada.Directories;
with Ada.Direct_IO;
with Ada.Text_IO;

procedure Extra_IO.Read_File (Name : String) is

package Dirs renames Ada.Directories;
package Text_IO renames Ada.Text_IO;

-- Get the size of the file for a new string.
Size : Natural := Natural (Dirs.Size (Name));
subtype File_String is String (1 .. Size);

-- Instantiate Direct_IO for our file type.
package FIO is new Ada.Direct_IO (File_String);

File : FIO.File_Type;
Contents : File_String;

begin
FIO.Open (File, FIO.In_File, Name);
FIO.Read (File, Contents);
FIO.Close (File);

Text_IO.Put (Contents);
end Extra_IO.Read_File;
162 changes: 162 additions & 0 deletions lib/rouge/lexers/ada.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class Ada < RegexLexer
tag 'ada'
filenames '*.ada', '*.ads', '*.adb', '*.gpr'
mimetypes 'text/x-ada'

title 'Ada'
desc 'The Ada 2012 programming language'

# Ada identifiers are Unicode with underscores only allowed as separators.
ID = /\b[[:alpha:]](?:\p{Pc}?[[:alnum:]])*\b/

# Numerals can also contain underscores.
NUM = /\d(_?\d)*/
XNUM = /\h(_?\h)*/
EXP = /(E[-+]?#{NUM})?/i

# Return a hash mapping lower-case identifiers to token classes.
def self.idents
@idents ||= Hash.new(Name).tap do |h|
%w(
abort abstract accept access aliased all array at begin body
case constant declare delay delta digits do else elsif end
exception exit for generic goto if in interface is limited
loop new null of others out overriding pragma private
protected raise range record renames requeue return reverse
select separate some synchronized tagged task terminate then
until use when while with
).each {|w| h[w] = Keyword}

%w(abs and mod not or rem xor).each {|w| h[w] = Operator::Word}

%w(
entry function package procedure subtype type
).each {|w| h[w] = Keyword::Declaration}

%w(
boolean character constraint_error duration float integer
natural positive long_float long_integer long_long_float
long_long_integer program_error short_float short_integer
short_short_integer storage_error string tasking_error
wide_character wide_string wide_wide_character
wide_wide_string
).each {|w| h[w] = Name::Builtin}
end
end

state :whitespace do
rule %r{\s+}m, Text
rule %r{--.*$}, Comment::Single
end

state :dquote_string do
rule %r{[^"\n]+}, Literal::String::Double
rule %r{""}, Literal::String::Escape
rule %r{"}, Literal::String::Double, :pop!
rule %r{\n}, Error, :pop!
end

state :attr do
mixin :whitespace
rule ID, Name::Attribute, :pop!
rule %r{}, Text, :pop!
end

# Handle a dotted name immediately following a declaration keyword.
state :decl_name do
mixin :whitespace
rule %r{body\b}i, Keyword::Declaration # package body Foo.Bar is...
rule %r{(#{ID})(\.)} do
groups Name::Namespace, Punctuation
end
# function "<=" (Left, Right: Type) is ...
rule %r{#{ID}|"(and|or|xor|/?=|<=?|>=?|\+|–|&\|/|mod|rem|\*?\*|abs|not)"},
Name::Function, :pop!
rule %r{}, Text, :pop!
end

# Handle a sequence of library unit names: with Ada.Foo, Ada.Bar;
#
# There's a chance we entered this state mistakenly since 'with'
# has multiple other uses in Ada (none of which are likely to
# appear at the beginning of a line). Try to bail as soon as
# possible if we see something suspicious like keywords.
#
# See ada_spec.rb for some examples.
state :libunit_name do
mixin :whitespace

rule ID do |m|
t = self.class.idents[m[0].downcase]
if t <= Name
# Convert all kinds of Name to namespaces in this context.
token Name::Namespace
else
# Yikes, we're not supposed to get a keyword in a library unit name!
# We probably entered this state by mistake, so try to fix it.
token t
if t == Keyword::Declaration
goto :decl_name
else
pop!
end
end
end

rule %r{[.,]}, Punctuation
rule %r{}, Text, :pop!
end

state :root do
mixin :whitespace

# String literals.
rule %r{'.'}, Literal::String::Char
rule %r{"[^"\n]*}, Literal::String::Double, :dquote_string

# Real literals.
rule %r{#{NUM}\.#{NUM}#{EXP}}, Literal::Number::Float
rule %r{#{NUM}##{XNUM}\.#{XNUM}##{EXP}}, Literal::Number::Float

# Integer literals.
rule %r{2#[01](_?[01])*##{EXP}}, Literal::Number::Bin
rule %r{8#[0-7](_?[0-7])*##{EXP}}, Literal::Number::Oct
rule %r{16##{XNUM}*##{EXP}}, Literal::Number::Hex
rule %r{#{NUM}##{XNUM}##{EXP}}, Literal::Number::Integer
rule %r{#{NUM}#\w+#}, Error
rule %r{#{NUM}#{EXP}}, Literal::Number::Integer

# Special constructs.
rule %r{'}, Punctuation, :attr
rule %r{<<#{ID}>>}, Name::Label

# Context clauses are tricky because the 'with' keyword is used
# for many purposes. Detect at beginning of the line only.
rule %r{^(?:(limited)(\s+))?(?:(private)(\s+))?(with)\b}i do
groups Keyword::Namespace, Text, Keyword::Namespace, Text, Keyword::Namespace
push :libunit_name
end

# Operators and punctuation characters.
rule %r{[+*/&<=>|]|-|=>|\.\.|\*\*|[:></]=|<<|>>|<>}, Operator
rule %r{[.,:;()]}, Punctuation

rule ID do |m|
t = self.class.idents[m[0].downcase]
token t
if t == Keyword::Declaration
push :decl_name
end
end

# Flag word-like things that don't match the ID pattern.
rule %r{\b(\p{Pc}|[[alpha]])\p{Word}*}, Error
end
end
end
end
164 changes: 164 additions & 0 deletions spec/lexers/ada_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Ada do
let(:subject) { Rouge::Lexers::Ada.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.ada'
assert_guess :filename => 'foo.adb'
assert_guess :filename => 'foo.ads'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'text/x-ada'
end
end

describe 'lexing' do
include Support::Lexing

it 'classifies identifiers' do
assert_tokens_equal 'constant Boolean := A and B',
['Keyword', 'constant'],
['Text', ' '],
['Name.Builtin', 'Boolean'],
['Text', ' '],
['Operator', ':='],
['Text', ' '],
['Name', 'A'],
['Text', ' '],
['Operator.Word', 'and'],
['Text', ' '],
['Name', 'B']
end

it 'accepts Unicode identifiers' do
assert_tokens_equal '東京', ['Name', '東京']
assert_tokens_equal '0東京', ['Error', '0'], ['Name', '東京']
assert_tokens_equal '東京0', ['Name', '東京0']
end

it 'rejects identifiers with double or trailing underscores' do
assert_tokens_equal '_ab', ['Error', '_ab']
assert_tokens_equal 'a__b', ['Error', 'a__b']
assert_tokens_equal 'a_b', ['Name', 'a_b']
assert_tokens_equal 'ab_', ['Error', 'ab_']
end

it 'understands other connecting punctuation' do
assert_tokens_equal 'a﹏b', ['Name', 'a﹏b']
assert_tokens_equal '﹏ab', ['Error', '﹏ab']
assert_tokens_equal 'a﹏﹏b', ['Error', 'a﹏﹏b']
assert_tokens_equal 'ab﹏', ['Error', 'ab﹏']
end

it 'classifies based number literals' do
assert_tokens_equal '2#0001_1110#', ['Literal.Number.Bin', '2#0001_1110#']
assert_tokens_equal '2#0001__1110#', ['Error', '2#0001__1110#']
assert_tokens_equal '8#1234_0000#', ['Literal.Number.Oct', '8#1234_0000#']
assert_tokens_equal '16#abc_BBB_12#', ['Literal.Number.Hex', '16#abc_BBB_12#']
assert_tokens_equal '4#1230000#e+5', ['Literal.Number.Integer', '4#1230000#e+5']
assert_tokens_equal '2#0001_1110#e3', ['Literal.Number.Bin', '2#0001_1110#e3']

assert_tokens_equal '16#abc_BBB.12#', ['Literal.Number.Float', '16#abc_BBB.12#']
end

it 'recognizes exponents in integers and reals' do
assert_tokens_equal '1e6', ['Literal.Number.Integer', '1e6']
assert_tokens_equal '123_456', ['Literal.Number.Integer', '123_456']
assert_tokens_equal '3.14159_26', ['Literal.Number.Float', '3.14159_26']
assert_tokens_equal '3.141_592e-20', ['Literal.Number.Float', '3.141_592e-20']
end

it 'highlights escape sequences inside doubly quoted strings' do
assert_tokens_equal '"Archimedes said ""Εύρηκα"""',
['Literal.String.Double', '"Archimedes said '],
['Literal.String.Escape', '""'],
['Literal.String.Double', 'Εύρηκα'],
['Literal.String.Escape', '""'],
['Literal.String.Double', '"']
end

it 'marks function names in declarations' do
assert_tokens_equal 'Entry Foo IS',
['Keyword.Declaration', 'Entry'],
['Text', ' '],
['Name.Function', 'Foo'],
['Text', ' '],
['Keyword', 'IS']

assert_tokens_equal 'package body Ada.Foo IS',
['Keyword.Declaration', 'package'],
['Text', ' '],
['Keyword.Declaration', 'body'],
['Text', ' '],
['Name.Namespace', 'Ada'],
['Punctuation', '.'],
['Name.Function', 'Foo'],
['Text', ' '],
['Keyword', 'IS']
end

it 'allows both names and builtin names in context clauses' do
assert_tokens_equal 'limited with Math.Integer;',
['Keyword.Namespace', 'limited'],
['Text', ' '],
['Keyword.Namespace', 'with'],
['Text', ' '],
['Name.Namespace', 'Math'],
['Punctuation', '.'],
['Name.Namespace', 'Integer'],
['Punctuation', ';']
end

it 'recovers quickly after mistakenly entering :libunit_name' do
# A `with` keyword at the beginning of a line is 99.9% sure to be
# a context clause, but there are things that could be mistaken if
# they are indented strangely:
#
# generic
# with function Random return Integer is <>;
# procedure Foo;
#
# If that `with` is not indented, we should recover immediately:
assert_tokens_equal 'with function Random',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Keyword.Declaration', 'function'],
['Text', ' '],
['Name.Function', 'Random']

# Another case that's even less likely to be at BOL:
#
# type Painted_Point is new Point with
# record
# Paint : Color := White;
# end record;
#
# type Addition is new Binary_Operation with null record;
assert_tokens_equal 'with record Paint',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Keyword', 'record'],
['Text', ' '],
['Name', 'Paint']
assert_tokens_equal 'with null record;',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Keyword', 'null'],
['Text', ' '],
['Keyword', 'record'],
['Punctuation', ';']

# Finally: raise Runtime_Error with "NO!"
assert_tokens_equal 'with "NO!"',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Literal.String.Double', '"NO!"']
end
end
end
Loading

0 comments on commit 7295682

Please sign in to comment.