Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a lexer for Ada 2012. #1255

Merged
merged 2 commits into from
Jul 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions lib/rouge/demos/ada
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
with Ada.Directories;
with Ada.Direct_IO;
with Ada.Text_IO;

procedure Extra_IO.Read_File (Name : String) is

package Dirs renames Ada.Directories;
package Text_IO renames Ada.Text_IO;

-- Get the size of the file for a new string.
Size : Natural := Natural (Dirs.Size (Name));
subtype File_String is String (1 .. Size);

-- Instantiate Direct_IO for our file type.
package FIO is new Ada.Direct_IO (File_String);

File : FIO.File_Type;
Contents : File_String;

begin
FIO.Open (File, FIO.In_File, Name);
FIO.Read (File, Contents);
FIO.Close (File);

Text_IO.Put (Contents);
end Extra_IO.Read_File;
162 changes: 162 additions & 0 deletions lib/rouge/lexers/ada.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class Ada < RegexLexer
tag 'ada'
filenames '*.ada', '*.ads', '*.adb', '*.gpr'
mimetypes 'text/x-ada'

title 'Ada'
desc 'The Ada 2012 programming language'

# Ada identifiers are Unicode with underscores only allowed as separators.
ID = /\b[[:alpha:]](?:\p{Pc}?[[:alnum:]])*\b/

# Numerals can also contain underscores.
NUM = /\d(_?\d)*/
XNUM = /\h(_?\h)*/
EXP = /(E[-+]?#{NUM})?/i

# Return a hash mapping lower-case identifiers to token classes.
def self.idents
@idents ||= Hash.new(Name).tap do |h|
%w(
abort abstract accept access aliased all array at begin body
case constant declare delay delta digits do else elsif end
exception exit for generic goto if in interface is limited
loop new null of others out overriding pragma private
protected raise range record renames requeue return reverse
select separate some synchronized tagged task terminate then
until use when while with
).each {|w| h[w] = Keyword}

%w(abs and mod not or rem xor).each {|w| h[w] = Operator::Word}

%w(
entry function package procedure subtype type
).each {|w| h[w] = Keyword::Declaration}

%w(
boolean character constraint_error duration float integer
natural positive long_float long_integer long_long_float
long_long_integer program_error short_float short_integer
short_short_integer storage_error string tasking_error
wide_character wide_string wide_wide_character
wide_wide_string
).each {|w| h[w] = Name::Builtin}
end
end

state :whitespace do
rule %r{\s+}m, Text
rule %r{--.*$}, Comment::Single
end

state :dquote_string do
rule %r{[^"\n]+}, Literal::String::Double
rule %r{""}, Literal::String::Escape
rule %r{"}, Literal::String::Double, :pop!
rule %r{\n}, Error, :pop!
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
end

state :attr do
mixin :whitespace
rule ID, Name::Attribute, :pop!
rule %r{}, Text, :pop!
end

# Handle a dotted name immediately following a declaration keyword.
state :decl_name do
mixin :whitespace
rule %r{body\b}i, Keyword::Declaration # package body Foo.Bar is...
rule %r{(#{ID})(\.)} do
groups Name::Namespace, Punctuation
end
# function "<=" (Left, Right: Type) is ...
rule %r{#{ID}|"(and|or|xor|/?=|<=?|>=?|\+|–|&\|/|mod|rem|\*?\*|abs|not)"},
Name::Function, :pop!
rule %r{}, Text, :pop!
end

# Handle a sequence of library unit names: with Ada.Foo, Ada.Bar;
#
# There's a chance we entered this state mistakenly since 'with'
# has multiple other uses in Ada (none of which are likely to
# appear at the beginning of a line). Try to bail as soon as
# possible if we see something suspicious like keywords.
#
# See ada_spec.rb for some examples.
state :libunit_name do
mixin :whitespace

rule ID do |m|
t = self.class.idents[m[0].downcase]
if t <= Name
# Convert all kinds of Name to namespaces in this context.
token Name::Namespace
else
# Yikes, we're not supposed to get a keyword in a library unit name!
# We probably entered this state by mistake, so try to fix it.
token t
if t == Keyword::Declaration
goto :decl_name
else
pop!
end
end
end

rule %r{[.,]}, Punctuation
rule %r{}, Text, :pop!
end

state :root do
mixin :whitespace

# String literals.
rule %r{'.'}, Literal::String::Char
rule %r{"[^"\n]*}, Literal::String::Double, :dquote_string

# Real literals.
rule %r{#{NUM}\.#{NUM}#{EXP}}, Literal::Number::Float
rule %r{#{NUM}##{XNUM}\.#{XNUM}##{EXP}}, Literal::Number::Float

# Integer literals.
rule %r{2#[01](_?[01])*##{EXP}}, Literal::Number::Bin
rule %r{8#[0-7](_?[0-7])*##{EXP}}, Literal::Number::Oct
rule %r{16##{XNUM}*##{EXP}}, Literal::Number::Hex
rule %r{#{NUM}##{XNUM}##{EXP}}, Literal::Number::Integer
rule %r{#{NUM}#\w+#}, Error
rule %r{#{NUM}#{EXP}}, Literal::Number::Integer

# Special constructs.
rule %r{'}, Punctuation, :attr
rule %r{<<#{ID}>>}, Name::Label

# Context clauses are tricky because the 'with' keyword is used
# for many purposes. Detect at beginning of the line only.
rule %r{^(?:(limited)(\s+))?(?:(private)(\s+))?(with)\b}i do
groups Keyword::Namespace, Text, Keyword::Namespace, Text, Keyword::Namespace
push :libunit_name
end

# Operators and punctuation characters.
rule %r{[+*/&<=>|]|-|=>|\.\.|\*\*|[:></]=|<<|>>|<>}, Operator
rule %r{[.,:;()]}, Punctuation

rule ID do |m|
t = self.class.idents[m[0].downcase]
token t
if t == Keyword::Declaration
push :decl_name
end
end

# Flag word-like things that don't match the ID pattern.
rule %r{\b(\p{Pc}|[[alpha]])\p{Word}*}, Error
end
end
end
end
164 changes: 164 additions & 0 deletions spec/lexers/ada_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::Ada do
let(:subject) { Rouge::Lexers::Ada.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.ada'
assert_guess :filename => 'foo.adb'
assert_guess :filename => 'foo.ads'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'text/x-ada'
end
end

describe 'lexing' do
include Support::Lexing

it 'classifies identifiers' do
assert_tokens_equal 'constant Boolean := A and B',
['Keyword', 'constant'],
['Text', ' '],
['Name.Builtin', 'Boolean'],
['Text', ' '],
['Operator', ':='],
['Text', ' '],
['Name', 'A'],
['Text', ' '],
['Operator.Word', 'and'],
['Text', ' '],
['Name', 'B']
end

it 'accepts Unicode identifiers' do
assert_tokens_equal '東京', ['Name', '東京']
assert_tokens_equal '0東京', ['Error', '0'], ['Name', '東京']
assert_tokens_equal '東京0', ['Name', '東京0']
end

it 'rejects identifiers with double or trailing underscores' do
assert_tokens_equal '_ab', ['Error', '_ab']
assert_tokens_equal 'a__b', ['Error', 'a__b']
assert_tokens_equal 'a_b', ['Name', 'a_b']
assert_tokens_equal 'ab_', ['Error', 'ab_']
end

it 'understands other connecting punctuation' do
assert_tokens_equal 'a﹏b', ['Name', 'a﹏b']
assert_tokens_equal '﹏ab', ['Error', '﹏ab']
assert_tokens_equal 'a﹏﹏b', ['Error', 'a﹏﹏b']
assert_tokens_equal 'ab﹏', ['Error', 'ab﹏']
end

it 'classifies based number literals' do
assert_tokens_equal '2#0001_1110#', ['Literal.Number.Bin', '2#0001_1110#']
assert_tokens_equal '2#0001__1110#', ['Error', '2#0001__1110#']
assert_tokens_equal '8#1234_0000#', ['Literal.Number.Oct', '8#1234_0000#']
assert_tokens_equal '16#abc_BBB_12#', ['Literal.Number.Hex', '16#abc_BBB_12#']
assert_tokens_equal '4#1230000#e+5', ['Literal.Number.Integer', '4#1230000#e+5']
assert_tokens_equal '2#0001_1110#e3', ['Literal.Number.Bin', '2#0001_1110#e3']

assert_tokens_equal '16#abc_BBB.12#', ['Literal.Number.Float', '16#abc_BBB.12#']
end

it 'recognizes exponents in integers and reals' do
assert_tokens_equal '1e6', ['Literal.Number.Integer', '1e6']
assert_tokens_equal '123_456', ['Literal.Number.Integer', '123_456']
assert_tokens_equal '3.14159_26', ['Literal.Number.Float', '3.14159_26']
assert_tokens_equal '3.141_592e-20', ['Literal.Number.Float', '3.141_592e-20']
end

it 'highlights escape sequences inside doubly quoted strings' do
assert_tokens_equal '"Archimedes said ""Εύρηκα"""',
['Literal.String.Double', '"Archimedes said '],
['Literal.String.Escape', '""'],
['Literal.String.Double', 'Εύρηκα'],
['Literal.String.Escape', '""'],
['Literal.String.Double', '"']
end

it 'marks function names in declarations' do
assert_tokens_equal 'Entry Foo IS',
['Keyword.Declaration', 'Entry'],
['Text', ' '],
['Name.Function', 'Foo'],
['Text', ' '],
['Keyword', 'IS']

assert_tokens_equal 'package body Ada.Foo IS',
['Keyword.Declaration', 'package'],
['Text', ' '],
['Keyword.Declaration', 'body'],
['Text', ' '],
['Name.Namespace', 'Ada'],
['Punctuation', '.'],
['Name.Function', 'Foo'],
['Text', ' '],
['Keyword', 'IS']
end

it 'allows both names and builtin names in context clauses' do
assert_tokens_equal 'limited with Math.Integer;',
['Keyword.Namespace', 'limited'],
['Text', ' '],
['Keyword.Namespace', 'with'],
['Text', ' '],
['Name.Namespace', 'Math'],
['Punctuation', '.'],
['Name.Namespace', 'Integer'],
['Punctuation', ';']
end

it 'recovers quickly after mistakenly entering :libunit_name' do
# A `with` keyword at the beginning of a line is 99.9% sure to be
# a context clause, but there are things that could be mistaken if
# they are indented strangely:
#
# generic
# with function Random return Integer is <>;
# procedure Foo;
#
# If that `with` is not indented, we should recover immediately:
assert_tokens_equal 'with function Random',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Keyword.Declaration', 'function'],
['Text', ' '],
['Name.Function', 'Random']

# Another case that's even less likely to be at BOL:
#
# type Painted_Point is new Point with
# record
# Paint : Color := White;
# end record;
#
# type Addition is new Binary_Operation with null record;
assert_tokens_equal 'with record Paint',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Keyword', 'record'],
['Text', ' '],
['Name', 'Paint']
assert_tokens_equal 'with null record;',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Keyword', 'null'],
['Text', ' '],
['Keyword', 'record'],
['Punctuation', ';']

# Finally: raise Runtime_Error with "NO!"
assert_tokens_equal 'with "NO!"',
['Keyword.Namespace', 'with'],
['Text', ' '],
['Literal.String.Double', '"NO!"']
end
end
end
Loading