Skip to content

Commit

Permalink
Migrated Hare lexer.
Browse files Browse the repository at this point in the history
  • Loading branch information
orbitalquark committed Aug 28, 2023
1 parent e88bbcf commit f426182
Showing 1 changed file with 45 additions and 41 deletions.
86 changes: 45 additions & 41 deletions lexers/hare.lua
Original file line number Diff line number Diff line change
@@ -1,77 +1,81 @@
-- Copyright 2021-2023 Mitchell. See LICENSE.
-- Hare LPeg lexer
-- https://harelang.org
-- Contributed by Qiu

local lexer = require('lexer')
local token, word_match = lexer.token, lexer.word_match
local lexer = lexer
local P, R, S = lpeg.P, lpeg.R, lpeg.S

local lex = lexer.new('hare')

-- Whitespace.
lex:add_rule('whitespace', token(lexer.WHITESPACE, lexer.space^1))
local lex = lexer.new(...)

-- Keywords.
lex:add_rule('keyword', token(lexer.KEYWORD, word_match{
'as', 'break', 'case', 'const', 'continue', 'def', 'defer', 'else', 'export', 'false', 'fn',
'for', 'if', 'is', 'let', 'match', 'null', 'nullable', 'return', 'static', 'struct', 'switch',
'true', 'type', 'use', 'yield'
}))
lex:add_rule('keyword', lex:tag(lexer.KEYWORD, lex:word_match(lexer.KEYWORD)))

-- Types.
lex:add_rule('type', lex:tag(lexer.TYPE, lex:word_match(lexer.TYPE)))

-- Functions.
local size_builtin = 'size' * #(lexer.space^0 * '(')
lex:add_rule('function', token(lexer.FUNCTION, word_match{
'abort', 'align', 'alloc', 'append', 'assert', 'cap', 'delete', 'free', 'insert', 'len', 'offset',
'vaarg', 'vaend', 'vastart'
} + size_builtin))
local builtin_func = lex:tag(lexer.FUNCTION_BUILTIN,
lex:word_match(lexer.FUNCTION_BUILTIN) + 'size' * #(lexer.space^0 * '('))
local func = lex:tag(lexer.FUNCTION, lex:tag(lexer.FUNCTION, lexer.word * ('::' * lexer.word)^0 *
#(lexer.space^0 * '(')))
lex:add_rule('function', builtin_func + func)

-- Types.
lex:add_rule('type', token(lexer.TYPE, word_match{
'bool', 'enum', 'f32', 'f64', 'i16', 'i32', 'i64', 'i8', 'int', 'rune', 'size', 'str', 'u16',
'u32', 'u64', 'u8', 'uint', 'uintptr', 'union', 'valist', 'void'
}))
-- Constants.
lex:add_rule('constant', lex:tag(lexer.CONSTANT_BUILTIN, lex:word_match(lexer.CONSTANT_BUILTIN)))

-- Identifiers.
lex:add_rule('identifier', token(lexer.IDENTIFIER, lexer.word))
lex:add_rule('identifier', lex:tag(lexer.IDENTIFIER, lexer.word))

-- Strings.
local sq_str = lexer.range("'", true)
local dq_str = lexer.range('"')
local raw_str = lexer.range('`')
lex:add_rule('string', token(lexer.STRING, sq_str + dq_str + raw_str))
local raw_str = lexer.range('`', false, false)
lex:add_rule('string', lex:tag(lexer.STRING, sq_str + dq_str + raw_str))

-- Comments.
lex:add_rule('comment', token(lexer.COMMENT, lexer.to_eol('//')))
lex:add_rule('comment', lex:tag(lexer.COMMENT, lexer.to_eol('//')))

-- Numbers.
local integer_suffix = word_match{
"i", "u", "z", "i8", "i16", "i32", "i64", "u8", "u16", "u32", "u64"
}
local float_suffix = word_match{"f32", "f64"}
local suffix = integer_suffix + float_suffix

local bin_num = '0b' * R('01')^1 * -lexer.xdigit
local oct_num = '0o' * R('07')^1 * -lexer.xdigit
local hex_num = '0x' * lexer.xdigit^1
local integer_literal = S('+-')^-1 *
((hex_num + oct_num + bin_num) * integer_suffix^-1 + lexer.dec_num * suffix^-1)
local float_literal = lexer.float * float_suffix^-1
lex:add_rule('number', token(lexer.NUMBER, integer_literal + float_literal))
local int_suffix = lexer.word_match('i u z i8 i16 i32 i64 u8 u16 u32 u64')
local float_suffix = lexer.word_match('f32 f64')
local suffix = int_suffix + float_suffix
local integer = S('+-')^-1 *
((hex_num + oct_num + bin_num) * int_suffix^-1 + lexer.dec_num * suffix^-1)
local float = lexer.float * float_suffix^-1
lex:add_rule('number', lex:tag(lexer.NUMBER, integer + float))

-- Error assertions
lex:add_rule('error_assert', token('error_assert', lpeg.B(')') * P('!')))
lex:add_style('error_assert', lexer.styles.error)
lex:add_rule('error_assert', lex:tag(lexer.ERROR .. '.assert', lpeg.B(')') * P('!')))

-- Operators.
lex:add_rule('operator', token(lexer.OPERATOR, S('+-/*%^!=&|?~:;,.()[]{}<>')))
lex:add_rule('operator', lex:tag(lexer.OPERATOR, S('+-/*%^!=&|?~:;,.()[]{}<>')))

-- Attributes.
lex:add_rule('attribute', token(lexer.ANNOTATION, '@' * lexer.word))
lex:add_rule('attribute', lex:tag(lexer.ANNOTATION, '@' * lexer.word))

-- Fold points.
lex:add_fold_point(lexer.OPERATOR, '{', '}')

-- Word lists.
lex:set_word_list(lexer.KEYWORD, {
'as', 'break', 'case', 'const', 'continue', 'def', 'defer', 'else', 'export', 'fn', 'for', 'if',
'is', 'let', 'match', 'return', 'static', 'switch', 'type', 'use', 'yield', '_'
})

lex:set_word_list(lexer.TYPE, {
'bool', 'enum', 'f32', 'f64', 'i16', 'i32', 'i64', 'i8', 'int', 'opaque', 'never', 'nullable',
'rune', 'size', 'str', 'struct', 'u16', 'u32', 'u64', 'u8', 'uint', 'uintptr', 'union', 'valist'
})

lex:set_word_list(lexer.FUNCTION_BUILTIN, {
'abort', 'align', 'alloc', 'append', 'assert', 'delete', 'free', 'insert', 'len', 'offset',
'vaarg', 'vaend', 'vastart'
})

lex:set_word_list(lexer.CONSTANT_BUILTIN, 'false null true void')

lexer.property['scintillua.comment'] = '//'

return lex

0 comments on commit f426182

Please sign in to comment.