-
Notifications
You must be signed in to change notification settings - Fork 740
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This commit adds a lexer for ARM assembly code. This is for the syntax used by ARM DS-5, Keil, RealView, ADS, SDT, objasm, asasm and aasm toolchains and is **not** the same as that shared by GNU AS and Clang's integrated assembler.
- Loading branch information
Showing
4 changed files
with
259 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
GET common.s | ||
|
||
RetVal * 0x123 :SHL: 4 | ||
|
||
AREA |Area$$Name|, CODE, READONLY | ||
|
||
MyFunction ROUT ; This is a comment | ||
ASSERT RetVal <> 0 | ||
1 MOVW r0, #RetVal | ||
BX lr | ||
|
||
END |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
module Rouge | ||
module Lexers | ||
class ArmAsm < RegexLexer | ||
title "ArmAsm" | ||
desc "Arm assembly syntax" | ||
tag 'armasm' | ||
filenames '*.s' | ||
|
||
def self.preproc_keyword | ||
@preproc_keyword ||= %w( | ||
define elif else endif error if ifdef ifndef include line pragma undef warning | ||
) | ||
end | ||
|
||
def self.file_directive | ||
@file_directive ||= %w( | ||
BIN GET INCBIN INCLUDE LNK | ||
) | ||
end | ||
|
||
def self.general_directive | ||
@general_directive ||= %w( | ||
ALIAS ALIGN AOF AOUT AREA ARM ASSERT ATTR CN CODE16 CODE32 COMMON CP | ||
DATA DCB DCD DCDO DCDU DCFD DCFDU DCFH DCFHU DCFS DCFSU DCI DCI.N DCI.W | ||
DCQ DCQU DCW DCWU DN ELIF ELSE END ENDFUNC ENDIF ENDP ENTRY EQU EXPORT | ||
EXPORTAS EXTERN FIELD FILL FN FRAME FUNCTION GBLA GBLL GBLS GLOBAL IF | ||
IMPORT INFO KEEP LCLA LCLL LCLS LEADR LEAF LTORG MACRO MAP MEND MEXIT | ||
NOFP OPT ORG PRESERVE8 PROC QN RELOC REQUIRE REQUIRE8 RLIST RN ROUT | ||
SETA SETL SETS SN SPACE STRONG SUBT THUMB THUMBX TTL WEND WHILE | ||
\[ \] [|!#*=%&^] | ||
) | ||
end | ||
|
||
def self.shift_or_condition | ||
@shift_or_condition ||= %w( | ||
ASR LSL LSR ROR RRX AL CC CS EQ GE GT HI HS LE LO LS LT MI NE PL VC VS | ||
asr lsl lsr ror rrx al cc cs eq ge gt hi hs le lo ls lt mi ne pl vc vs | ||
) | ||
end | ||
|
||
def self.builtin | ||
@builtin ||= %w( | ||
ARCHITECTURE AREANAME ARMASM_VERSION CODESIZE COMMANDLINE CONFIG CPU | ||
ENDIAN FALSE FPIC FPU INPUTFILE INTER LINENUM LINENUMUP LINENUMUPPER | ||
OBJASM_VERSION OPT PC PCSTOREOFFSET REENTRANT ROPI RWPI TRUE VAR | ||
) | ||
end | ||
|
||
def self.operator | ||
@operator ||= %w( | ||
AND BASE CC CC_ENCODING CHR DEF EOR FATTR FEXEC FLOAD FSIZE INDEX LAND | ||
LEFT LEN LEOR LNOT LOR LOWERCASE MOD NOT OR RCONST REVERSE_CC RIGHT ROL | ||
ROR SHL SHR STR TARGET_ARCH_[0-9A-Z_]+ TARGET_FEATURE_[0-9A-Z_]+ | ||
TARGET_FPU_[A-Z_] TARGET_PROFILE_[ARM] UAL UPPERCASE | ||
) | ||
end | ||
|
||
state :root do | ||
rule %r/\n/, Text | ||
rule %r/^[ \t]*#[ \t]*(?:(?:#{ArmAsm.preproc_keyword.join('|')})(?:[ \t].*)?)?\n/, Comment::Preproc | ||
rule %r/[ \t]+/, Text, :command | ||
rule %r/;.*/, Comment | ||
rule %r/\$[a-z_]\w*\.?/i, Name::Namespace # variable substitution or macro argument | ||
rule %r/\w+|\|[^|\n]+\|/, Name::Label | ||
end | ||
|
||
state :command do | ||
rule %r/\n/, Text, :pop! | ||
rule %r/[ \t]+/ do |m| | ||
token Text | ||
goto :args | ||
end | ||
rule %r/;.*/, Comment, :pop! | ||
rule %r/(?:#{ArmAsm.file_directive.join('|')})\b/ do |m| | ||
token Keyword | ||
goto :filespec | ||
end | ||
rule %r/(?:#{ArmAsm.general_directive.join('|')})(?=[; \t\n])/, Keyword | ||
rule %r/(?:[A-Z][\dA-Z]*|[a-z][\da-z]*)(?:\.[NWnw])?(?:\.[DFIPSUdfipsu]?(?:8|16|32|64)?){,3}\b/, Name::Builtin # rather than attempt to list all opcodes, rely on all-uppercase or all-lowercase rule | ||
rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Function # probably a macro name | ||
rule %r/\$[a-z]\w*\.?/i, Name::Namespace | ||
end | ||
|
||
state :args do | ||
rule %r/\n/, Text, :pop! | ||
rule %r/[ \t]+/, Text | ||
rule %r/;.*/, Comment, :pop! | ||
rule %r/(?:#{ArmAsm.shift_or_condition.join('|')})\b/, Name::Builtin | ||
rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Variable # various types of symbol | ||
rule %r/%[bf]?[at]?\d+(?:[a-z_]\w*)?/i, Name::Label | ||
rule %r/(?:&|0x)\h+(?!p)/i, Literal::Number::Hex | ||
rule %r/(?:&|0x)[.\h]+(?:p[-+]?\d+)?/i, Literal::Number::Float | ||
rule %r/0f_\h{8}|0d_\h{16}/i, Literal::Number::Float | ||
rule %r/(?:2_[01]+|3_[0-2]+|4_[0-3]+|5_[0-4]+|6_[0-5]+|7_[0-6]+|8_[0-7]+|9_[0-8]+|\d+)(?!e)/i, Literal::Number::Integer | ||
rule %r/(?:2_[.01]+|3_[.0-2]+|4_[.0-3]+|5_[.0-4]+|6_[.0-5]+|7_[.0-6]+|8_[.0-7]+|9_[.0-8]+|[.\d]+)(?:e[-+]?\d+)?/i, Literal::Number::Float | ||
rule %r/[@:](?=[ \t]*(?:8|16|32|64|128|256)[^\d])/, Operator | ||
rule %r/[.@]|\{(?:#{ArmAsm.builtin.join('|')})\}/, Name::Constant | ||
rule %r/[-!#%&()*+,\/<=>?^{|}]|\[|\]|!=|&&|\/=|<<|<=|<>|==|><|>=|>>|\|\||:(?:#{ArmAsm.operator.join('|')}):/, Operator | ||
rule %r/\$[a-z]\w*\.?/i, Name::Namespace | ||
rule %r/'/ do |m| | ||
token Literal::String::Char | ||
goto :singlequoted | ||
end | ||
rule %r/"/ do |m| | ||
token Literal::String::Double | ||
goto :doublequoted | ||
end | ||
end | ||
|
||
state :singlequoted do | ||
rule %r/\n/, Text, :pop! | ||
rule %r/\$\$/, Literal::String::Char | ||
rule %r/\$[a-z]\w*\.?/i, Name::Namespace | ||
rule %r/'/ do |m| | ||
token Literal::String::Char | ||
goto :args | ||
end | ||
rule %r/[^$'\n]+/, Literal::String::Char | ||
end | ||
|
||
state :doublequoted do | ||
rule %r/\n/, Text, :pop! | ||
rule %r/\$\$/, Literal::String::Double | ||
rule %r/\$[a-z]\w*\.?/i, Name::Namespace | ||
rule %r/"/ do |m| | ||
token Literal::String::Double | ||
goto :args | ||
end | ||
rule %r/[^$"\n]+/, Literal::String::Double | ||
end | ||
|
||
state :filespec do | ||
rule %r/\n/, Text, :pop! | ||
rule %r/\$\$/, Literal::String::Other | ||
rule %r/\$[a-z]\w*\.?/i, Name::Namespace | ||
rule %r/[^$\n]+/, Literal::String::Other | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# -*- coding: utf-8 -*- # | ||
# frozen_string_literal: true | ||
|
||
describe Rouge::Lexers::ArmAsm do | ||
let(:subject) { Rouge::Lexers::ArmAsm.new } | ||
|
||
describe 'guessing' do | ||
include Support::Guessing | ||
|
||
it 'guesses by filename' do | ||
assert_guess :filename => 'foo.s' | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
#ifndef Version | ||
#include "Version.h" | ||
#endif | ||
|
||
; it shouldn't have any problem with apostrophes in comments | ||
; or "quotation marks" or more ;s | ||
|
||
GET otherfile.s | ||
|
||
^ 0,r12 ; typical data structure layout | ||
offset1 # 4 | ||
# 4 ; sometimes no label assigned | ||
offset2 # 0 | ||
size * :INDEX:@ | ||
|
||
IF ?offset1 = 4 | ||
! 1, "This is an assembly-time assert" | ||
ELIF -1<>:NOT:0 | ||
! 1, "This is another assert" | ||
ELSE | ||
! 0, "But this is a warning", 1 | ||
ENDIF | ||
|
||
my_r0 RN 0 ; non-standard register name symbols | ||
|
||
sixteen * 4 * 4 | ||
|
||
GBLS VBar | ||
VBar SETS "|" | ||
|
||
MACRO | ||
$label MyMacro$suffix $arg1 = default, $arg2 | ||
LCLS thing | ||
[ "$arg2"="" | ||
thing SETS "wibble ; this isn't a comment" | ||
| | ||
thing SETS $arg2 | ||
] | ||
LCLL boolean | ||
boolean SETL {TRUE} | ||
EXPORT $arg1 | ||
$arg1 | ||
$thing MOV pc, #0 | ||
MEND | ||
|
||
|
||
ORG 0 | ||
|
||
ARM | ||
|
||
AREA |Area$$Name|, CODE, READONLY | ||
|
||
MyFunc ROUT | ||
|
||
just_a_label | ||
label_and ; comment | ||
LDR my_r0, =just_a_statement | ||
label AND r0, r1, r2 ; and a comment | ||
MOV r0, r1, r2, LSL r3 | ||
MOV.W r0, r1, r2, ROR #1 | ||
ADDS.N r0, r1 | ||
IT MI | ||
MOVMI r0, r1, RRX | ||
UND #0x10-&10 | ||
PUSH {r0,r2-r4,ip,lr} | ||
LDF F0, =25E-1 | ||
VMOVEQ.F64 d0, #-.7e1 | ||
|
||
Symbols_may_contain_d161ts_and_underscores | ||
but_must_start_with_a_letter | ||
and_are_case_SenSitIve | ||
01 ; this, by contrast, is a local label | ||
|
||
LDR r0, here | ||
B .+8 | ||
here DATA | ||
DCD 1 | ||
BNE %BT01 | ||
|
||
DCB 1,2,3,'A',';','"' ; bytes | ||
= "This is a string with embedded $$ dollar and "" double quote characters", 0 | ||
= "$VBar.not_part_of_variable_name", 0 | ||
DCD -1 ; words | ||
& -2,:INDEX:offset1 ; more words | ||
% 16 | ||
same_as SPACE 16 | ||
or FILL 16, 0 | ||
INCBIN include.bin | ||
|
||
END |