Skip to content

Commit

Permalink
Add ARM assembly lexer (#1057)
Browse files Browse the repository at this point in the history
This commit adds a lexer for ARM assembly code. This is for the syntax 
used by ARM DS-5, Keil, RealView, ADS, SDT, objasm, asasm and aasm 
toolchains and is **not** the same as that shared by GNU AS and Clang's 
integrated assembler.
  • Loading branch information
bavison authored and pyrmont committed Jul 30, 2019
1 parent b898149 commit b4fe9f1
Show file tree
Hide file tree
Showing 4 changed files with 259 additions and 0 deletions.
12 changes: 12 additions & 0 deletions lib/rouge/demos/armasm
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
GET common.s

RetVal * 0x123 :SHL: 4

AREA |Area$$Name|, CODE, READONLY

MyFunction ROUT ; This is a comment
ASSERT RetVal <> 0
1 MOVW r0, #RetVal
BX lr

END
143 changes: 143 additions & 0 deletions lib/rouge/lexers/armasm.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class ArmAsm < RegexLexer
title "ArmAsm"
desc "Arm assembly syntax"
tag 'armasm'
filenames '*.s'

def self.preproc_keyword
@preproc_keyword ||= %w(
define elif else endif error if ifdef ifndef include line pragma undef warning
)
end

def self.file_directive
@file_directive ||= %w(
BIN GET INCBIN INCLUDE LNK
)
end

def self.general_directive
@general_directive ||= %w(
ALIAS ALIGN AOF AOUT AREA ARM ASSERT ATTR CN CODE16 CODE32 COMMON CP
DATA DCB DCD DCDO DCDU DCFD DCFDU DCFH DCFHU DCFS DCFSU DCI DCI.N DCI.W
DCQ DCQU DCW DCWU DN ELIF ELSE END ENDFUNC ENDIF ENDP ENTRY EQU EXPORT
EXPORTAS EXTERN FIELD FILL FN FRAME FUNCTION GBLA GBLL GBLS GLOBAL IF
IMPORT INFO KEEP LCLA LCLL LCLS LEADR LEAF LTORG MACRO MAP MEND MEXIT
NOFP OPT ORG PRESERVE8 PROC QN RELOC REQUIRE REQUIRE8 RLIST RN ROUT
SETA SETL SETS SN SPACE STRONG SUBT THUMB THUMBX TTL WEND WHILE
\[ \] [|!#*=%&^]
)
end

def self.shift_or_condition
@shift_or_condition ||= %w(
ASR LSL LSR ROR RRX AL CC CS EQ GE GT HI HS LE LO LS LT MI NE PL VC VS
asr lsl lsr ror rrx al cc cs eq ge gt hi hs le lo ls lt mi ne pl vc vs
)
end

def self.builtin
@builtin ||= %w(
ARCHITECTURE AREANAME ARMASM_VERSION CODESIZE COMMANDLINE CONFIG CPU
ENDIAN FALSE FPIC FPU INPUTFILE INTER LINENUM LINENUMUP LINENUMUPPER
OBJASM_VERSION OPT PC PCSTOREOFFSET REENTRANT ROPI RWPI TRUE VAR
)
end

def self.operator
@operator ||= %w(
AND BASE CC CC_ENCODING CHR DEF EOR FATTR FEXEC FLOAD FSIZE INDEX LAND
LEFT LEN LEOR LNOT LOR LOWERCASE MOD NOT OR RCONST REVERSE_CC RIGHT ROL
ROR SHL SHR STR TARGET_ARCH_[0-9A-Z_]+ TARGET_FEATURE_[0-9A-Z_]+
TARGET_FPU_[A-Z_] TARGET_PROFILE_[ARM] UAL UPPERCASE
)
end

state :root do
rule %r/\n/, Text
rule %r/^[ \t]*#[ \t]*(?:(?:#{ArmAsm.preproc_keyword.join('|')})(?:[ \t].*)?)?\n/, Comment::Preproc
rule %r/[ \t]+/, Text, :command
rule %r/;.*/, Comment
rule %r/\$[a-z_]\w*\.?/i, Name::Namespace # variable substitution or macro argument
rule %r/\w+|\|[^|\n]+\|/, Name::Label
end

state :command do
rule %r/\n/, Text, :pop!
rule %r/[ \t]+/ do |m|
token Text
goto :args
end
rule %r/;.*/, Comment, :pop!
rule %r/(?:#{ArmAsm.file_directive.join('|')})\b/ do |m|
token Keyword
goto :filespec
end
rule %r/(?:#{ArmAsm.general_directive.join('|')})(?=[; \t\n])/, Keyword
rule %r/(?:[A-Z][\dA-Z]*|[a-z][\da-z]*)(?:\.[NWnw])?(?:\.[DFIPSUdfipsu]?(?:8|16|32|64)?){,3}\b/, Name::Builtin # rather than attempt to list all opcodes, rely on all-uppercase or all-lowercase rule
rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Function # probably a macro name
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
end

state :args do
rule %r/\n/, Text, :pop!
rule %r/[ \t]+/, Text
rule %r/;.*/, Comment, :pop!
rule %r/(?:#{ArmAsm.shift_or_condition.join('|')})\b/, Name::Builtin
rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Variable # various types of symbol
rule %r/%[bf]?[at]?\d+(?:[a-z_]\w*)?/i, Name::Label
rule %r/(?:&|0x)\h+(?!p)/i, Literal::Number::Hex
rule %r/(?:&|0x)[.\h]+(?:p[-+]?\d+)?/i, Literal::Number::Float
rule %r/0f_\h{8}|0d_\h{16}/i, Literal::Number::Float
rule %r/(?:2_[01]+|3_[0-2]+|4_[0-3]+|5_[0-4]+|6_[0-5]+|7_[0-6]+|8_[0-7]+|9_[0-8]+|\d+)(?!e)/i, Literal::Number::Integer
rule %r/(?:2_[.01]+|3_[.0-2]+|4_[.0-3]+|5_[.0-4]+|6_[.0-5]+|7_[.0-6]+|8_[.0-7]+|9_[.0-8]+|[.\d]+)(?:e[-+]?\d+)?/i, Literal::Number::Float
rule %r/[@:](?=[ \t]*(?:8|16|32|64|128|256)[^\d])/, Operator
rule %r/[.@]|\{(?:#{ArmAsm.builtin.join('|')})\}/, Name::Constant
rule %r/[-!#%&()*+,\/<=>?^{|}]|\[|\]|!=|&&|\/=|<<|<=|<>|==|><|>=|>>|\|\||:(?:#{ArmAsm.operator.join('|')}):/, Operator
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/'/ do |m|
token Literal::String::Char
goto :singlequoted
end
rule %r/"/ do |m|
token Literal::String::Double
goto :doublequoted
end
end

state :singlequoted do
rule %r/\n/, Text, :pop!
rule %r/\$\$/, Literal::String::Char
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/'/ do |m|
token Literal::String::Char
goto :args
end
rule %r/[^$'\n]+/, Literal::String::Char
end

state :doublequoted do
rule %r/\n/, Text, :pop!
rule %r/\$\$/, Literal::String::Double
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/"/ do |m|
token Literal::String::Double
goto :args
end
rule %r/[^$"\n]+/, Literal::String::Double
end

state :filespec do
rule %r/\n/, Text, :pop!
rule %r/\$\$/, Literal::String::Other
rule %r/\$[a-z]\w*\.?/i, Name::Namespace
rule %r/[^$\n]+/, Literal::String::Other
end
end
end
end
14 changes: 14 additions & 0 deletions spec/lexers/armasm_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::ArmAsm do
let(:subject) { Rouge::Lexers::ArmAsm.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.s'
end
end
end
90 changes: 90 additions & 0 deletions spec/visual/samples/armasm
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#ifndef Version
#include "Version.h"
#endif

; it shouldn't have any problem with apostrophes in comments
; or "quotation marks" or more ;s

GET otherfile.s

^ 0,r12 ; typical data structure layout
offset1 # 4
# 4 ; sometimes no label assigned
offset2 # 0
size * :INDEX:@

IF ?offset1 = 4
! 1, "This is an assembly-time assert"
ELIF -1<>:NOT:0
! 1, "This is another assert"
ELSE
! 0, "But this is a warning", 1
ENDIF

my_r0 RN 0 ; non-standard register name symbols

sixteen * 4 * 4

GBLS VBar
VBar SETS "|"

MACRO
$label MyMacro$suffix $arg1 = default, $arg2
LCLS thing
[ "$arg2"=""
thing SETS "wibble ; this isn't a comment"
|
thing SETS $arg2
]
LCLL boolean
boolean SETL {TRUE}
EXPORT $arg1
$arg1
$thing MOV pc, #0
MEND


ORG 0

ARM

AREA |Area$$Name|, CODE, READONLY

MyFunc ROUT

just_a_label
label_and ; comment
LDR my_r0, =just_a_statement
label AND r0, r1, r2 ; and a comment
MOV r0, r1, r2, LSL r3
MOV.W r0, r1, r2, ROR #1
ADDS.N r0, r1
IT MI
MOVMI r0, r1, RRX
UND #0x10-&10
PUSH {r0,r2-r4,ip,lr}
LDF F0, =25E-1
VMOVEQ.F64 d0, #-.7e1

Symbols_may_contain_d161ts_and_underscores
but_must_start_with_a_letter
and_are_case_SenSitIve
01 ; this, by contrast, is a local label

LDR r0, here
B .+8
here DATA
DCD 1
BNE %BT01

DCB 1,2,3,'A',';','"' ; bytes
= "This is a string with embedded $$ dollar and "" double quote characters", 0
= "$VBar.not_part_of_variable_name", 0
DCD -1 ; words
& -2,:INDEX:offset1 ; more words
% 16
same_as SPACE 16
or FILL 16, 0
INCBIN include.bin

END

0 comments on commit b4fe9f1

Please sign in to comment.