diff --git a/lib/rouge/demos/armasm b/lib/rouge/demos/armasm new file mode 100644 index 0000000000..b8bdf654b9 --- /dev/null +++ b/lib/rouge/demos/armasm @@ -0,0 +1,12 @@ + GET common.s + +RetVal * 0x123 :SHL: 4 + + AREA |Area$$Name|, CODE, READONLY + +MyFunction ROUT ; This is a comment + ASSERT RetVal <> 0 +1 MOVW r0, #RetVal + BX lr + + END diff --git a/lib/rouge/lexers/armasm.rb b/lib/rouge/lexers/armasm.rb new file mode 100644 index 0000000000..464a64215f --- /dev/null +++ b/lib/rouge/lexers/armasm.rb @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + class ArmAsm < RegexLexer + title "ArmAsm" + desc "Arm assembly syntax" + tag 'armasm' + filenames '*.s' + + def self.preproc_keyword + @preproc_keyword ||= %w( + define elif else endif error if ifdef ifndef include line pragma undef warning + ) + end + + def self.file_directive + @file_directive ||= %w( + BIN GET INCBIN INCLUDE LNK + ) + end + + def self.general_directive + @general_directive ||= %w( + ALIAS ALIGN AOF AOUT AREA ARM ASSERT ATTR CN CODE16 CODE32 COMMON CP + DATA DCB DCD DCDO DCDU DCFD DCFDU DCFH DCFHU DCFS DCFSU DCI DCI.N DCI.W + DCQ DCQU DCW DCWU DN ELIF ELSE END ENDFUNC ENDIF ENDP ENTRY EQU EXPORT + EXPORTAS EXTERN FIELD FILL FN FRAME FUNCTION GBLA GBLL GBLS GLOBAL IF + IMPORT INFO KEEP LCLA LCLL LCLS LEADR LEAF LTORG MACRO MAP MEND MEXIT + NOFP OPT ORG PRESERVE8 PROC QN RELOC REQUIRE REQUIRE8 RLIST RN ROUT + SETA SETL SETS SN SPACE STRONG SUBT THUMB THUMBX TTL WEND WHILE + \[ \] [|!#*=%&^] + ) + end + + def self.shift_or_condition + @shift_or_condition ||= %w( + ASR LSL LSR ROR RRX AL CC CS EQ GE GT HI HS LE LO LS LT MI NE PL VC VS + asr lsl lsr ror rrx al cc cs eq ge gt hi hs le lo ls lt mi ne pl vc vs + ) + end + + def self.builtin + @builtin ||= %w( + ARCHITECTURE AREANAME ARMASM_VERSION CODESIZE COMMANDLINE CONFIG CPU + ENDIAN FALSE FPIC FPU INPUTFILE INTER LINENUM LINENUMUP LINENUMUPPER + OBJASM_VERSION OPT PC PCSTOREOFFSET REENTRANT ROPI RWPI TRUE VAR + ) + end + + def self.operator + @operator ||= %w( + AND BASE CC CC_ENCODING CHR DEF EOR FATTR FEXEC FLOAD FSIZE INDEX LAND + LEFT LEN LEOR LNOT LOR LOWERCASE MOD NOT OR RCONST REVERSE_CC RIGHT ROL + ROR SHL SHR STR TARGET_ARCH_[0-9A-Z_]+ TARGET_FEATURE_[0-9A-Z_]+ + TARGET_FPU_[A-Z_] TARGET_PROFILE_[ARM] UAL UPPERCASE + ) + end + + state :root do + rule %r/\n/, Text + rule %r/^[ \t]*#[ \t]*(?:(?:#{ArmAsm.preproc_keyword.join('|')})(?:[ \t].*)?)?\n/, Comment::Preproc + rule %r/[ \t]+/, Text, :command + rule %r/;.*/, Comment + rule %r/\$[a-z_]\w*\.?/i, Name::Namespace # variable substitution or macro argument + rule %r/\w+|\|[^|\n]+\|/, Name::Label + end + + state :command do + rule %r/\n/, Text, :pop! + rule %r/[ \t]+/ do |m| + token Text + goto :args + end + rule %r/;.*/, Comment, :pop! + rule %r/(?:#{ArmAsm.file_directive.join('|')})\b/ do |m| + token Keyword + goto :filespec + end + rule %r/(?:#{ArmAsm.general_directive.join('|')})(?=[; \t\n])/, Keyword + rule %r/(?:[A-Z][\dA-Z]*|[a-z][\da-z]*)(?:\.[NWnw])?(?:\.[DFIPSUdfipsu]?(?:8|16|32|64)?){,3}\b/, Name::Builtin # rather than attempt to list all opcodes, rely on all-uppercase or all-lowercase rule + rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Function # probably a macro name + rule %r/\$[a-z]\w*\.?/i, Name::Namespace + end + + state :args do + rule %r/\n/, Text, :pop! + rule %r/[ \t]+/, Text + rule %r/;.*/, Comment, :pop! + rule %r/(?:#{ArmAsm.shift_or_condition.join('|')})\b/, Name::Builtin + rule %r/[a-z_]\w*|\|[^|\n]+\|/i, Name::Variable # various types of symbol + rule %r/%[bf]?[at]?\d+(?:[a-z_]\w*)?/i, Name::Label + rule %r/(?:&|0x)\h+(?!p)/i, Literal::Number::Hex + rule %r/(?:&|0x)[.\h]+(?:p[-+]?\d+)?/i, Literal::Number::Float + rule %r/0f_\h{8}|0d_\h{16}/i, Literal::Number::Float + rule %r/(?:2_[01]+|3_[0-2]+|4_[0-3]+|5_[0-4]+|6_[0-5]+|7_[0-6]+|8_[0-7]+|9_[0-8]+|\d+)(?!e)/i, Literal::Number::Integer + rule %r/(?:2_[.01]+|3_[.0-2]+|4_[.0-3]+|5_[.0-4]+|6_[.0-5]+|7_[.0-6]+|8_[.0-7]+|9_[.0-8]+|[.\d]+)(?:e[-+]?\d+)?/i, Literal::Number::Float + rule %r/[@:](?=[ \t]*(?:8|16|32|64|128|256)[^\d])/, Operator + rule %r/[.@]|\{(?:#{ArmAsm.builtin.join('|')})\}/, Name::Constant + rule %r/[-!#%&()*+,\/<=>?^{|}]|\[|\]|!=|&&|\/=|<<|<=|<>|==|><|>=|>>|\|\||:(?:#{ArmAsm.operator.join('|')}):/, Operator + rule %r/\$[a-z]\w*\.?/i, Name::Namespace + rule %r/'/ do |m| + token Literal::String::Char + goto :singlequoted + end + rule %r/"/ do |m| + token Literal::String::Double + goto :doublequoted + end + end + + state :singlequoted do + rule %r/\n/, Text, :pop! + rule %r/\$\$/, Literal::String::Char + rule %r/\$[a-z]\w*\.?/i, Name::Namespace + rule %r/'/ do |m| + token Literal::String::Char + goto :args + end + rule %r/[^$'\n]+/, Literal::String::Char + end + + state :doublequoted do + rule %r/\n/, Text, :pop! + rule %r/\$\$/, Literal::String::Double + rule %r/\$[a-z]\w*\.?/i, Name::Namespace + rule %r/"/ do |m| + token Literal::String::Double + goto :args + end + rule %r/[^$"\n]+/, Literal::String::Double + end + + state :filespec do + rule %r/\n/, Text, :pop! + rule %r/\$\$/, Literal::String::Other + rule %r/\$[a-z]\w*\.?/i, Name::Namespace + rule %r/[^$\n]+/, Literal::String::Other + end + end + end +end diff --git a/spec/lexers/armasm_spec.rb b/spec/lexers/armasm_spec.rb new file mode 100644 index 0000000000..684a5bf242 --- /dev/null +++ b/spec/lexers/armasm_spec.rb @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::ArmAsm do + let(:subject) { Rouge::Lexers::ArmAsm.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess :filename => 'foo.s' + end + end +end diff --git a/spec/visual/samples/armasm b/spec/visual/samples/armasm new file mode 100644 index 0000000000..d80c58ecfb --- /dev/null +++ b/spec/visual/samples/armasm @@ -0,0 +1,90 @@ +#ifndef Version + #include "Version.h" +#endif + +; it shouldn't have any problem with apostrophes in comments +; or "quotation marks" or more ;s + + GET otherfile.s + + ^ 0,r12 ; typical data structure layout +offset1 # 4 + # 4 ; sometimes no label assigned +offset2 # 0 +size * :INDEX:@ + + IF ?offset1 = 4 + ! 1, "This is an assembly-time assert" + ELIF -1<>:NOT:0 + ! 1, "This is another assert" + ELSE + ! 0, "But this is a warning", 1 + ENDIF + +my_r0 RN 0 ; non-standard register name symbols + +sixteen * 4 * 4 + + GBLS VBar +VBar SETS "|" + + MACRO +$label MyMacro$suffix $arg1 = default, $arg2 + LCLS thing + [ "$arg2"="" +thing SETS "wibble ; this isn't a comment" + | +thing SETS $arg2 + ] + LCLL boolean +boolean SETL {TRUE} + EXPORT $arg1 +$arg1 +$thing MOV pc, #0 + MEND + + + ORG 0 + + ARM + + AREA |Area$$Name|, CODE, READONLY + +MyFunc ROUT + +just_a_label +label_and ; comment + LDR my_r0, =just_a_statement +label AND r0, r1, r2 ; and a comment + MOV r0, r1, r2, LSL r3 + MOV.W r0, r1, r2, ROR #1 + ADDS.N r0, r1 + IT MI + MOVMI r0, r1, RRX + UND #0x10-&10 + PUSH {r0,r2-r4,ip,lr} + LDF F0, =25E-1 + VMOVEQ.F64 d0, #-.7e1 + +Symbols_may_contain_d161ts_and_underscores +but_must_start_with_a_letter +and_are_case_SenSitIve +01 ; this, by contrast, is a local label + + LDR r0, here + B .+8 +here DATA + DCD 1 + BNE %BT01 + + DCB 1,2,3,'A',';','"' ; bytes + = "This is a string with embedded $$ dollar and "" double quote characters", 0 + = "$VBar.not_part_of_variable_name", 0 + DCD -1 ; words + & -2,:INDEX:offset1 ; more words + % 16 +same_as SPACE 16 +or FILL 16, 0 + INCBIN include.bin + + END