3636 :symbol
3737 :keyword
3838 :string
39+ :regex
3940 :character )
4041 " Types of tokens that represent leaf nodes in the AST." )
4142
4445 :rbrace )
4546 " Types of tokens that mark the end of a non-atomic form." )
4647
48+ (defvar parseclj-lex--prefix-tokens '(:quote
49+ :backquote
50+ :unquote
51+ :unquote-splice
52+ :discard
53+ :tag
54+ :reader-conditional
55+ :reader-conditional-splice
56+ :var
57+ :deref
58+ :map-prefix )
59+ " Tokens that modify the form that follows." )
60+
61+ (defvar parseclj-lex--prefix-2-tokens '(:metadata )
62+ " Tokens that modify the two forms that follow." )
63+
4764; ; Token interface
4865
4966(defun parseclj-lex-token (type form pos &rest attributes )
@@ -81,6 +98,11 @@ A token is an association list with :token-type as its first key."
8198 (and (consp token)
8299 (cdr (assq :token-type token))))
83100
101+ (defun parseclj-lex-token-form (token )
102+ " Get the form of TOKEN."
103+ (and (consp token)
104+ (cdr (assq :form token))))
105+
84106(defun parseclj-lex-leaf-token-p (token )
85107 " Return t if the given AST TOKEN is a leaf node."
86108 (member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens))
@@ -89,6 +111,9 @@ A token is an association list with :token-type as its first key."
89111 " Return t if the given ast TOKEN is a closing token."
90112 (member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))
91113
114+ (defun parseclj-lex-error-p (token )
115+ " Return t if the TOKEN represents a lexing error token."
116+ (eq (parseclj-lex-token-type token) :lex-error ))
92117
93118; ; Elisp values from tokens
94119
@@ -177,18 +202,32 @@ S goes through three transformations:
177202 (<= (char-after (point )) ?9 ))
178203 (right-char )))
179204
205+ (defun parseclj-lex-skip-hex ()
206+ " Skip all consecutive hex digits after point."
207+ (while (and (char-after (point ))
208+ (or (<= ?0 (char-after (point )) ?9 )
209+ (<= ?a (char-after (point )) ?f )
210+ (<= ?A (char-after (point )) ?F )))
211+ (right-char )))
212+
180213(defun parseclj-lex-skip-number ()
181214 " Skip a number at point."
182215 ; ; [\+\-]?\d+\.\d+
183- (when (member (char-after (point )) '(?+ ?- ))
184- (right-char ))
216+ (if (and (eq ?0 (char-after (point )))
217+ (eq ?x (char-after (1+ (point )))))
218+ (progn
219+ (right-char 2 )
220+ (parseclj-lex-skip-hex))
221+ (progn
222+ (when (member (char-after (point )) '(?+ ?- ))
223+ (right-char ))
185224
186- (parseclj-lex-skip-digits)
225+ (parseclj-lex-skip-digits)
187226
188- (when (eq (char-after (point )) ?. )
189- (right-char ))
227+ (when (eq (char-after (point )) ?. )
228+ (right-char ))
190229
191- (parseclj-lex-skip-digits))
230+ (parseclj-lex-skip-digits)) ))
192231
193232(defun parseclj-lex-number ()
194233 " Consume a number and return a `:number' token representing it."
@@ -270,22 +309,39 @@ are returned as their own lex tokens."
270309 ((equal sym " false" ) (parseclj-lex-token :false " false" pos))
271310 (t (parseclj-lex-token :symbol sym pos))))))
272311
273- (defun parseclj-lex-string ()
274- " Return a lex token representing a string.
275- If EOF is reached without finding a closing double quote, a :lex-error
276- token is returned."
312+ (defun parseclj-lex-string* ()
313+ " Helper for string/regex lexing.
314+ Returns either the string, or an error token"
277315 (let ((pos (point )))
278316 (right-char )
279317 (while (not (or (equal (char-after (point )) ?\" ) (parseclj-lex-at-eof-p)))
280318 (if (equal (char-after (point )) ?\\ )
281319 (right-char 2 )
282320 (right-char )))
283- (if (equal (char-after (point )) ?\" )
284- (progn
285- (right-char )
286- (parseclj-lex-token :string (buffer-substring-no-properties pos (point )) pos))
321+ (when (equal (char-after (point )) ?\" )
322+ (right-char )
323+ (buffer-substring-no-properties pos (point )))))
324+
325+ (defun parseclj-lex-string ()
326+ " Return a lex token representing a string.
327+ If EOF is reached without finding a closing double quote, a :lex-error
328+ token is returned."
329+ (let ((pos (point ))
330+ (str (parseclj-lex-string*)))
331+ (if str
332+ (parseclj-lex-token :string str pos)
287333 (parseclj-lex-error-token pos :invalid-string ))))
288334
335+ (defun parseclj-lex-regex ()
336+ " Return a lex token representing a regular expression.
337+ If EOF is reached without finding a closing double quote, a :lex-error
338+ token is returned."
339+ (let ((pos (1- (point )))
340+ (str (parseclj-lex-string*)))
341+ (if str
342+ (parseclj-lex-token :regex (concat " #" str) pos)
343+ (parseclj-lex-error-token pos :invalid-regex ))))
344+
289345(defun parseclj-lex-lookahead (n )
290346 " Return a lookahead string of N characters after point."
291347 (buffer-substring-no-properties (point ) (min (+ (point ) n) (point-max ))))
@@ -351,6 +407,16 @@ See `parseclj-lex-symbol', `parseclj-lex-symbol-start-p'."
351407 (right-char ))
352408 (parseclj-lex-token :comment (buffer-substring-no-properties pos (point )) pos)))
353409
410+ (defun parseclj-lex-map-prefix ()
411+ " Return a lex token representing a map prefix."
412+ (let ((pos (1- (point ))))
413+ (right-char )
414+ (when (equal (char-after (point )) ?: )
415+ (right-char ))
416+ (while (parseclj-lex-symbol-rest-p (char-after (point )))
417+ (right-char ))
418+ (parseclj-lex-token :map-prefix (buffer-substring-no-properties pos (point )) pos)))
419+
354420(defun parseclj-lex-next ()
355421 " Consume characters at point and return the next lexical token.
356422
@@ -387,6 +453,22 @@ See `parseclj-lex-token'."
387453 (right-char )
388454 (parseclj-lex-token :rbrace " }" pos))
389455
456+ ((equal char ?' )
457+ (right-char )
458+ (parseclj-lex-token :quote " '" pos))
459+
460+ ((equal char ?` )
461+ (right-char )
462+ (parseclj-lex-token :backquote " `" pos))
463+
464+ ((equal char ?~ )
465+ (right-char )
466+ (if (eq ?@ (char-after (point )))
467+ (progn
468+ (right-char )
469+ (parseclj-lex-token :unquote-splice " ~@" pos))
470+ (parseclj-lex-token :unquote " ~" pos)))
471+
390472 ((parseclj-lex-at-number-p)
391473 (parseclj-lex-number))
392474
@@ -405,6 +487,14 @@ See `parseclj-lex-token'."
405487 ((equal char ?\; )
406488 (parseclj-lex-comment))
407489
490+ ((equal char ?^ )
491+ (right-char )
492+ (parseclj-lex-token :metadata " ^" pos))
493+
494+ ((equal char ?@ )
495+ (right-char )
496+ (parseclj-lex-token :deref " @" pos))
497+
408498 ((equal char ?# )
409499 (right-char )
410500 (let ((char (char-after (point ))))
@@ -415,6 +505,23 @@ See `parseclj-lex-token'."
415505 ((equal char ?_ )
416506 (right-char )
417507 (parseclj-lex-token :discard " #_" pos))
508+ ((equal char ?\( )
509+ (right-char )
510+ (parseclj-lex-token :lambda " #(" pos))
511+ ((equal char ?' )
512+ (right-char )
513+ (parseclj-lex-token :var " #'" pos))
514+ ((equal char ?\" )
515+ (parseclj-lex-regex))
516+ ((equal char ?: )
517+ (parseclj-lex-map-prefix))
518+ ((equal char ?\? )
519+ (right-char )
520+ (if (eq ?@ (char-after (point )))
521+ (progn
522+ (right-char )
523+ (parseclj-lex-token :reader-conditional-splice " #?@" pos))
524+ (parseclj-lex-token :reader-conditional " #?" pos)))
418525 ((parseclj-lex-symbol-start-p char t )
419526 (right-char )
420527 (parseclj-lex-token :tag (concat " #" (parseclj-lex-get-symbol-at-point (1+ pos))) pos))
0 commit comments