Skip to content

Commit 034c5df

Browse files
JeffBezansonstaticfloat
authored andcommitted
disallow unbalanced bidirectional formatting in strings and comments (#42918)
(cherry picked from commit 2cfebad)
1 parent efa5645 commit 034c5df

File tree

2 files changed

+79
-41
lines changed

2 files changed

+79
-41
lines changed

src/julia-parser.scm

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,6 @@
219219

220220
(define (newline? c) (eqv? c #\newline))
221221

222-
(define (skip-to-eol port)
223-
(let ((c (peek-char port)))
224-
(cond ((eof-object? c) c)
225-
((eqv? c #\newline) c)
226-
(else (read-char port)
227-
(skip-to-eol port)))))
228-
229222
(define (op-or-sufchar? c) (or (op-suffix-char? c) (opchar? c)))
230223

231224
(define (read-operator port c0 (postfix? #f))
@@ -486,33 +479,56 @@
486479
(pair? (cadr t)) (eq? (car (cadr t)) 'core)
487480
(memq (cadadr t) '(@int128_str @uint128_str @big_str))))
488481

482+
(define (make-bidi-state) '(0 . 0))
483+
484+
(define (update-bidi-state st c)
485+
(case c
486+
((#\U202A #\U202B #\U202D #\U202E) (cons (+ (car st) 1) (cdr st))) ;; LRE RLE LRO RLO
487+
((#\U2066 #\U2067 #\U2068) (cons (car st) (+ (cdr st) 1))) ;; LRI RLI FSI
488+
((#\U202C) (cons (- (car st) 1) (cdr st))) ;; PDF
489+
((#\U2069) (cons (car st) (- (cdr st) 1))) ;; PDI
490+
((#\newline) '(0 . 0))
491+
(else st)))
492+
493+
(define (bidi-state-terminated? st) (equal? st '(0 . 0)))
494+
495+
(define (skip-line-comment port)
496+
(let ((c (peek-char port)))
497+
(cond ((eof-object? c) c)
498+
((eqv? c #\newline) c)
499+
(else (read-char port)
500+
(skip-line-comment port)))))
501+
502+
(define (skip-multiline-comment port count bds)
503+
(let ((c (read-char port)))
504+
(if (eof-object? c)
505+
(error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
506+
(if (eqv? c #\=)
507+
(let ((c (peek-char port)))
508+
(if (eqv? c #\#)
509+
(begin
510+
(read-char port)
511+
(if (> count 1)
512+
(skip-multiline-comment port (- count 1) bds)
513+
(if (not (bidi-state-terminated? bds))
514+
(error "unbalanced bidirectional formatting in comment"))))
515+
(skip-multiline-comment port count (update-bidi-state bds c))))
516+
(if (eqv? c #\#)
517+
(skip-multiline-comment port
518+
(if (eqv? (peek-char port) #\=)
519+
(begin (read-char port)
520+
(+ count 1))
521+
count)
522+
bds)
523+
(skip-multiline-comment port count (update-bidi-state bds c)))))))
524+
489525
;; skip to end of comment, starting at #: either #...<eol> or #= .... =#.
490526
(define (skip-comment port)
491-
(define (skip-multiline-comment port count)
492-
(let ((c (read-char port)))
493-
(if (eof-object? c)
494-
(error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
495-
(begin (if (eqv? c #\=)
496-
(let ((c (peek-char port)))
497-
(if (eqv? c #\#)
498-
(begin
499-
(read-char port)
500-
(if (> count 1)
501-
(skip-multiline-comment port (- count 1))))
502-
(skip-multiline-comment port count)))
503-
(if (eqv? c #\#)
504-
(skip-multiline-comment port
505-
(if (eqv? (peek-char port) #\=)
506-
(begin (read-char port)
507-
(+ count 1))
508-
count))
509-
(skip-multiline-comment port count)))))))
510-
511527
(read-char port) ; read # that was already peeked
512528
(if (eqv? (peek-char port) #\=)
513529
(begin (read-char port) ; read initial =
514-
(skip-multiline-comment port 1))
515-
(skip-to-eol port)))
530+
(skip-multiline-comment port 1 (make-bidi-state)))
531+
(skip-line-comment port)))
516532

517533
(define (skip-ws-and-comments port)
518534
(skip-ws port #t)
@@ -2221,24 +2237,28 @@
22212237
(let loop ((c (read-char p))
22222238
(b (open-output-string))
22232239
(e ())
2224-
(quotes 0))
2240+
(quotes 0)
2241+
(bds (make-bidi-state)))
22252242
(cond
22262243
((eqv? c delim)
22272244
(if (< quotes n)
2228-
(loop (read-char p) b e (+ quotes 1))
2229-
(reverse (cons (io.tostring! b) e))))
2245+
(loop (read-char p) b e (+ quotes 1) bds)
2246+
(begin
2247+
(if (not (bidi-state-terminated? bds))
2248+
(error "unbalanced bidirectional formatting in string literal"))
2249+
(reverse (cons (io.tostring! b) e)))))
22302250

22312251
((= quotes 1)
22322252
(if (not raw) (write-char #\\ b))
22332253
(write-char delim b)
2234-
(loop c b e 0))
2254+
(loop c b e 0 (update-bidi-state bds c)))
22352255

22362256
((= quotes 2)
22372257
(if (not raw) (write-char #\\ b))
22382258
(write-char delim b)
22392259
(if (not raw) (write-char #\\ b))
22402260
(write-char delim b)
2241-
(loop c b e 0))
2261+
(loop c b e 0 (update-bidi-state bds c)))
22422262

22432263
((eqv? c #\\)
22442264
(if raw
@@ -2251,16 +2271,16 @@
22512271
(io.write b (string.rep "\\" (div count 2)))
22522272
(if (odd? count)
22532273
(begin (write-char delim b)
2254-
(loop (read-char p) b e 0))
2255-
(loop nxch b e 0)))
2274+
(loop (read-char p) b e 0 bds))
2275+
(loop nxch b e 0 bds)))
22562276
(else
22572277
(io.write b (string.rep "\\" count))
22582278
(write-char nxch b)
2259-
(loop (read-char p) b e 0))))
2279+
(loop (read-char p) b e 0 (update-bidi-state bds nxch)))))
22602280
(let ((nxch (not-eof-for delim (read-char p))))
22612281
(write-char #\\ b)
22622282
(write-char nxch b)
2263-
(loop (read-char p) b e 0))))
2283+
(loop (read-char p) b e 0 (update-bidi-state bds nxch)))))
22642284

22652285
((and (eqv? c #\$) (not raw))
22662286
(let* ((ex (parse-interpolate s))
@@ -2270,19 +2290,19 @@
22702290
(loop (read-char p)
22712291
(open-output-string)
22722292
(list* ex (io.tostring! b) e)
2273-
0)))
2293+
0 bds)))
22742294

22752295
; convert literal \r and \r\n in strings to \n (issue #11988)
22762296
((eqv? c #\return) ; \r
22772297
(begin
22782298
(if (eqv? (peek-char p) #\linefeed) ; \r\n
22792299
(read-char p))
22802300
(write-char #\newline b)
2281-
(loop (read-char p) b e 0)))
2301+
(loop (read-char p) b e 0 bds)))
22822302

22832303
(else
22842304
(write-char (not-eof-for delim c) b)
2285-
(loop (read-char p) b e 0)))))
2305+
(loop (read-char p) b e 0 (update-bidi-state bds c))))))
22862306

22872307
(define (not-eof-1 c)
22882308
(if (eof-object? c)

test/syntax.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2719,3 +2719,21 @@ macro m42220()
27192719
end
27202720
@test @m42220()() isa Vector{Float64}
27212721
@test @m42220()(Bool) isa Vector{Bool}
2722+
2723+
@test_throws ParseError Meta.parse("""
2724+
function checkUserAccess(u::User)
2725+
if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066"
2726+
return true
2727+
end
2728+
return false
2729+
end
2730+
""")
2731+
2732+
@test_throws ParseError Meta.parse("""
2733+
function checkUserAccess(u::User)
2734+
#=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =#
2735+
return true
2736+
#= end admin only \u202e \u2066end\u2069 \u2066=#
2737+
return false
2738+
end
2739+
""")

0 commit comments

Comments
 (0)