Skip to content

Commit 9132a40

Browse files
JeffBezansonKristofferC
authored andcommitted
disallow unbalanced bidirectional formatting in strings and comments (#42918)
(cherry picked from commit 2cfebad)
1 parent 8597a4b commit 9132a40

File tree

3 files changed

+82
-42
lines changed

3 files changed

+82
-42
lines changed

NEWS.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ Language changes
4141
* `Iterators.peel(itr)` now returns `nothing` when `itr` is empty instead of throwing a `BoundsError` ([#39607]).
4242
* Multiple successive semicolons in an array expresion were previously ignored (e.g., `[1 ;; 2] == [1 ; 2]`).
4343
This syntax is now used to separate dimensions (see **New language features**).
44+
* Unbalanced Unicode bidirectional formatting directives are now disallowed within strings and comments,
45+
to mitigate the ["trojan source"](https://www.trojansource.codes) vulnerability ([#42918]).
4446

4547
Compiler/Runtime improvements
4648
-----------------------------

src/julia-parser.scm

Lines changed: 62 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -221,13 +221,6 @@
221221

222222
(define (newline? c) (eqv? c #\newline))
223223

224-
(define (skip-to-eol port)
225-
(let ((c (peek-char port)))
226-
(cond ((eof-object? c) c)
227-
((eqv? c #\newline) c)
228-
(else (read-char port)
229-
(skip-to-eol port)))))
230-
231224
(define (op-or-sufchar? c) (or (op-suffix-char? c) (opchar? c)))
232225

233226
(define (read-operator port c0 (postfix? #f))
@@ -495,33 +488,56 @@
495488
(pair? (cadr t)) (eq? (car (cadr t)) 'core)
496489
(memq (cadadr t) '(@int128_str @uint128_str @big_str))))
497490

491+
(define (make-bidi-state) '(0 . 0))
492+
493+
(define (update-bidi-state st c)
494+
(case c
495+
((#\U202A #\U202B #\U202D #\U202E) (cons (+ (car st) 1) (cdr st))) ;; LRE RLE LRO RLO
496+
((#\U2066 #\U2067 #\U2068) (cons (car st) (+ (cdr st) 1))) ;; LRI RLI FSI
497+
((#\U202C) (cons (- (car st) 1) (cdr st))) ;; PDF
498+
((#\U2069) (cons (car st) (- (cdr st) 1))) ;; PDI
499+
((#\newline) '(0 . 0))
500+
(else st)))
501+
502+
(define (bidi-state-terminated? st) (equal? st '(0 . 0)))
503+
504+
(define (skip-line-comment port)
505+
(let ((c (peek-char port)))
506+
(cond ((eof-object? c) c)
507+
((eqv? c #\newline) c)
508+
(else (read-char port)
509+
(skip-line-comment port)))))
510+
511+
(define (skip-multiline-comment port count bds)
512+
(let ((c (read-char port)))
513+
(if (eof-object? c)
514+
(error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
515+
(if (eqv? c #\=)
516+
(let ((c (peek-char port)))
517+
(if (eqv? c #\#)
518+
(begin
519+
(read-char port)
520+
(if (> count 1)
521+
(skip-multiline-comment port (- count 1) bds)
522+
(if (not (bidi-state-terminated? bds))
523+
(error "unbalanced bidirectional formatting in comment"))))
524+
(skip-multiline-comment port count (update-bidi-state bds c))))
525+
(if (eqv? c #\#)
526+
(skip-multiline-comment port
527+
(if (eqv? (peek-char port) #\=)
528+
(begin (read-char port)
529+
(+ count 1))
530+
count)
531+
bds)
532+
(skip-multiline-comment port count (update-bidi-state bds c)))))))
533+
498534
;; skip to end of comment, starting at #: either #...<eol> or #= .... =#.
499535
(define (skip-comment port)
500-
(define (skip-multiline-comment port count)
501-
(let ((c (read-char port)))
502-
(if (eof-object? c)
503-
(error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
504-
(begin (if (eqv? c #\=)
505-
(let ((c (peek-char port)))
506-
(if (eqv? c #\#)
507-
(begin
508-
(read-char port)
509-
(if (> count 1)
510-
(skip-multiline-comment port (- count 1))))
511-
(skip-multiline-comment port count)))
512-
(if (eqv? c #\#)
513-
(skip-multiline-comment port
514-
(if (eqv? (peek-char port) #\=)
515-
(begin (read-char port)
516-
(+ count 1))
517-
count))
518-
(skip-multiline-comment port count)))))))
519-
520536
(read-char port) ; read # that was already peeked
521537
(if (eqv? (peek-char port) #\=)
522538
(begin (read-char port) ; read initial =
523-
(skip-multiline-comment port 1))
524-
(skip-to-eol port)))
539+
(skip-multiline-comment port 1 (make-bidi-state)))
540+
(skip-line-comment port)))
525541

526542
(define (skip-ws-and-comments port)
527543
(skip-ws port #t)
@@ -2336,24 +2352,28 @@
23362352
(let loop ((c (read-char p))
23372353
(b (open-output-string))
23382354
(e ())
2339-
(quotes 0))
2355+
(quotes 0)
2356+
(bds (make-bidi-state)))
23402357
(cond
23412358
((eqv? c delim)
23422359
(if (< quotes n)
2343-
(loop (read-char p) b e (+ quotes 1))
2344-
(reverse (cons (io.tostring! b) e))))
2360+
(loop (read-char p) b e (+ quotes 1) bds)
2361+
(begin
2362+
(if (not (bidi-state-terminated? bds))
2363+
(error "unbalanced bidirectional formatting in string literal"))
2364+
(reverse (cons (io.tostring! b) e)))))
23452365

23462366
((= quotes 1)
23472367
(if (not raw) (write-char #\\ b))
23482368
(write-char delim b)
2349-
(loop c b e 0))
2369+
(loop c b e 0 (update-bidi-state bds c)))
23502370

23512371
((= quotes 2)
23522372
(if (not raw) (write-char #\\ b))
23532373
(write-char delim b)
23542374
(if (not raw) (write-char #\\ b))
23552375
(write-char delim b)
2356-
(loop c b e 0))
2376+
(loop c b e 0 (update-bidi-state bds c)))
23572377

23582378
((eqv? c #\\)
23592379
(if raw
@@ -2366,19 +2386,19 @@
23662386
(io.write b (string.rep "\\" (div count 2)))
23672387
(if (odd? count)
23682388
(begin (write-char delim b)
2369-
(loop (read-char p) b e 0))
2370-
(loop nxch b e 0)))
2389+
(loop (read-char p) b e 0 bds))
2390+
(loop nxch b e 0 bds)))
23712391
(else
23722392
(io.write b (string.rep "\\" count))
23732393
(write-char nxch b)
2374-
(loop (read-char p) b e 0))))
2394+
(loop (read-char p) b e 0 (update-bidi-state bds nxch)))))
23752395
(let ((nxch (not-eof-for delim (read-char p))))
23762396
(write-char #\\ b)
23772397
(if (eqv? nxch #\return)
2378-
(loop nxch b e 0)
2398+
(loop nxch b e 0 bds)
23792399
(begin
23802400
(write-char nxch b)
2381-
(loop (read-char p) b e 0))))))
2401+
(loop (read-char p) b e 0 (update-bidi-state bds nxch)))))))
23822402

23832403
((and (eqv? c #\$) (not raw))
23842404
(let* ((ex (parse-interpolate s))
@@ -2388,19 +2408,19 @@
23882408
(loop (read-char p)
23892409
(open-output-string)
23902410
(list* ex (io.tostring! b) e)
2391-
0)))
2411+
0 bds)))
23922412

23932413
; convert literal \r and \r\n in strings to \n (issue #11988)
23942414
((eqv? c #\return) ; \r
23952415
(begin
23962416
(if (eqv? (peek-char p) #\linefeed) ; \r\n
23972417
(read-char p))
23982418
(write-char #\newline b)
2399-
(loop (read-char p) b e 0)))
2419+
(loop (read-char p) b e 0 bds)))
24002420

24012421
(else
24022422
(write-char (not-eof-for delim c) b)
2403-
(loop (read-char p) b e 0)))))
2423+
(loop (read-char p) b e 0 (update-bidi-state bds c))))))
24042424

24052425
(define (not-eof-1 c)
24062426
(if (eof-object? c)

test/syntax.jl

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2964,3 +2964,21 @@ macro m42220()
29642964
end
29652965
@test @m42220()() isa Vector{Float64}
29662966
@test @m42220()(Bool) isa Vector{Bool}
2967+
2968+
@test_throws ParseError Meta.parse("""
2969+
function checkUserAccess(u::User)
2970+
if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066"
2971+
return true
2972+
end
2973+
return false
2974+
end
2975+
""")
2976+
2977+
@test_throws ParseError Meta.parse("""
2978+
function checkUserAccess(u::User)
2979+
#=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =#
2980+
return true
2981+
#= end admin only \u202e \u2066end\u2069 \u2066=#
2982+
return false
2983+
end
2984+
""")

0 commit comments

Comments
 (0)