Skip to content

Commit d425f86

Browse files
johanneswilmgitaarikakx
authored
Improved javascript template string expression extracting (#939)
Co-authored-by: Rik <gitaarik@posteo.net> Co-authored-by: Aarni Koskela <akx@iki.fi>
1 parent 82c41cc commit d425f86

File tree

3 files changed

+94
-8
lines changed

3 files changed

+94
-8
lines changed

babel/messages/extract.py

Lines changed: 53 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@
1616
:license: BSD, see LICENSE for more details.
1717
"""
1818
import ast
19+
import io
1920
import os
20-
from os.path import relpath
2121
import sys
22+
from os.path import relpath
2223
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
2324

2425
from babel.util import parse_encoding, parse_future_flags, pathmatch
@@ -532,7 +533,7 @@ def _parse_python_string(value, encoding, future_flags):
532533
return None
533534

534535

535-
def extract_javascript(fileobj, keywords, comment_tags, options):
536+
def extract_javascript(fileobj, keywords, comment_tags, options, lineno=1):
536537
"""Extract messages from JavaScript source code.
537538
538539
:param fileobj: the seekable, file-like object the messages should be
@@ -544,7 +545,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
544545
:param options: a dictionary of additional options (optional)
545546
Supported options are:
546547
* `jsx` -- set to false to disable JSX/E4X support.
547-
* `template_string` -- set to false to disable ES6 template string support.
548+
* `template_string` -- if `True`, supports gettext(`key`)
549+
* `parse_template_string` -- if `True` will parse the
550+
contents of javascript
551+
template strings.
552+
:param lineno: line number offset (for parsing embedded fragments)
548553
"""
549554
from babel.messages.jslexer import Token, tokenize, unquote_string
550555
funcname = message_lineno = None
@@ -556,12 +561,12 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
556561
last_token = None
557562
call_stack = -1
558563
dotted = any('.' in kw for kw in keywords)
559-
560564
for token in tokenize(
561565
fileobj.read().decode(encoding),
562566
jsx=options.get("jsx", True),
563567
template_string=options.get("template_string", True),
564-
dotted=dotted
568+
dotted=dotted,
569+
lineno=lineno
565570
):
566571
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
567572
funcname and # have a keyword...
@@ -573,7 +578,11 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
573578
call_stack = 0
574579
token = Token('operator', ')', token.lineno)
575580

576-
if token.type == 'operator' and token.value == '(':
581+
if options.get('parse_template_string') and not funcname and token.type == 'template_string':
582+
for item in parse_template_string(token.value, keywords, comment_tags, options, token.lineno):
583+
yield item
584+
585+
elif token.type == 'operator' and token.value == '(':
577586
if funcname:
578587
message_lineno = token.lineno
579588
call_stack += 1
@@ -665,3 +674,41 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
665674
funcname = token.value
666675

667676
last_token = token
677+
678+
679+
def parse_template_string(template_string, keywords, comment_tags, options, lineno=1):
680+
"""Parse JavaScript template string.
681+
682+
:param template_string: the template string to be parsed
683+
:param keywords: a list of keywords (i.e. function names) that should be
684+
recognized as translation functions
685+
:param comment_tags: a list of translator tags to search for and include
686+
in the results
687+
:param options: a dictionary of additional options (optional)
688+
:param lineno: starting line number (optional)
689+
"""
690+
from babel.messages.jslexer import line_re
691+
prev_character = None
692+
level = 0
693+
inside_str = False
694+
expression_contents = ''
695+
for character in template_string[1:-1]:
696+
if not inside_str and character in ('"', "'", '`'):
697+
inside_str = character
698+
elif inside_str == character and prev_character != r'\\':
699+
inside_str = False
700+
if level:
701+
expression_contents += character
702+
if not inside_str:
703+
if character == '{' and prev_character == '$':
704+
level += 1
705+
elif level and character == '}':
706+
level -= 1
707+
if level == 0 and expression_contents:
708+
expression_contents = expression_contents[0:-1]
709+
fake_file_obj = io.BytesIO(expression_contents.encode())
710+
for item in extract_javascript(fake_file_obj, keywords, comment_tags, options, lineno):
711+
yield item
712+
lineno += len(line_re.findall(expression_contents))
713+
expression_contents = ''
714+
prev_character = character

babel/messages/jslexer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -151,17 +151,17 @@ def unquote_string(string):
151151
return u''.join(result)
152152

153153

154-
def tokenize(source, jsx=True, dotted=True, template_string=True):
154+
def tokenize(source, jsx=True, dotted=True, template_string=True, lineno=1):
155155
"""
156156
Tokenize JavaScript/JSX source. Returns a generator of tokens.
157157
158158
:param jsx: Enable (limited) JSX parsing.
159159
:param dotted: Read dotted names as single name token.
160160
:param template_string: Support ES6 template strings
161+
:param lineno: starting line number (optional)
161162
"""
162163
may_divide = False
163164
pos = 0
164-
lineno = 1
165165
end = len(source)
166166
rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)
167167

tests/messages/test_js_extract.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,3 +150,42 @@ def test_template_string_tag_usage():
150150
)
151151

152152
assert messages == [(1, 'Tag template, wow', [], None)]
153+
154+
155+
def test_inside_template_string():
156+
buf = BytesIO(b"const msg = `${gettext('Hello')} ${user.name}`")
157+
messages = list(
158+
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
159+
)
160+
161+
assert messages == [(1, 'Hello', [], None)]
162+
163+
164+
def test_inside_template_string_with_linebreaks():
165+
buf = BytesIO(b"""\
166+
const userName = gettext('Username')
167+
const msg = `${
168+
gettext('Hello')
169+
} ${userName} ${
170+
gettext('Are you having a nice day?')
171+
}`
172+
const msg2 = `${
173+
gettext('Howdy')
174+
} ${userName} ${
175+
gettext('Are you doing ok?')
176+
}`
177+
""")
178+
messages = list(
179+
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
180+
)
181+
182+
assert messages == [(1, 'Username', [], None), (3, 'Hello', [], None), (5, 'Are you having a nice day?', [], None), (8, 'Howdy', [], None), (10, 'Are you doing ok?', [], None)]
183+
184+
185+
def test_inside_nested_template_string():
186+
buf = BytesIO(b"const msg = `${gettext('Greetings!')} ${ evening ? `${user.name}: ${gettext('This is a lovely evening.')}` : `${gettext('The day is really nice!')} ${user.name}`}`")
187+
messages = list(
188+
extract.extract('javascript', buf, {"gettext": None}, [], {'parse_template_string': True})
189+
)
190+
191+
assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]

0 commit comments

Comments
 (0)