Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use generic string states in Python lexer #1477

Merged
merged 4 commits into from
Apr 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 65 additions & 55 deletions lib/rouge/lexers/python.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ def self.exceptions

identifier = /[a-z_][a-z0-9_]*/i
dotted_identifier = /[a-z_.][a-z0-9_.]*/i

def current_string
@string_register ||= StringRegister.new
end

state :root do
rule %r/\n+/m, Text
rule %r/^(:)(\s*)([ru]{,2}""".*?""")/mi do
Expand Down Expand Up @@ -109,14 +114,11 @@ def self.exceptions

# TODO: not in python 3
rule %r/`.*?`/, Str::Backtick
rule %r/(?:r|ur|ru)"""/i, Str, :raw_tdqs
rule %r/(?:r|ur|ru)'''/i, Str, :raw_tsqs
rule %r/(?:r|ur|ru)"/i, Str, :raw_dqs
rule %r/(?:r|ur|ru)'/i, Str, :raw_sqs
rule %r/u?"""/i, Str, :tdqs
rule %r/u?'''/i, Str, :tsqs
rule %r/u?"/i, Str, :dqs
rule %r/u?'/i, Str, :sqs
rule %r/([rfbu]{0,2})('''|"""|['"])/i do |m|
token Str
current_string.register type: m[1].downcase, delim: m[2]
push :generic_string
end

rule %r/@#{dotted_identifier}/i, Name::Decorator

Expand Down Expand Up @@ -172,26 +174,39 @@ def self.exceptions
mixin :raise
end

state :strings do
rule %r/%(\([a-z0-9_]+\))?[-#0 +]*([0-9]+|[*])?(\.([0-9]+|[*]))?/i, Str::Interpol
end
state :generic_string do
rule %r/[^'"\\{]+/, Str
rule %r/{{/, Str

state :strings_double do
rule %r/[^\\"%\n]+/, Str
mixin :strings
end
rule %r/'''|"""|['"]/ do |m|
token Str
if current_string.delim? m[0]
current_string.remove
pop!
end
end

state :strings_single do
rule %r/[^\\'%\n]+/, Str
mixin :strings
end
rule %r/\\/ do |m|
if current_string.type? "r"
token Str
else
token Str::Interpol
end
push :generic_escape
end

state :nl do
rule %r/\n/, Str
rule %r/{/ do |m|
if current_string.type? "f"
token Str::Interpol
push :generic_interpol
else
token Str
end
end
end

state :escape do
rule %r(\\
state :generic_escape do
rule %r(
( [\\abfnrtv"']
| \n
| N{[a-zA-Z][a-zA-Z ]+[a-zA-Z]}
Expand All @@ -200,48 +215,43 @@ def self.exceptions
| x[a-fA-F0-9]{2}
| [0-7]{1,3}
)
)x, Str::Escape
end

state :raw_escape do
rule %r/\\./, Str
)x do
if current_string.type? "r"
token Str
else
token Str::Escape
end
pop!
end
end

state :dqs do
rule %r/"/, Str, :pop!
mixin :escape
mixin :strings_double
state :generic_interpol do
rule %r/[^{}]+/ do |m|
recurse m[0]
end
rule %r/{/, Str::Interpol, :generic_interpol
rule %r/}/, Str::Interpol, :pop!
end

state :sqs do
rule %r/'/, Str, :pop!
mixin :escape
mixin :strings_single
end
class StringRegister < Array
def delim?(delim)
self.last[1] == delim
end

state :tdqs do
rule %r/"""/, Str, :pop!
rule %r/"/, Str
mixin :escape
mixin :strings_double
mixin :nl
end
def register(type: "u", delim: "'")
self.push [type, delim]
end

state :tsqs do
rule %r/'''/, Str, :pop!
rule %r/'/, Str
mixin :escape
mixin :strings_single
mixin :nl
end
def remove
self.pop
end

%w(tdqs tsqs dqs sqs).each do |qtype|
state :"raw_#{qtype}" do
mixin :raw_escape
mixin :"#{qtype}"
def type?(type)
self.last[0].include? type
end
end

private_constant :StringRegister
end
end
end
6 changes: 5 additions & 1 deletion spec/visual/samples/python
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,13 @@ float_literals = [
0_00E+2_34_5, 0.e+1, 00_1.E-0_2, -100.e+20, 1e01,
0_00E+2_34_5j, 0.e+1J, 00_1.E-0_2j, -100.e+20J 1e01j
]
floats = (19.0, 19.)

# PEP 465
a = b @ c
x @= y

floats = (19.0, 19.)
# PEP 498
f'{hello} world {int(x) + 1}'
f'{{ {4*10} }}'
f'result: {value:{width}.{precision}}'