2
2
"""Classes and methods for parsing regexes into NFAs."""
3
3
4
4
from collections import deque
5
- from itertools import chain , count , product , zip_longest
5
+ from itertools import chain , count , product , repeat , zip_longest
6
6
7
7
from automata .base .utils import get_renaming_function
8
8
from automata .regex .lexer import Lexer
17
17
class NFARegexBuilder :
18
18
"""Builder class designed for speed in parsing regular expressions into NFAs."""
19
19
20
- __slots__ = ('_transitions' , '_initial_state' , '_final_states' )
21
- _state_name_counter = count (0 )
20
+ __slots__ = ('_transitions' , '_initial_state' , '_final_states' , '_state_name_counter' )
22
21
23
- def __init__ (self , * , transitions , initial_state , final_states ):
22
+ def __init__ (self , * , transitions , initial_state , final_states , counter ):
24
23
"""
25
24
Initialize new builder class
26
25
"""
27
26
28
27
self ._transitions = transitions
29
28
self ._initial_state = initial_state
30
29
self ._final_states = final_states
30
+ self ._state_name_counter = counter
31
31
32
32
@classmethod
33
- def from_string_literal (cls , literal ):
33
+ def from_string_literal (cls , literal , counter ):
34
34
"""
35
35
Initialize this builder accepting only the given string literal
36
36
"""
37
37
38
38
transitions = {
39
- cls . __get_next_state_name ( ): {symbol : set ()}
39
+ next ( counter ): {symbol : set ()}
40
40
for symbol in literal
41
41
}
42
42
43
43
for start_state , path in transitions .items ():
44
44
for end_states in path .values ():
45
45
end_states .add (start_state + 1 )
46
46
47
- final_state = cls . __get_next_state_name ( )
47
+ final_state = next ( counter )
48
48
transitions [final_state ] = {}
49
49
50
50
return cls (
51
51
transitions = transitions ,
52
52
initial_state = min (transitions .keys ()),
53
- final_states = {final_state }
53
+ final_states = {final_state },
54
+ counter = counter
54
55
)
55
56
56
57
@classmethod
57
- def wildcard (cls , input_symbols ):
58
+ def wildcard (cls , input_symbols , counter ):
58
59
"""
59
60
Initialize this builder for a wildcard with the given input symbols
60
61
"""
61
62
62
- initial_state = cls . __get_next_state_name ( )
63
- final_state = cls . __get_next_state_name ( )
63
+ initial_state = next ( counter )
64
+ final_state = next ( counter )
64
65
65
66
transitions = {
66
67
initial_state : {symbol : {final_state } for symbol in input_symbols },
@@ -70,7 +71,8 @@ def wildcard(cls, input_symbols):
70
71
return cls (
71
72
transitions = transitions ,
72
73
initial_state = initial_state ,
73
- final_states = {final_state }
74
+ final_states = {final_state },
75
+ counter = counter
74
76
)
75
77
76
78
def union (self , other ):
@@ -79,7 +81,7 @@ def union(self, other):
79
81
"""
80
82
self ._transitions .update (other ._transitions )
81
83
82
- new_initial_state = self .__get_next_state_name ( )
84
+ new_initial_state = next ( self ._state_name_counter )
83
85
84
86
# Add epsilon transitions from new start state to old ones
85
87
self ._transitions [new_initial_state ] = {
@@ -129,9 +131,9 @@ def intersection(self, other):
129
131
if epsilon_transitions_a is not None :
130
132
state_dict = new_transitions .setdefault (curr_state_name , {})
131
133
state_dict .setdefault ('' , set ()).update (
132
- map (get_state_name , product (epsilon_transitions_a , [ q_b ] ))
134
+ map (get_state_name , zip (epsilon_transitions_a , repeat ( q_b ) ))
133
135
)
134
- next_states_iterables .append (product (epsilon_transitions_a , [ q_b ] ))
136
+ next_states_iterables .append (zip (epsilon_transitions_a , repeat ( q_b ) ))
135
137
136
138
# Get transition dict for states in other
137
139
transitions_b = other ._transitions .get (q_b , {})
@@ -140,9 +142,9 @@ def intersection(self, other):
140
142
if epsilon_transitions_b is not None :
141
143
state_dict = new_transitions .setdefault (curr_state_name , {})
142
144
state_dict .setdefault ('' , set ()).update (
143
- map (get_state_name , product ([ q_a ] , epsilon_transitions_b ))
145
+ map (get_state_name , zip ( repeat ( q_a ) , epsilon_transitions_b ))
144
146
)
145
- next_states_iterables .append (product ([ q_a ] , epsilon_transitions_b ))
147
+ next_states_iterables .append (zip ( repeat ( q_a ) , epsilon_transitions_b ))
146
148
147
149
# Add all transitions moving over same input symbols
148
150
for symbol in new_input_symbols :
@@ -190,7 +192,7 @@ def kleene_plus(self):
190
192
"""
191
193
Apply the kleene plus operation to the NFA represented by this builder
192
194
"""
193
- new_initial_state = self .__get_next_state_name ( )
195
+ new_initial_state = next ( self ._state_name_counter )
194
196
195
197
self ._transitions [new_initial_state ] = {
196
198
'' : {self ._initial_state }
@@ -205,7 +207,7 @@ def option(self):
205
207
"""
206
208
Apply the option operation to the NFA represented by this builder
207
209
"""
208
- new_initial_state = self .__get_next_state_name ( )
210
+ new_initial_state = next ( self ._state_name_counter )
209
211
210
212
self ._transitions [new_initial_state ] = {
211
213
'' : {self ._initial_state }
@@ -232,21 +234,17 @@ def shuffle_product(self, other):
232
234
233
235
for symbol , end_states in transitions_a .items ():
234
236
state_dict .setdefault (symbol , set ()).update (
235
- map (get_state_name , product (end_states , [ q_b ] ))
237
+ map (get_state_name , zip (end_states , repeat ( q_b ) ))
236
238
)
237
239
238
240
for symbol , end_states in transitions_b .items ():
239
241
state_dict .setdefault (symbol , set ()).update (
240
- map (get_state_name , product ([ q_a ] , end_states ))
242
+ map (get_state_name , zip ( repeat ( q_a ) , end_states ))
241
243
)
242
244
243
245
self ._final_states = set (map (get_state_name , product (self ._final_states , other ._final_states )))
244
246
self ._transitions = new_transitions
245
247
246
- @classmethod
247
- def __get_next_state_name (cls ):
248
- return next (cls ._state_name_counter )
249
-
250
248
251
249
class UnionToken (InfixOperator ):
252
250
"""Subclass of infix operator defining the union operator."""
@@ -328,19 +326,24 @@ def op(self, left, right):
328
326
class StringToken (Literal ):
329
327
"""Subclass of literal token defining a string literal."""
330
328
329
+ def __init__ (self , text , counter ):
330
+ super ().__init__ (text )
331
+ self .counter = counter
332
+
331
333
def val (self ):
332
- return NFARegexBuilder .from_string_literal (self .text )
334
+ return NFARegexBuilder .from_string_literal (self .text , self . counter )
333
335
334
336
335
337
class WildcardToken (Literal ):
336
338
"""Subclass of literal token defining a wildcard literal."""
337
339
338
- def __init__ (self , text , input_symbols ):
340
+ def __init__ (self , text , input_symbols , counter ):
339
341
super ().__init__ (text )
340
342
self .input_symbols = input_symbols
343
+ self .counter = counter
341
344
342
345
def val (self ):
343
- return NFARegexBuilder .wildcard (self .input_symbols )
346
+ return NFARegexBuilder .wildcard (self .input_symbols , self . counter )
344
347
345
348
346
349
def add_concat_tokens (token_list ):
@@ -372,17 +375,18 @@ def add_concat_tokens(token_list):
372
375
def get_regex_lexer (input_symbols ):
373
376
"""Get lexer for parsing regular expressions."""
374
377
lexer = Lexer ()
378
+ state_name_counter = count (0 )
375
379
376
380
lexer .register_token (LeftParen , r'\(' )
377
381
lexer .register_token (RightParen , r'\)' )
378
- lexer .register_token (StringToken , r'[A-Za-z0-9]' )
382
+ lexer .register_token (lambda text : StringToken ( text , state_name_counter ) , r'[A-Za-z0-9]' )
379
383
lexer .register_token (UnionToken , r'\|' )
380
384
lexer .register_token (IntersectionToken , r'\&' )
381
385
lexer .register_token (ShuffleToken , r'\^' )
382
386
lexer .register_token (KleeneStarToken , r'\*' )
383
387
lexer .register_token (KleenePlusToken , r'\+' )
384
388
lexer .register_token (OptionToken , r'\?' )
385
- lexer .register_token (lambda text : WildcardToken (text , input_symbols ), r'\.' )
389
+ lexer .register_token (lambda text : WildcardToken (text , input_symbols , state_name_counter ), r'\.' )
386
390
387
391
return lexer
388
392
@@ -391,7 +395,7 @@ def parse_regex(regexstr, input_symbols):
391
395
"""Return an NFARegexBuilder corresponding to regexstr."""
392
396
393
397
if len (regexstr ) == 0 :
394
- return NFARegexBuilder .from_string_literal (regexstr )
398
+ return NFARegexBuilder .from_string_literal (regexstr , count ( 0 ) )
395
399
396
400
lexer = get_regex_lexer (input_symbols )
397
401
lexed_tokens = lexer .lex (regexstr )
0 commit comments