Skip to content

Commit f5ed05a

Browse files
authored
Better numbering usage + other small updates (#130)
1 parent e9d87b5 commit f5ed05a

File tree

3 files changed

+48
-47
lines changed

3 files changed

+48
-47
lines changed

automata/fa/dfa.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,7 @@ def disjoint_state_fn(state_pair):
633633

634634
def isempty(self):
635635
"""Return True if this DFA is completely empty."""
636-
return len(self._compute_reachable_states() & self.final_states) == 0
636+
return self._compute_reachable_states().isdisjoint(self.final_states)
637637

638638
def isfinite(self):
639639
"""
@@ -1247,7 +1247,7 @@ def get_name_original(states):
12471247

12481248
# Add NFA states to DFA as it is constructed from NFA.
12491249
dfa_transitions[current_state_name] = {}
1250-
if (current_states & target_nfa.final_states):
1250+
if not current_states.isdisjoint(target_nfa.final_states):
12511251
dfa_final_states.add(current_state_name)
12521252

12531253
# Enqueue the next set of current states for the generated DFA.

automata/fa/nfa.py

+11-14
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python3
22
"""Classes and methods for working with nondeterministic finite automata."""
33
from collections import deque
4-
from itertools import chain, count, product
4+
from itertools import chain, count, product, repeat
55

66
import networkx as nx
77
from frozendict import frozendict
@@ -208,7 +208,7 @@ def _eliminate_lambda(self):
208208
else:
209209
state_transition_dict[input_symbol] = next_current_states
210210

211-
if (new_final_states & lambda_enclosure):
211+
if not new_final_states.isdisjoint(lambda_enclosure):
212212
new_final_states.add(state)
213213

214214
if state in new_transitions:
@@ -238,7 +238,7 @@ def eliminate_lambda(self):
238238

239239
def _check_for_input_rejection(self, current_states):
240240
"""Raise an error if the given config indicates rejected input."""
241-
if not (current_states & self.final_states):
241+
if current_states.isdisjoint(self.final_states):
242242
raise exceptions.RejectionException(
243243
'the NFA stopped on all non-final states ({})'.format(
244244
', '.join(str(state) for state in current_states)))
@@ -459,17 +459,17 @@ def intersection(self, other):
459459
epsilon_transitions_a = transitions_a.get('')
460460
if epsilon_transitions_a is not None:
461461
state_dict = new_transitions.setdefault(curr_state, {})
462-
state_dict.setdefault('', set()).update(product(epsilon_transitions_a, [q_b]))
463-
next_states_iterables.append(product(epsilon_transitions_a, [q_b]))
462+
state_dict.setdefault('', set()).update(zip(epsilon_transitions_a, repeat(q_b)))
463+
next_states_iterables.append(zip(epsilon_transitions_a, repeat(q_b)))
464464

465465
# Get transition dict for states in other
466466
transitions_b = other.transitions.get(q_b, {})
467467
# Add epsilon transitions for second set of transitions
468468
epsilon_transitions_b = transitions_b.get('')
469469
if epsilon_transitions_b is not None:
470470
state_dict = new_transitions.setdefault(curr_state, {})
471-
state_dict.setdefault('', set()).update(product([q_a], epsilon_transitions_b))
472-
next_states_iterables.append(product([q_a], epsilon_transitions_b))
471+
state_dict.setdefault('', set()).update(zip(repeat(q_a), epsilon_transitions_b))
472+
next_states_iterables.append(zip(repeat(q_a), epsilon_transitions_b))
473473

474474
# Add all transitions moving over same input symbols
475475
for symbol in new_input_symbols:
@@ -522,11 +522,11 @@ def shuffle_product(self, other):
522522

523523
transitions_a = self.transitions.get(q_a, {})
524524
for symbol, end_states in transitions_a.items():
525-
state_dict.setdefault(symbol, set()).update(product(end_states, [q_b]))
525+
state_dict.setdefault(symbol, set()).update(zip(end_states, repeat(q_b)))
526526

527527
transitions_b = other.transitions.get(q_b, {})
528528
for symbol, end_states in transitions_b.items():
529-
state_dict.setdefault(symbol, set()).update(product([q_a], end_states))
529+
state_dict.setdefault(symbol, set()).update(zip(repeat(q_a), end_states))
530530

531531
return self.__class__(
532532
states=new_states,
@@ -658,10 +658,7 @@ def left_quotient(self, other):
658658

659659
if old_transitions_dict:
660660
for symbol, end_states in old_transitions_dict.items():
661-
new_state_dict[symbol] = {
662-
(end_state, state_b, True)
663-
for end_state in end_states
664-
}
661+
new_state_dict[symbol] = set(zip(end_states, repeat(state_b), repeat(True)))
665662

666663
return self.__class__(
667664
states=new_states,
@@ -760,7 +757,7 @@ def is_final_state(states_pair):
760757
# If at least one of the current states is a final state, the
761758
# condition should satisfy
762759
return any(
763-
len(nfa.final_states & nfa._lambda_closures[state]) > 0
760+
not nfa.final_states.isdisjoint(nfa._lambda_closures[state])
764761
for state in states
765762
)
766763

automata/regex/parser.py

+35-31
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""Classes and methods for parsing regexes into NFAs."""
33

44
from collections import deque
5-
from itertools import chain, count, product, zip_longest
5+
from itertools import chain, count, product, repeat, zip_longest
66

77
from automata.base.utils import get_renaming_function
88
from automata.regex.lexer import Lexer
@@ -17,50 +17,51 @@
1717
class NFARegexBuilder:
1818
"""Builder class designed for speed in parsing regular expressions into NFAs."""
1919

20-
__slots__ = ('_transitions', '_initial_state', '_final_states')
21-
_state_name_counter = count(0)
20+
__slots__ = ('_transitions', '_initial_state', '_final_states', '_state_name_counter')
2221

23-
def __init__(self, *, transitions, initial_state, final_states):
22+
def __init__(self, *, transitions, initial_state, final_states, counter):
2423
"""
2524
Initialize new builder class
2625
"""
2726

2827
self._transitions = transitions
2928
self._initial_state = initial_state
3029
self._final_states = final_states
30+
self._state_name_counter = counter
3131

3232
@classmethod
33-
def from_string_literal(cls, literal):
33+
def from_string_literal(cls, literal, counter):
3434
"""
3535
Initialize this builder accepting only the given string literal
3636
"""
3737

3838
transitions = {
39-
cls.__get_next_state_name(): {symbol: set()}
39+
next(counter): {symbol: set()}
4040
for symbol in literal
4141
}
4242

4343
for start_state, path in transitions.items():
4444
for end_states in path.values():
4545
end_states.add(start_state+1)
4646

47-
final_state = cls.__get_next_state_name()
47+
final_state = next(counter)
4848
transitions[final_state] = {}
4949

5050
return cls(
5151
transitions=transitions,
5252
initial_state=min(transitions.keys()),
53-
final_states={final_state}
53+
final_states={final_state},
54+
counter=counter
5455
)
5556

5657
@classmethod
57-
def wildcard(cls, input_symbols):
58+
def wildcard(cls, input_symbols, counter):
5859
"""
5960
Initialize this builder for a wildcard with the given input symbols
6061
"""
6162

62-
initial_state = cls.__get_next_state_name()
63-
final_state = cls.__get_next_state_name()
63+
initial_state = next(counter)
64+
final_state = next(counter)
6465

6566
transitions = {
6667
initial_state: {symbol: {final_state} for symbol in input_symbols},
@@ -70,7 +71,8 @@ def wildcard(cls, input_symbols):
7071
return cls(
7172
transitions=transitions,
7273
initial_state=initial_state,
73-
final_states={final_state}
74+
final_states={final_state},
75+
counter=counter
7476
)
7577

7678
def union(self, other):
@@ -79,7 +81,7 @@ def union(self, other):
7981
"""
8082
self._transitions.update(other._transitions)
8183

82-
new_initial_state = self.__get_next_state_name()
84+
new_initial_state = next(self._state_name_counter)
8385

8486
# Add epsilon transitions from new start state to old ones
8587
self._transitions[new_initial_state] = {
@@ -129,9 +131,9 @@ def intersection(self, other):
129131
if epsilon_transitions_a is not None:
130132
state_dict = new_transitions.setdefault(curr_state_name, {})
131133
state_dict.setdefault('', set()).update(
132-
map(get_state_name, product(epsilon_transitions_a, [q_b]))
134+
map(get_state_name, zip(epsilon_transitions_a, repeat(q_b)))
133135
)
134-
next_states_iterables.append(product(epsilon_transitions_a, [q_b]))
136+
next_states_iterables.append(zip(epsilon_transitions_a, repeat(q_b)))
135137

136138
# Get transition dict for states in other
137139
transitions_b = other._transitions.get(q_b, {})
@@ -140,9 +142,9 @@ def intersection(self, other):
140142
if epsilon_transitions_b is not None:
141143
state_dict = new_transitions.setdefault(curr_state_name, {})
142144
state_dict.setdefault('', set()).update(
143-
map(get_state_name, product([q_a], epsilon_transitions_b))
145+
map(get_state_name, zip(repeat(q_a), epsilon_transitions_b))
144146
)
145-
next_states_iterables.append(product([q_a], epsilon_transitions_b))
147+
next_states_iterables.append(zip(repeat(q_a), epsilon_transitions_b))
146148

147149
# Add all transitions moving over same input symbols
148150
for symbol in new_input_symbols:
@@ -190,7 +192,7 @@ def kleene_plus(self):
190192
"""
191193
Apply the kleene plus operation to the NFA represented by this builder
192194
"""
193-
new_initial_state = self.__get_next_state_name()
195+
new_initial_state = next(self._state_name_counter)
194196

195197
self._transitions[new_initial_state] = {
196198
'': {self._initial_state}
@@ -205,7 +207,7 @@ def option(self):
205207
"""
206208
Apply the option operation to the NFA represented by this builder
207209
"""
208-
new_initial_state = self.__get_next_state_name()
210+
new_initial_state = next(self._state_name_counter)
209211

210212
self._transitions[new_initial_state] = {
211213
'': {self._initial_state}
@@ -232,21 +234,17 @@ def shuffle_product(self, other):
232234

233235
for symbol, end_states in transitions_a.items():
234236
state_dict.setdefault(symbol, set()).update(
235-
map(get_state_name, product(end_states, [q_b]))
237+
map(get_state_name, zip(end_states, repeat(q_b)))
236238
)
237239

238240
for symbol, end_states in transitions_b.items():
239241
state_dict.setdefault(symbol, set()).update(
240-
map(get_state_name, product([q_a], end_states))
242+
map(get_state_name, zip(repeat(q_a), end_states))
241243
)
242244

243245
self._final_states = set(map(get_state_name, product(self._final_states, other._final_states)))
244246
self._transitions = new_transitions
245247

246-
@classmethod
247-
def __get_next_state_name(cls):
248-
return next(cls._state_name_counter)
249-
250248

251249
class UnionToken(InfixOperator):
252250
"""Subclass of infix operator defining the union operator."""
@@ -328,19 +326,24 @@ def op(self, left, right):
328326
class StringToken(Literal):
329327
"""Subclass of literal token defining a string literal."""
330328

329+
def __init__(self, text, counter):
330+
super().__init__(text)
331+
self.counter = counter
332+
331333
def val(self):
332-
return NFARegexBuilder.from_string_literal(self.text)
334+
return NFARegexBuilder.from_string_literal(self.text, self.counter)
333335

334336

335337
class WildcardToken(Literal):
336338
"""Subclass of literal token defining a wildcard literal."""
337339

338-
def __init__(self, text, input_symbols):
340+
def __init__(self, text, input_symbols, counter):
339341
super().__init__(text)
340342
self.input_symbols = input_symbols
343+
self.counter = counter
341344

342345
def val(self):
343-
return NFARegexBuilder.wildcard(self.input_symbols)
346+
return NFARegexBuilder.wildcard(self.input_symbols, self.counter)
344347

345348

346349
def add_concat_tokens(token_list):
@@ -372,17 +375,18 @@ def add_concat_tokens(token_list):
372375
def get_regex_lexer(input_symbols):
373376
"""Get lexer for parsing regular expressions."""
374377
lexer = Lexer()
378+
state_name_counter = count(0)
375379

376380
lexer.register_token(LeftParen, r'\(')
377381
lexer.register_token(RightParen, r'\)')
378-
lexer.register_token(StringToken, r'[A-Za-z0-9]')
382+
lexer.register_token(lambda text: StringToken(text, state_name_counter), r'[A-Za-z0-9]')
379383
lexer.register_token(UnionToken, r'\|')
380384
lexer.register_token(IntersectionToken, r'\&')
381385
lexer.register_token(ShuffleToken, r'\^')
382386
lexer.register_token(KleeneStarToken, r'\*')
383387
lexer.register_token(KleenePlusToken, r'\+')
384388
lexer.register_token(OptionToken, r'\?')
385-
lexer.register_token(lambda text: WildcardToken(text, input_symbols), r'\.')
389+
lexer.register_token(lambda text: WildcardToken(text, input_symbols, state_name_counter), r'\.')
386390

387391
return lexer
388392

@@ -391,7 +395,7 @@ def parse_regex(regexstr, input_symbols):
391395
"""Return an NFARegexBuilder corresponding to regexstr."""
392396

393397
if len(regexstr) == 0:
394-
return NFARegexBuilder.from_string_literal(regexstr)
398+
return NFARegexBuilder.from_string_literal(regexstr, count(0))
395399

396400
lexer = get_regex_lexer(input_symbols)
397401
lexed_tokens = lexer.lex(regexstr)

0 commit comments

Comments
 (0)