2828 POSSESSIVE_REPEAT : (POSSESSIVE_REPEAT , SUCCESS , POSSESSIVE_REPEAT_ONE ),
2929}
3030
31- class _CompileData :
32- __slots__ = ('code' , 'repeat_count' )
33- def __init__ (self ):
34- self .code = []
35- self .repeat_count = 0
31+ # Sets of lowercase characters which have the same uppercase.
32+ _equivalences = (
33+ # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I
34+ (0x69 , 0x131 ), # iı
35+ # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S
36+ (0x73 , 0x17f ), # sſ
37+ # MICRO SIGN, GREEK SMALL LETTER MU
38+ (0xb5 , 0x3bc ), # µμ
39+ # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI
40+ (0x345 , 0x3b9 , 0x1fbe ), # \u0345ιι
41+ # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
42+ (0x390 , 0x1fd3 ), # ΐΐ
43+ # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
44+ (0x3b0 , 0x1fe3 ), # ΰΰ
45+ # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL
46+ (0x3b2 , 0x3d0 ), # βϐ
47+ # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL
48+ (0x3b5 , 0x3f5 ), # εϵ
49+ # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL
50+ (0x3b8 , 0x3d1 ), # θϑ
51+ # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL
52+ (0x3ba , 0x3f0 ), # κϰ
53+ # GREEK SMALL LETTER PI, GREEK PI SYMBOL
54+ (0x3c0 , 0x3d6 ), # πϖ
55+ # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL
56+ (0x3c1 , 0x3f1 ), # ρϱ
57+ # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA
58+ (0x3c2 , 0x3c3 ), # ςσ
59+ # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL
60+ (0x3c6 , 0x3d5 ), # φϕ
61+ # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE
62+ (0x1e61 , 0x1e9b ), # ṡẛ
63+ # LATIN SMALL LIGATURE LONG S T, LATIN SMALL LIGATURE ST
64+ (0xfb05 , 0xfb06 ), # ſtst
65+ )
66+
67+ # Maps the lowercase code to lowercase codes which have the same uppercase.
68+ _ignorecase_fixes = {i : tuple (j for j in t if i != j )
69+ for t in _equivalences for i in t }
3670
3771def _combine_flags (flags , add_flags , del_flags ,
3872 TYPE_FLAGS = _parser .TYPE_FLAGS ):
3973 if add_flags & TYPE_FLAGS :
4074 flags &= ~ TYPE_FLAGS
4175 return (flags | add_flags ) & ~ del_flags
4276
43- def _compile (data , pattern , flags ):
77+ def _compile (code , pattern , flags ):
4478 # internal: compile a (sub)pattern
45- code = data .code
4679 emit = code .append
4780 _len = len
4881 LITERAL_CODES = _LITERAL_CODES
@@ -115,19 +148,15 @@ def _compile(data, pattern, flags):
115148 skip = _len (code ); emit (0 )
116149 emit (av [0 ])
117150 emit (av [1 ])
118- _compile (data , av [2 ], flags )
151+ _compile (code , av [2 ], flags )
119152 emit (SUCCESS )
120153 code [skip ] = _len (code ) - skip
121154 else :
122155 emit (REPEATING_CODES [op ][0 ])
123156 skip = _len (code ); emit (0 )
124157 emit (av [0 ])
125158 emit (av [1 ])
126- # now op is in (MIN_REPEAT, MAX_REPEAT, POSSESSIVE_REPEAT)
127- if op != POSSESSIVE_REPEAT :
128- emit (data .repeat_count )
129- data .repeat_count += 1
130- _compile (data , av [2 ], flags )
159+ _compile (code , av [2 ], flags )
131160 code [skip ] = _len (code ) - skip
132161 emit (REPEATING_CODES [op ][1 ])
133162 elif op is SUBPATTERN :
@@ -136,7 +165,7 @@ def _compile(data, pattern, flags):
136165 emit (MARK )
137166 emit ((group - 1 )* 2 )
138167 # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))
139- _compile (data , p , _combine_flags (flags , add_flags , del_flags ))
168+ _compile (code , p , _combine_flags (flags , add_flags , del_flags ))
140169 if group :
141170 emit (MARK )
142171 emit ((group - 1 )* 2 + 1 )
@@ -148,7 +177,7 @@ def _compile(data, pattern, flags):
148177 # pop their stack if they reach it
149178 emit (ATOMIC_GROUP )
150179 skip = _len (code ); emit (0 )
151- _compile (data , av , flags )
180+ _compile (code , av , flags )
152181 emit (SUCCESS )
153182 code [skip ] = _len (code ) - skip
154183 elif op in SUCCESS_CODES :
@@ -163,7 +192,7 @@ def _compile(data, pattern, flags):
163192 if lo != hi :
164193 raise error ("look-behind requires fixed-width pattern" )
165194 emit (lo ) # look behind
166- _compile (data , av [1 ], flags )
195+ _compile (code , av [1 ], flags )
167196 emit (SUCCESS )
168197 code [skip ] = _len (code ) - skip
169198 elif op is AT :
@@ -182,7 +211,7 @@ def _compile(data, pattern, flags):
182211 for av in av [1 ]:
183212 skip = _len (code ); emit (0 )
184213 # _compile_info(code, av, flags)
185- _compile (data , av , flags )
214+ _compile (code , av , flags )
186215 emit (JUMP )
187216 tailappend (_len (code )); emit (0 )
188217 code [skip ] = _len (code ) - skip
@@ -210,12 +239,12 @@ def _compile(data, pattern, flags):
210239 emit (op )
211240 emit (av [0 ]- 1 )
212241 skipyes = _len (code ); emit (0 )
213- _compile (data , av [1 ], flags )
242+ _compile (code , av [1 ], flags )
214243 if av [2 ]:
215244 emit (JUMP )
216245 skipno = _len (code ); emit (0 )
217246 code [skipyes ] = _len (code ) - skipyes + 1
218- _compile (data , av [2 ], flags )
247+ _compile (code , av [2 ], flags )
219248 code [skipno ] = _len (code ) - skipno
220249 else :
221250 code [skipyes ] = _len (code ) - skipyes + 1
@@ -582,17 +611,17 @@ def isstring(obj):
582611def _code (p , flags ):
583612
584613 flags = p .state .flags | flags
585- data = _CompileData ()
614+ code = []
586615
587616 # compile info block
588- _compile_info (data . code , p , flags )
617+ _compile_info (code , p , flags )
589618
590619 # compile the pattern
591- _compile (data , p .data , flags )
620+ _compile (code , p .data , flags )
592621
593- data . code .append (SUCCESS )
622+ code .append (SUCCESS )
594623
595- return data
624+ return code
596625
597626def _hex_code (code ):
598627 return '[%s]' % ', ' .join ('%#0*x' % (_sre .CODESIZE * 2 + 2 , x ) for x in code )
@@ -693,21 +722,14 @@ def print_2(*args):
693722 else :
694723 print_ (FAILURE )
695724 i += 1
696- elif op in (REPEAT_ONE , MIN_REPEAT_ONE ,
725+ elif op in (REPEAT , REPEAT_ONE , MIN_REPEAT_ONE ,
697726 POSSESSIVE_REPEAT , POSSESSIVE_REPEAT_ONE ):
698727 skip , min , max = code [i : i + 3 ]
699728 if max == MAXREPEAT :
700729 max = 'MAXREPEAT'
701730 print_ (op , skip , min , max , to = i + skip )
702731 dis_ (i + 3 , i + skip )
703732 i += skip
704- elif op is REPEAT :
705- skip , min , max , repeat_index = code [i : i + 4 ]
706- if max == MAXREPEAT :
707- max = 'MAXREPEAT'
708- print_ (op , skip , min , max , repeat_index , to = i + skip )
709- dis_ (i + 4 , i + skip )
710- i += skip
711733 elif op is GROUPREF_EXISTS :
712734 arg , skip = code [i : i + 2 ]
713735 print_ (op , arg , skip , to = i + skip )
@@ -762,11 +784,11 @@ def compile(p, flags=0):
762784 else :
763785 pattern = None
764786
765- data = _code (p , flags )
787+ code = _code (p , flags )
766788
767789 if flags & SRE_FLAG_DEBUG :
768790 print ()
769- dis (data . code )
791+ dis (code )
770792
771793 # map in either direction
772794 groupindex = p .state .groupdict
@@ -775,6 +797,7 @@ def compile(p, flags=0):
775797 indexgroup [i ] = k
776798
777799 return _sre .compile (
778- pattern , flags | p .state .flags , data .code ,
779- p .state .groups - 1 , groupindex , tuple (indexgroup ),
780- data .repeat_count )
800+ pattern , flags | p .state .flags , code ,
801+ p .state .groups - 1 ,
802+ groupindex , tuple (indexgroup )
803+ )
0 commit comments