Skip to content

Commit b1aa195

Browse files
committed
Fredrik Lundh: here's the 96.6% version of SRE
1 parent 0292d78 commit b1aa195

File tree

6 files changed

+743
-303
lines changed

6 files changed

+743
-303
lines changed

Lib/sre.py

Lines changed: 105 additions & 18 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# -*- Mode: Python; tab-width: 4 -*-
2
#
1
#
3
# Secret Labs' Regular Expression Engine
2
# Secret Labs' Regular Expression Engine
4
# $Id$
3
# $Id$
@@ -7,39 +6,127 @@
7
#
6
#
8
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
7
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
9
#
8
#
10-
# This code can only be used for 1.6 alpha testing. All other use
11-
# require explicit permission from Secret Labs AB.
12-
#
13
# Portions of this engine have been developed in cooperation with
9
# Portions of this engine have been developed in cooperation with
14
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
10
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
15
# other compatibility work.
11
# other compatibility work.
16
#
12
#
17

13

18-
"""
19-
this is a long string
20-
"""
21-
22
import sre_compile
14
import sre_compile
23

15

16+
# flags
17+
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
18+
L = LOCALE = sre_compile.SRE_FLAG_LOCALE
19+
M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
20+
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
21+
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
22+
24
# --------------------------------------------------------------------
23
# --------------------------------------------------------------------
25
# public interface
24
# public interface
26

25

27-
def compile(pattern, flags=0):
26+
# FIXME: add docstrings
28-
return sre_compile.compile(pattern, _fixflags(flags))
29

27

30
def match(pattern, string, flags=0):
28
def match(pattern, string, flags=0):
31-
return compile(pattern, _fixflags(flags)).match(string)
29+
return _compile(pattern, flags).match(string)
32

30

33
def search(pattern, string, flags=0):
31
def search(pattern, string, flags=0):
34-
return compile(pattern, _fixflags(flags)).search(string)
32+
return _compile(pattern, flags).search(string)
33+
34+
def sub(pattern, repl, string, count=0):
35+
return _compile(pattern).sub(repl, string, count)
36+
37+
def subn(pattern, repl, string, count=0):
38+
return _compile(pattern).subn(repl, string, count)
39+
40+
def split(pattern, string, maxsplit=0):
41+
return _compile(pattern).split(string, maxsplit)
35

42

36-
# FIXME: etc
43+
def findall(pattern, string, maxsplit=0):
44+
return _compile(pattern).findall(string, maxsplit)
45+
46+
def compile(pattern, flags=0):
47+
return _compile(pattern, flags)
48+
49+
def escape(pattern):
50+
s = list(pattern)
51+
for i in range(len(pattern)):
52+
c = pattern[i]
53+
if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
54+
if c == "\000":
55+
s[i] = "\\000"
56+
else:
57+
s[i] = "\\" + c
58+
return pattern[:0].join(s)
37

59

38
# --------------------------------------------------------------------
60
# --------------------------------------------------------------------
39-
# helpers
61+
# internals
62+
63+
_cache = {}
64+
_MAXCACHE = 100
65+
66+
def _compile(pattern, flags=0):
67+
# internal: compile pattern
68+
tp = type(pattern)
69+
if tp not in (type(""), type(u"")):
70+
return pattern
71+
key = (tp, pattern, flags)
72+
try:
73+
return _cache[key]
74+
except KeyError:
75+
pass
76+
p = sre_compile.compile(pattern, flags)
77+
if len(_cache) >= _MAXCACHE:
78+
_cache.clear()
79+
_cache[key] = p
80+
return p
81+
82+
def _sub(pattern, template, string, count=0):
83+
# internal: pattern.sub implementation hook
84+
return _subn(pattern, template, string, count)[0]
85+
86+
def _expand(match, template):
87+
# internal: expand template
88+
return template # FIXME
40

89

41-
def _fixflags(flags):
90+
def _subn(pattern, template, string, count=0):
42-
# convert flag bitmask to sequence
91+
# internal: pattern.subn implementation hook
43-
assert not flags
92+
if callable(template):
44-
return ()
93+
filter = callable
94+
else:
95+
# FIXME: prepare template
96+
def filter(match, template=template):
97+
return _expand(match, template)
98+
n = i = 0
99+
s = []
100+
append = s.append
101+
c = pattern.cursor(string)
102+
while not count or n < count:
103+
m = c.search()
104+
if not m:
105+
break
106+
j = m.start()
107+
if j > i:
108+
append(string[i:j])
109+
append(filter(m))
110+
i = m.end()
111+
n = n + 1
112+
if i < len(string):
113+
append(string[i:])
114+
return string[:0].join(s), n
45

115

116+
def _split(pattern, string, maxsplit=0):
117+
# internal: pattern.split implementation hook
118+
n = i = 0
119+
s = []
120+
append = s.append
121+
c = pattern.cursor(string)
122+
while not maxsplit or n < maxsplit:
123+
m = c.search()
124+
if not m:
125+
break
126+
j = m.start()
127+
append(string[i:j])
128+
i = m.end()
129+
n = n + 1
130+
if i < len(string):
131+
append(string[i:])
132+
return s

Lib/sre_compile.py

Lines changed: 70 additions & 64 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -14,9 +14,6 @@
14
# other compatibility work.
14
# other compatibility work.
15
#
15
#
16

16

17-
# FIXME: <fl> formalize (objectify?) and document the compiler code
18-
# format, so that other frontends can use this compiler
19-
20
import array, string, sys
17
import array, string, sys
21

18

22
import _sre
19
import _sre
@@ -45,64 +42,70 @@ def append(self, code):
45
self.data.append(code)
42
self.data.append(code)
46
def todata(self):
43
def todata(self):
47
# print self.data
44
# print self.data
48-
return array.array(WORDSIZE, self.data).tostring()
45+
try:
49-
46+
return array.array(WORDSIZE, self.data).tostring()
50-
def _lower(literal):
47+
except OverflowError:
51-
# return _sre._lower(literal) # FIXME
48+
print self.data
52-
return string.lower(literal)
49+
raise
53

50

54-
def _compile(code, pattern, flags):
51+
def _compile(code, pattern, flags, level=0):
55
append = code.append
52
append = code.append
56
for op, av in pattern:
53
for op, av in pattern:
57
if op is ANY:
54
if op is ANY:
58-
if "s" in flags:
55+
if flags & SRE_FLAG_DOTALL:
59-
append(CODES[op]) # any character at all!
56+
append(OPCODES[op]) # any character at all!
60
else:
57
else:
61-
append(CODES[NOT_LITERAL])
58+
append(OPCODES[CATEGORY])
62-
append(10)
59+
append(CHCODES[CATEGORY_NOT_LINEBREAK])
63
elif op in (SUCCESS, FAILURE):
60
elif op in (SUCCESS, FAILURE):
64-
append(CODES[op])
61+
append(OPCODES[op])
65
elif op is AT:
62
elif op is AT:
66-
append(CODES[op])
63+
append(OPCODES[op])
67-
append(POSITIONS[av])
64+
if flags & SRE_FLAG_MULTILINE:
65+
append(ATCODES[AT_MULTILINE[av]])
66+
else:
67+
append(ATCODES[av])
68
elif op is BRANCH:
68
elif op is BRANCH:
69-
append(CODES[op])
69+
append(OPCODES[op])
70
tail = []
70
tail = []
71
for av in av[1]:
71
for av in av[1]:
72
skip = len(code); append(0)
72
skip = len(code); append(0)
73-
_compile(code, av, flags)
73+
_compile(code, av, flags, level)
74-
append(CODES[JUMP])
74+
append(OPCODES[JUMP])
75
tail.append(len(code)); append(0)
75
tail.append(len(code)); append(0)
76
code[skip] = len(code) - skip
76
code[skip] = len(code) - skip
77
append(0) # end of branch
77
append(0) # end of branch
78
for tail in tail:
78
for tail in tail:
79
code[tail] = len(code) - tail
79
code[tail] = len(code) - tail
80
elif op is CALL:
80
elif op is CALL:
81-
append(CODES[op])
81+
append(OPCODES[op])
82
skip = len(code); append(0)
82
skip = len(code); append(0)
83-
_compile(code, av, flags)
83+
_compile(code, av, flags, level+1)
84-
append(CODES[SUCCESS])
84+
append(OPCODES[SUCCESS])
85
code[skip] = len(code) - skip
85
code[skip] = len(code) - skip
86
elif op is CATEGORY: # not used by current parser
86
elif op is CATEGORY: # not used by current parser
87-
append(CODES[op])
87+
append(OPCODES[op])
88-
append(CATEGORIES[av])
88+
if flags & SRE_FLAG_LOCALE:
89+
append(CH_LOCALE[CHCODES[av]])
90+
else:
91+
append(CHCODES[av])
89
elif op is GROUP:
92
elif op is GROUP:
90-
if "i" in flags:
93+
if flags & SRE_FLAG_IGNORECASE:
91-
append(CODES[MAP_IGNORE[op]])
94+
append(OPCODES[OP_IGNORE[op]])
92
else:
95
else:
93-
append(CODES[op])
96+
append(OPCODES[op])
94-
append(av)
97+
append(av-1)
95
elif op is IN:
98
elif op is IN:
96-
if "i" in flags:
99+
if flags & SRE_FLAG_IGNORECASE:
97-
append(CODES[MAP_IGNORE[op]])
100+
append(OPCODES[OP_IGNORE[op]])
98
def fixup(literal):
101
def fixup(literal):
99-
return ord(_lower(literal))
102+
return ord(literal.lower())
100
else:
103
else:
101-
append(CODES[op])
104+
append(OPCODES[op])
102
fixup = ord
105
fixup = ord
103
skip = len(code); append(0)
106
skip = len(code); append(0)
104
for op, av in av:
107
for op, av in av:
105-
append(CODES[op])
108+
append(OPCODES[op])
106
if op is NEGATE:
109
if op is NEGATE:
107
pass
110
pass
108
elif op is LITERAL:
111
elif op is LITERAL:
@@ -111,77 +114,80 @@ def fixup(literal):
111
append(fixup(av[0]))
114
append(fixup(av[0]))
112
append(fixup(av[1]))
115
append(fixup(av[1]))
113
elif op is CATEGORY:
116
elif op is CATEGORY:
114-
append(CATEGORIES[av])
117+
if flags & SRE_FLAG_LOCALE:
118+
append(CH_LOCALE[CHCODES[av]])
119+
else:
120+
append(CHCODES[av])
115
else:
121
else:
116
raise ValueError, "unsupported set operator"
122
raise ValueError, "unsupported set operator"
117-
append(CODES[FAILURE])
123+
append(OPCODES[FAILURE])
118
code[skip] = len(code) - skip
124
code[skip] = len(code) - skip
119
elif op in (LITERAL, NOT_LITERAL):
125
elif op in (LITERAL, NOT_LITERAL):
120-
if "i" in flags:
126+
if flags & SRE_FLAG_IGNORECASE:
121-
append(CODES[MAP_IGNORE[op]])
127+
append(OPCODES[OP_IGNORE[op]])
122-
append(ord(_lower(av)))
128+
append(ord(av.lower()))
123
else:
129
else:
124-
append(CODES[op])
130+
append(OPCODES[op])
125
append(ord(av))
131
append(ord(av))
126
elif op is MARK:
132
elif op is MARK:
127-
append(CODES[op])
133+
append(OPCODES[op])
128
append(av)
134
append(av)
129
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
135
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
130
lo, hi = av[2].getwidth()
136
lo, hi = av[2].getwidth()
131
if lo == 0:
137
if lo == 0:
132
raise SyntaxError, "cannot repeat zero-width items"
138
raise SyntaxError, "cannot repeat zero-width items"
133
if lo == hi == 1 and op is MAX_REPEAT:
139
if lo == hi == 1 and op is MAX_REPEAT:
134-
append(CODES[MAX_REPEAT_ONE])
140+
append(OPCODES[MAX_REPEAT_ONE])
135
skip = len(code); append(0)
141
skip = len(code); append(0)
136
append(av[0])
142
append(av[0])
137
append(av[1])
143
append(av[1])
138-
_compile(code, av[2], flags)
144+
_compile(code, av[2], flags, level+1)
139-
append(CODES[SUCCESS])
145+
append(OPCODES[SUCCESS])
140
code[skip] = len(code) - skip
146
code[skip] = len(code) - skip
141
else:
147
else:
142-
append(CODES[op])
148+
append(OPCODES[op])
143
skip = len(code); append(0)
149
skip = len(code); append(0)
144
append(av[0])
150
append(av[0])
145
append(av[1])
151
append(av[1])
146-
_compile(code, av[2], flags)
152+
_compile(code, av[2], flags, level+1)
147
if op is MIN_REPEAT:
153
if op is MIN_REPEAT:
148-
append(CODES[MIN_UNTIL])
154+
append(OPCODES[MIN_UNTIL])
149
else:
155
else:
150-
# FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
156+
append(OPCODES[MAX_UNTIL])
151-
append(CODES[MAX_UNTIL])
152
code[skip] = len(code) - skip
157
code[skip] = len(code) - skip
153
elif op is SUBPATTERN:
158
elif op is SUBPATTERN:
154-
## group = av[0]
159+
group = av[0]
155-
## if group:
160+
if group:
156-
## append(CODES[MARK])
161+
append(OPCODES[MARK])
157-
## append((group-1)*2)
162+
append((group-1)*2)
158-
_compile(code, av[1], flags)
163+
_compile(code, av[1], flags, level+1)
159-
## if group:
164+
if group:
160-
## append(CODES[MARK])
165+
append(OPCODES[MARK])
161-
## append((group-1)*2+1)
166+
append((group-1)*2+1)
162
else:
167
else:
163
raise ValueError, ("unsupported operand type", op)
168
raise ValueError, ("unsupported operand type", op)
164

169

165-
def compile(p, flags=()):
170+
def compile(p, flags=0):
166
# convert pattern list to internal format
171
# convert pattern list to internal format
167
if type(p) in (type(""), type(u"")):
172
if type(p) in (type(""), type(u"")):
168
import sre_parse
173
import sre_parse
169
pattern = p
174
pattern = p
170
p = sre_parse.parse(p)
175
p = sre_parse.parse(p)
171
else:
176
else:
172
pattern = None
177
pattern = None
173-
# print p.getwidth()
178+
flags = p.pattern.flags | flags
174-
# print p
175
code = Code()
179
code = Code()
176-
_compile(code, p.data, p.pattern.flags)
180+
_compile(code, p.data, flags)
177-
code.append(CODES[SUCCESS])
181+
code.append(OPCODES[SUCCESS])
178-
# print list(code.data)
179
data = code.todata()
182
data = code.todata()
180
if 0: # debugging
183
if 0: # debugging
181
print
184
print
182
print "-" * 68
185
print "-" * 68
183
import sre_disasm
186
import sre_disasm
184
sre_disasm.disasm(data)
187
sre_disasm.disasm(data)
185
print "-" * 68
188
print "-" * 68
186-
# print len(data), p.pattern.groups, len(p.pattern.groupdict)
189+
return _sre.compile(
187-
return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
190+
pattern, flags,
191+
data,
192+
p.pattern.groups-1, p.pattern.groupdict
193+
)

0 commit comments

Comments
 (0)