Skip to content

Commit 8e7390a

Browse files
adding terminal indicators on edges; adding edge values for IntCompletionDawgs; adding tests for all
1 parent fa6cd76 commit 8e7390a

File tree

3 files changed

+129
-47
lines changed

3 files changed

+129
-47
lines changed

dawg_python/dawgs.py

Lines changed: 70 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,13 @@ def edges(self, prefix=""):
149149
if index is None:
150150
return res
151151

152-
completer = wrapper.Completer(self.dct, self.guide)
153-
if not completer.start_edges(index, b_prefix):
152+
edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
153+
if not edge_follower.start(index, b_prefix):
154154
return res
155155

156-
res.append(completer.decoded_key)
157-
while completer.next_edge():
158-
res.append(completer.decoded_key)
156+
res.append(edge_follower.get_cur_edge())
157+
while edge_follower.next():
158+
res.append(edge_follower.get_cur_edge())
159159

160160
return res
161161

@@ -166,13 +166,13 @@ def iteredges(self, prefix=""):
166166
if index is None:
167167
return
168168

169-
completer = wrapper.Completer(self.dct, self.guide)
170-
if not completer.start_edges(index, b_prefix):
169+
edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
170+
if not edge_follower.start(index, b_prefix):
171171
return
172172

173-
yield completer.decoded_key
174-
while completer.next_edge():
175-
yield completer.decoded_key
173+
yield edge_follower.get_cur_edge()
174+
while edge_follower.next():
175+
yield edge_follower.get_cur_edge()
176176

177177
def iterkeys(self, prefix=""):
178178
b_prefix = prefix.encode('utf8')
@@ -312,15 +312,14 @@ def iterkeys(self, prefix=""):
312312
yield u_key
313313

314314
def items(self, prefix=""):
315+
index = self.dct.ROOT
315316
if not isinstance(prefix, bytes):
316317
prefix = prefix.encode('utf8')
317-
res = []
318-
319-
index = self.dct.ROOT
320318
if prefix:
321319
index = self.dct.follow_bytes(prefix, index)
322320
if not index:
323-
return res
321+
return
322+
res = []
324323

325324
completer = wrapper.Completer(self.dct, self.guide)
326325
completer.start(index, prefix)
@@ -333,11 +332,30 @@ def items(self, prefix=""):
333332

334333
return res
335334

336-
def iteritems(self, prefix=""):
335+
def edges(self, prefix=""):
336+
index = self.dct.ROOT
337337
if not isinstance(prefix, bytes):
338338
prefix = prefix.encode('utf8')
339+
if prefix:
340+
index = self.dct.follow_bytes(prefix, index)
341+
if not index:
342+
return
343+
res = []
344+
345+
edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
346+
if not edge_follower.start(index, prefix):
347+
return res
348+
349+
res.append(edge_follower.decoded_key)
350+
while edge_follower.next():
351+
res.append(edge_follower.decoded_key)
339352

353+
return res
354+
355+
def iteritems(self, prefix=""):
340356
index = self.dct.ROOT
357+
if not isinstance(prefix, bytes):
358+
prefix = prefix.encode('utf8')
341359
if prefix:
342360
index = self.dct.follow_bytes(prefix, index)
343361
if not index:
@@ -497,6 +515,43 @@ class IntCompletionDAWG(CompletionDAWG, IntDAWG):
497515
Dict-like class based on DAWG.
498516
It can store integer values for unicode keys and support key completion.
499517
"""
518+
def edges(self, prefix=""):
519+
index = self.dct.ROOT
520+
if not isinstance(prefix, bytes):
521+
prefix = prefix.encode('utf8')
522+
if prefix:
523+
index = self.dct.follow_bytes(prefix, index)
524+
if not index:
525+
return
526+
res = []
527+
528+
edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
529+
if not edge_follower.start(index, prefix):
530+
return res
531+
532+
res.append((edge_follower.decoded_key, edge_follower.value()))
533+
while edge_follower.next():
534+
res.append((edge_follower.decoded_key, edge_follower.value()))
535+
536+
return res
537+
538+
def iteredges(self, prefix=""):
539+
index = self.dct.ROOT
540+
if not isinstance(prefix, bytes):
541+
prefix = prefix.encode('utf8')
542+
if prefix:
543+
index = self.dct.follow_bytes(prefix, index)
544+
if not index:
545+
return
546+
547+
edge_follower = wrapper.EdgeFollower(self.dct, self.guide)
548+
if not edge_follower.start(index, prefix):
549+
return
550+
551+
yield (edge_follower.decoded_key, edge_follower.value())
552+
while edge_follower.next():
553+
yield (edge_follower.decoded_key, edge_follower.value())
554+
500555
def items(self, prefix=""):
501556
if not isinstance(prefix, bytes):
502557
prefix = prefix.encode('utf8')

dawg_python/wrapper.py

Lines changed: 45 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,29 @@ def __init__(self):
1717
"Root index"
1818

1919
def has_value(self, index):
20-
"Checks if a given index is related to the end of a key."
20+
#Checks if a given index is related to the end of a key.
2121
return units.has_leaf(self._units[index])
2222

2323
def value(self, index):
24-
"Gets a value from a given index."
24+
#Gets a value from a given index.
2525
offset = units.offset(self._units[index])
2626
value_index = (index ^ offset) & units.PRECISION_MASK
2727
return units.value(self._units[value_index])
2828

2929
def read(self, fp):
30-
"Reads a dictionary from an input stream."
30+
#Reads a dictionary from an input stream.
3131
base_size = struct.unpack(str("=I"), fp.read(4))[0]
3232
self._units.fromfile(fp, base_size)
3333

3434
def contains(self, key):
35-
"Exact matching."
35+
#Exact matching.
3636
index = self.follow_bytes(key, self.ROOT)
3737
if index is None:
3838
return False
3939
return self.has_value(index)
4040

4141
def find(self, key):
42-
"Exact matching (returns value)"
42+
#Exact matching (returns value)
4343
index = self.follow_bytes(key, self.ROOT)
4444
if index is None:
4545
return -1
@@ -48,7 +48,7 @@ def find(self, key):
4848
return self.value(index)
4949

5050
def follow_char(self, label, index):
51-
"Follows a transition"
51+
#Follows a transition
5252
offset = units.offset(self._units[index])
5353
next_index = (index ^ offset ^ label) & units.PRECISION_MASK
5454

@@ -58,7 +58,7 @@ def follow_char(self, label, index):
5858
return next_index
5959

6060
def follow_bytes(self, s, index):
61-
"Follows transitions."
61+
#Follows transitions.
6262
for ch in s:
6363
index = self.follow_char(int_from_byte(ch), index)
6464
if index is None:
@@ -95,27 +95,17 @@ def size(self):
9595
return len(self._units)
9696

9797

98-
class Completer(object):
99-
98+
class EdgeFollower(object):
10099
def __init__(self, dic=None, guide=None):
101100
self._dic = dic
102101
self._guide = guide
103102

104103
def value(self):
105-
return self._dic.value(self._last_index)
104+
if self._dic.has_value(self._cur_index):
105+
return self._dic.value(self._cur_index)
106+
return False
106107

107108
def start(self, index, prefix=b""):
108-
"initial setup for a completer next() action on some prefix"
109-
110-
self.key = bytearray(prefix)
111-
112-
if self._guide.size():
113-
self._index_stack = [index]
114-
self._last_index = self._dic.ROOT
115-
else:
116-
self._index_stack = []
117-
118-
def start_edges(self, index, prefix=b""):
119109
"""initial setup for a completer next_edge() action on some prefix. If
120110
there's a child for this prefix, we add that as the one item on the
121111
index_stack. Otherwise, leave the stack empty, so next_edge() fails"""
@@ -124,6 +114,7 @@ def start_edges(self, index, prefix=b""):
124114
self.base_key_len = len(self.key)
125115
self._parent_index = index
126116
self._sib_index = None
117+
self._cur_index = None
127118
if self._guide.size():
128119
child_label = self._guide.child(index) # UCharType
129120

@@ -132,19 +123,21 @@ def start_edges(self, index, prefix=b""):
132123
next_index = self._dic.follow_char(child_label, index)
133124
if index is not None:
134125
self._sib_index = next_index
126+
self._cur_index = self._sib_index
135127
self.key.append(child_label)
136128
self.decoded_key = self.key.decode('utf-8')
137129
return True
138130

139-
def next_edge(self):
140-
"Gets the next edge (not necessarily a terminal)"
131+
def next(self):
132+
#Gets the next edge (not necessarily a terminal)
141133

142134
if not self._sib_index:
143135
return False
144136

145137
sibling_label = self._guide.sibling(self._sib_index)
146138
self._sib_index = self._dic.follow_char(sibling_label,
147139
self._parent_index)
140+
self._cur_index = self._sib_index
148141
if not self._sib_index:
149142
return False
150143

@@ -153,13 +146,13 @@ def next_edge(self):
153146
try:
154147
self.decoded_key = self.key.decode('utf-8')
155148
except UnicodeDecodeError:
156-
#this sibling is multi-character. keep following its children til
149+
#this sibling is a multibyte char. keep following its children til
157150
#something is decodable
158-
cur_index = self._sib_index
159151
while True:
160152
child_label = self._guide.child(self._sib_index)
161-
cur_index = self._dic.follow_char(child_label, cur_index)
162-
if not cur_index:
153+
self._cur_index = self._dic.follow_char(child_label,
154+
self._cur_index)
155+
if not self._cur_index:
163156
return False
164157
self.key.append(child_label)
165158
try:
@@ -169,8 +162,32 @@ def next_edge(self):
169162
pass
170163
return True
171164

165+
def get_cur_edge(self):
166+
return (self.decoded_key, self._dic.has_value(self._cur_index))
167+
168+
169+
class Completer(object):
170+
171+
def __init__(self, dic=None, guide=None):
172+
self._dic = dic
173+
self._guide = guide
174+
175+
def value(self):
176+
return self._dic.value(self._last_index)
177+
178+
def start(self, index, prefix=b""):
179+
#initial setup for a completer next() action on some prefix
180+
181+
self.key = bytearray(prefix)
182+
183+
if self._guide.size():
184+
self._index_stack = [index]
185+
self._last_index = self._dic.ROOT
186+
else:
187+
self._index_stack = []
188+
172189
def next(self):
173-
"Gets the next key"
190+
#Gets the next key
174191

175192
if not self._index_stack:
176193
return False

tests/test_dawg.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,17 +48,19 @@ def test_keys(self):
4848

4949
def test_edges(self):
5050
d = self.dawg()
51-
assert d.edges() == ['b', 'f']
52-
assert d.edges('f') == ['fo']
51+
assert d.edges() == [('b', False), ('f', True)]
52+
assert d.edges('b') == [('ba', False)]
53+
assert d.edges('fo') == [('foo', True)]
5354

5455
def test_iterkeys(self):
5556
d = self.dawg()
5657
assert list(d.iterkeys()) == d.keys()
5758

5859
def test_iter_edges(self):
5960
d = self.dawg()
60-
assert list(d.iteredges()) == ['b', 'f']
61-
assert list(d.edges('f')) == ['fo']
61+
assert list(d.iteredges()) == [('b', False), ('f', True)]
62+
assert list(d.iteredges('b')) == [('ba', False)]
63+
assert list(d.edges('fo')) == [('foo', True)]
6264

6365
def test_completion(self):
6466
d = self.dawg()
@@ -129,3 +131,11 @@ def test_completion_keys_with_prefix(self):
129131

130132
def test_completion_items(self):
131133
assert self.dawg().items() == sorted(self.payload.items(), key=lambda r: r[0])
134+
135+
def test_completion_edges(self):
136+
assert self.dawg().edges('ba') == [('bar', 5)]
137+
assert self.dawg().edges('foob') == [('fooba', False)]
138+
139+
def test_completion_iteredges(self):
140+
assert list(self.dawg().iteredges('ba')) == [('bar', 5)]
141+
assert list(self.dawg().iteredges('foob')) == [('fooba', False)]

0 commit comments

Comments
 (0)