Skip to content

Commit fa6cd76

Browse files
edges() working with unicode; added working iteredges(); added basic tests
1 parent 926d6e8 commit fa6cd76

File tree

3 files changed

+48
-7
lines changed

3 files changed

+48
-7
lines changed

dawg_python/dawgs.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,27 @@ def edges(self, prefix=""):
153153
if not completer.start_edges(index, b_prefix):
154154
return res
155155

156-
key = completer.key.decode('utf8')
157-
res.append(key)
156+
res.append(completer.decoded_key)
158157
while completer.next_edge():
159-
key = completer.key.decode('utf8')
160-
res.append(key)
158+
res.append(completer.decoded_key)
161159

162160
return res
163161

162+
def iteredges(self, prefix=""):
163+
b_prefix = prefix.encode('utf8')
164+
165+
index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)
166+
if index is None:
167+
return
168+
169+
completer = wrapper.Completer(self.dct, self.guide)
170+
if not completer.start_edges(index, b_prefix):
171+
return
172+
173+
yield completer.decoded_key
174+
while completer.next_edge():
175+
yield completer.decoded_key
176+
164177
def iterkeys(self, prefix=""):
165178
b_prefix = prefix.encode('utf8')
166179
index = self.dct.follow_bytes(b_prefix, self.dct.ROOT)

dawg_python/wrapper.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
from __future__ import absolute_import, unicode_literals
33
import struct
44
import array
5-
import pdb
65

76
from . import units
87
from .compat import int_from_byte
@@ -122,6 +121,7 @@ def start_edges(self, index, prefix=b""):
122121
index_stack. Otherwise, leave the stack empty, so next_edge() fails"""
123122

124123
self.key = bytearray(prefix)
124+
self.base_key_len = len(self.key)
125125
self._parent_index = index
126126
self._sib_index = None
127127
if self._guide.size():
@@ -133,6 +133,7 @@ def start_edges(self, index, prefix=b""):
133133
if index is not None:
134134
self._sib_index = next_index
135135
self.key.append(child_label)
136+
self.decoded_key = self.key.decode('utf-8')
136137
return True
137138

138139
def next_edge(self):
@@ -146,9 +147,26 @@ def next_edge(self):
146147
self._parent_index)
147148
if not self._sib_index:
148149
return False
149-
150-
self.key.pop()
150+
151+
self.key = self.key[:self.base_key_len]
151152
self.key.append(sibling_label)
153+
try:
154+
self.decoded_key = self.key.decode('utf-8')
155+
except UnicodeDecodeError:
156+
#this sibling is multi-character. keep following its children til
157+
#something is decodable
158+
cur_index = self._sib_index
159+
while True:
160+
child_label = self._guide.child(self._sib_index)
161+
cur_index = self._dic.follow_char(child_label, cur_index)
162+
if not cur_index:
163+
return False
164+
self.key.append(child_label)
165+
try:
166+
self.decoded_key = self.key.decode('utf-8')
167+
break
168+
except UnicodeDecodeError:
169+
pass
152170
return True
153171

154172
def next(self):

tests/test_dawg.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,20 @@ def test_keys(self):
4646
d = self.dawg()
4747
assert d.keys() == sorted(self.keys)
4848

49+
def test_edges(self):
50+
d = self.dawg()
51+
assert d.edges() == ['b', 'f']
52+
assert d.edges('f') == ['fo']
53+
4954
def test_iterkeys(self):
5055
d = self.dawg()
5156
assert list(d.iterkeys()) == d.keys()
5257

58+
def test_iter_edges(self):
59+
d = self.dawg()
60+
assert list(d.iteredges()) == ['b', 'f']
61+
assert list(d.edges('f')) == ['fo']
62+
5363
def test_completion(self):
5464
d = self.dawg()
5565

0 commit comments

Comments
 (0)