Skip to content

Commit 87c7a80

Browse files
authored
Added Trie Data Structure (#299)
1 parent 53e7638 commit 87c7a80

File tree

8 files changed

+244
-3
lines changed

8 files changed

+244
-3
lines changed

pydatastructs/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
from .miscellaneous_data_structures import *
44
from .utils import *
55
from .graphs import *
6+
from .strings import *

pydatastructs/strings/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
__all__ = []
2+
3+
from . import trie
4+
from .trie import (
5+
Trie
6+
)
7+
8+
__all__.extend(trie.__all__)

pydatastructs/strings/tests/__init__.py

Whitespace-only changes.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from pydatastructs import Trie
2+
3+
def test_Trie():
4+
5+
strings = ["A", "to", "tea", "ted", "ten", "i", "in", "inn"]
6+
trie = Trie()
7+
for string in strings:
8+
trie.insert(string)
9+
10+
for string in strings:
11+
assert trie.is_present(string)
12+
13+
assert sorted(trie.strings_with_prefix("t")) == ['tea', 'ted', 'ten', 'to']
14+
assert sorted(trie.strings_with_prefix("te")) == ["tea", "ted", "ten"]
15+
assert trie.strings_with_prefix("i") == ["i", "in", "inn"]
16+
assert trie.strings_with_prefix("a") == []
17+
18+
remove_order = ["to", "tea", "ted", "ten", "inn", "in", "A"]
19+
20+
assert trie.delete("z") is None
21+
22+
for string in remove_order:
23+
trie.delete(string)
24+
for present in strings:
25+
if present == string:
26+
assert not trie.is_present(present)
27+
else:
28+
assert trie.is_present(present)
29+
strings.remove(string)

pydatastructs/strings/trie.py

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
from pydatastructs.utils.misc_util import TrieNode
2+
from collections import deque
3+
import copy
4+
5+
__all__ = [
6+
'Trie'
7+
]
8+
9+
Stack = Queue = deque
10+
11+
class Trie(object):
12+
"""
13+
Represents the trie data structure for storing strings.
14+
15+
Examples
16+
========
17+
18+
>>> from pydatastructs import Trie
19+
>>> trie = Trie()
20+
>>> trie.insert("a")
21+
>>> trie.insert("aa")
22+
>>> trie.strings_with_prefix("a")
23+
['a', 'aa']
24+
>>> trie.is_present("aa")
25+
True
26+
>>> trie.delete("aa")
27+
True
28+
>>> trie.is_present("aa")
29+
False
30+
31+
References
32+
==========
33+
34+
.. [1] https://en.wikipedia.org/wiki/Trie
35+
"""
36+
37+
__slots__ = ['root']
38+
39+
@classmethod
40+
def methods(cls):
41+
return ['__new__', 'insert', 'is_present', 'delete',
42+
'strings_with_prefix']
43+
44+
def __new__(cls):
45+
obj = object.__new__(cls)
46+
obj.root = TrieNode()
47+
return obj
48+
49+
def insert(self, string: str) -> None:
50+
"""
51+
Inserts the given string into the trie.
52+
53+
Parameters
54+
==========
55+
56+
string: str
57+
58+
Returns
59+
=======
60+
61+
None
62+
"""
63+
walk = self.root
64+
for char in string:
65+
if walk.get_child(char) is None:
66+
newNode = TrieNode(char)
67+
walk.add_child(newNode)
68+
walk = newNode
69+
else:
70+
walk = walk.get_child(char)
71+
walk.is_terminal = True
72+
73+
def is_present(self, string: str) -> bool:
74+
"""
75+
Checks if the given string is present as a prefix in the trie.
76+
77+
Parameters
78+
==========
79+
80+
string: str
81+
82+
Returns
83+
=======
84+
85+
True if the given string is present as a prefix;
86+
False in all other cases.
87+
"""
88+
walk = self.root
89+
for char in string:
90+
if walk.get_child(char) is None:
91+
return False
92+
walk = walk.get_child(char)
93+
return True
94+
95+
def delete(self, string: str) -> bool:
96+
"""
97+
Deletes the given string from the trie.
98+
99+
Parameters
100+
==========
101+
102+
string: str
103+
104+
Returns
105+
=======
106+
107+
True if successfully deleted;
108+
None if the string is not present in the trie.
109+
"""
110+
path = []
111+
walk = self.root
112+
size = len(string)
113+
for i in range(size):
114+
char = string[i]
115+
path.append(walk)
116+
if walk.get_child(char) is None:
117+
return None
118+
walk = walk.get_child(char)
119+
path.append(walk)
120+
i = len(path) - 1
121+
path[i].is_terminal = False
122+
while not path[i]._children and i >= 1:
123+
path[i-1].remove_child(path[i].char)
124+
i -= 1
125+
if path[i].is_terminal:
126+
return True
127+
return True
128+
129+
def strings_with_prefix(self, string: str) -> list:
130+
"""
131+
Generates a list of all strings with the given prefix.
132+
133+
Parameters
134+
==========
135+
136+
string: str
137+
138+
Returns
139+
=======
140+
141+
strings: list
142+
The list of strings with the given prefix.
143+
"""
144+
145+
def _collect(prefix: str, node: TrieNode, strings: list) -> str:
146+
TrieNode_stack = Stack()
147+
TrieNode_stack.append((node, prefix))
148+
while TrieNode_stack:
149+
walk, curr_prefix = TrieNode_stack.pop()
150+
if walk.is_terminal:
151+
strings.append(curr_prefix + walk.char)
152+
for child in walk._children:
153+
TrieNode_stack.append((walk.get_child(child), curr_prefix + walk.char))
154+
155+
strings = []
156+
prefix = ""
157+
walk = self.root
158+
for char in string:
159+
walk = walk.get_child(char)
160+
if walk is None:
161+
return strings
162+
prefix += char
163+
if walk.is_terminal:
164+
strings.append(walk.char)
165+
for child in walk._children:
166+
_collect(prefix, walk.get_child(child), strings)
167+
return strings

pydatastructs/utils/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
GraphEdge,
1212
Set,
1313
CartesianTreeNode,
14-
RedBlackTreeNode
14+
RedBlackTreeNode,
15+
TrieNode
1516
)
1617
__all__.extend(misc_util.__all__)

pydatastructs/utils/misc_util.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
'GraphEdge',
99
'Set',
1010
'CartesianTreeNode',
11-
'RedBlackTreeNode'
11+
'RedBlackTreeNode',
12+
'TrieNode'
1213
]
1314

1415
_check_type = lambda a, t: isinstance(a, t)
@@ -394,6 +395,39 @@ def __new__(cls, key, data=None):
394395
obj.parent, obj.size = [None]*2
395396
return obj
396397

398+
class TrieNode(Node):
399+
"""
400+
Represents nodes in the trie data structure.
401+
402+
Parameters
403+
==========
404+
405+
char: The character stored in the current node.
406+
Optional, by default None.
407+
"""
408+
409+
__slots__ = ['char', '_children', 'is_terminal']
410+
411+
@classmethod
412+
def methods(cls):
413+
return ['__new__', 'add_child', 'get_child', 'remove_child']
414+
415+
def __new__(cls, char=None):
416+
obj = Node.__new__(cls)
417+
obj.char = char
418+
obj._children = dict()
419+
obj.is_terminal = False
420+
return obj
421+
422+
def add_child(self, trie_node) -> None:
423+
self._children[trie_node.char] = trie_node
424+
425+
def get_child(self, char: str):
426+
return self._children.get(char, None)
427+
428+
def remove_child(self, char: str) -> None:
429+
self._children.pop(char)
430+
397431
def _comp(u, v, tcomp):
398432
"""
399433
Overloaded comparator for comparing

pydatastructs/utils/tests/test_code_quality.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ def _apis():
9696
pyds.DisjointSetForest, pyds.BinomialTree, pyds.TreeNode, pyds.MAryTreeNode,
9797
pyds.LinkedListNode, pyds.BinomialTreeNode, pyds.AdjacencyListGraphNode,
9898
pyds.AdjacencyMatrixGraphNode, pyds.GraphEdge, pyds.Set, pyds.BinaryIndexedTree,
99-
pyds.CartesianTree, pyds.CartesianTreeNode, pyds.Treap, pyds.RedBlackTreeNode, pyds.RedBlackTree]
99+
pyds.CartesianTree, pyds.CartesianTreeNode, pyds.Treap, pyds.RedBlackTreeNode, pyds.RedBlackTree,
100+
pyds.Trie, pyds.TrieNode]
100101

101102
def test_public_api():
102103
pyds = pydatastructs

0 commit comments

Comments
 (0)