-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathiterative_tries.py
290 lines (235 loc) · 10.2 KB
/
iterative_tries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Callable, Any
@dataclass
class AbstractNode(ABC):
"""
Abstract class for nodes in a trie.
"""
char: str
children: Any
@abstractmethod
def has_max_children(self, max_children: int) -> bool:
"""
Check whether the node has at most `max_children` children.
"""
pass
@dataclass
class AbstractTrie(ABC):
"""
Abstract class for trie data structures.
"""
children: Any
@abstractmethod
def _go_through_trie(self,
word: str,
if_found: Callable[[AbstractNode, AbstractNode], None],
if_not_found: Callable[[AbstractNode, str], bool],
when_completed: Callable[[], bool]
) -> bool:
"""
Go through the trie structure with the given word and call the appropriate
functions when a node for a character is found, not found, or when the traversal
is completed. Returns a boolean value representing the success of the operation.
"""
pass
def contains(self, word: str) -> bool:
"""
Checks whether the trie contains the given word.
Returns `True` if the word is found, `False` otherwise.
"""
return self._go_through_trie(
word,
if_found=lambda parent, child: None, # do nothing when a node is found
if_not_found=lambda parent, rest_word: False, # if a node is not found, the word is not contained in the trie
when_completed=lambda: True # the word is contained in trie if traversal is completed
)
@abstractmethod
def _delete_child_from_parent(parent_children, child: AbstractNode) -> None:
"""
Given a list/array/dictionary of children, delete the given child.
"""
pass
def delete(self, word: str) -> bool:
"""
Deletes the given word from the trie.
Returns `True` if the word was deleted, `False` otherwise.
"""
marked_parent = None
child_to_delete = None
# A node is only (potentially deleted), if it has at most one child.
# If it has more, this means that multiple words have this same prefix.
def mark_parent(parent, child):
nonlocal marked_parent, child_to_delete
if not child.has_max_children(1): # reset mark, if the child has more than one child
marked_parent = None
child_to_delete = None
elif marked_parent == None: # set mark, if it has at most one child
marked_parent = parent
child_to_delete = child
# Only if the word is contained in the trie, it can be deleted.
# This is ensured, by checking whether the traversal was completed.
def delete_child_from_marked():
nonlocal marked_parent, child_to_delete
self._delete_child_from_parent(marked_parent.children, child_to_delete)
return True
return self._go_through_trie(
word,
if_found=lambda parent, child: mark_parent(parent, child),
if_not_found=lambda parent, rest_word: False,
when_completed=lambda: delete_child_from_marked()
)
@abstractmethod
def _add_empty_child(self, parent_children, character: str):
"""
Create a new child node with the given character and add it to the parent's children.
"""
pass
def insert(self, word: str) -> bool:
"""
Inserts the given word into the trie.
Returns `True` if the word was inserted, `False` if it already is contained.
"""
# Construct a new subtrie for the rest of the word and add it to the parent's children.
def insert_child(parent, rest_word):
current_child = parent
for character in rest_word:
current_child = self._add_empty_child(current_child.children, character)
return True
return self._go_through_trie(
word,
if_found=lambda parent, child: None, # do nothing when a node is found
if_not_found=lambda parent, rest_word: insert_child(parent, rest_word),
when_completed=lambda: False # if the traversal is completed, the word is already contained
)
@dataclass
class VarSizeNode(AbstractNode):
"""
Class for nodes in a variable size trie.
"""
children: list[VarSizeNode]
def has_max_children(self, max_children):
if self.children is None: return True
# The list contains exactly the number of children.
return len(self.children) <= max_children
@dataclass
class VarSizeTrie(AbstractTrie):
"""
Class for variable size trie data structures.
For each node, its character is stored as a string and the children are stored in a list.
"""
children: list[VarSizeNode]
def _go_through_trie(self, word: str, if_found, if_not_found, when_completed) -> bool:
parent = self # parent is initialized with a VarSizeTrie -> later it will be a VarSizeNode
for i, character in enumerate(word):
found = False
# one has to enumerate through all children to find the right one
for child in parent.children:
if character == child.char:
if_found(parent, child)
parent = child # child is a VarSizeNode!
found = True
break
if not found: return if_not_found(parent, word[i:])
return when_completed()
def _delete_child_from_parent(self, parent_children, child):
parent_children.remove(child) # default list method does the trick
def _add_empty_child(self, parent_children, character):
# new children list can be initialized with an empty list
new_child = VarSizeNode(char=character, children=[])
parent_children.append(new_child)
return new_child
@staticmethod
def create_trie(words: list[str]) -> VarSizeTrie:
trie = VarSizeTrie(children=[])
for word in words: trie.insert(word)
return trie
@dataclass
class FixedSizeNode(AbstractNode):
"""
Class for nodes in a variable size trie.
"""
children: list[FixedSizeNode]
def has_max_children(self, max_children):
if self.children is None: return True
# The list contains more elements than the number of children.
# Thus one has to filter out the None elements.
return sum(1 for _ in filter(None.__ne__, self.children)) <= max_children
@dataclass
class FixedSizeTrie(AbstractTrie):
"""
Class for fixed size trie data structures.
For each node, the children are stored in a fixed size list and the character to index mapping is stored in a list.
"""
children: list[FixedSizeNode]
char_to_idx: list[int] # necessary for O(1) access! Converts a character to an index in the children list.
alphabet: str
def _go_through_trie(self, word: str, if_found, if_not_found, when_completed) -> bool:
parent = self
for i, character in enumerate(word):
# get the child by the index: No need to enumerate through all children!
child = parent.children[self.char_to_idx[ord(character)]]
if child: # if the child is not None
if_found(parent, child)
parent = child
else: return if_not_found(parent, word[i:])
return when_completed()
def _delete_child_from_parent(self, parent_children, child):
# To delete a child, one simply has to set it to None.
parent_children[self.char_to_idx[ord(child.char)]] = None
def _add_empty_child(self, parent_children, character):
new_children = [None] * len(self.alphabet) # init new children array with many None elements
new_child = FixedSizeNode(char=character, children=new_children)
parent_children[self.char_to_idx[ord(character)]] = new_child
return new_child
@staticmethod
def create_trie(alphabet: str, words: list[str]) -> FixedSizeTrie:
# Find the highest index in the alphabet to determine the size of the children list.
highest_index = max([ord(char) for char in alphabet.strip('\x00')])
char_to_idx: list[int] = [None] * (highest_index+1)
children: list[FixedSizeNode] = [None] * len(alphabet)
for i, char in enumerate(alphabet):
char_to_idx[ord(char)] = i # map the character to the index
trie = FixedSizeTrie(char_to_idx=char_to_idx, alphabet=alphabet, children=children)
for word in words: trie.insert(word)
return trie
@dataclass
class HashNode(AbstractNode):
"""
Class for nodes in a variable size trie.
"""
children: dict[str, HashNode]
def has_max_children(self, max_children):
if self.children is None: return True
# The dictionary's keys contain exactly the number of children.
return len(self.children.keys()) <= max_children
@dataclass
class HashTrie(AbstractTrie):
"""
Class for hash trie data structures.
For each node, the children are stored in a dictionary.
"""
children: dict[str, HashNode]
def _go_through_trie(self, word: str, if_found, if_not_found, when_completed) -> bool:
parent = self
for i, character in enumerate(word):
# get the child by the character: No need to enumerate through all children!
if character in parent.children:
child = parent.children[character]
if_found(parent, child)
parent = child
else: return if_not_found(parent, word[i:])
return when_completed()
def _delete_child_from_parent(self, parent_children, child):
parent_children.pop(child.char) # default dictionary method does the trick
def _add_empty_child(self, parent_children, character):
# new children dictionary can be initialized with an empty dictionary
new_child = HashNode(char=character, children={})
parent_children[character] = new_child
return new_child
@staticmethod
def create_trie(words: list[str]) -> HashTrie:
trie = HashTrie({})
for word in words: trie.insert(word)
return trie