-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathfilter.go
233 lines (203 loc) · 6.89 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
// Filtering and Tree Walking
//
// The Filter function recursively removes or manipulates nodes with a
// FilterFunction:
//
// newNodes := gedcom.Filter(node, func (node gedcom.Node) (gedcom.Node, bool) {
// if node.Tag().Is(gedcom.TagIndividual) {
// // false means it will not traverse children, since an
// // individual can never be inside of another individual.
// return node, false
// }
//
// return nil, false
// })
//
// // Remove all tags that are not official.
// newNodes := gedcom.Filter(node, gedcom.OfficialTagFilter())
//
// Some examples of Filter functions include BlacklistTagFilter,
// OfficialTagFilter, SimpleNameFilter and WhitelistTagFilter.
package gedcom
// FilterFunction is used with the Filter function.
//
// The node (as passed in through the parameter) will be replaced with newNode.
// You should return the same node argument if you do not want the node to be
// changed.
//
// The traverseChildren argument decides if the traversal should continue
// through the children of node. If the traversal continues (traverseChildren is
// true) the FilterFunction will always receive the children of the node, and
// not the children (if any) of newNode.
//
// If the newNode is nil then it will be removed and the children will not be
// traversed, regardless of the traverseChildren value.
type FilterFunction func(node Node) (newNode Node, traverseChildren bool)
// Filter returns a new nest node structure by recursively filtering all
// children based on a callback FilterFunction.
//
// See FilterFunction for the implementation details of fn.
//
// There are several other functions that can be used as filters including;
// WhitelistTagFilter, BlacklistTagFilter and OfficialTagFilter.
//
// Some nodes, such as an IndividualNode or FamilyNode cannot be created without
// being attached to a document. Filter will attach these to a new document
// which can be accessed through their respective Document() method.
func Filter(root Node, document *Document, fn FilterFunction) Node {
entityMap := entityMap{}
return filter(root, fn, entityMap, document, nil)
}
func filter(root Node, fn FilterFunction, entityMap entityMap, document *Document, family *FamilyNode) Node {
newRoot, keepTraversing := fn(root)
if IsNil(newRoot) {
return nil
}
if familyNoder, ok := newRoot.(FamilyNoder); ok {
fam := familyNoder.Family()
family = entityMap.GetOrAssign(fam, func() interface{} {
return document.AddFamily(fam.Pointer())
}).(*FamilyNode)
}
result := shallowCopyNode(newRoot, document, family)
if keepTraversing {
for _, child := range root.Nodes() {
newNode := filter(child, fn, entityMap, document, family)
if newNode != nil {
result.AddNode(newNode)
}
}
} else {
for _, child := range newRoot.Nodes() {
result.AddNode(child)
}
}
return result
}
// Copy a node without children.
//
// Some nodes require a document (such as an IndividualNode) or family (such as
// a ChildNode) to be created. Since we don't want to attach these to the
// existing documents, families, etc. new entities will have to be passed in.
//
// One important thing to note here is that we don't want to create new
// documents, etc for every single node we copy because that will leave the new
// nodes totally fractured and not in a state that we would expect to traverse.
// Be careful to reuse the document and other entities in a reasonable way.
func shallowCopyNode(node Node, document *Document, family *FamilyNode) Node {
tag := node.Tag()
value := node.Value()
pointer := node.Pointer()
return newNode(document, family, tag, value, pointer)
}
// WhitelistTagFilter returns any node that is one of the provided tags.
//
// This is the opposite of BlacklistTagFilter.
//
// See the Filter() function.
func WhitelistTagFilter(tags ...Tag) FilterFunction {
return func(node Node) (Node, bool) {
for _, tag := range tags {
if tag.Is(node.Tag()) {
return node, true
}
}
return nil, false
}
}
// BlacklistTagFilter returns any node that is not one of the provided tags.
//
// This is the opposite of WhitelistTagFilter.
//
// See the Filter function.
func BlacklistTagFilter(tags ...Tag) FilterFunction {
return func(node Node) (Node, bool) {
for _, tag := range tags {
if tag.Is(node.Tag()) {
return nil, false
}
}
return node, true
}
}
// OfficialTagFilter returns any node that is official tag. See Tag.IsOfficial
// for more information.
//
// See the Filter function.
func OfficialTagFilter() FilterFunction {
return func(node Node) (Node, bool) {
isOfficial := node.Tag().IsOfficial()
return NodeCondition(isOfficial, node, nil), isOfficial
}
}
// SimpleNameFilter flattens NAME nodes with the provided format.
//
// This is useful for comparing names when the components of the name (title,
// suffix, etc) are less important than the name itself.
func SimpleNameFilter(format NameFormat) FilterFunction {
return func(node Node) (Node, bool) {
if name, ok := node.(*NameNode); ok {
newNode := newNode(
nil,
nil,
TagName,
name.Format(format),
name.Pointer(),
)
return newNode, false
}
return node, true
}
}
// OnlyVitalsTagFilter removes all tags except for vital individual information.
//
// The vital nodes are (or multiples in the same individual of): Name, birth,
// baptism, death and burial. Within these only the date and place is retained.
func OnlyVitalsTagFilter() FilterFunction {
return WhitelistTagFilter(
// Level 0: We have to allow this for the children.
TagIndividual,
// Level 1.
TagName, TagBirth, TagBaptism, TagDeath, TagBurial,
// Level 2: These should only ever appear as direct children of the tags
// above.
TagGivenName, TagSurname, TagSurnamePrefix, TagNamePrefix,
TagNameSuffix, TagTitle, TagDate, TagPlace,
)
}
// RemoveEmptyDeathTagFilter removes any Death (DEAT) events that do not have
// any child nodes (which would otherwise be information like the date or place.
//
// This is because some applications use the death tag as a marker without any
// further information which can cause problems when comparing individuals.
func RemoveEmptyDeathTagFilter() FilterFunction {
return func(node Node) (Node, bool) {
if death, ok := node.(*DeathNode); ok && len(death.Nodes()) == 0 {
return nil, false
}
return node, true
}
}
func RemoveDuplicateNamesFilter() FilterFunction {
return func(node Node) (Node, bool) {
if individual, ok := node.(*IndividualNode); ok {
newIndividual := newIndividualNode(individual.Document(),
individual.Pointer())
names := map[string]bool{}
for _, n := range individual.children {
if name, isName := n.(*NameNode); isName {
nameString := name.String()
if names[nameString] == true {
continue
}
names[nameString] = true
}
newIndividual.AddNode(n)
}
return newIndividual, false
}
// Individuals can only exist on the root level so there's no need to
// recurse.
return node, true
}
}