-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnormalize.go
61 lines (49 loc) · 1.74 KB
/
normalize.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
// Text normalization.
package txt
import (
"strings"
)
// Normalizes a given string.
type Normalizer func(text string) string
var (
// Converts all characters to lowercase.
NormalizerToLower Normalizer = func(text string) string { return strings.ToLower(text) }
// Converts excess whitespace into a single space.
NormalizeSingleSpace Normalizer = func(text string) string {
return replaceSpaces(text)
}
// Removes most punctuation. Periods and slashes are not removed.
NormalizePunctuation Normalizer = func(text string) string {
return removeChars(text, ",", ";", "!", "?", "\"", "'", "(", ")", "&")
}
// Removes all punctuation, including periods and slashes.
NormalizeAllPunctuation Normalizer = func(text string) string {
return removeChars(NormalizePunctuation(text), ".", "/", "\\")
}
NormalizeSpecial Normalizer = func(text string) string {
return removeChars(text, "!", "@", "#", "%", "^", "&", "*", "(", ")", "-", "_", "+", "=", "[", "]", "{", "}", ";", "'", "<", ">", "?", "~", "`")
}
// Replaces instances of hyphens with spaces.
NormalizeHyphens Normalizer = func(text string) string {
return strings.ReplaceAll(text, "-", " ")
}
)
// Default normalizer used if no normalizers are provided.
var DefaultNormalizer = []Normalizer{
NormalizerToLower,
NormalizePunctuation,
NormalizeSingleSpace,
NormalizeHyphens,
NormalizeSpecial,
}
// Normalizes a given string using the provided options. Options are executed in the order that they're provided in.
// If no options are provided, the default normalizer is used.
func Normalize(text string, options ...Normalizer) string {
if len(options) == 0 || options == nil {
options = append(options, DefaultNormalizer...)
}
for _, v := range options {
text = v(text)
}
return text
}