-
Notifications
You must be signed in to change notification settings - Fork 1
/
util.go
50 lines (42 loc) · 1006 Bytes
/
util.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
package tfidf
import (
"regexp"
"strings"
"unicode"
)
// init initializes the package by pre-compiling the regex for the stripping method
func init() {
r, _ := regexp.Compile(`\s{2,}`)
stripSpacesRegex = r
}
// stripSpacesRegex in ram cache for the compiled regex
var stripSpacesRegex *regexp.Regexp
// StripSpacesRegex removes all the duplicates spaces and all newlines through regex
// it produces one space between words text
func StripSpacesRegex(t string) string {
r := stripSpacesRegex.ReplaceAllString(t, " ")
return r
}
// StripSpacesLoop removes all the duplicates spaces and all newlines through a loop
// it produces one space between words text
func StripSpacesLoop(t string) string {
var lsp bool = false
r := strings.Map(func(r rune) rune {
if unicode.IsSpace(r) {
if ('\t' == r || '\n' == r) && !lsp {
lsp = true
return ' '
}
if lsp {
return -1
} else {
lsp = true
return r
}
} else {
lsp = false
return r
}
}, t)
return r
}