-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathsearcher.go
140 lines (127 loc) · 4.23 KB
/
searcher.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// SPDX-License-Identifier: MIT OR Unlicense
package main
import (
"errors"
str "github.com/boyter/go-string"
"regexp"
"runtime"
"strings"
"sync"
)
type SearcherWorker struct {
input chan *FileJob
output chan *FileJob
searchParams []searchParams
FileCount int64 // Count of the number of files that have been processed
BinaryCount int64 // Count the number of binary files
MinfiedCount int64
SearchString []string
CaseSensitive bool
MatchLimit int
InstanceId int
}
func NewSearcherWorker(input chan *FileJob, output chan *FileJob) *SearcherWorker {
return &SearcherWorker{
input: input,
output: output,
SearchString: []string{},
MatchLimit: -1, // sensible default
}
}
// Does the actual processing of stats and as such contains the hot path CPU call
func (f *SearcherWorker) Start() {
// Build out the search params
f.searchParams = ParseQuery(f.SearchString)
var wg sync.WaitGroup
for i := 0; i < runtime.NumCPU(); i++ {
wg.Add(1)
go func() {
defer wg.Done()
for res := range f.input {
// Now we do the actual search against the file
for i, needle := range f.searchParams {
didSearch := false
switch needle.Type {
case Default, Quoted:
didSearch = true
if f.CaseSensitive {
res.MatchLocations[needle.Term] = str.IndexAll(string(res.Content), needle.Term, f.MatchLimit)
} else {
res.MatchLocations[needle.Term] = str.IndexAllIgnoreCase(string(res.Content), needle.Term, f.MatchLimit)
}
case Regex:
x, err := f.regexSearch(needle, &res.Content)
if err == nil { // Error indicates a regex compile fail so safe to ignore here
didSearch = true
res.MatchLocations[needle.Term] = x
}
case Fuzzy1:
didSearch = true
terms := makeFuzzyDistanceOne(strings.TrimRight(needle.Term, "~1"))
matchLocations := [][]int{}
for _, t := range terms {
if f.CaseSensitive {
matchLocations = append(matchLocations, str.IndexAll(string(res.Content), t, f.MatchLimit)...)
} else {
matchLocations = append(matchLocations, str.IndexAllIgnoreCase(string(res.Content), t, f.MatchLimit)...)
}
}
res.MatchLocations[needle.Term] = matchLocations
case Fuzzy2:
didSearch = true
terms := makeFuzzyDistanceTwo(strings.TrimRight(needle.Term, "~2"))
matchLocations := [][]int{}
for _, t := range terms {
if f.CaseSensitive {
matchLocations = append(matchLocations, str.IndexAll(string(res.Content), t, f.MatchLimit)...)
} else {
matchLocations = append(matchLocations, str.IndexAllIgnoreCase(string(res.Content), t, f.MatchLimit)...)
}
}
res.MatchLocations[needle.Term] = matchLocations
}
// We currently ignore things such as NOT and as such
// we don't want to break out if we run into them
// so only update the score IF there was a search
// which also makes this by default an AND search
if didSearch {
// If we did a search but the previous was a NOT we need to only continue if we found nothing
if i != 0 && f.searchParams[i-1].Type == Negated {
if len(res.MatchLocations[needle.Term]) != 0 {
res.Score = 0
break
}
} else {
// Normal search so ensure we got something by default AND logic rules
if len(res.MatchLocations[needle.Term]) == 0 {
res.Score = 0
break
}
}
// Without ranking this score favors the most matches which is
// basic but better than nothing NB this is almost always
// overridden inside the actual ranker so its only here in case
// we ever forget that so we at least get something
res.Score += float64(len(res.MatchLocations[needle.Term]))
}
}
if res.Score != 0 {
f.output <- res
}
}
}()
}
wg.Wait()
close(f.output)
}
func (f *SearcherWorker) regexSearch(needle searchParams, content *[]byte) (x [][]int, err error) {
// Its possible the user supplies an invalid regex and if so we should not crash
// but ignore it
defer func() {
if recover() != nil {
err = errors.New("regex compile failure issue")
}
}()
r := regexp.MustCompile(needle.Term)
return r.FindAllIndex(*content, f.MatchLimit), nil
}