Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 13c80a5

Browse files
naivewongcodesome
authored andcommitted
Optimize queries using regex matchers for set lookups (#602)
* Original version of the set optimization Signed-off-by: naivewong <[email protected]> * simple set matcher Signed-off-by: naivewong <[email protected]> * simple set matcher Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * add benchmark Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * update benchmark Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * update benchmark Signed-off-by: naivewong <[email protected]> * update benchmark Signed-off-by: naivewong <[email protected]> * update benchmark Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]> * use genSeries from #467 Signed-off-by: naivewong <[email protected]> * update Signed-off-by: naivewong <[email protected]>
1 parent 562e93e commit 13c80a5

File tree

4 files changed

+324
-36
lines changed

4 files changed

+324
-36
lines changed

block_test.go

+9-30
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"math/rand"
2222
"os"
2323
"path/filepath"
24+
"strconv"
2425
"testing"
2526

2627
"github.com/go-kit/kit/log"
@@ -184,6 +185,11 @@ func createBlock(tb testing.TB, dir string, series []Series) string {
184185
return filepath.Join(dir, ulid.String())
185186
}
186187

188+
const (
189+
defaultLabelName = "labelName"
190+
defaultLabelValue = "labelValue"
191+
)
192+
187193
// genSeries generates series with a given number of labels and values.
188194
func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series {
189195
if totalSeries == 0 || labelCount == 0 {
@@ -193,8 +199,9 @@ func genSeries(totalSeries, labelCount int, mint, maxt int64) []Series {
193199
series := make([]Series, totalSeries)
194200
for i := 0; i < totalSeries; i++ {
195201
lbls := make(map[string]string, labelCount)
196-
for len(lbls) < labelCount {
197-
lbls[randString()] = randString()
202+
lbls[defaultLabelName] = strconv.Itoa(i)
203+
for j := 1; len(lbls) < labelCount; j++ {
204+
lbls[defaultLabelName+strconv.Itoa(j)] = defaultLabelValue + strconv.Itoa(j)
198205
}
199206
samples := make([]tsdbutil.Sample, 0, maxt-mint+1)
200207
for t := mint; t <= maxt; t++ {
@@ -224,31 +231,3 @@ func populateSeries(lbls []map[string]string, mint, maxt int64) []Series {
224231
}
225232
return series
226233
}
227-
228-
const letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
229-
const (
230-
letterIdxBits = 6 // 6 bits to represent a letter index
231-
letterIdxMask = 1<<letterIdxBits - 1 // All 1-bits, as many as letterIdxBits
232-
letterIdxMax = 63 / letterIdxBits // # of letter indices fitting in 63 bits
233-
)
234-
235-
// randString generates random string.
236-
func randString() string {
237-
maxLength := int32(50)
238-
length := rand.Int31n(maxLength)
239-
b := make([]byte, length+1)
240-
// A rand.Int63() generates 63 random bits, enough for letterIdxMax characters!
241-
for i, cache, remain := length, rand.Int63(), letterIdxMax; i >= 0; {
242-
if remain == 0 {
243-
cache, remain = rand.Int63(), letterIdxMax
244-
}
245-
if idx := int(cache & letterIdxMask); idx < len(letterBytes) {
246-
b[i] = letterBytes[idx]
247-
i--
248-
}
249-
cache >>= letterIdxBits
250-
remain--
251-
}
252-
253-
return string(b)
254-
}

labels/selector.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,15 @@ func NewEqualMatcher(name, value string) Matcher {
6363
return &EqualMatcher{name: name, value: value}
6464
}
6565

66-
type regexpMatcher struct {
66+
type RegexpMatcher struct {
6767
name string
6868
re *regexp.Regexp
6969
}
7070

71-
func (m regexpMatcher) Name() string { return m.name }
72-
func (m regexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
73-
func (m regexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
71+
func (m RegexpMatcher) Name() string { return m.name }
72+
func (m RegexpMatcher) Matches(v string) bool { return m.re.MatchString(v) }
73+
func (m RegexpMatcher) String() string { return fmt.Sprintf("%s=~%q", m.name, m.re.String()) }
74+
func (m RegexpMatcher) Value() string { return m.re.String() }
7475

7576
// NewRegexpMatcher returns a new matcher verifying that a value matches
7677
// the regular expression pattern.
@@ -79,7 +80,7 @@ func NewRegexpMatcher(name, pattern string) (Matcher, error) {
7980
if err != nil {
8081
return nil, err
8182
}
82-
return &regexpMatcher{name: name, re: re}, nil
83+
return &RegexpMatcher{name: name, re: re}, nil
8384
}
8485

8586
// NewMustRegexpMatcher returns a new matcher verifying that a value matches
@@ -90,7 +91,7 @@ func NewMustRegexpMatcher(name, pattern string) Matcher {
9091
if err != nil {
9192
panic(err)
9293
}
93-
return &regexpMatcher{name: name, re: re}
94+
return &RegexpMatcher{name: name, re: re}
9495

9596
}
9697

querier.go

+77
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"fmt"
1818
"sort"
1919
"strings"
20+
"unicode/utf8"
2021

2122
"github.com/pkg/errors"
2223
"github.com/prometheus/tsdb/chunkenc"
@@ -266,6 +267,62 @@ func (q *blockQuerier) Close() error {
266267
return merr.Err()
267268
}
268269

270+
// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
271+
var regexMetaCharacterBytes [16]byte
272+
273+
// isRegexMetaCharacter reports whether byte b needs to be escaped.
274+
func isRegexMetaCharacter(b byte) bool {
275+
return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
276+
}
277+
278+
func init() {
279+
for _, b := range []byte(`.+*?()|[]{}^$`) {
280+
regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
281+
}
282+
}
283+
284+
func findSetMatches(pattern string) []string {
285+
// Return empty matches if the wrapper from Prometheus is missing.
286+
if len(pattern) < 6 || pattern[:4] != "^(?:" || pattern[len(pattern)-2:] != ")$" {
287+
return nil
288+
}
289+
escaped := false
290+
sets := []*strings.Builder{&strings.Builder{}}
291+
for i := 4; i < len(pattern)-2; i++ {
292+
if escaped {
293+
switch {
294+
case isRegexMetaCharacter(pattern[i]):
295+
sets[len(sets)-1].WriteByte(pattern[i])
296+
case pattern[i] == '\\':
297+
sets[len(sets)-1].WriteByte('\\')
298+
default:
299+
return nil
300+
}
301+
escaped = false
302+
} else {
303+
switch {
304+
case isRegexMetaCharacter(pattern[i]):
305+
if pattern[i] == '|' {
306+
sets = append(sets, &strings.Builder{})
307+
} else {
308+
return nil
309+
}
310+
case pattern[i] == '\\':
311+
escaped = true
312+
default:
313+
sets[len(sets)-1].WriteByte(pattern[i])
314+
}
315+
}
316+
}
317+
matches := make([]string, 0, len(sets))
318+
for _, s := range sets {
319+
if s.Len() > 0 {
320+
matches = append(matches, s.String())
321+
}
322+
}
323+
return matches
324+
}
325+
269326
// PostingsForMatchers assembles a single postings iterator against the index reader
270327
// based on the given matchers.
271328
func PostingsForMatchers(ix IndexReader, ms ...labels.Matcher) (index.Postings, error) {
@@ -346,6 +403,14 @@ func postingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings, error
346403
return ix.Postings(em.Name(), em.Value())
347404
}
348405

406+
// Fast-path for set matching.
407+
if em, ok := m.(*labels.RegexpMatcher); ok {
408+
setMatches := findSetMatches(em.Value())
409+
if len(setMatches) > 0 {
410+
return postingsForSetMatcher(ix, em.Name(), setMatches)
411+
}
412+
}
413+
349414
tpls, err := ix.LabelValues(m.Name())
350415
if err != nil {
351416
return nil, err
@@ -411,6 +476,18 @@ func inversePostingsForMatcher(ix IndexReader, m labels.Matcher) (index.Postings
411476
return index.Merge(rit...), nil
412477
}
413478

479+
func postingsForSetMatcher(ix IndexReader, name string, matches []string) (index.Postings, error) {
480+
var its []index.Postings
481+
for _, match := range matches {
482+
if it, err := ix.Postings(name, match); err == nil {
483+
its = append(its, it)
484+
} else {
485+
return nil, err
486+
}
487+
}
488+
return index.Merge(its...), nil
489+
}
490+
414491
func mergeStrings(a, b []string) []string {
415492
maxl := len(a)
416493
if len(b) > len(a) {

0 commit comments

Comments
 (0)