Skip to content

Commit 9724431

Browse files
authored
Add the Memory Protector to limit the resource usage of the query operations (#599)
1 parent 765af71 commit 9724431

29 files changed

+580
-25
lines changed

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Release Notes.
2121
- Add the "api version" service to gRPC and HTTP server.
2222
- Metadata: Wait for the existing registration to be removed before registering the node.
2323
- Stream: Introduce the batch scan to improve the performance of the query and limit the memory usage.
24+
- Add memory protector to protect the memory usage of the system. It will limit the memory usage of the querying.
2425

2526
### Bug Fixes
2627

banyand/dquery/dquery.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func (q *queryService) Name() string {
9494

9595
func (q *queryService) FlagSet() *run.FlagSet {
9696
fs := run.NewFlagSet("distributed-query")
97-
fs.DurationVar(&q.slowQuery, "dst-slow-query", 0, "distributed slow query threshold, 0 means no slow query log")
97+
fs.DurationVar(&q.slowQuery, "dst-slow-query", 5*time.Second, "distributed slow query threshold, 0 means no slow query log")
9898
return fs
9999
}
100100

banyand/liaison/grpc/property.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ import (
6161
"github.com/apache/skywalking-banyandb/pkg/query"
6262
)
6363

64-
const defaultQueryTimeout = 30 * time.Second
64+
const defaultQueryTimeout = 10 * time.Second
6565

6666
type propertyServer struct {
6767
propertyv1.UnimplementedPropertyServiceServer

banyand/measure/measure.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525

2626
commonv1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/common/v1"
2727
databasev1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1"
28+
"github.com/apache/skywalking-banyandb/banyand/protector"
2829
"github.com/apache/skywalking-banyandb/banyand/queue"
2930
"github.com/apache/skywalking-banyandb/pkg/logger"
3031
"github.com/apache/skywalking-banyandb/pkg/partition"
@@ -53,6 +54,7 @@ type option struct {
5354

5455
type measure struct {
5556
databaseSupplier schema.Supplier
57+
pm *protector.Memory
5658
indexTagMap map[string]struct{}
5759
l *logger.Logger
5860
schema *databasev1.Measure
@@ -122,14 +124,16 @@ type measureSpec struct {
122124
topNAggregations []*databasev1.TopNAggregation
123125
}
124126

125-
func openMeasure(shardNum uint32, db schema.Supplier, spec measureSpec, l *logger.Logger, pipeline queue.Queue,
127+
func openMeasure(shardNum uint32, db schema.Supplier, spec measureSpec,
128+
l *logger.Logger, pipeline queue.Queue, pm *protector.Memory,
126129
) (*measure, error) {
127130
m := &measure{
128131
shardNum: shardNum,
129132
schema: spec.schema,
130133
indexRules: spec.indexRules,
131134
topNAggregations: spec.topNAggregations,
132135
l: l,
136+
pm: pm,
133137
}
134138
if err := m.parseSpec(); err != nil {
135139
return nil, err

banyand/measure/measure_suite_test.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"github.com/apache/skywalking-banyandb/banyand/metadata"
2929
"github.com/apache/skywalking-banyandb/banyand/metadata/embeddedserver"
3030
"github.com/apache/skywalking-banyandb/banyand/observability"
31+
"github.com/apache/skywalking-banyandb/banyand/protector"
3132
"github.com/apache/skywalking-banyandb/banyand/query"
3233
"github.com/apache/skywalking-banyandb/banyand/queue"
3334
"github.com/apache/skywalking-banyandb/pkg/logger"
@@ -75,8 +76,9 @@ func setUp() (*services, func()) {
7576
gomega.Expect(err).NotTo(gomega.HaveOccurred())
7677

7778
metricSvc := observability.NewMetricService(metadataService, pipeline, "test", nil)
79+
pm := protector.NewMemory(metricSvc)
7880
// Init Measure Service
79-
measureService, err := measure.NewService(context.TODO(), metadataService, pipeline, nil, metricSvc)
81+
measureService, err := measure.NewService(metadataService, pipeline, nil, metricSvc, pm)
8082
gomega.Expect(err).NotTo(gomega.HaveOccurred())
8183
preloadMeasureSvc := &preloadMeasureService{metaSvc: metadataService}
8284
querySvc, err := query.NewService(context.TODO(), nil, measureService, metadataService, pipeline)

banyand/measure/metadata.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/apache/skywalking-banyandb/banyand/metadata"
3535
"github.com/apache/skywalking-banyandb/banyand/metadata/schema"
3636
"github.com/apache/skywalking-banyandb/banyand/observability"
37+
"github.com/apache/skywalking-banyandb/banyand/protector"
3738
"github.com/apache/skywalking-banyandb/banyand/queue"
3839
"github.com/apache/skywalking-banyandb/pkg/logger"
3940
resourceSchema "github.com/apache/skywalking-banyandb/pkg/schema"
@@ -267,6 +268,7 @@ type supplier struct {
267268
pipeline queue.Queue
268269
omr observability.MetricsRegistry
269270
l *logger.Logger
271+
pm *protector.Memory
270272
path string
271273
option option
272274
}
@@ -279,6 +281,7 @@ func newSupplier(path string, svc *service) *supplier {
279281
pipeline: svc.localPipeline,
280282
option: svc.option,
281283
omr: svc.omr,
284+
pm: svc.pm,
282285
}
283286
}
284287

@@ -288,7 +291,7 @@ func (s *supplier) OpenResource(shardNum uint32, supplier resourceSchema.Supplie
288291
schema: measureSchema,
289292
indexRules: spec.IndexRules(),
290293
topNAggregations: spec.TopN(),
291-
}, s.l, s.pipeline)
294+
}, s.l, s.pipeline, s.pm)
292295
}
293296

294297
func (s *supplier) ResourceSchema(md *commonv1.Metadata) (resourceSchema.ResourceSchema, error) {
@@ -351,5 +354,5 @@ func (s *portableSupplier) OpenResource(shardNum uint32, _ resourceSchema.Suppli
351354
schema: measureSchema,
352355
indexRules: spec.IndexRules(),
353356
topNAggregations: spec.TopN(),
354-
}, s.l, nil)
357+
}, s.l, nil, nil)
355358
}

banyand/measure/query.go

+5
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,7 @@ func (s *measure) searchBlocks(ctx context.Context, result *queryResult, sids []
353353
return fmt.Errorf("cannot init tstIter: %w", tstIter.Error())
354354
}
355355
var hit int
356+
var totalBlockBytes uint64
356357
for tstIter.nextBlock() {
357358
if hit%checkDoneEvery == 0 {
358359
select {
@@ -366,10 +367,14 @@ func (s *measure) searchBlocks(ctx context.Context, result *queryResult, sids []
366367
p := tstIter.piHeap[0]
367368
bc.init(p.p, p.curBlock, qo)
368369
result.data = append(result.data, bc)
370+
totalBlockBytes += bc.bm.uncompressedSizeBytes
369371
}
370372
if tstIter.Error() != nil {
371373
return fmt.Errorf("cannot iterate tstIter: %w", tstIter.Error())
372374
}
375+
if err := s.pm.AcquireResource(ctx, totalBlockBytes); err != nil {
376+
return err
377+
}
373378
result.sidToIndex = make(map[common.SeriesID]int)
374379
for i, si := range originalSids {
375380
result.sidToIndex[si] = i

banyand/measure/service.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
databasev1 "github.com/apache/skywalking-banyandb/api/proto/banyandb/database/v1"
2929
"github.com/apache/skywalking-banyandb/banyand/metadata"
3030
"github.com/apache/skywalking-banyandb/banyand/observability"
31+
"github.com/apache/skywalking-banyandb/banyand/protector"
3132
"github.com/apache/skywalking-banyandb/banyand/queue"
3233
"github.com/apache/skywalking-banyandb/pkg/bus"
3334
"github.com/apache/skywalking-banyandb/pkg/logger"
@@ -60,6 +61,7 @@ type service struct {
6061
omr observability.MetricsRegistry
6162
schemaRepo *schemaRepo
6263
l *logger.Logger
64+
pm *protector.Memory
6365
root string
6466
option option
6567
maxDiskUsagePercent int
@@ -147,11 +149,12 @@ func (s *service) GracefulStop() {
147149
}
148150

149151
// NewService returns a new service.
150-
func NewService(_ context.Context, metadata metadata.Repo, pipeline queue.Server, metricPipeline queue.Server, omr observability.MetricsRegistry) (Service, error) {
152+
func NewService(metadata metadata.Repo, pipeline queue.Server, metricPipeline queue.Server, omr observability.MetricsRegistry, pm *protector.Memory) (Service, error) {
151153
return &service{
152154
metadata: metadata,
153155
pipeline: pipeline,
154156
metricPipeline: metricPipeline,
155157
omr: omr,
158+
pm: pm,
156159
}, nil
157160
}

banyand/protector/protector.go

+191
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
// Licensed to Apache Software Foundation (ASF) under one or more contributor
2+
// license agreements. See the NOTICE file distributed with
3+
// this work for additional information regarding copyright
4+
// ownership. Apache Software Foundation (ASF) licenses this file to you under
5+
// the Apache License, Version 2.0 (the "License"); you may
6+
// not use this file except in compliance with the License.
7+
// You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
// Package protector provides a set of protectors that stop the query services when the resource usage exceeds the limit.
19+
package protector
20+
21+
import (
22+
"context"
23+
"errors"
24+
"fmt"
25+
"runtime/metrics"
26+
"sync/atomic"
27+
"time"
28+
29+
"github.com/dustin/go-humanize"
30+
31+
"github.com/apache/skywalking-banyandb/banyand/observability"
32+
"github.com/apache/skywalking-banyandb/pkg/cgroups"
33+
"github.com/apache/skywalking-banyandb/pkg/logger"
34+
"github.com/apache/skywalking-banyandb/pkg/meter"
35+
"github.com/apache/skywalking-banyandb/pkg/run"
36+
)
37+
38+
var scope = observability.RootScope.SubScope("memory_protector")
39+
40+
// Memory is a protector that stops the query services when the memory usage exceeds the limit.
41+
type Memory struct {
42+
omr observability.MetricsRegistry
43+
limitGauge meter.Gauge
44+
usageGauge meter.Gauge
45+
l *logger.Logger
46+
closed chan struct{}
47+
blockedChan chan struct{}
48+
allowedPercent int
49+
allowedBytes run.Bytes
50+
limit uint64
51+
usage uint64
52+
}
53+
54+
// NewMemory creates a new Memory protector.
55+
func NewMemory(omr observability.MetricsRegistry) *Memory {
56+
queueSize := cgroups.CPUs()
57+
factory := omr.With(scope)
58+
59+
return &Memory{
60+
omr: omr,
61+
blockedChan: make(chan struct{}, queueSize),
62+
closed: make(chan struct{}),
63+
64+
limitGauge: factory.NewGauge("limit"),
65+
usageGauge: factory.NewGauge("usage"),
66+
}
67+
}
68+
69+
// AcquireResource attempts to acquire a `size` amount of memory.
70+
func (m *Memory) AcquireResource(ctx context.Context, size uint64) error {
71+
if m.limit == 0 {
72+
return nil
73+
}
74+
start := time.Now()
75+
76+
select {
77+
case m.blockedChan <- struct{}{}:
78+
defer func() { <-m.blockedChan }()
79+
case <-ctx.Done():
80+
return fmt.Errorf("context canceled while waiting for blocked queue slot: %w", ctx.Err())
81+
}
82+
83+
for {
84+
currentUsage := atomic.LoadUint64(&m.usage)
85+
if currentUsage+size <= m.limit {
86+
return nil
87+
}
88+
89+
select {
90+
case <-time.After(100 * time.Millisecond):
91+
continue
92+
case <-ctx.Done():
93+
return fmt.Errorf(
94+
"context canceled: memory acquisition failed (currentUsage: %d, limit: %d, size: %d, blockedDuration: %v): %w",
95+
currentUsage, m.limit, size, time.Since(start), ctx.Err(),
96+
)
97+
}
98+
}
99+
}
100+
101+
// Name returns the name of the protector.
102+
func (m *Memory) Name() string {
103+
return "memory-protector"
104+
}
105+
106+
// FlagSet returns the flag set for the protector.
107+
func (m *Memory) FlagSet() *run.FlagSet {
108+
flagS := run.NewFlagSet(m.Name())
109+
flagS.IntVarP(&m.allowedPercent, "allowed-percent", "", 75,
110+
"Allowed bytes of memory usage. If the memory usage exceeds this value, the query services will stop. "+
111+
"Setting a large value may evict data from the OS page cache, causing high disk I/O.")
112+
flagS.VarP(&m.allowedBytes, "allowed-bytes", "", "Allowed percentage of total memory usage. If usage exceeds this value, the query services will stop. "+
113+
"This takes effect only if `allowed-bytes` is 0. If usage is too high, it may cause OS page cache eviction.")
114+
return flagS
115+
}
116+
117+
// Validate validates the protector's flags.
118+
func (m *Memory) Validate() error {
119+
if m.allowedPercent <= 0 || m.allowedPercent > 100 {
120+
if m.allowedBytes <= 0 {
121+
return errors.New("allowed-bytes must be greater than 0")
122+
}
123+
return errors.New("allowed-percent must be in the range (0, 100]")
124+
}
125+
return nil
126+
}
127+
128+
// PreRun initializes the protector.
129+
func (m *Memory) PreRun(context.Context) error {
130+
m.l = logger.GetLogger(m.Name())
131+
if m.allowedBytes > 0 {
132+
m.limit = uint64(m.allowedBytes)
133+
m.l.Info().
134+
Str("limit", humanize.Bytes(m.limit)).
135+
Msg("memory protector enabled")
136+
} else {
137+
cgLimit, err := cgroups.MemoryLimit()
138+
if err != nil {
139+
m.l.Warn().Err(err).Msg("failed to get memory limit from cgroups, disable memory protector")
140+
return nil
141+
}
142+
if cgLimit <= 0 || cgLimit > 1e18 {
143+
m.l.Warn().Int64("cgroup_memory_limit", cgLimit).Msg("cgroup memory limit is invalid, disable memory protector")
144+
return nil
145+
}
146+
m.limit = uint64(cgLimit) * uint64(m.allowedPercent) / 100
147+
m.l.Info().
148+
Str("limit", humanize.Bytes(m.limit)).
149+
Str("cgroup_limit", humanize.Bytes(uint64(cgLimit))).
150+
Int("percent", m.allowedPercent).
151+
Msg("memory protector enabled")
152+
}
153+
m.limitGauge.Set(float64(m.limit))
154+
return nil
155+
}
156+
157+
// GracefulStop stops the protector.
158+
func (m *Memory) GracefulStop() {
159+
close(m.closed)
160+
}
161+
162+
// Serve starts the protector.
163+
func (m *Memory) Serve() run.StopNotify {
164+
if m.limit == 0 {
165+
return m.closed
166+
}
167+
go func() {
168+
ticker := time.NewTicker(5 * time.Second)
169+
defer ticker.Stop()
170+
171+
for {
172+
select {
173+
case <-m.closed:
174+
return
175+
case <-ticker.C:
176+
samples := []metrics.Sample{
177+
{Name: "/memory/classes/total:bytes"},
178+
}
179+
metrics.Read(samples)
180+
usedBytes := samples[0].Value.Uint64()
181+
182+
atomic.StoreUint64(&m.usage, usedBytes)
183+
184+
if usedBytes > m.limit {
185+
m.l.Warn().Str("used", humanize.Bytes(usedBytes)).Str("limit", humanize.Bytes(m.limit)).Msg("memory usage exceeds limit")
186+
}
187+
}
188+
}
189+
}()
190+
return m.closed
191+
}

0 commit comments

Comments
 (0)