forked from buildfarm/buildfarm
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathshard-server.config.example
218 lines (185 loc) · 9.22 KB
/
shard-server.config.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# an instance specification
instances {
name: "shard"
# the digest function for this instance, required
# to match out of band between the client and server
# since resource names must be determined on the client
# for a valid upload
digest_function: SHA256
# the implicit type specifier for this instance
# a shard instance is a cooperative member of a cluster
# that communicates over a backplane for its operation
# a shard instance implies a backend-retained ActionCache,
# Operation store, and CAS index and registration, with
# Workers representing egalitarian shards of the CAS
shard_instance_config: {
# Enable an agent in this instance to monitor the
# Operation store to ensure that dispatched Operations
# with expired worker leases are requeued
# At least one agent within the cluster must exist to
# ensure that worker leases on operations are handled.
# While not exclusive, it is safe to run the monitor
# on multiple instances concurrently.
run_dispatched_monitor: true
# The interval at which to run the dispatch monitor's
# lease expiration check
dispatched_monitor_interval_seconds: 1
# Enable an agent in this instance to acquire Execute
# request entries cooperatively from an arrival queue
# on the backplane.
# If true, this instance will validate and transform
# an arrival queue ExecuteRequest as an ExecuteEntry into
# a QueueEntry with a heavyweight ready-to-run QueuedOperation
# available in the CAS via a fixed size.
# At least one agent within the cluster must exist to
# bring operations from the arrival queue to the ready-to-run queue,
# or no operation execution will take place.
# The operation queuer is exclusive and should run on
# multiple instances concurrently.
run_operation_queuer: true
# The maximum size of a single blob accepted via a
# ByteStream#write or ContentAddressableStorage#batchUpdateBlobs
# To meet API standards, a request which exceeds this size receives
# a transient UNAVAILABLE response, which, in bazel's case, induces
# a fallback to non-remote recovery, rather than a catastrophic
# failure.
max_blob_size: 4294967296
# A backplane specification hosted with redis cluster
redis_shard_backplane_config: {
# The URI of the redis cluster endpoint. This must
# be a single URI, regardless of the layout of the cluster
redis_uri: "redis://localhost:6379"
# The size of the redis connection pool
jedis_pool_max_total: 4000
# The redis key used to store a hash of registered Workers
# to their registration expiration time. After a worker's
# registration has expired, they are no longer considered
# as shards of the CAS
workers_hash_name: "Workers"
# A redis pubsub channel key where changes to the cluster
# membership are announced
worker_channel: "WorkerChannel"
# A redis key prefix for all ActionCache entries, suffixed
# with the action's key and mapping to an ActionResult
action_cache_prefix: "ActionCache"
# The ttl maintained for ActionCache entries. This is not
# refreshed on getActionResult hit
action_cache_expire: 2419200 # 4 weeks
# A redis key prefix for all blacklisted actions, suffixed
# with the action's key hash. An action which is blacklisted
# should be rejected for all requests where it is identified via
# its RequestMetadata
# To meet API standards, a request which matches this condition
# receives a transient UNAVAILABLE response, which, in bazel's
# case, can induce a fallback to non-remote recovery, rather
# than a catastrophic failure.
action_blacklist_prefix: "ActionBlacklist"
# The ttl maintained for action_blacklist entries.
action_blacklist_expire: 3600 # 1 hour
# A redis key prefix for all Operations, suffixed with the
# operation's name and mapping to an Operation which reflects
# the cluster perceived state of that Operation
operation_prefix: "Operation"
# The ttl maintained for all Operations, updated on each
# modification
operation_expire: 604800 # 1 week
# The redis key used to store a list of ExecuteEntrys
# awaiting transformation into QueueEntrys. These are queued
# by an instance which receives an ExecuteRequest, and
# dequeued by an operation_queuer agent.
# Redis keyspace manipulation is used here to support multi-
# key commands.
# The string contained within {} must match that of
# processing_list_name.
pre_queued_operations_list_name: "{Arrival}:PreQueuedOperations"
# The redis key of a list used to ensure reliable processing of
# arrival queue entries together with operation watch monitoring.
# The string contained within {} must match that of
# pre_queued_operations_list_name.
processing_list_name: "{Arrival}:ProcessingOperations"
# A redis key prefix for operations which are being dequeued
# from the arrival queue. The key is suffixed with the operation
# name and contains the expiration time in epoch milliseconds
# after which the operation is considered lost.
processing_prefix: "Processing"
# The delay in milliseconds used to populate processing operation
# entries
processing_timeout_millis: 10000
# The redis key used to store a list of QueueEntrys
# awaiting execution by workers. These are queued
# by an operation_queuer agent, and dequeued by a worker.
# Redis keyspace manipulation is used here to support multi-
# key commands.
# The string contained within {} must match that of
# dispatching_list_name.
queued_operations_list_name: "{Execution}:QueuedOperations"
# The redis key of a list used to ensure reliable processing of
# ready-to-run queue entries together with operation watch
# monitoring.
# The string contained within {} must match that of
# queued_operations_list_name.
dispatching_list_name: "{Execution}:DispatchingOperations"
# A redis key prefix for operations which are being dequeued
# from the ready-to-run queue. The key is suffixed with the
# operation name and contains the expiration time in epoch
# milliseconds after which the operation is considered lost.
dispatching_prefix: "Dispatching"
# The delay in milliseconds used to populate dispathing operation
# entries
dispatching_timeout_millis: 10000
# The redis key of a hash of operation names to the worker
# lease for its execution. Entries in this hash are monitored
# by the dispatched_monitor for expiration, and the worker
# is expected to extend a lease in a timely fashion to indicate
# continued handling of an operation.
dispatched_operations_hash_name: "DispatchedOperations"
# A redis pubsub channel prefix suffixed by an operation name
# where updates and keepalives are transmitted as it makes its
# way through the various processing elements of the cluster.
operation_channel_prefix: "OperationChannel"
# A redis key prefix suffixed with a blob digest that maps to a
# set of workers which advertise that blob's availability.
# This set must be intersected with the set of active worker
# leases to be considered meaningful.
cas_prefix: "ContentAddressableStorage"
# The ttl maintained for cas entries. This is not refreshed on
# any read access of the blob.
cas_expire: 604800 # 1 week
# Enable an agent in the backplane client which subscribes
# to worker_channel and operation_channel events. If this is
# disabled, the responsiveness of watchers is reduced and the
# CAS is reduced.
# When in doubt, leave this enabled.
subscribe_to_backplane: true
# Enable an agent in the backplane client which monitors
# watched operations and ensures that they are in a known
# maintained, or expirable state.
# This field is intended to distinguish servers with active
# listeners of the backplane from workers with passive reuse
# of it.
# When in doubt, leave this enabled on servers.
run_failsafe_operation: true
# The maximum length that the ready-to-run queue is allowed
# to reach via queue invocations. Reaching this limit induces
# back-pressure on the arrival queue and is intended as a
# flow control mechanism for execution.
# Average QueuedOperation size in relation to CAS size and
# churn should influence safe values here.
max_queue_depth: 100000
# The maximum length that the arrival queue is allowed
# to reach via execute invocations. Reaching this limit results
# in Execute errors of RESOURCE_EXHAUSTED and is intended as a
# safety check for the backplane storage.
# Redis cluster storage size should influence safe values here.
max_pre_queue_depth: 1000000
}
}
}
# the listening port of the buildfarm grpc server
port: 8980
# the instance to which all requests with an empty/missing
# instance name are routed
#
# this can be empty as well, to indicate that there is no
# default instance
default_instance_name: "shard"