examples/shard-server.config.example

# an instance specification
instances {
  name: "shard"

  # the digest function for this instance, required
  # to match out of band between the client and server
  # since resource names must be determined on the client
  # for a valid upload
  digest_function: SHA256

  # the implicit type specifier for this instance
  # a shard instance is a cooperative member of a cluster
  # that communicates over a backplane for its operation
  # a shard instance implies a backend-retained ActionCache,
  # Operation store, and CAS index and registration, with
  # Workers representing egalitarian shards of the CAS
  shard_instance_config: {
    # Enable an agent in this instance to monitor the
    # Operation store to ensure that dispatched Operations
    # with expired worker leases are requeued
    # At least one agent within the cluster must exist to
    # ensure that worker leases on operations are handled.
    # While not exclusive, it is safe to run the monitor
    # on multiple instances concurrently.
    run_dispatched_monitor: true

    # The interval at which to run the dispatch monitor's
    # lease expiration check
    dispatched_monitor_interval_seconds: 1

    # Enable an agent in this instance to acquire Execute
    # request entries cooperatively from an arrival queue
    # on the backplane.
    # If true, this instance will validate and transform
    # an arrival queue ExecuteRequest as an ExecuteEntry into
    # a QueueEntry with a heavyweight ready-to-run QueuedOperation
    # available in the CAS via a fixed size.
    # At least one agent within the cluster must exist to
    # bring operations from the arrival queue to the ready-to-run queue,
    # or no operation execution will take place.
    # The operation queuer is exclusive and should run on
    # multiple instances concurrently.
    run_operation_queuer: true

    # The maximum size of a single blob accepted via a
    # ByteStream#write or ContentAddressableStorage#batchUpdateBlobs
    # To meet API standards, a request which exceeds this size receives
    # a transient UNAVAILABLE response, which, in bazel's case, induces
    # a fallback to non-remote recovery, rather than a catastrophic
    # failure.
    max_blob_size: 4294967296

    # A backplane specification hosted with redis cluster
    redis_shard_backplane_config: {
      # The URI of the redis cluster endpoint. This must
      # be a single URI, regardless of the layout of the cluster
      redis_uri: "redis://localhost:6379"

      # The size of the redis connection pool
      jedis_pool_max_total: 4000

      # The redis key used to store a hash of registered Workers
      # to their registration expiration time. After a worker's
      # registration has expired, they are no longer considered
      # as shards of the CAS
      workers_hash_name: "Workers"

      # A redis pubsub channel key where changes to the cluster
      # membership are announced
      worker_channel: "WorkerChannel"

      # A redis key prefix for all ActionCache entries, suffixed
      # with the action's key and mapping to an ActionResult
      action_cache_prefix: "ActionCache"

      # The ttl maintained for ActionCache entries. This is not
      # refreshed on getActionResult hit
      action_cache_expire: 2419200 # 4 weeks

      # A redis key prefix for all blacklisted actions, suffixed
      # with the action's key hash. An action which is blacklisted
      # should be rejected for all requests where it is identified via
      # its RequestMetadata
      # To meet API standards, a request which matches this condition
      # receives a transient UNAVAILABLE response, which, in bazel's
      # case, can induce a fallback to non-remote recovery, rather
      # than a catastrophic failure.
      action_blacklist_prefix: "ActionBlacklist"

      # The ttl maintained for action_blacklist entries.
      action_blacklist_expire: 3600 # 1 hour

      # A redis key prefix for all Operations, suffixed with the
      # operation's name and mapping to an Operation which reflects
      # the cluster perceived state of that Operation
      operation_prefix: "Operation"

      # The ttl maintained for all Operations, updated on each
      # modification
      operation_expire: 604800 # 1 week

      # The redis key used to store a list of ExecuteEntrys
      # awaiting transformation into QueueEntrys. These are queued
      # by an instance which receives an ExecuteRequest, and
      # dequeued by an operation_queuer agent.
      # Redis keyspace manipulation is used here to support multi-
      # key commands.
      # The string contained within {} must match that of
      # processing_list_name.
      pre_queued_operations_list_name: "{Arrival}:PreQueuedOperations"

      # The redis key of a list used to ensure reliable processing of
      # arrival queue entries together with operation watch monitoring.
      # The string contained within {} must match that of
      # pre_queued_operations_list_name.
      processing_list_name: "{Arrival}:ProcessingOperations"

      # A redis key prefix for operations which are being dequeued
      # from the arrival queue. The key is suffixed with the operation
      # name and contains the expiration time in epoch milliseconds
      # after which the operation is considered lost.
      processing_prefix: "Processing"

      # The delay in milliseconds used to populate processing operation
      # entries
      processing_timeout_millis: 10000

      # The redis key used to store a list of QueueEntrys
      # awaiting execution by workers. These are queued
      # by an operation_queuer agent, and dequeued by a worker.
      # Redis keyspace manipulation is used here to support multi-
      # key commands.
      # The string contained within {} must match that of
      # dispatching_list_name.
      queued_operations_list_name: "{Execution}:QueuedOperations"

      # The redis key of a list used to ensure reliable processing of
      # ready-to-run queue entries together with operation watch
      # monitoring.
      # The string contained within {} must match that of
      # queued_operations_list_name.
      dispatching_list_name: "{Execution}:DispatchingOperations"

      # A redis key prefix for operations which are being dequeued
      # from the ready-to-run queue. The key is suffixed with the
      # operation name and contains the expiration time in epoch
      # milliseconds after which the operation is considered lost.
      dispatching_prefix: "Dispatching"

      # The delay in milliseconds used to populate dispathing operation
      # entries
      dispatching_timeout_millis: 10000

      # The redis key of a hash of operation names to the worker
      # lease for its execution. Entries in this hash are monitored
      # by the dispatched_monitor for expiration, and the worker
      # is expected to extend a lease in a timely fashion to indicate
      # continued handling of an operation.
      dispatched_operations_hash_name: "DispatchedOperations"

      # A redis pubsub channel prefix suffixed by an operation name
      # where updates and keepalives are transmitted as it makes its
      # way through the various processing elements of the cluster.
      operation_channel_prefix: "OperationChannel"

      # A redis key prefix suffixed with a blob digest that maps to a
      # set of workers which advertise that blob's availability.
      # This set must be intersected with the set of active worker
      # leases to be considered meaningful.
      cas_prefix: "ContentAddressableStorage"

      # The ttl maintained for cas entries. This is not refreshed on
      # any read access of the blob.
      cas_expire: 604800 # 1 week

      # Enable an agent in the backplane client which subscribes
      # to worker_channel and operation_channel events. If this is
      # disabled, the responsiveness of watchers is reduced and the
      # CAS is reduced.
      # When in doubt, leave this enabled.
      subscribe_to_backplane: true

      # Enable an agent in the backplane client which monitors
      # watched operations and ensures that they are in a known
      # maintained, or expirable state.
      # This field is intended to distinguish servers with active
      # listeners of the backplane from workers with passive reuse
      # of it.
      # When in doubt, leave this enabled on servers.
      run_failsafe_operation: true

      # The maximum length that the ready-to-run queue is allowed
      # to reach via queue invocations. Reaching this limit induces
      # back-pressure on the arrival queue and is intended as a
      # flow control mechanism for execution.
      # Average QueuedOperation size in relation to CAS size and
      # churn should influence safe values here.
      max_queue_depth: 100000

      # The maximum length that the arrival queue is allowed
      # to reach via execute invocations. Reaching this limit results
      # in Execute errors of RESOURCE_EXHAUSTED and is intended as a
      # safety check for the backplane storage.
      # Redis cluster storage size should influence safe values here.
      max_pre_queue_depth: 1000000
    }
  }
}

# the listening port of the buildfarm grpc server
port: 8980

# the instance to which all requests with an empty/missing
# instance name are routed
#
# this can be empty as well, to indicate that there is no
# default instance
default_instance_name: "shard"