Skip to content

Commit

Permalink
work
Browse files Browse the repository at this point in the history
  • Loading branch information
gabrielrussoc committed Nov 13, 2024
1 parent 221cdd5 commit 5a6bb1e
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 26 deletions.
56 changes: 44 additions & 12 deletions container/incremental_load.sh.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,36 @@ function import_config() {
local TAG="$1"
shift 1

local registry_output="$(mktemp)"
echo "${registry_output}" >> "${TEMP_FILES}"
"${RUNFILES}/%{registry_tool}" -- "${registry_output}" "image" "$@" &
# Start the local registry binary on the background with all the layers we want to load
local registry_stdout="$(mktemp -t 2>/dev/null || mktemp -t 'rules_docker_registry_output')"
echo "${registry_stdout}" >> "${TEMP_FILES}"
"${RUNFILES}/%{registry_tool}" > ${registry_stdout} "$@" &
local registry_pid=$!

# If we can do that, symlinking the layer diff blobs into the containerd
# content dir is a way to skip downloading them, and then keeping the
# downloaded copy. After creating the snapshot of the image, we don't
# need the layer blobs anymore, but there's no way to prune the content
# store. As they aren't really needed, it's OK if the symlinks
# eventually dangle.
# This is an optimization that only affects systems using containerd storage, namely RBE.
# In this case, when we 'docker pull', the docker client will ask the snapshotter what to do.
# The snapshotter will either say:
# 1. I don't have this, go ahead and pull it; OR
# 2. I already have it, you don't need to pull anything.
# In case of (1.), we will store this into a place called 'content store'.
# When using sysbox docker-in-docker, this 'content store' is local to every daemon.
# Once we need to create a container, we will copy the image from the 'content store' into
# the snapshotter, which is shared across all daemons.
# In the worst case, if many actions try to pull the same image on a cold snapshotter,
# we would end up with one copy of the image in every daemon 'content store'.
# The trick: instead of actually copying the image to the content store, we just drop a symlink
# from the action inputs into the 'content store'. The docker client is smart enough to pull only
# what's missing on their content stores.
# To recap:
# 1. We symlink the image layers into the local daemon 'content store'.
# 2. We docker pull from our local registry.
# 3. The docker client will ask the snapshotter whether it needs to pull the image or not.
# 4. If the snapshotter says yes, we will start the pull operation.
# 5. The pull operation is a no-op due to (1.), i.e. all the layers are already present.
# 6. When we create a container, we load the image into the snapshotter.
# 7. The snapshotter will dedupe images in case of race conditions.
# Once loaded in the snapshotter, we don't care about the content store anymore. So it's fine
# if those symlinks dangle.
if [[ -w "/var/lib/containerd/io.containerd.content.v1.content/blobs/sha256" ]]; then
shift 1
while test $# -gt 0
Expand All @@ -87,12 +106,25 @@ function import_config() {
done
fi

local ref=$(tail -f "${registry_output}" | head -1)
"${DOCKER}" pull "${ref}"
# Read the reference we can pull from
local ref=$(tail -f "${registry_stdout}" | head -n 1)
# Pull it
"${DOCKER}" pull "${ref}" >&2
# Kill the registry process for cleanup
kill "${registry_pid}"

# Prints to keep compatibility on other scripts parsing this output
# since 'docker load' used to print the sha
local image_id=$("${DOCKER}" inspect --format "{{ .Id }}" "${ref}" | awk -F'sha256:' '{print $2}')
echo sha256:${image_id}

echo "Tagging ${image_id} as ${TAG}"
"${DOCKER}" tag "${ref}" "${TAG}"
"${DOCKER}" rmi "${ref}"

# Clean up the temporary tag created by docker pull
# This DOES NOT delete the image, just the tag.
# By default, docker pull creates a tag based on the reference you pulled from.
"${DOCKER}" rmi "${ref}" >&2
}

function read_variables() {
Expand Down
44 changes: 30 additions & 14 deletions container/registry_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import ssl
import sys
import tempfile
import argparse
import random

from bazel_tools.tools.python.runfiles import runfiles

Expand Down Expand Up @@ -85,11 +87,16 @@
MANIFEST_MEDIA_TYPE = "application/vnd.docker.distribution.manifest.v2+json"


def _generate_random_string(length):
characters = 'abcdefghijklmnopqrstuvwxyz0123456789'
return ''.join(random.choice(characters) for _ in range(length))


class DockerV2Registry:
def __init__(self, repo, config_path, *layers):
def __init__(self, config_path, *layers):
r = runfiles.Create()

self._repo = repo
self._repo_name = "registry-%s.local" % _generate_random_string(10)
self._registry_blobs = {}
self._manifest = collections.OrderedDict(
[
Expand All @@ -106,7 +113,9 @@ def __init__(self, repo, config_path, *layers):
("layers", []),
]
)
for layer_digest_path, layer_path in zip(layers[::2], layers[1::2]):
digests = layers[::2]
tarballs = layers[1::2]
for layer_digest_path, layer_path in zip(digests, tarballs):
assert layer_digest_path == layer_path + ".sha256"
blob_data = self._blob(
DIFF_MEDIA_TYPE,
Expand All @@ -133,14 +142,14 @@ def _blob(self, media_type, blob_path, digest_path):
def handler(self):
_manifest_data = self._manifest_data
_manifest_digest = self._manifest_digest
_repo = self._repo
_repo_name = self._repo_name
_registry_blobs = self._registry_blobs

class _RegistryHandler(http.server.BaseHTTPRequestHandler):
def _is_manifest(self, path):
return path in (
"/v2/%s/manifests/latest" % _repo,
"/v2/%s/manifests/%s" % (_repo, _manifest_digest),
"/v2/%s/manifests/latest" % _repo_name,
"/v2/%s/manifests/%s" % (_repo_name, _manifest_digest),
)

def _send_blob(self, head):
Expand All @@ -158,7 +167,7 @@ def _send_blob(self, head):
self.wfile.write(_manifest_data)
return

if self.path.startswith("/v2/%s/blobs/sha256:" % _repo):
if self.path.startswith("/v2/%s/blobs/sha256:" % _repo_name):
_, _, digest = self.path.rpartition("/")
if digest in _registry_blobs:
media_type, path, size = _registry_blobs[digest]
Expand All @@ -183,21 +192,28 @@ def do_GET(self):
return _RegistryHandler

def image_ref(self):
return "%s@%s" % (self._repo, self._manifest_digest)
return "%s@%s" % (self._repo_name, self._manifest_digest)


if __name__ == "__main__":
args = sys.argv[sys.argv.index("--") + 1 :]
registry = DockerV2Registry(*args[1:])
parser = argparse.ArgumentParser(
description="Simple local registry binary that allows docker clients to pull local image layers efficiently"
)
parser.add_argument('config_path', type=str, help='The path to the image config file')
parser.add_argument('layer_pairs', nargs='+', help='Layer tarballs and digests')
args = parser.parse_args()

registry = DockerV2Registry(args.config_path, *args.layer_pairs)
httpd = http.server.HTTPServer(("127.0.0.1", 0), registry.handler())
ctx = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
with tempfile.NamedTemporaryFile() as certfile:
certfile.write(SSL_CERT)
certfile.flush()
ctx.load_cert_chain(certfile=certfile.name)
httpd.socket = ctx.wrap_socket(httpd.socket, server_side=True)
with open(args[0], "a") as f:
f.write(
"%s/%s\n" % ("%s:%s" % httpd.socket.getsockname(), registry.image_ref())
)

address_with_port = "%s:%s" % httpd.socket.getsockname()
pullable_image = "%s/%s" % (address_with_port, registry.image_ref())
print(pullable_image, flush=True)

httpd.serve_forever()

0 comments on commit 5a6bb1e

Please sign in to comment.