Skip to content

Commit 167b9b9

Browse files
authored
Simplifies check whether the CI image should be rebuilt (apache#12181)
Rather than counting changed layers in the image (which was enigmatic, difficult and prone to some magic number) we rely now on random file generated while building the image. We are using the docker image caching mechanism here. The random file will be regenerated only when the previous layer (which is about installling Airflow dependencies for the first time) gets rebuild. And for us this is the indication, that the building the image will take quite some time. This layer should be relatively static - even if setup.py changes the CI image is designed in the way that the first time installation of Airflow dependencies is not invalidated. This should lead to faster and less frequent rebuild for people using Breeze and static checks.
1 parent 4e362c1 commit 167b9b9

File tree

5 files changed

+103
-90
lines changed

5 files changed

+103
-90
lines changed

Dockerfile.ci

+8-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,9 @@ ARG RUNTIME_APT_DEPS="\
132132
sqlite3 \
133133
tmux \
134134
unzip \
135-
vim"
135+
vim \
136+
xxd"
137+
ENV RUNTIME_APT_DEP=${RUNTIME_APT_DEPS}
136138

137139
ARG ADDITIONAL_RUNTIME_APT_DEPS=""
138140
ENV ADDITIONAL_RUNTIME_APT_DEPS=${ADDITIONAL_RUNTIME_APT_DEPS}
@@ -278,6 +280,11 @@ RUN if [[ ${AIRFLOW_PRE_CACHED_PIP_PACKAGES} == "true" ]]; then \
278280
--constraint "${AIRFLOW_CONSTRAINTS_LOCATION}" && pip uninstall --yes apache-airflow; \
279281
fi
280282

283+
# Generate random hex dump file so that we can determine whether it's faster to rebuild the image
284+
# using current cache (when our dump is the same as the remote onb) or better to pull
285+
# the new image (when it is different)
286+
RUN head -c 30 /dev/urandom | xxd -ps >/build-cache-hash
287+
281288
# Link dumb-init for backwards compatibility (so that older images also work)
282289
RUN ln -sf /usr/bin/dumb-init /usr/local/bin/dumb-init
283290

breeze

+2-4
Original file line numberDiff line numberDiff line change
@@ -3040,11 +3040,9 @@ function breeze::run_build_command() {
30403040
build_images::prepare_prod_build
30413041
build_images::build_prod_images
30423042
else
3043+
30433044
build_images::prepare_ci_build
3044-
md5sum::calculate_md5sum_for_all_files
3045-
build_images::build_ci_image
3046-
md5sum::update_all_md5
3047-
build_images::build_ci_image_manifest
3045+
build_images::rebuild_ci_image_if_needed
30483046
fi
30493047
;;
30503048
cleanup_image | run_exec)

manifests/.gitignore

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
*.json
1+
*

scripts/ci/libraries/_build_images.sh

+80-83
Original file line numberDiff line numberDiff line change
@@ -136,10 +136,18 @@ function build_images::confirm_image_rebuild() {
136136
;;
137137
esac
138138
elif [[ -t 0 ]]; then
139+
echo
140+
echo
141+
echo "Make sure that you rebased to latest master before rebuilding!"
142+
echo
139143
# Check if this script is run interactively with stdin open and terminal attached
140144
"${AIRFLOW_SOURCES}/confirm" "${ACTION} image ${THE_IMAGE_TYPE}-python${PYTHON_MAJOR_MINOR_VERSION}"
141145
RES=$?
142146
elif [[ ${DETECTED_TERMINAL:=$(tty)} != "not a tty" ]]; then
147+
echo > "${DETECTED_TERMINAL}"
148+
echo > "${DETECTED_TERMINAL}"
149+
echo "Make sure that you rebased to latest master before rebuilding!" > "${DETECTED_TERMINAL}"
150+
echo > "${DETECTED_TERMINAL}"
143151
# Make sure to use output of tty rather than stdin/stdout when available - this way confirm
144152
# will works also in case of pre-commits (git does not pass stdin/stdout to pre-commit hooks)
145153
# shellcheck disable=SC2094
@@ -151,6 +159,10 @@ function build_images::confirm_image_rebuild() {
151159
export DETECTED_TERMINAL=/dev/tty
152160
# Make sure to use /dev/tty first rather than stdin/stdout when available - this way confirm
153161
# will works also in case of pre-commits (git does not pass stdin/stdout to pre-commit hooks)
162+
echo > "${DETECTED_TERMINAL}"
163+
echo > "${DETECTED_TERMINAL}"
164+
echo "Make sure that you rebased to latest master before rebuilding!" > "${DETECTED_TERMINAL}"
165+
echo > "${DETECTED_TERMINAL}"
154166
# shellcheck disable=SC2094
155167
"${AIRFLOW_SOURCES}/confirm" "${ACTION} image ${THE_IMAGE_TYPE}-python${PYTHON_MAJOR_MINOR_VERSION}" \
156168
<"${DETECTED_TERMINAL}" >"${DETECTED_TERMINAL}"
@@ -193,123 +205,106 @@ function build_images::confirm_image_rebuild() {
193205
# We cannot use docker registry APIs as they are available only with authorisation
194206
# But this image can be pulled without authentication
195207
function build_images::build_ci_image_manifest() {
196-
docker inspect "${AIRFLOW_CI_IMAGE}" >"manifests/${AIRFLOW_CI_BASE_TAG}.json"
197208
docker build \
198-
--build-arg AIRFLOW_CI_BASE_TAG="${AIRFLOW_CI_BASE_TAG}" \
199209
--tag="${AIRFLOW_CI_LOCAL_MANIFEST_IMAGE}" \
200210
-f- . <<EOF
201-
ARG AIRFLOW_CI_BASE_TAG
202211
FROM scratch
203212
204-
COPY "manifests/${AIRFLOW_CI_BASE_TAG}.json" .
213+
COPY "manifests/local-build-cache-hash" /build-cache-hash
205214
206215
CMD ""
207216
EOF
208217
}
209218

210219
#
211-
# Retrieves information about layers in the local IMAGE
212-
# it stores list of SHAs of image layers in the file pointed at by TMP_MANIFEST_LOCAL_SHA
220+
# Retrieves information about build cache hash random file from the local image
213221
#
214-
function build_images::get_local_image_info() {
215-
TMP_MANIFEST_LOCAL_JSON=$(mktemp)
216-
TMP_MANIFEST_LOCAL_SHA=$(mktemp)
222+
function build_images::get_local_build_cache_hash() {
223+
217224
set +e
218225
# Remove the container just in case
219-
docker rm --force "local-airflow-manifest" 2>/dev/null >/dev/null
220-
# Create manifest from the local manifest image
221-
if ! docker create --name "local-airflow-manifest" "${AIRFLOW_CI_LOCAL_MANIFEST_IMAGE}" 2>/dev/null; then
222-
echo
223-
echo "Local manifest image not available"
224-
echo
226+
docker rm --force "local-airflow-ci-container" 2>/dev/null >/dev/null
227+
if ! docker create --name "local-airflow-ci-container" "${AIRFLOW_CI_IMAGE}" 2>/dev/null; then
228+
>&2 echo
229+
>&2 echo "Local airflow CI image not available"
230+
>&2 echo
225231
LOCAL_MANIFEST_IMAGE_UNAVAILABLE="true"
232+
export LOCAL_MANIFEST_IMAGE_UNAVAILABLE
233+
touch "${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE}"
226234
return
227235
fi
228-
# Create manifest from the local manifest image
229-
docker cp "local-airflow-manifest:${AIRFLOW_CI_BASE_TAG}.json" "${TMP_MANIFEST_LOCAL_JSON}"
230-
sed 's/ *//g' "${TMP_MANIFEST_LOCAL_JSON}" | grep '^"sha256:' >"${TMP_MANIFEST_LOCAL_SHA}"
231-
docker rm --force "local-airflow-manifest" 2>/dev/null >/dev/null
236+
docker cp "local-airflow-ci-container:/build-cache-hash" \
237+
"${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE}" 2> /dev/null \
238+
|| touch "${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE}"
232239
set -e
240+
echo
241+
echo "Local build cache hash: '$(cat "${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE}")'"
242+
echo
233243
}
234244

235-
#
236-
# Retrieves information about layers in the remote IMAGE
237-
# it stores list of SHAs of image layers in the file pointed at by TMP_MANIFEST_REMOTE_SHA
238-
# This cannot be done easily with existing APIs of Dockerhub because they require additional authentication
239-
# even for public images. Therefore instead we are downloading a specially prepared manifest image
240-
# which is built together with the main image. This special manifest image is prepared during
241-
# building of the main image and contains single JSON file being result of docker inspect on that image
242-
# This image is from scratch so it is very tiny
243-
function build_images::get_remote_image_info() {
245+
# Retrieves information about the build cache hash random file from the remote image.
246+
# We actually use manifest image for that, which is a really, really small image to pull!
247+
# The problem is that inspecting information about remote image cannot be done easily with existing APIs
248+
# of Dockerhub because they require additional authentication even for public images.
249+
# Therefore instead we are downloading a specially prepared manifest image
250+
# which is built together with the main image and pushed with it. This special manifest image is prepared
251+
# during building of the main image and contains single file which is randomly built during the docker
252+
# build in the right place in the image (right after installing all dependencies of Apache Airflow
253+
# for the first time). When this random file gets regenerated it means that either base image has
254+
# changed or some of the earlier layers was modified - which means that it is usually faster to pull
255+
# that image first and then rebuild it - because this will likely be faster
256+
function build_images::get_remote_image_build_cache_hash() {
244257
set +e
245258
# Pull remote manifest image
246259
if ! docker pull "${AIRFLOW_CI_REMOTE_MANIFEST_IMAGE}" 2>/dev/null >/dev/null; then
247-
echo >&2
248-
echo >&2 "Remote docker registry unreachable"
249-
echo >&2
260+
>&2 echo
261+
>&2 echo "Remote docker registry unreachable"
262+
>&2 echo
250263
REMOTE_DOCKER_REGISTRY_UNREACHABLE="true"
264+
export REMOTE_DOCKER_REGISTRY_UNREACHABLE
265+
touch "${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE}"
251266
return
252267
fi
253268
set -e
254-
255-
# Docker needs the file passed to --cidfile to not exist, so we can't use mktemp
256-
TMP_CONTAINER_DIR="$(mktemp -d)"
257-
TMP_CONTAINER_ID="${TMP_CONTAINER_DIR}/remote-airflow-manifest-$$.container_id"
258-
FILES_TO_CLEANUP_ON_EXIT+=("$TMP_CONTAINER_ID")
259-
260-
TMP_MANIFEST_REMOTE_JSON=$(mktemp)
261-
TMP_MANIFEST_REMOTE_SHA=$(mktemp)
262-
# Create container out of the manifest image without running it
263-
docker create --cidfile "${TMP_CONTAINER_ID}" "${AIRFLOW_CI_REMOTE_MANIFEST_IMAGE}" 2>/dev/null >/dev/null
269+
# Create container dump out of the manifest image without actually running it
270+
docker create --cidfile "${REMOTE_IMAGE_CONTAINER_ID_FILE}" "${AIRFLOW_CI_REMOTE_MANIFEST_IMAGE}" \
271+
2>/dev/null >/dev/null || true
264272
# Extract manifest and store it in local file
265-
docker cp "$(cat "${TMP_CONTAINER_ID}"):${AIRFLOW_CI_BASE_TAG}.json" "${TMP_MANIFEST_REMOTE_JSON}"
266-
# Filter everything except SHAs of image layers
267-
sed 's/ *//g' "${TMP_MANIFEST_REMOTE_JSON}" | grep '^"sha256:' >"${TMP_MANIFEST_REMOTE_SHA}"
268-
docker rm --force "$(cat "${TMP_CONTAINER_ID}")" 2>/dev/null >/dev/null
273+
docker cp "$(cat "${REMOTE_IMAGE_CONTAINER_ID_FILE}"):/build-cache-hash" \
274+
"${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE}" 2> /dev/null \
275+
|| touch "${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE}"
276+
docker rm --force "$(cat "${REMOTE_IMAGE_CONTAINER_ID_FILE}")" 2>/dev/null || true
277+
echo
278+
echo "Remote build cache hash: '$(cat "${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE}")'"
279+
echo
269280
}
270281

271-
# The Number determines the cut-off between local building time and pull + build time.
272-
# It is a bit experimental and it will have to be kept
273-
# updated as we keep on changing layers. The cut-off point is at the moment when we do first
274-
# pip install "https://github.com/apache/airflow/archive/${AIRFLOW_BRANCH}.tar...
275-
# you can get it via this command:
276-
# docker history --no-trunc apache/airflow:master-python3.6-ci | \
277-
# grep ^sha256 | grep -n "pip uninstall" | awk 'BEGIN {FS=":"} {print $1 }'
278-
#
279-
# This command returns the number of layer in docker history where pip uninstall is called. This is the
280-
# line that will take a lot of time to run and at this point it's worth to pull the image from repo
281-
# if there are at least NN changed layers in your docker file, you should pull the image.
282-
#
283-
# Note that this only matters if you have any of the important files changed since the last build
284-
# of your image such as Dockerfile.ci, setup.py etc.
285-
#
286-
MAGIC_CUT_OFF_NUMBER_OF_LAYERS=36
287-
288282
# Compares layers from both remote and local image and set FORCE_PULL_IMAGES to true in case
289283
# More than the last NN layers are different.
290-
function build_images::compare_layers() {
291-
NUM_DIFF=$(diff -y --suppress-common-lines "${TMP_MANIFEST_REMOTE_SHA}" "${TMP_MANIFEST_LOCAL_SHA}" |
292-
wc -l || true)
293-
rm -f "${TMP_MANIFEST_REMOTE_JSON}" "${TMP_MANIFEST_REMOTE_SHA}" "${TMP_MANIFEST_LOCAL_JSON}" "${TMP_MANIFEST_LOCAL_SHA}"
294-
echo
295-
echo "Number of layers different between the local and remote image: ${NUM_DIFF}"
296-
echo
297-
# This is where setup py is rebuilt - it will usually take a looooot of time to build it, so it is
298-
# Better to pull here
299-
if ((NUM_DIFF >= MAGIC_CUT_OFF_NUMBER_OF_LAYERS)); then
284+
function build_images::compare_local_and_remote_build_cache_hash() {
285+
set +e
286+
local remote_hash
287+
remote_hash=$(cat "${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE}")
288+
local local_hash
289+
local_hash=$(cat "${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE}")
290+
291+
if [[ ${remote_hash} != "${local_hash}" ||
292+
${local_hash} == "" ]]; then
300293
echo
301294
echo
302-
echo "WARNING! Your image and the dockerhub image differ significantly"
295+
echo "Your image and the dockerhub have different or missing build cache hashes."
296+
echo "Local hash: '${local_hash}'. Remote hash: '${remote_hash}'."
303297
echo
304298
echo "Forcing pulling the images. It will be faster than rebuilding usually."
305299
echo "You can avoid it by setting SKIP_CHECK_REMOTE_IMAGE to true"
306300
echo
307301
export FORCE_PULL_IMAGES="true"
308302
else
309303
echo
310-
echo "No need to pull the image. Local rebuild will be faster"
304+
echo "No need to pull the image. Yours and remote cache hashes are the same!"
311305
echo
312306
fi
307+
set -e
313308
}
314309

315310
# Prints summary of the build parameters
@@ -335,7 +330,6 @@ function build_images::get_docker_image_names() {
335330
# CI image to build
336331
export AIRFLOW_CI_IMAGE="${DOCKERHUB_USER}/${DOCKERHUB_REPO}:${AIRFLOW_CI_BASE_TAG}"
337332

338-
339333
# Base production image tag - used to build kubernetes tag as well
340334
if [[ ${FORCE_AIRFLOW_PROD_BASE_TAG=} == "" ]]; then
341335
export AIRFLOW_PROD_BASE_TAG="${BRANCH_NAME}-python${PYTHON_MAJOR_MINOR_VERSION}"
@@ -396,6 +390,9 @@ function build_images::prepare_ci_build() {
396390
# In case rebuild is needed, it determines (by comparing layers in local and remote image)
397391
# Whether pull is needed before rebuild.
398392
function build_images::rebuild_ci_image_if_needed() {
393+
verbosity::print_info
394+
verbosity::print_info "Checking if pull or just build for ${THE_IMAGE_TYPE} is needed."
395+
verbosity::print_info
399396
if [[ -f "${BUILT_CI_IMAGE_FLAG_FILE}" ]]; then
400397
verbosity::print_info
401398
verbosity::print_info "${THE_IMAGE_TYPE} image already built locally."
@@ -418,6 +415,7 @@ function build_images::rebuild_ci_image_if_needed() {
418415

419416
local needs_docker_build="false"
420417
md5sum::check_if_docker_build_is_needed
418+
build_images::get_local_build_cache_hash
421419
if [[ ${needs_docker_build} == "true" ]]; then
422420
if [[ ${SKIP_CHECK_REMOTE_IMAGE:=} != "true" && ${DOCKER_CACHE} == "pulled" ]]; then
423421
# Check if remote image is different enough to force pull
@@ -427,14 +425,12 @@ function build_images::rebuild_ci_image_if_needed() {
427425
echo
428426
echo "Checking if the remote image needs to be pulled"
429427
echo
430-
build_images::get_remote_image_info
431-
if [[ ${REMOTE_DOCKER_REGISTRY_UNREACHABLE:=} != "true" ]]; then
432-
build_images::get_local_image_info
433-
if [[ ${LOCAL_MANIFEST_IMAGE_UNAVAILABLE:=} != "true" ]]; then
434-
build_images::compare_layers
435-
else
436-
FORCE_PULL_IMAGES="true"
437-
fi
428+
build_images::get_remote_image_build_cache_hash
429+
if [[ ${REMOTE_DOCKER_REGISTRY_UNREACHABLE:=} != "true" && \
430+
${LOCAL_MANIFEST_IMAGE_UNAVAILABLE:=} != "true" ]]; then
431+
build_images::compare_local_and_remote_build_cache_hash
432+
else
433+
FORCE_PULL_IMAGES="true"
438434
fi
439435
fi
440436
SKIP_REBUILD="false"
@@ -453,6 +449,7 @@ function build_images::rebuild_ci_image_if_needed() {
453449
verbosity::print_info "Build start: ${THE_IMAGE_TYPE} image."
454450
verbosity::print_info
455451
build_images::build_ci_image
452+
build_images::get_local_build_cache_hash
456453
md5sum::update_all_md5
457454
build_images::build_ci_image_manifest
458455
verbosity::print_info

scripts/ci/libraries/_initialization.sh

+12-1
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,12 @@ function initialization::initialize_test_variables() {
458458
export TEST_TYPE=${TEST_TYPE:=""}
459459
}
460460

461+
function initialization::initialize_build_image_variables() {
462+
REMOTE_IMAGE_CONTAINER_ID_FILE="${AIRFLOW_SOURCES}/manifests/remote-airflow-manifest-image"
463+
LOCAL_IMAGE_BUILD_CACHE_HASH_FILE="${AIRFLOW_SOURCES}/manifests/local-build-cache-hash"
464+
REMOTE_IMAGE_BUILD_CACHE_HASH_FILE="${AIRFLOW_SOURCES}/manifests/remote-build-cache-hash"
465+
}
466+
461467
# Common environment that is initialized by both Breeze and CI scripts
462468
function initialization::initialize_common_environment() {
463469
initialization::create_directories
@@ -475,6 +481,7 @@ function initialization::initialize_common_environment() {
475481
initialization::initialize_git_variables
476482
initialization::initialize_github_variables
477483
initialization::initialize_test_variables
484+
initialization::initialize_build_image_variables
478485
}
479486

480487
function initialization::set_default_python_version_if_empty() {
@@ -727,6 +734,10 @@ function initialization::make_constants_read_only() {
727734
readonly BUILT_CI_IMAGE_FLAG_FILE
728735
readonly INIT_SCRIPT_FILE
729736

737+
readonly REMOTE_IMAGE_CONTAINER_ID_FILE
738+
readonly LOCAL_IMAGE_BUILD_CACHE_HASH_FILE
739+
readonly REMOTE_IMAGE_BUILD_CACHE_HASH_FILE
740+
730741
}
731742

732743
# converts parameters to json array
@@ -749,6 +760,6 @@ function initialization::ga_output() {
749760

750761
function initialization::ga_env() {
751762
if [[ ${GITHUB_ENV=} != "" ]]; then
752-
echo "${1}=${2}" >> "${GITHUB_ENV}"
763+
echo "${1}=${2}" >>"${GITHUB_ENV}"
753764
fi
754765
}

0 commit comments

Comments
 (0)