@@ -136,10 +136,18 @@ function build_images::confirm_image_rebuild() {
136
136
;;
137
137
esac
138
138
elif [[ -t 0 ]]; then
139
+ echo
140
+ echo
141
+ echo " Make sure that you rebased to latest master before rebuilding!"
142
+ echo
139
143
# Check if this script is run interactively with stdin open and terminal attached
140
144
" ${AIRFLOW_SOURCES} /confirm" " ${ACTION} image ${THE_IMAGE_TYPE} -python${PYTHON_MAJOR_MINOR_VERSION} "
141
145
RES=$?
142
146
elif [[ ${DETECTED_TERMINAL:= $(tty)} != " not a tty" ]]; then
147
+ echo > " ${DETECTED_TERMINAL} "
148
+ echo > " ${DETECTED_TERMINAL} "
149
+ echo " Make sure that you rebased to latest master before rebuilding!" > " ${DETECTED_TERMINAL} "
150
+ echo > " ${DETECTED_TERMINAL} "
143
151
# Make sure to use output of tty rather than stdin/stdout when available - this way confirm
144
152
# will works also in case of pre-commits (git does not pass stdin/stdout to pre-commit hooks)
145
153
# shellcheck disable=SC2094
@@ -151,6 +159,10 @@ function build_images::confirm_image_rebuild() {
151
159
export DETECTED_TERMINAL=/dev/tty
152
160
# Make sure to use /dev/tty first rather than stdin/stdout when available - this way confirm
153
161
# will works also in case of pre-commits (git does not pass stdin/stdout to pre-commit hooks)
162
+ echo > " ${DETECTED_TERMINAL} "
163
+ echo > " ${DETECTED_TERMINAL} "
164
+ echo " Make sure that you rebased to latest master before rebuilding!" > " ${DETECTED_TERMINAL} "
165
+ echo > " ${DETECTED_TERMINAL} "
154
166
# shellcheck disable=SC2094
155
167
" ${AIRFLOW_SOURCES} /confirm" " ${ACTION} image ${THE_IMAGE_TYPE} -python${PYTHON_MAJOR_MINOR_VERSION} " \
156
168
< " ${DETECTED_TERMINAL} " > " ${DETECTED_TERMINAL} "
@@ -193,123 +205,106 @@ function build_images::confirm_image_rebuild() {
193
205
# We cannot use docker registry APIs as they are available only with authorisation
194
206
# But this image can be pulled without authentication
195
207
function build_images::build_ci_image_manifest() {
196
- docker inspect " ${AIRFLOW_CI_IMAGE} " > " manifests/${AIRFLOW_CI_BASE_TAG} .json"
197
208
docker build \
198
- --build-arg AIRFLOW_CI_BASE_TAG=" ${AIRFLOW_CI_BASE_TAG} " \
199
209
--tag=" ${AIRFLOW_CI_LOCAL_MANIFEST_IMAGE} " \
200
210
-f- . << EOF
201
- ARG AIRFLOW_CI_BASE_TAG
202
211
FROM scratch
203
212
204
- COPY "manifests/${AIRFLOW_CI_BASE_TAG} .json" .
213
+ COPY "manifests/local-build-cache-hash" /build-cache-hash
205
214
206
215
CMD ""
207
216
EOF
208
217
}
209
218
210
219
#
211
- # Retrieves information about layers in the local IMAGE
212
- # it stores list of SHAs of image layers in the file pointed at by TMP_MANIFEST_LOCAL_SHA
220
+ # Retrieves information about build cache hash random file from the local image
213
221
#
214
- function build_images::get_local_image_info() {
215
- TMP_MANIFEST_LOCAL_JSON=$( mktemp)
216
- TMP_MANIFEST_LOCAL_SHA=$( mktemp)
222
+ function build_images::get_local_build_cache_hash() {
223
+
217
224
set +e
218
225
# Remove the container just in case
219
- docker rm --force " local-airflow-manifest" 2> /dev/null > /dev/null
220
- # Create manifest from the local manifest image
221
- if ! docker create --name " local-airflow-manifest" " ${AIRFLOW_CI_LOCAL_MANIFEST_IMAGE} " 2> /dev/null; then
222
- echo
223
- echo " Local manifest image not available"
224
- echo
226
+ docker rm --force " local-airflow-ci-container" 2> /dev/null > /dev/null
227
+ if ! docker create --name " local-airflow-ci-container" " ${AIRFLOW_CI_IMAGE} " 2> /dev/null; then
228
+ >&2 echo
229
+ >&2 echo " Local airflow CI image not available"
230
+ >&2 echo
225
231
LOCAL_MANIFEST_IMAGE_UNAVAILABLE=" true"
232
+ export LOCAL_MANIFEST_IMAGE_UNAVAILABLE
233
+ touch " ${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE} "
226
234
return
227
235
fi
228
- # Create manifest from the local manifest image
229
- docker cp " local-airflow-manifest:${AIRFLOW_CI_BASE_TAG} .json" " ${TMP_MANIFEST_LOCAL_JSON} "
230
- sed ' s/ *//g' " ${TMP_MANIFEST_LOCAL_JSON} " | grep ' ^"sha256:' > " ${TMP_MANIFEST_LOCAL_SHA} "
231
- docker rm --force " local-airflow-manifest" 2> /dev/null > /dev/null
236
+ docker cp " local-airflow-ci-container:/build-cache-hash" \
237
+ " ${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE} " 2> /dev/null \
238
+ || touch " ${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE} "
232
239
set -e
240
+ echo
241
+ echo " Local build cache hash: '$( cat " ${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE} " ) '"
242
+ echo
233
243
}
234
244
235
- #
236
- # Retrieves information about layers in the remote IMAGE
237
- # it stores list of SHAs of image layers in the file pointed at by TMP_MANIFEST_REMOTE_SHA
238
- # This cannot be done easily with existing APIs of Dockerhub because they require additional authentication
239
- # even for public images. Therefore instead we are downloading a specially prepared manifest image
240
- # which is built together with the main image. This special manifest image is prepared during
241
- # building of the main image and contains single JSON file being result of docker inspect on that image
242
- # This image is from scratch so it is very tiny
243
- function build_images::get_remote_image_info() {
245
+ # Retrieves information about the build cache hash random file from the remote image.
246
+ # We actually use manifest image for that, which is a really, really small image to pull!
247
+ # The problem is that inspecting information about remote image cannot be done easily with existing APIs
248
+ # of Dockerhub because they require additional authentication even for public images.
249
+ # Therefore instead we are downloading a specially prepared manifest image
250
+ # which is built together with the main image and pushed with it. This special manifest image is prepared
251
+ # during building of the main image and contains single file which is randomly built during the docker
252
+ # build in the right place in the image (right after installing all dependencies of Apache Airflow
253
+ # for the first time). When this random file gets regenerated it means that either base image has
254
+ # changed or some of the earlier layers was modified - which means that it is usually faster to pull
255
+ # that image first and then rebuild it - because this will likely be faster
256
+ function build_images::get_remote_image_build_cache_hash() {
244
257
set +e
245
258
# Pull remote manifest image
246
259
if ! docker pull " ${AIRFLOW_CI_REMOTE_MANIFEST_IMAGE} " 2> /dev/null > /dev/null; then
247
- echo >&2
248
- echo >&2 " Remote docker registry unreachable"
249
- echo >&2
260
+ >&2 echo
261
+ >&2 echo " Remote docker registry unreachable"
262
+ >&2 echo
250
263
REMOTE_DOCKER_REGISTRY_UNREACHABLE=" true"
264
+ export REMOTE_DOCKER_REGISTRY_UNREACHABLE
265
+ touch " ${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE} "
251
266
return
252
267
fi
253
268
set -e
254
-
255
- # Docker needs the file passed to --cidfile to not exist, so we can't use mktemp
256
- TMP_CONTAINER_DIR=" $( mktemp -d) "
257
- TMP_CONTAINER_ID=" ${TMP_CONTAINER_DIR} /remote-airflow-manifest-$$ .container_id"
258
- FILES_TO_CLEANUP_ON_EXIT+=(" $TMP_CONTAINER_ID " )
259
-
260
- TMP_MANIFEST_REMOTE_JSON=$( mktemp)
261
- TMP_MANIFEST_REMOTE_SHA=$( mktemp)
262
- # Create container out of the manifest image without running it
263
- docker create --cidfile " ${TMP_CONTAINER_ID} " " ${AIRFLOW_CI_REMOTE_MANIFEST_IMAGE} " 2> /dev/null > /dev/null
269
+ # Create container dump out of the manifest image without actually running it
270
+ docker create --cidfile " ${REMOTE_IMAGE_CONTAINER_ID_FILE} " " ${AIRFLOW_CI_REMOTE_MANIFEST_IMAGE} " \
271
+ 2> /dev/null > /dev/null || true
264
272
# Extract manifest and store it in local file
265
- docker cp " $( cat " ${TMP_CONTAINER_ID} " ) :${AIRFLOW_CI_BASE_TAG} .json" " ${TMP_MANIFEST_REMOTE_JSON} "
266
- # Filter everything except SHAs of image layers
267
- sed ' s/ *//g' " ${TMP_MANIFEST_REMOTE_JSON} " | grep ' ^"sha256:' > " ${TMP_MANIFEST_REMOTE_SHA} "
268
- docker rm --force " $( cat " ${TMP_CONTAINER_ID} " ) " 2> /dev/null > /dev/null
273
+ docker cp " $( cat " ${REMOTE_IMAGE_CONTAINER_ID_FILE} " ) :/build-cache-hash" \
274
+ " ${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE} " 2> /dev/null \
275
+ || touch " ${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE} "
276
+ docker rm --force " $( cat " ${REMOTE_IMAGE_CONTAINER_ID_FILE} " ) " 2> /dev/null || true
277
+ echo
278
+ echo " Remote build cache hash: '$( cat " ${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE} " ) '"
279
+ echo
269
280
}
270
281
271
- # The Number determines the cut-off between local building time and pull + build time.
272
- # It is a bit experimental and it will have to be kept
273
- # updated as we keep on changing layers. The cut-off point is at the moment when we do first
274
- # pip install "https://github.com/apache/airflow/archive/${AIRFLOW_BRANCH}.tar...
275
- # you can get it via this command:
276
- # docker history --no-trunc apache/airflow:master-python3.6-ci | \
277
- # grep ^sha256 | grep -n "pip uninstall" | awk 'BEGIN {FS=":"} {print $1 }'
278
- #
279
- # This command returns the number of layer in docker history where pip uninstall is called. This is the
280
- # line that will take a lot of time to run and at this point it's worth to pull the image from repo
281
- # if there are at least NN changed layers in your docker file, you should pull the image.
282
- #
283
- # Note that this only matters if you have any of the important files changed since the last build
284
- # of your image such as Dockerfile.ci, setup.py etc.
285
- #
286
- MAGIC_CUT_OFF_NUMBER_OF_LAYERS=36
287
-
288
282
# Compares layers from both remote and local image and set FORCE_PULL_IMAGES to true in case
289
283
# More than the last NN layers are different.
290
- function build_images::compare_layers() {
291
- NUM_DIFF=$( diff -y --suppress-common-lines " ${TMP_MANIFEST_REMOTE_SHA} " " ${TMP_MANIFEST_LOCAL_SHA} " |
292
- wc -l || true)
293
- rm -f " ${TMP_MANIFEST_REMOTE_JSON} " " ${TMP_MANIFEST_REMOTE_SHA} " " ${TMP_MANIFEST_LOCAL_JSON} " " ${TMP_MANIFEST_LOCAL_SHA} "
294
- echo
295
- echo " Number of layers different between the local and remote image: ${NUM_DIFF} "
296
- echo
297
- # This is where setup py is rebuilt - it will usually take a looooot of time to build it, so it is
298
- # Better to pull here
299
- if (( NUM_DIFF >= MAGIC_CUT_OFF_NUMBER_OF_LAYERS)) ; then
284
+ function build_images::compare_local_and_remote_build_cache_hash() {
285
+ set +e
286
+ local remote_hash
287
+ remote_hash=$( cat " ${REMOTE_IMAGE_BUILD_CACHE_HASH_FILE} " )
288
+ local local_hash
289
+ local_hash=$( cat " ${LOCAL_IMAGE_BUILD_CACHE_HASH_FILE} " )
290
+
291
+ if [[ ${remote_hash} != " ${local_hash} " ||
292
+ ${local_hash} == " " ]]; then
300
293
echo
301
294
echo
302
- echo " WARNING! Your image and the dockerhub image differ significantly"
295
+ echo " Your image and the dockerhub have different or missing build cache hashes."
296
+ echo " Local hash: '${local_hash} '. Remote hash: '${remote_hash} '."
303
297
echo
304
298
echo " Forcing pulling the images. It will be faster than rebuilding usually."
305
299
echo " You can avoid it by setting SKIP_CHECK_REMOTE_IMAGE to true"
306
300
echo
307
301
export FORCE_PULL_IMAGES=" true"
308
302
else
309
303
echo
310
- echo " No need to pull the image. Local rebuild will be faster "
304
+ echo " No need to pull the image. Yours and remote cache hashes are the same! "
311
305
echo
312
306
fi
307
+ set -e
313
308
}
314
309
315
310
# Prints summary of the build parameters
@@ -335,7 +330,6 @@ function build_images::get_docker_image_names() {
335
330
# CI image to build
336
331
export AIRFLOW_CI_IMAGE=" ${DOCKERHUB_USER} /${DOCKERHUB_REPO} :${AIRFLOW_CI_BASE_TAG} "
337
332
338
-
339
333
# Base production image tag - used to build kubernetes tag as well
340
334
if [[ ${FORCE_AIRFLOW_PROD_BASE_TAG=} == " " ]]; then
341
335
export AIRFLOW_PROD_BASE_TAG=" ${BRANCH_NAME} -python${PYTHON_MAJOR_MINOR_VERSION} "
@@ -396,6 +390,9 @@ function build_images::prepare_ci_build() {
396
390
# In case rebuild is needed, it determines (by comparing layers in local and remote image)
397
391
# Whether pull is needed before rebuild.
398
392
function build_images::rebuild_ci_image_if_needed() {
393
+ verbosity::print_info
394
+ verbosity::print_info " Checking if pull or just build for ${THE_IMAGE_TYPE} is needed."
395
+ verbosity::print_info
399
396
if [[ -f " ${BUILT_CI_IMAGE_FLAG_FILE} " ]]; then
400
397
verbosity::print_info
401
398
verbosity::print_info " ${THE_IMAGE_TYPE} image already built locally."
@@ -418,6 +415,7 @@ function build_images::rebuild_ci_image_if_needed() {
418
415
419
416
local needs_docker_build=" false"
420
417
md5sum::check_if_docker_build_is_needed
418
+ build_images::get_local_build_cache_hash
421
419
if [[ ${needs_docker_build} == " true" ]]; then
422
420
if [[ ${SKIP_CHECK_REMOTE_IMAGE:= } != " true" && ${DOCKER_CACHE} == " pulled" ]]; then
423
421
# Check if remote image is different enough to force pull
@@ -427,14 +425,12 @@ function build_images::rebuild_ci_image_if_needed() {
427
425
echo
428
426
echo " Checking if the remote image needs to be pulled"
429
427
echo
430
- build_images::get_remote_image_info
431
- if [[ ${REMOTE_DOCKER_REGISTRY_UNREACHABLE:= } != " true" ]]; then
432
- build_images::get_local_image_info
433
- if [[ ${LOCAL_MANIFEST_IMAGE_UNAVAILABLE:= } != " true" ]]; then
434
- build_images::compare_layers
435
- else
436
- FORCE_PULL_IMAGES=" true"
437
- fi
428
+ build_images::get_remote_image_build_cache_hash
429
+ if [[ ${REMOTE_DOCKER_REGISTRY_UNREACHABLE:= } != " true" && \
430
+ ${LOCAL_MANIFEST_IMAGE_UNAVAILABLE:= } != " true" ]]; then
431
+ build_images::compare_local_and_remote_build_cache_hash
432
+ else
433
+ FORCE_PULL_IMAGES=" true"
438
434
fi
439
435
fi
440
436
SKIP_REBUILD=" false"
@@ -453,6 +449,7 @@ function build_images::rebuild_ci_image_if_needed() {
453
449
verbosity::print_info " Build start: ${THE_IMAGE_TYPE} image."
454
450
verbosity::print_info
455
451
build_images::build_ci_image
452
+ build_images::get_local_build_cache_hash
456
453
md5sum::update_all_md5
457
454
build_images::build_ci_image_manifest
458
455
verbosity::print_info
0 commit comments