Skip to content

Commit 59e7ee9

Browse files
authored
comparable_patch.sh: Zero out .gnu_debuglink CRCs (adoptium#3730) (adoptium#3736)
1 parent 5e136f8 commit 59e7ee9

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

tooling/reproducible/ReproducibleBuilds.md

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ The patching process involves:
4848
- Remove Vendor strings embedded in executables, classes and text files.
4949
- Remove module-info differences due to "hash" of Signed module executables
5050
- Remove any non-deterministic build process artifact strings, like Manifest Created-By stamps.
51+
- Zero out CRC in .gnu_debuglink ELF sections to eliminate .debuginfo-induced differences.
5152

5253
### How to setup and run comparable_patch.sh on Windows
5354

tooling/reproducible/comparable_patch.sh

+39
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,43 @@ function neutraliseReleaseFile() {
478478
fi
479479
}
480480

481+
# The last four bytes of a .gnu_debuglink ELF section contains a
482+
# 32-bit cyclic redundancy check (CRC32) of the separate .debuginfo
483+
# file. A given .debuginfo file will differ when compiled in
484+
# different environments. Specifically, if some declaration in a
485+
# system header is referenced by OpenJDK source code, and that
486+
# declaration's line number changes between environments, the
487+
# .debuginfo files will have many bytewise differences. Even
488+
# seemingly inconsequential header changes will result in large
489+
# .debuginfo differences, for example, additions of new preprocessor
490+
# macros, or comment additions and deletions. The CRC32 is thus
491+
# sensitive to almost any textual changes to system headers. This
492+
# function changes the four bytes to zeroes.
493+
function neutraliseDebuglinkCRCs() {
494+
if [[ "$OS" =~ CYGWIN* ]] || [[ "$OS" =~ Darwin* ]]; then
495+
# Assume Cygwin and Darwin toolchains do not produce .gnu_debuglink sections.
496+
return
497+
fi
498+
elf_magic="^7f454c46$"
499+
# Does not handle filenames with newlines because the hexdump format does not support \0.
500+
find "${JDK_DIR}" -type f \! -name '*.debuginfo' -print -exec hexdump -n 4 -e '4/1 "%.2x" "\n"' '{}' ';' \
501+
| grep --no-group-separator -B 1 "${elf_magic}" | grep -v "${elf_magic}" \
502+
| while read -r -d $'\n' file; do
503+
if objdump -Fsj .gnu_debuglink "${file}" >/dev/null 2>&1; then
504+
echo "Zeroing .gnu_debuglink cyclic redundancy check bytes in ${file}"
505+
section="$(objdump -Fsj .gnu_debuglink "${file}")"
506+
section_offset_within_file=$((16#$(echo "${section}" | awk '/^Contents of section/ { sub(/\x29/, ""); sub(/0x/, ""); print $9; }')))
507+
contents_line="$(echo "${section}" | sed '/^Contents of section.*$/q' | wc -l)"
508+
section_bytes_in_hex="$(echo "${section}" | tail -q -n +$((contents_line + 1)) | cut -b 7-41 | tr -d ' \n')"
509+
check_length=4
510+
hex_chars_per_byte=2
511+
check_offset_within_section="$((${#section_bytes_in_hex} / hex_chars_per_byte - check_length))"
512+
check_offset_within_file="$((section_offset_within_file + check_offset_within_section))"
513+
printf "%0.s\0" $(seq 1 "${check_length}") | dd of="${file}" bs=1 seek="${check_offset_within_file}" count="${check_length}" conv=notrunc status=none
514+
fi
515+
done
516+
}
517+
481518
# Remove some non-JDK files that some Vendors distribute
482519
# - NEWS : Some Vendors provide a NEWS text file
483520
# - demo : Not all vendors distribute the demo examples
@@ -551,6 +588,8 @@ neutraliseManifests
551588

552589
neutraliseReleaseFile
553590

591+
neutraliseDebuglinkCRCs
592+
554593
removeNonJdkFiles
555594

556595
echo "***********"

0 commit comments

Comments
 (0)