diff --git a/CHANGELOG.md b/CHANGELOG.md index b537521e7..ba9a03670 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,12 +8,14 @@ All notable changes to this project will be documented in this file. - airflow: check for correct permissions and ownerships in /stackable folder via `check-permissions-ownership.sh` provided in stackable-base image ([#1054]). +- druid: check for correct permissions and ownerships in /stackable folder via + `check-permissions-ownership.sh` provided in stackable-base image ([#1039]). - hadoop: check for correct permissions and ownerships in /stackable folder via `check-permissions-ownership.sh` provided in stackable-base image ([#1029]). - hbase: check for correct permissions and ownerships in /stackable folder via `check-permissions-ownership.sh` provided in stackable-base image ([#1028]). -- druid: check for correct permissions and ownerships in /stackable folder via - `check-permissions-ownership.sh` provided in stackable-base image ([#1039]). +- hive: check for correct permissions and ownerships in /stackable folder via + `check-permissions-ownership.sh` provided in stackable-base image ([#1040]). - spark-connect-client: A new image for Spark connect tests and demos ([#1034]) - nifi: check for correct permissions and ownerships in /stackable folder via `check-permissions-ownership.sh` provided in stackable-base image ([#1027]). @@ -31,9 +33,10 @@ All notable changes to this project will be documented in this file. ### Fixed +- druid: reduce docker image size by removing the recursive chown/chmods in the final image ([#1039]). - hadoop: reduce docker image size by removing the recursive chown/chmods in the final image ([#1029]). - hbase: reduce docker image size by removing the recursive chown/chmods in the final image ([#1028]). -- druid: reduce docker image size by removing the recursive chown/chmods in the final image ([#1039]). +- hive: reduce docker image size by removing the recursive chown/chmods in the final image ([#1040]). - nifi: reduce docker image size by removing the recursive chown/chmods in the final image ([#1027]). - opa: reduce docker image size by removing the recursive chown/chmods in the final image ([#1038]). - spark-k8s: reduce docker image size by removing the recursive chown/chmods in the final image ([#1042]). @@ -47,6 +50,7 @@ All notable changes to this project will be documented in this file. [#1034]: https://github.com/stackabletech/docker-images/pull/1034 [#1038]: https://github.com/stackabletech/docker-images/pull/1038 [#1039]: https://github.com/stackabletech/docker-images/pull/1039 +[#1040]: https://github.com/stackabletech/docker-images/pull/1040 [#1042]: https://github.com/stackabletech/docker-images/pull/1042 [#1044]: https://github.com/stackabletech/docker-images/pull/1044 [#1050]: https://github.com/stackabletech/docker-images/pull/1050 diff --git a/hive/Dockerfile b/hive/Dockerfile index d3f5f61ae..acc54fab5 100644 --- a/hive/Dockerfile +++ b/hive/Dockerfile @@ -13,12 +13,20 @@ FROM stackable/image/java-devel AS hive-builder ARG PRODUCT ARG HADOOP ARG JMX_EXPORTER +ARG AWS_JAVA_SDK_BUNDLE +ARG AZURE_STORAGE +ARG AZURE_KEYVAULT_CORE ARG STACKABLE_USER_UID # Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.) # This can be used to speed up builds when disk space is of no concern. ARG DELETE_CACHES="true" +# It is useful to see which version of Hadoop is used at a glance +# Therefore the use of the full name here +# TODO: Do we really need all of Hadoop in here? +COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP} + COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable /stackable USER ${STACKABLE_USER_UID} @@ -58,6 +66,18 @@ rm -rf /stackable/apache-hive-${PRODUCT}-src curl "https://repo.stackable.tech/repository/packages/jmx-exporter/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" -o "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" ln -s "/stackable/jmx/jmx_prometheus_javaagent-${JMX_EXPORTER}.jar" /stackable/jmx/jmx_prometheus_javaagent.jar +# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards +# This way the build will fail should one of the files not be available anymore in a later Hadoop version! + +# Add S3 Support for Hive (support for s3a://) +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ + +# Add Azure ABFS support (support for abfs://) +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ +cp /stackable/hadoop-${HADOOP}/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/apache-hive-metastore-${PRODUCT}-bin/lib/ + # We're removing these to make the intermediate layer smaller # This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available # and we are sometimes running into errors because we're out of space. @@ -67,6 +87,9 @@ if [ "${DELETE_CACHES}" = "true" ] ; then rm -rf /stackable/.npm/* rm -rf /stackable/.cache/* fi + +# change groups +chmod --recursive g=u /stackable EOF @@ -75,9 +98,6 @@ FROM stackable/image/java-base AS final ARG PRODUCT ARG HADOOP ARG RELEASE -ARG AWS_JAVA_SDK_BUNDLE -ARG AZURE_STORAGE -ARG AZURE_KEYVAULT_CORE ARG STACKABLE_USER_UID @@ -106,47 +126,45 @@ LABEL io.k8s.display-name="${NAME}" WORKDIR /stackable COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/apache-hive-metastore-${PRODUCT}-bin +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hadoop-${HADOOP} /stackable/hadoop-${HADOOP} +COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/jmx /stackable/jmx -# It is useful to see which version of Hadoop is used at a glance -# Therefore the use of the full name here -# TODO: Do we really need all of Hadoop in here? -COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/hadoop /stackable/hadoop-${HADOOP} +COPY hive/licenses /licenses RUN < /stackable/package_manifest.txt +chown ${STACKABLE_USER_UID}:0 /stackable/package_manifest.txt +chmod g=u /stackable/package_manifest.txt rm -rf /var/cache/yum ln -s /stackable/apache-hive-metastore-${PRODUCT}-bin /stackable/hive-metastore +chown -h ${STACKABLE_USER_UID}:0 /stackable/hive-metastore +chmod g=u /stackable/hive-metastore ln -s /stackable/hadoop-${HADOOP} /stackable/hadoop +chown -h ${STACKABLE_USER_UID}:0 /stackable/hadoop +chmod g=u /stackable/hadoop -# The next two sections for S3 and Azure use hardcoded version numbers on purpose instead of wildcards -# This way the build will fail should one of the files not be available anymore in a later Hadoop version! +# fix missing permissions +chmod --recursive g=u /stackable/jmx +EOF -# Add S3 Support for Hive (support for s3a://) -cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-aws-${HADOOP}.jar /stackable/hive-metastore/lib/ -cp /stackable/hadoop/share/hadoop/tools/lib/aws-java-sdk-bundle-${AWS_JAVA_SDK_BUNDLE}.jar /stackable/hive-metastore/lib/ +# ---------------------------------------- +# Checks +# This section is to run final checks to ensure the created final images +# adhere to several minimal requirements like: +# - check file permissions and ownerships +# ---------------------------------------- -# Add Azure ABFS support (support for abfs://) -cp /stackable/hadoop/share/hadoop/tools/lib/hadoop-azure-${HADOOP}.jar /stackable/hive-metastore/lib/ -cp /stackable/hadoop/share/hadoop/tools/lib/azure-storage-${AZURE_STORAGE}.jar /stackable/hive-metastore/lib/ -cp /stackable/hadoop/share/hadoop/tools/lib/azure-keyvault-core-${AZURE_KEYVAULT_CORE}.jar /stackable/hive-metastore/lib/ - -# All files and folders owned by root group to support running as arbitrary users. -# This is best practice as all container users will belong to the root group (0). -chown -R ${STACKABLE_USER_UID}:0 /stackable -chmod -R g=u /stackable +# Check that permissions and ownership in /stackable are set correctly +# This will fail and stop the build if any mismatches are found. +RUN <