From af4b9fdffaac1747bf6263984babafb839b52ec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 11 Dec 2023 15:08:30 +0100 Subject: [PATCH 01/50] Add tweedie package --- docker/scripts/install_R_version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/scripts/install_R_version b/docker/scripts/install_R_version index 704db5d7b208..23775daaa954 100755 --- a/docker/scripts/install_R_version +++ b/docker/scripts/install_R_version @@ -96,7 +96,7 @@ checkRInstallLog R.install.2.log 'dplyr', 'R.utils', 'AUC', 'mlbench', 'flexclust', 'randomForest', \ 'bit64', 'HDtweedie', 'jsonlite', 'statmod', 'roxygen2', 'testthat', 'Rcpp', 'fpc', 'RUnit', \ 'ade4', 'e1071', 'ggplot2', 'LiblineaR', 'optparse', 'uuid', 'usethis', 'plot3Drgl', \ - 'IRdisplay', 'plotly' \ + 'IRdisplay', 'plotly', 'tweedie' \ )) \ " 2>&1 | tee R.install.3.log checkRInstallLog R.install.3.log From 8e3861f42280fd6016c515480722681ea5226875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 11 Dec 2023 15:47:18 +0100 Subject: [PATCH 02/50] fix pyarrow typo --- h2o-py/test-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 81e6a14bb743..8b56a0737865 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -45,4 +45,4 @@ sphinx==3.0.4; python_version <= '3.8' sphinx==6.1.3; python_version > '3.8' datatable=1.0.0; python_version <= 3.9 polars=0.19.17; python_version > 3.9 -pyarrows=14.0.1; python_version > 3.9 +pyarrow=14.0.1; python_version > 3.9 From 87ba69ec46d2977935162516685c10aaf4b874eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 11 Dec 2023 15:58:08 +0100 Subject: [PATCH 03/50] Increase the DEFAULT_IMAGE_VERSION_TAG in scripts/jenkins/groovy/buildConfig.groovy --- scripts/jenkins/groovy/buildConfig.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/jenkins/groovy/buildConfig.groovy b/scripts/jenkins/groovy/buildConfig.groovy index 73f6f89c4c78..852c38178bc7 100644 --- a/scripts/jenkins/groovy/buildConfig.groovy +++ b/scripts/jenkins/groovy/buildConfig.groovy @@ -14,7 +14,7 @@ class BuildConfig { private static final String DEFAULT_HADOOP_IMAGE_NAME = 'dev-build-hadoop' private static final String DEFAULT_RELEASE_IMAGE_NAME = 'dev-release' - public static final int DEFAULT_IMAGE_VERSION_TAG = 42 + public static final int DEFAULT_IMAGE_VERSION_TAG = 43 public static final String AWSCLI_IMAGE = DOCKER_REGISTRY + '/opsh2oai/awscli' public static final String S3CMD_IMAGE = DOCKER_REGISTRY + '/opsh2oai/s3cmd' From 55c9c01b60eda3cd773d9a6aeccad452e1e4453e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 11 Dec 2023 16:58:51 +0100 Subject: [PATCH 04/50] Add missing quotes to test-requirements.txt --- h2o-py/test-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 8b56a0737865..2672c1ae17a9 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -43,6 +43,6 @@ pygments==2.7.4; python_version <= '3.7' pygments==2.15.1; python_version > '3.7' sphinx==3.0.4; python_version <= '3.8' sphinx==6.1.3; python_version > '3.8' -datatable=1.0.0; python_version <= 3.9 -polars=0.19.17; python_version > 3.9 -pyarrow=14.0.1; python_version > 3.9 +datatable=1.0.0; python_version <= '3.9' +polars=0.19.17; python_version > '3.9' +pyarrow=14.0.1; python_version > '3.9' From 0de141a71b85810f917ee39ac90c4d1d2a13a7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 12 Dec 2023 12:02:40 +0100 Subject: [PATCH 05/50] Increase DEFAULT_IMAGE_VERSION_TAG --- scripts/jenkins/groovy/buildConfig.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/jenkins/groovy/buildConfig.groovy b/scripts/jenkins/groovy/buildConfig.groovy index 852c38178bc7..d1417254ecfe 100644 --- a/scripts/jenkins/groovy/buildConfig.groovy +++ b/scripts/jenkins/groovy/buildConfig.groovy @@ -14,7 +14,7 @@ class BuildConfig { private static final String DEFAULT_HADOOP_IMAGE_NAME = 'dev-build-hadoop' private static final String DEFAULT_RELEASE_IMAGE_NAME = 'dev-release' - public static final int DEFAULT_IMAGE_VERSION_TAG = 43 + public static final int DEFAULT_IMAGE_VERSION_TAG = 44 public static final String AWSCLI_IMAGE = DOCKER_REGISTRY + '/opsh2oai/awscli' public static final String S3CMD_IMAGE = DOCKER_REGISTRY + '/opsh2oai/s3cmd' From e578cf723fb5c0c06ca1a1182495b26610c8e7c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 12 Dec 2023 13:11:43 +0100 Subject: [PATCH 06/50] Fix more typos --- h2o-py/test-requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 2672c1ae17a9..8e45560b1982 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -43,6 +43,6 @@ pygments==2.7.4; python_version <= '3.7' pygments==2.15.1; python_version > '3.7' sphinx==3.0.4; python_version <= '3.8' sphinx==6.1.3; python_version > '3.8' -datatable=1.0.0; python_version <= '3.9' -polars=0.19.17; python_version > '3.9' -pyarrow=14.0.1; python_version > '3.9' +datatable==1.0.0; python_version <= '3.9' +polars==0.19.17; python_version > '3.9' +pyarrow==14.0.1; python_version > '3.9' From 615c3d5ae929a47265478bc39f1871a9d6369a46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 12 Dec 2023 14:15:23 +0100 Subject: [PATCH 07/50] specify python versions for packages we had issues in the build process --- h2o-py/test-requirements.txt | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 8e45560b1982..a743bf393db5 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -10,7 +10,8 @@ twine==1.10.0 urllib3==1.26.5 grip==4.4.0 wheel==0.38.0 -numpy==1.23.5 +numpy<=1.23.5; python_version < '3.8' +numpy==1.23.5; python_version >= '3.8' scipy==1.2.3; python_version == '3.6' scipy==1.6.3; python_version == '3.7' scipy==1.10.1; python_version > '3.7' @@ -34,15 +35,16 @@ shap==0.29.1 boto3==1.18.65 lifelines>=0.19.5 xlsxwriter==1.3.7 -xgboost==1.7.6 +xgboost==1.7.6; python_version >= '3.8' causalml==0.12.3; python_version <= '3.7' causalml==0.14.1; python_version > '3.7' and python_version < '3.11' colorama -# not directly required, pinned by Snyk to avoid a vulnerability -pygments==2.7.4; python_version <= '3.7' -pygments==2.15.1; python_version > '3.7' -sphinx==3.0.4; python_version <= '3.8' -sphinx==6.1.3; python_version > '3.8' datatable==1.0.0; python_version <= '3.9' polars==0.19.17; python_version > '3.9' pyarrow==14.0.1; python_version > '3.9' +# not directly required, pinned by Snyk to avoid a vulnerability +pygments==2.7.4; python_version < '3.5' +pygments==2.15.1; python_version >= '3.7' +sphinx==3.0.4; python_version < '3.5' +sphinx==3.5.4; python_version >= '3.6' and python_version <= '3.8' +sphinx==6.1.3; python_version > '3.8' From 773dcdc88981b05b7d053dc6776f274526ec52b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 13 Dec 2023 15:51:16 +0100 Subject: [PATCH 08/50] compile python3.6 Changed version of wheel as 0.38.0 was yanked out of pypi due to circular dependency --- docker/scripts/install_python_version | 25 ++++++++++++++++++++----- h2o-py/test-requirements.txt | 3 ++- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/docker/scripts/install_python_version b/docker/scripts/install_python_version index 2296597eefeb..44d2cda6676f 100755 --- a/docker/scripts/install_python_version +++ b/docker/scripts/install_python_version @@ -34,16 +34,31 @@ for python_version in "${array[@]}"; do if [[ ${major} == "2" ]] ; then pkg_suffix="" ; else pkg_suffix=${major} ; fi apt-get update - - DEBIAN_FRONTEND=noninteractive apt-get -y install python${python_version} python${python_version}-dev python${pkg_suffix}-setuptools - if [[ ${major} -ge 3 && ${minor} -ge 7 ]]; then - DEBIAN_FRONTEND=noninteractive apt-get -y install python${python_version}-distutils + if [[ ${python_version} == "3.6" ]] + then + DEBIAN_FRONTEND=noninteractive apt-get install -y build-essential checkinstall libreadline-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev clang + pushd /usr/src + wget -nv https://www.python.org/ftp/python/3.6.15/Python-3.6.15.tar.xz + echo "bc04aa6c2a1a172a35012abd668538cd Python-3.6.15.tar.xz" > Python-3.6.15.tar.xz.md5 + md5sum -c Python-3.6.15.tar.xz.md5 || exit 1 # unexpected checksum + tar xf Python-3.6.15.tar.xz + cd Python-3.6.15 + export CC="clang" # needed to avoid SegFault during install + ./configure + make + make altinstall + popd + else + DEBIAN_FRONTEND=noninteractive apt-get -y install python${python_version} python${python_version}-dev python${pkg_suffix}-setuptools + if [[ ${major} -ge 3 && ${minor} -ge 7 ]]; then + DEBIAN_FRONTEND=noninteractive apt-get -y install python${python_version}-distutils + fi fi apt-get clean rm -rf /var/cache/apt/* echo "###### Installing pip for Python ${python_version} ######" - if [[ ${major} == "2" ]] || [[ ${python_version} == "3.5" ]] + if [[ ${major} == "2" ]] || [[ ${python_version} == "3.5" ]] || [[ ${python_version} == "3.6" ]] then curl https://bootstrap.pypa.io/pip/${python_version}/get-pip.py --output get-pip.py python${python_version} get-pip.py diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index a743bf393db5..41ae53f0c109 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -9,7 +9,8 @@ cython==0.29.34 twine==1.10.0 urllib3==1.26.5 grip==4.4.0 -wheel==0.38.0 +wheel<=0.42.0; python_version < '3.7' +wheel==0.42.0; python_version >= '3.7' numpy<=1.23.5; python_version < '3.8' numpy==1.23.5; python_version >= '3.8' scipy==1.2.3; python_version == '3.6' From c47915dbd433d8ae49f8b5a082d94f9b71d465dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 13 Dec 2023 21:13:49 +0100 Subject: [PATCH 09/50] Add htmlTable for Hmisc in R --- docker/scripts/install_R_version | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/scripts/install_R_version b/docker/scripts/install_R_version index 23775daaa954..4b775aa5d4e4 100755 --- a/docker/scripts/install_R_version +++ b/docker/scripts/install_R_version @@ -133,6 +133,7 @@ checkRInstallLog R.install.5.log # uplift 0.3.5 requires coin installed above /usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(devtools); \ install_version('ROCR', version = '1.0-7'); \ + install_version('htmlTable', version = '1.13'); \ install_version('Hmisc', version = '4.3-0'); \ install_version('uplift', version = '0.3.5'); \ " 2>&1 | tee R.install.6.log From 8a5b410d27ab1a395c7a30246b6ab16ac383e4e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Fri, 15 Dec 2023 18:28:45 +0100 Subject: [PATCH 10/50] Install old pkgs --- docker/jenkins-images/Dockerfile-r | 10 + docker/scripts/install_R_version | 398 +++++++++++++++++++++++------ 2 files changed, 323 insertions(+), 85 deletions(-) diff --git a/docker/jenkins-images/Dockerfile-r b/docker/jenkins-images/Dockerfile-r index 8ec2be69ba31..e9b592580a9f 100644 --- a/docker/jenkins-images/Dockerfile-r +++ b/docker/jenkins-images/Dockerfile-r @@ -4,6 +4,16 @@ FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-r-base:${FROM_VERSION} ARG R_VERSION ENV R_VERSION=${R_VERSION} +# Old R versions (at least 3.3 and 3.4) are written in a way that is not compilable. This is likely due to the change in +# gcc-10 (GCC now defaults to -fno-common. As a result, global variable accesses are more efficient on various targets. +# In C, global variables with multiple tentative definitions now result in linker errors. With -fcommon such definitions +# are silently merged during linking.). I tried using -fcommon but without much luck so due to the time constrain I +# decided to use gcc-9 instead. +RUN apt-get update && apt-get install -y gcc-9 gfortran-9 g++-9 tcl-dev tk-dev && apt-get clean && apt-get autoremove -y && \ + rm -rf /var/cache/apt/* &&\ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 99 --slave /usr/bin/g++ g++ /usr/bin/g++-9 \ + --slave /usr/bin/gcov gcov /usr/bin/gcov-9 && update-alternatives --install /usr/bin/gfortran f95 /usr/bin/gfortran-9 99 + COPY scripts/install_R_version /tmp/ RUN \ chmod +x /tmp/install_R_version && \ diff --git a/docker/scripts/install_R_version b/docker/scripts/install_R_version index 4b775aa5d4e4..1d6f6f0ebcd6 100755 --- a/docker/scripts/install_R_version +++ b/docker/scripts/install_R_version @@ -28,9 +28,12 @@ rm R-${R_VERSION}.tar.gz # Configure and make cd R-${R_VERSION} -./configure --prefix=${R_VERSION_HOME} --with-x=no +# some graphic packages require tcltk so it needs to be configured here +./configure --prefix=${R_VERSION_HOME} --with-x=no --enable-R-shlib=yes --with-tcltk=yes \ + --with-tk-config=/usr/lib/tkConfig.sh --with-tcl-config=/usr/lib/tclConfig.sh make make install +mkdir -p ${R_VERSION_HOME}/lib/R/library || true chmod a+w -R ${R_VERSION_HOME}/lib/R/library # Cleanup @@ -58,104 +61,329 @@ chmod a+x /usr/bin/activate_R_${R_VERSION} echo "Activating R ${R_VERSION}" activate_R_${R_VERSION} + +function header() { + set +x + echo + echo $* + echo $* | sed -e 's/./=/g' + set -x +} + function checkRInstallLog() { log_file=$1 - if grep -E '(packages .* are not available)|(installation of package .* had non-zero exit status)' $log_file; then + if grep -E '(packages .* are not available)|(installation of package .* had non-zero exit status|ERROR: dependenc.*not available for package)' $log_file; then echo "Errors detected in R package installation." exit 1 fi } +function getAndInstall() { + NAME="$2" + VERSION="$3" + if isAlreadyPresent "$NAME" "$VERSION" ; then + echo ":: " Package $NAME with version $VERSION or above is already present. Skipping installation. + else + header Downloading $2 + wget "$1" -O "$2.tar.gz" || return 1 + header Installing $2 + /usr/local/R/current/bin/R CMD INSTALL "$2.tar.gz" 2>&1 | tee "$2.log" + checkRInstallLog "$2.log" + rm "$2.tar.gz" "$2.log" + isAlreadyPresent "$NAME" "$VERSION" || exit 1 # make sure it's installed + fi +} + +function isAlreadyPresent(){ + NAME="$1" + VERSION="$2" + /usr/local/R/current/bin/R -q -e "if (require('$NAME') &&\ + utils::compareVersion('$VERSION', as.character(packageVersion('$NAME'))) <= 0) q('no', 0) else q('no', 1)" &> /dev/null +} + + +function getAndInstallFromCRAN(){ + set +x + NAME="$1" + VERSION="$2" + getAndInstall "https://cran.r-project.org/src/contrib/Archive/${NAME}/${NAME}_${VERSION}.tar.gz" "$NAME" "$VERSION" || \ + getAndInstall "https://cran.r-project.org/src/contrib/${NAME}_${VERSION}.tar.gz" "$NAME" "$VERSION" || exit 1 + set -x +} + +# How to get dependencies? +#========================= +# Since we can't rely on `versions` package to get older packages (Microsoft's CRAN (MRAN) is discontinued), we have to +# install the packages in some other way. What I did was opened the dev-r-3.3.3-jdk-8 and got the installed packages by +# using the following: +# +######################################################################################################################## +# # Get packages and freeze them +# write.csv(installed.packages(), "pkgs.csv") +######################################################################################################################## +# +# Now that I got the packages, their versions and their dependencies, I am able to topologically sort them to ensure +# I won't try to install a package that has is missing some dependencies. Then I install it from CRAN using the above +# defined shell function `getAndInstallFromCRAN`. +# +# To make it repeatable I created the following script in R that uses `igraph` package for creating the graph representing +# packages and its dependencies that are then topologically sorted and printed out so it can be easily copy-pasted here. +# Some packages may never reach CRAN and those should be added to the exclude_pkgs vector (like the `isofor` package) +# and then installed separately. +# +######################################################################################################################## +# exclude_pkgs <- c("isofor") +# +# # Load them, sort them topologically and generate the code to install them +# pkgs <- read.csv("/Users/tomasfryda/tmp/jenkins_logs/pkgs.csv") +# pkgs <- pkgs[pkgs$Priority != "base" | is.na(pkgs$Priority),] +# rownames(pkgs) <- pkgs$Package +# +# library(igraph) +# +# i2n <- setNames(pkgs$Package, seq_len(nrow(pkgs))) +# n2i <- setNames(names(i2n), i2n) +# +# g <- make_empty_graph(directed = TRUE) +# g <- add_vertices(g, nrow(pkgs), label = i2n) +# +# for (n in i2n) { +# deps <- paste0(pkgs[n, "Depends"], ", ", pkgs[n, "Imports"]) +# cat(n, "->", deps, "\n") +# if (is.na(deps)) +# next +# deps <- strsplit(deps, ",\\s*", perl = TRUE)[[1]] +# for (d in deps) { +# print(d) +# d <- strsplit(d, "\\s+", perl = TRUE)[[1]][[1]] +# print(d) +# if (d %in% i2n) { +# g <- add_edges(g, c(n2i[[n]], n2i[[d]])) +# } +# } +# } +# +# ts <- topo_sort(g, mode = "in") +# ts <- ts[!ts %in% n2i[exclude_pkgs]] +# cat(paste("getAndInstallFromCRAN", i2n[ts], pkgs[i2n[ts], "Version"]), sep = "\n") +######################################################################################################################## + + # Install dependencies echo "Installing dependencies for R ${R_VERSION}" -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); \ - install.packages(c( \ - 'versions' \ - )) \ -" 2>&1 | tee R.install.0.log -checkRInstallLog R.install.0.log -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(versions);\ - install.versions(c('usethis'), c('2.0.1')); -" 2>&1 | tee R.install.00.log -checkRInstallLog R.install.00.log -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(versions);\ - install.packages(c( \ - 'devtools' \ - )) \ -" 2>&1 | tee R.install.1.log -checkRInstallLog R.install.1.log -# Install dependency proxy for e1071 and dependency robustbase for fpc in version suited for R 3.3.3 -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(devtools); \ - install_version('rgl', version = '0.100.54'); \ - install_version('proxy', version = '0.4-20'); \ - install_version('robustbase', version = '0.93-7'); \ -" 2>&1 | tee R.install.2.log -checkRInstallLog R.install.2.log -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); \ - install.packages(c( \ - 'dplyr', 'R.utils', 'AUC', 'mlbench', 'flexclust', 'randomForest', \ - 'bit64', 'HDtweedie', 'jsonlite', 'statmod', 'roxygen2', 'testthat', 'Rcpp', 'fpc', 'RUnit', \ - 'ade4', 'e1071', 'ggplot2', 'LiblineaR', 'optparse', 'uuid', 'usethis', 'plot3Drgl', \ - 'IRdisplay', 'plotly', 'tweedie' \ - )) \ -" 2>&1 | tee R.install.3.log -checkRInstallLog R.install.3.log -# Latest XGBoost (from source) does not compile; see https://github.com/h2oai/h2o-3/issues/7830 -#/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); \ -# install.packages(c('xgboost'), type='source') \ -#" 2>&1 | tee R.install.2.log -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(devtools); \ - install_version('xgboost', version = '1.0.0.2', build = TRUE); \ -" 2>&1 | tee R.install.4.log -checkRInstallLog R.install.4.log -# RCUrl 1.95-4.12 is the last to support R 3.3 -# survival 2.44-1.1 is the last to support R 3.3 -# slam 0.1-40 is the last to support R 3.3 -# latticeExtra (dependency of Hmisc) 0.6-28 is the last to support R 3.3 -# caTools (dependency of ROCR) requires R 3.6.0 in latest version -# text2vec requires R 3.6.0 in latest version -# mvtnorm (dependency of coin) -# coin (dependency of uplift, 1.0 is the last to support R 3.3) -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(devtools); \ - install_version('RCurl', version = '1.95-4.12'); \ - install_version('survival', version = '2.44-1.1'); \ - install_version('slam', version = '0.1-40'); \ - install_version('latticeExtra', version = '0.6-28'); \ - install_version('caTools', version = '1.17.1.4'); \ - install_version('text2vec', version = '0.5.0'); \ - install_version('mvtnorm', version = '1.0'); \ - install_version('coin', version = '1.0'); \ -" 2>&1 | tee R.install.5.log -checkRInstallLog R.install.5.log -# install packages that need special deps installed above -# ROCR 1.0-7 is the last to support R 3.5 and below -# Hmisc 4.3-0 is the last to support survival 2.44 -# uplift 0.3.5 requires coin installed above -/usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(devtools); \ - install_version('ROCR', version = '1.0-7'); \ - install_version('htmlTable', version = '1.13'); \ - install_version('Hmisc', version = '4.3-0'); \ - install_version('uplift', version = '0.3.5'); \ -" 2>&1 | tee R.install.6.log -checkRInstallLog R.install.6.log + +getAndInstallFromCRAN abind 1.4-5 +getAndInstallFromCRAN acepack 1.4.1 +getAndInstallFromCRAN AUC 0.3.0 +getAndInstallFromCRAN backports 1.3.0 +getAndInstallFromCRAN base64enc 0.1-3 +getAndInstallFromCRAN bit 4.0.4 +getAndInstallFromCRAN bitops 1.0-7 +getAndInstallFromCRAN boot 1.3-18 +getAndInstallFromCRAN brew 1.0-6 +getAndInstallFromCRAN brio 1.1.2 +getAndInstallFromCRAN clipr 0.7.1 +getAndInstallFromCRAN cluster 2.0.5 +getAndInstallFromCRAN codetools 0.2-15 +getAndInstallFromCRAN colorspace 2.0-2 +getAndInstallFromCRAN commonmark 1.7 +getAndInstallFromCRAN cpp11 0.4.1 +getAndInstallFromCRAN crayon 1.4.2 +getAndInstallFromCRAN curl 4.3.2 +getAndInstallFromCRAN data.table 1.14.2 +getAndInstallFromCRAN DEoptimR 1.0-9 +getAndInstallFromCRAN digest 0.6.28 +getAndInstallFromCRAN diptest 0.76-0 +getAndInstallFromCRAN evaluate 0.14 +getAndInstallFromCRAN fansi 0.5.0 +getAndInstallFromCRAN farver 2.1.0 +getAndInstallFromCRAN fastmap 1.1.0 +getAndInstallFromCRAN foreign 0.8-67 +getAndInstallFromCRAN formatR 1.11 +getAndInstallFromCRAN Formula 1.2-4 +getAndInstallFromCRAN fs 1.5.0 +getAndInstallFromCRAN futile.options 1.0.1 +getAndInstallFromCRAN generics 0.1.1 +getAndInstallFromCRAN getopt 1.20.3 +getAndInstallFromCRAN gitcreds 0.1.1 +getAndInstallFromCRAN glue 1.5.0 +getAndInstallFromCRAN gtable 0.3.0 +getAndInstallFromCRAN gtools 3.9.2 +getAndInstallFromCRAN HDtweedie 1.1 +getAndInstallFromCRAN highlight 0.5.0 +getAndInstallFromCRAN ini 0.3.1 +getAndInstallFromCRAN isoband 0.2.5 +getAndInstallFromCRAN iterators 1.0.13 +getAndInstallFromCRAN jsonlite 1.7.2 +getAndInstallFromCRAN kernlab 0.9-29 +getAndInstallFromCRAN KernSmooth 2.23-15 +getAndInstallFromCRAN labeling 0.4.2 +getAndInstallFromCRAN lattice 0.20-34 +getAndInstallFromCRAN lazyeval 0.2.2 +getAndInstallFromCRAN LiblineaR 1.94-2 +getAndInstallFromCRAN magrittr 2.0.1 +getAndInstallFromCRAN MASS 7.3-45 +getAndInstallFromCRAN mclust 5.4.8 +getAndInstallFromCRAN mime 0.12 +getAndInstallFromCRAN misc3d 0.9-1 +getAndInstallFromCRAN mlbench 2.1-3 +getAndInstallFromCRAN MLmetrics 1.0.0 +getAndInstallFromCRAN modeltools 0.2-23 +getAndInstallFromCRAN mvtnorm 1.0-0 +getAndInstallFromCRAN nnet 7.3-12 +getAndInstallFromCRAN packrat 0.7.0 +getAndInstallFromCRAN pixmap 0.4-12 +getAndInstallFromCRAN pkgconfig 2.0.3 +getAndInstallFromCRAN praise 1.0.0 +getAndInstallFromCRAN prettyunits 1.1.1 +getAndInstallFromCRAN proxy 0.4-20 +getAndInstallFromCRAN ps 1.6.0 +getAndInstallFromCRAN R.methodsS3 1.8.1 +getAndInstallFromCRAN R6 2.5.1 +getAndInstallFromCRAN randomForest 4.6-14 +getAndInstallFromCRAN rappdirs 0.3.3 +getAndInstallFromCRAN RColorBrewer 1.1-2 +getAndInstallFromCRAN Rcpp 1.0.7 +getAndInstallFromCRAN RcppParallel 5.1.4 +getAndInstallFromCRAN remotes 2.4.1 +getAndInstallFromCRAN rlang 0.4.12 +getAndInstallFromCRAN rpart 4.1-10 +getAndInstallFromCRAN rprojroot 2.0.2 +getAndInstallFromCRAN rstudioapi 0.13 +getAndInstallFromCRAN RUnit 0.4.32 +getAndInstallFromCRAN slam 0.1-40 +getAndInstallFromCRAN sourcetools 0.1.7 +getAndInstallFromCRAN SparseM 1.81 +getAndInstallFromCRAN sparsepp 1.22 +getAndInstallFromCRAN spatial 7.3-11 +getAndInstallFromCRAN statmod 1.4.36 +getAndInstallFromCRAN stringi 1.7.5 +getAndInstallFromCRAN svd 0.5 +getAndInstallFromCRAN sys 3.4 +getAndInstallFromCRAN systemfonts 0.2.3 +getAndInstallFromCRAN tweedie 2.3.5 +getAndInstallFromCRAN utf8 1.2.2 +getAndInstallFromCRAN uuid 1.0-3 +getAndInstallFromCRAN versions 0.3 +getAndInstallFromCRAN viridisLite 0.4.0 +getAndInstallFromCRAN whisker 0.4 +getAndInstallFromCRAN withr 2.4.2 +getAndInstallFromCRAN xfun 0.28 +getAndInstallFromCRAN xml2 1.3.2 +getAndInstallFromCRAN xtable 1.8-4 +getAndInstallFromCRAN yaml 2.2.1 +getAndInstallFromCRAN zip 2.2.0 +getAndInstallFromCRAN checkmate 2.0.0 +getAndInstallFromCRAN bit64 4.0.5 +getAndInstallFromCRAN caTools 1.17.1.4 +getAndInstallFromCRAN RCurl 1.95-4.12 +getAndInstallFromCRAN munsell 0.5.0 +getAndInstallFromCRAN diffobj 0.3.5 +getAndInstallFromCRAN robustbase 0.93-7 +getAndInstallFromCRAN lambda.r 1.2.4 +getAndInstallFromCRAN optparse 1.7.1 +getAndInstallFromCRAN cli 3.1.0 +getAndInstallFromCRAN gridExtra 2.3 +getAndInstallFromCRAN foreach 1.5.1 +getAndInstallFromCRAN Matrix 1.2-8 +getAndInstallFromCRAN nlme 3.1-131 +getAndInstallFromCRAN sp 1.4-5 +getAndInstallFromCRAN class 7.3-14 +getAndInstallFromCRAN prabclus 2.3-2 +getAndInstallFromCRAN plot3D 1.4 +getAndInstallFromCRAN flexmix 2.3-17 +getAndInstallFromCRAN R.oo 1.24.0 +getAndInstallFromCRAN processx 3.5.2 +getAndInstallFromCRAN latticeExtra 0.6-28 +getAndInstallFromCRAN RcppArmadillo 0.10.7.3.0 +getAndInstallFromCRAN cachem 1.0.6 +getAndInstallFromCRAN ellipsis 0.3.2 +getAndInstallFromCRAN htmltools 0.5.2 +getAndInstallFromCRAN later 1.3.0 +getAndInstallFromCRAN lifecycle 1.0.1 +getAndInstallFromCRAN purrr 0.3.4 +getAndInstallFromCRAN desc 1.4.0 +getAndInstallFromCRAN stringr 1.4.0 +getAndInstallFromCRAN askpass 1.1 +getAndInstallFromCRAN highr 0.9 +getAndInstallFromCRAN tinytex 0.35 +getAndInstallFromCRAN rversions 2.1.1 +getAndInstallFromCRAN gplots 3.1.1 +getAndInstallFromCRAN futile.logger 1.4.3 +getAndInstallFromCRAN sessioninfo 1.2.1 +getAndInstallFromCRAN glmnet 2.0-2 +getAndInstallFromCRAN irlba 2.3.3 +getAndInstallFromCRAN survival 2.44-1.1 +getAndInstallFromCRAN xgboost 1.0.0.2 +getAndInstallFromCRAN mgcv 1.8-17 +getAndInstallFromCRAN ade4 1.7-18 +getAndInstallFromCRAN e1071 1.7-9 +getAndInstallFromCRAN flexclust 1.4-0 +getAndInstallFromCRAN fpc 2.2-9 +getAndInstallFromCRAN R.utils 2.11.0 +getAndInstallFromCRAN callr 3.7.0 +getAndInstallFromCRAN xopen 1.0.0 +getAndInstallFromCRAN memoise 2.0.0 +getAndInstallFromCRAN vctrs 0.3.8 +getAndInstallFromCRAN crosstalk 1.2.0 +getAndInstallFromCRAN fontawesome 0.2.2 +getAndInstallFromCRAN htmlwidgets 1.5.4 +getAndInstallFromCRAN jquerylib 0.1.4 +getAndInstallFromCRAN sass 0.4.0 +getAndInstallFromCRAN promises 1.2.0.1 +getAndInstallFromCRAN scales 1.1.1 +getAndInstallFromCRAN pkgload 1.2.3 +getAndInstallFromCRAN openssl 1.4.5 +getAndInstallFromCRAN knitr 1.36 +getAndInstallFromCRAN ROCR 1.0-7 +getAndInstallFromCRAN text2vec 0.5.0 +getAndInstallFromCRAN coin 1.0-0 +getAndInstallFromCRAN gbm 2.1.3 +getAndInstallFromCRAN penalized 0.9-51 +getAndInstallFromCRAN RItools 0.1-17 +getAndInstallFromCRAN pkgbuild 1.2.0 +getAndInstallFromCRAN webshot 0.5.2 +getAndInstallFromCRAN pillar 1.6.4 +getAndInstallFromCRAN tidyselect 1.1.1 +getAndInstallFromCRAN bslib 0.3.1 +getAndInstallFromCRAN httpuv 1.6.3 +getAndInstallFromCRAN credentials 1.3.0 +getAndInstallFromCRAN httr 1.4.2 +getAndInstallFromCRAN rsconnect 0.8.24 +getAndInstallFromCRAN htmlTable 2.3.0 +getAndInstallFromCRAN rmarkdown 2.11 +getAndInstallFromCRAN roxygen2 7.1.2 +getAndInstallFromCRAN tables 0.9.6 +getAndInstallFromCRAN rcmdcheck 1.4.0 +getAndInstallFromCRAN repr 1.1.3 +getAndInstallFromCRAN tibble 3.1.6 +getAndInstallFromCRAN shiny 1.7.1 +getAndInstallFromCRAN gert 1.3.0 +getAndInstallFromCRAN gh 1.3.0 +getAndInstallFromCRAN uplift 0.3.5 +getAndInstallFromCRAN IRdisplay 1.0 +getAndInstallFromCRAN dplyr 1.0.7 +getAndInstallFromCRAN ggplot2 3.3.5 +getAndInstallFromCRAN rematch2 2.1.2 +getAndInstallFromCRAN miniUI 0.1.1.1 +getAndInstallFromCRAN shinyjs 2.0.0 +getAndInstallFromCRAN usethis 2.0.1 +getAndInstallFromCRAN tidyr 1.1.4 +getAndInstallFromCRAN viridis 0.6.2 +getAndInstallFromCRAN pkgdown 1.3.0 +getAndInstallFromCRAN waldo 0.3.1 +getAndInstallFromCRAN manipulateWidget 0.11.1 +getAndInstallFromCRAN plotly 4.10.0 +getAndInstallFromCRAN Hmisc 4.3-0 +getAndInstallFromCRAN testthat 3.1.0 +getAndInstallFromCRAN rgl 0.100.54 +getAndInstallFromCRAN devtools 2.4.2 +getAndInstallFromCRAN plot3Drgl 1.0.2 # dependecies from GitHub -# The R_REMOTES_UPGRADE=never disables automatic upgrade of dependencies. -# mainly because of systemfonts that is not supported in newer versions by R 3.3 -# is also a dependency of pkgdown which would try otherwise to upgrade it to a version -# which doesn't support R < 3.6 R_REMOTES_UPGRADE=never /usr/local/R/current/bin/R -e "chooseCRANmirror(graphics=FALSE, ind=1); library(devtools); \ - install_version('systemfonts', version = '0.2.3'); \ - install_version('glmnet', version = '2.0-2'); \ - install_version('gbm', version = '2.1.3'); \ - install_version('MLmetrics', version = '1.0.0'); \ - install_version('pkgdown', version = '1.3.0'); \ install_github('Zelazny7/isofor') \ " 2>&1 | tee R.install.7.log checkRInstallLog R.install.7.log rm R.install.*.log -# install LiblineaR from S3 -wget -q -O /LiblineaR_1.94-2.tar.gz https://cran.r-project.org/src/contrib/Archive/LiblineaR/LiblineaR_1.94-2.tar.gz -/usr/local/R/current/bin/R CMD INSTALL /LiblineaR_1.94-2.tar.gz -rm /LiblineaR_1.94-2.tar.gz From 56b5e4b92f4d8f849f3b9390b234d18855decbb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 18 Dec 2023 10:50:25 +0100 Subject: [PATCH 11/50] Update Dockerfile-build-base --- docker/jenkins-images/Dockerfile-build-base | 25 ++++++++++++--------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/docker/jenkins-images/Dockerfile-build-base b/docker/jenkins-images/Dockerfile-build-base index cdddb4a2d79f..351a29d48852 100644 --- a/docker/jenkins-images/Dockerfile-build-base +++ b/docker/jenkins-images/Dockerfile-build-base @@ -1,17 +1,20 @@ ARG FROM_VERSION FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-r-3.4.1:${FROM_VERSION} -RUN \ - curl -sL https://deb.nodesource.com/setup_16.x | bash - && \ - apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y \ - texlive \ - texlive-fonts-extra \ - texlive-htmlxml \ - texinfo \ - texlive-bibtex-extra \ - texlive-formats-extra \ - texlive-generic-extra && \ + +RUN apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ +curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ +NODE_MAJOR=16 echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ +DEBIAN_FRONTEND=noninteractive apt-get install -y \ + nodejs \ + texlive \ + texlive-fonts-extra \ + texlive-latex-extra \ + texlive-latex-recommended \ + texlive-plain-generic \ + texinfo \ + texlive-bibtex-extra \ + texlive-formats-extra && \ apt-get clean && \ apt-get autoremove -y && \ rm -rf /var/cache/apt/* From 29aca57252f8a083df459631513465ab7883f8b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 2 Jan 2024 11:33:10 +0100 Subject: [PATCH 12/50] fix issues with nodesource setup script in python and jdk-other images --- docker/jenkins-images/Dockerfile-jdk-others-base | 4 +++- docker/jenkins-images/Dockerfile-python-base | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docker/jenkins-images/Dockerfile-jdk-others-base b/docker/jenkins-images/Dockerfile-jdk-others-base index e469ec2d7037..73f15440535b 100644 --- a/docker/jenkins-images/Dockerfile-jdk-others-base +++ b/docker/jenkins-images/Dockerfile-jdk-others-base @@ -3,7 +3,9 @@ FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-base:${FROM_VERSION} RUN \ add-apt-repository -y ppa:deadsnakes && \ - curl -sL https://deb.nodesource.com/setup_16.x | bash - && \ + apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + NODE_MAJOR=16 echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ python3-virtualenv libkrb5-dev nodejs && \ diff --git a/docker/jenkins-images/Dockerfile-python-base b/docker/jenkins-images/Dockerfile-python-base index 364b0e5d6647..393c1c5f5a72 100644 --- a/docker/jenkins-images/Dockerfile-python-base +++ b/docker/jenkins-images/Dockerfile-python-base @@ -3,7 +3,9 @@ FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-jdk-8-base:${FROM_VERSION} RUN \ add-apt-repository -y ppa:deadsnakes && \ - curl -sL https://deb.nodesource.com/setup_16.x | bash - && \ + apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + NODE_MAJOR=16 echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ apt-get update -q -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ gconf-service libasound2 libatk1.0-0 libatk-bridge2.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 \ From 3de9fa399bc4bd9667fc255ea8ab29433cc992ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 2 Jan 2024 12:24:23 +0100 Subject: [PATCH 13/50] Extract NODE_MAJOR env var ouside of the complicated RUN statements --- docker/jenkins-images/Dockerfile-build-base | 3 ++- docker/jenkins-images/Dockerfile-jdk-others-base | 4 +++- docker/jenkins-images/Dockerfile-python-base | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/docker/jenkins-images/Dockerfile-build-base b/docker/jenkins-images/Dockerfile-build-base index 351a29d48852..1266a0ae0a9a 100644 --- a/docker/jenkins-images/Dockerfile-build-base +++ b/docker/jenkins-images/Dockerfile-build-base @@ -1,10 +1,11 @@ ARG FROM_VERSION FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-r-3.4.1:${FROM_VERSION} +ENV NODE_MAJOR '16' RUN apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ -NODE_MAJOR=16 echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ +echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ nodejs \ texlive \ diff --git a/docker/jenkins-images/Dockerfile-jdk-others-base b/docker/jenkins-images/Dockerfile-jdk-others-base index 73f15440535b..186272455fb8 100644 --- a/docker/jenkins-images/Dockerfile-jdk-others-base +++ b/docker/jenkins-images/Dockerfile-jdk-others-base @@ -1,11 +1,13 @@ ARG FROM_VERSION FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-base:${FROM_VERSION} +ENV NODE_MAJOR '16' + RUN \ add-apt-repository -y ppa:deadsnakes && \ apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ - NODE_MAJOR=16 echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ python3-virtualenv libkrb5-dev nodejs && \ diff --git a/docker/jenkins-images/Dockerfile-python-base b/docker/jenkins-images/Dockerfile-python-base index 393c1c5f5a72..beed493d07d0 100644 --- a/docker/jenkins-images/Dockerfile-python-base +++ b/docker/jenkins-images/Dockerfile-python-base @@ -1,11 +1,13 @@ ARG FROM_VERSION FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-jdk-8-base:${FROM_VERSION} +ENV NODE_MAJOR '16' + RUN \ add-apt-repository -y ppa:deadsnakes && \ apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ - NODE_MAJOR=16 echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ + echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ apt-get update -q -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ gconf-service libasound2 libatk1.0-0 libatk-bridge2.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 \ From 269f41751f1ddff7015efae8e203f1615c024967 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 3 Jan 2024 11:38:44 +0100 Subject: [PATCH 14/50] Add --no-tty to gpg call to resolve issue with non-existing /dev/tty --- docker/jenkins-images/Dockerfile-build-base | 2 +- docker/jenkins-images/Dockerfile-jdk-others-base | 2 +- docker/jenkins-images/Dockerfile-python-base | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/jenkins-images/Dockerfile-build-base b/docker/jenkins-images/Dockerfile-build-base index 1266a0ae0a9a..e0c16fb367ec 100644 --- a/docker/jenkins-images/Dockerfile-build-base +++ b/docker/jenkins-images/Dockerfile-build-base @@ -4,7 +4,7 @@ FROM harbor.h2o.ai/opsh2oai/h2o-3/dev-r-3.4.1:${FROM_VERSION} ENV NODE_MAJOR '16' RUN apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ -curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ +curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --no-tty --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ nodejs \ diff --git a/docker/jenkins-images/Dockerfile-jdk-others-base b/docker/jenkins-images/Dockerfile-jdk-others-base index 186272455fb8..89e3db419942 100644 --- a/docker/jenkins-images/Dockerfile-jdk-others-base +++ b/docker/jenkins-images/Dockerfile-jdk-others-base @@ -6,7 +6,7 @@ ENV NODE_MAJOR '16' RUN \ add-apt-repository -y ppa:deadsnakes && \ apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ - curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --no-tty --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ diff --git a/docker/jenkins-images/Dockerfile-python-base b/docker/jenkins-images/Dockerfile-python-base index beed493d07d0..86fd25d44237 100644 --- a/docker/jenkins-images/Dockerfile-python-base +++ b/docker/jenkins-images/Dockerfile-python-base @@ -6,7 +6,7 @@ ENV NODE_MAJOR '16' RUN \ add-apt-repository -y ppa:deadsnakes && \ apt-get update && apt-get install -y ca-certificates curl gnupg && mkdir -p /etc/apt/keyrings && \ - curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ + curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --no-tty --yes --dearmor -o /etc/apt/keyrings/nodesource.gpg && \ echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | tee /etc/apt/sources.list.d/nodesource.list && \ apt-get update -q -y && \ DEBIAN_FRONTEND=noninteractive apt-get install -y \ From f1dcf2d686c6aa0da8f8703121e5ac539f90973c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Thu, 4 Jan 2024 11:57:37 +0100 Subject: [PATCH 15/50] Enable Tweedie test --- ...NOPASS.R => runit_GLM_tweedie_ml_dispersion_estimation_only.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename h2o-r/tests/testdir_algos/glm/{runit_GLM_tweedie_ml_dispersion_estimation_only_NOPASS.R => runit_GLM_tweedie_ml_dispersion_estimation_only.R} (100%) diff --git a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only_NOPASS.R b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R similarity index 100% rename from h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only_NOPASS.R rename to h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R From 3f2e7860abfa95bd5d6240c19840b41dbf93844f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Fri, 5 Jan 2024 12:41:52 +0100 Subject: [PATCH 16/50] Use make install instead of make altinstall when installing python 3.6 to solve issue with virtualenv --- docker/scripts/install_python_version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/scripts/install_python_version b/docker/scripts/install_python_version index 44d2cda6676f..78787eef231b 100755 --- a/docker/scripts/install_python_version +++ b/docker/scripts/install_python_version @@ -46,7 +46,7 @@ for python_version in "${array[@]}"; do export CC="clang" # needed to avoid SegFault during install ./configure make - make altinstall + make install popd else DEBIAN_FRONTEND=noninteractive apt-get -y install python${python_version} python${python_version}-dev python${pkg_suffix}-setuptools From e0c36cca5bd94e02ddb2238095996d90b4c70d2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 8 Jan 2024 11:08:29 +0100 Subject: [PATCH 17/50] Increase default build python version to 3.7 --- docker/scripts/build-h2o-3 | 2 +- scripts/jenkins/groovy/buildH2O3.groovy | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/scripts/build-h2o-3 b/docker/scripts/build-h2o-3 index ca6a8447e8c4..42d00d00a469 100755 --- a/docker/scripts/build-h2o-3 +++ b/docker/scripts/build-h2o-3 @@ -8,7 +8,7 @@ if [[ ! ${H2O_BRANCH} ]]; then fi if [[ ! -n ${PYTHON_VERSION} ]]; then echo "Using default Python version" - PYTHON_VERSION='3.6' + PYTHON_VERSION='3.7' fi # cd home diff --git a/scripts/jenkins/groovy/buildH2O3.groovy b/scripts/jenkins/groovy/buildH2O3.groovy index 1a6f6eaf9581..55d9f102bf9d 100644 --- a/scripts/jenkins/groovy/buildH2O3.groovy +++ b/scripts/jenkins/groovy/buildH2O3.groovy @@ -1,6 +1,6 @@ def call(final pipelineContext) { - final String PYTHON_VERSION = '3.6' + final String PYTHON_VERSION = '3.7' final String R_VERSION = '3.4.1' final String JAVA_VERSION = '8' From a7870d733b872828d8c8e7299502202923093c47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 8 Jan 2024 14:30:33 +0100 Subject: [PATCH 18/50] Update R library path due to new OS version --- docker/scripts/install_R_version | 4 ++++ scripts/jenkins/groovy/buildH2O3Public.groovy | 2 +- scripts/jenkins/groovy/defineTestStages.groovy | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docker/scripts/install_R_version b/docker/scripts/install_R_version index 1d6f6f0ebcd6..3af9eb8514be 100755 --- a/docker/scripts/install_R_version +++ b/docker/scripts/install_R_version @@ -33,8 +33,12 @@ cd R-${R_VERSION} --with-tk-config=/usr/lib/tkConfig.sh --with-tcl-config=/usr/lib/tclConfig.sh make make install + +# R4.0 uses just lib R3.5 uses lib64 mkdir -p ${R_VERSION_HOME}/lib/R/library || true +mkdir -p ${R_VERSION_HOME}/lib64/R/library || true chmod a+w -R ${R_VERSION_HOME}/lib/R/library +chmod a+w -R ${R_VERSION_HOME}/lib64/R/library # Cleanup cd ../.. diff --git a/scripts/jenkins/groovy/buildH2O3Public.groovy b/scripts/jenkins/groovy/buildH2O3Public.groovy index e615f08ede04..7d459dcf61b8 100644 --- a/scripts/jenkins/groovy/buildH2O3Public.groovy +++ b/scripts/jenkins/groovy/buildH2O3Public.groovy @@ -1,6 +1,6 @@ def call(final pipelineContext) { - final String PYTHON_VERSION = '3.6' + final String PYTHON_VERSION = '3.7' final String R_VERSION = '3.4.1' final String JAVA_VERSION = '8' diff --git a/scripts/jenkins/groovy/defineTestStages.groovy b/scripts/jenkins/groovy/defineTestStages.groovy index 6dfb48561732..aa6b85c5016c 100644 --- a/scripts/jenkins/groovy/defineTestStages.groovy +++ b/scripts/jenkins/groovy/defineTestStages.groovy @@ -942,7 +942,7 @@ private void invokeStageUsingDefinition(final stageDef, final pipelineContext) { private void invokeStage(final pipelineContext, final body) { final String DEFAULT_JAVA = '8' - final String DEFAULT_PYTHON = '3.6' + final String DEFAULT_PYTHON = '3.7' final String DEFAULT_R = '3.5.3' final int DEFAULT_TIMEOUT = 60 final String DEFAULT_EXECUTION_SCRIPT = 'h2o-3/scripts/jenkins/groovy/defaultStage.groovy' From 29a38c1bd250b4bc32c426ee14dac1f6c1f3849d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 10 Jan 2024 14:38:34 +0100 Subject: [PATCH 19/50] Install chromium directly (apt now installs just a file that tells you to install it using snap which requires running systemd in the docker) --- docker/jenkins-images/Dockerfile-python-base | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docker/jenkins-images/Dockerfile-python-base b/docker/jenkins-images/Dockerfile-python-base index 86fd25d44237..492677a8cbf6 100644 --- a/docker/jenkins-images/Dockerfile-python-base +++ b/docker/jenkins-images/Dockerfile-python-base @@ -15,11 +15,13 @@ RUN \ libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libx11-xcb-dev libxcb1 libxcomposite1 libxcomposite-dev \ libxcursor1 libxcursor-dev libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 \ ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils libblas-dev liblapack-dev \ - chromium-browser \ - libkrb5-dev \ - nodejs && \ + libgbm1 libkrb5-dev nodejs && \ apt-get clean && \ - rm -rf /var/cache/apt/* + rm -rf /var/cache/apt/* && \ + wget "https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2F1244635%2Fchrome-linux.zip?alt=media" -O chrome-linux.zip && \ + unzip chrome-linux.zip -d /opt/ && \ + rm chrome-linux.zip && \ + ln -s /opt/chrome-linux/chrome /usr/bin/chromium-browser ENV CHROME_BIN=/usr/bin/chromium-browser From a38f6db7bafbe46450b511ff6a9c67e1ae466fd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Fri, 12 Jan 2024 16:18:38 +0100 Subject: [PATCH 20/50] Update python 3.6 pkgs to resolve statsmodels and scipy version incompatibility --- h2o-py/test-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 41ae53f0c109..2680d2860b85 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -13,13 +13,13 @@ wheel<=0.42.0; python_version < '3.7' wheel==0.42.0; python_version >= '3.7' numpy<=1.23.5; python_version < '3.8' numpy==1.23.5; python_version >= '3.8' -scipy==1.2.3; python_version == '3.6' +scipy==1.5.4; python_version == '3.6' scipy==1.6.3; python_version == '3.7' scipy==1.10.1; python_version > '3.7' pandas==0.24.2; python_version == '3.6' pandas==1.3.5; python_version > '3.6' pyarrow==10.0.1; python_version > '3.6' -statsmodels==0.9.0; python_version == '3.6' +statsmodels==0.12.2; python_version == '3.6' statsmodels==0.13.5; python_version > '3.6' patsy==0.5.3 scikit-learn==0.24.2; python_version <= '3.7' From 373556d376465adbe635ac63655ce0b201276552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 16 Jan 2024 13:02:57 +0100 Subject: [PATCH 21/50] update R tests --- .../glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R | 8 +++++--- .../runit_pubdev_8683_glm_gamma_dispersion_factor_large.R | 4 ++-- .../runit_pubdev_8774_gamma_fix_dispersion_parameter.R | 4 ++-- .../glm/runit_pubdev_8775_gamma_null_model.R | 4 ++-- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R index d7dcaaf8e182..3fd691ec5bcf 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R +++ b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R @@ -97,6 +97,8 @@ train_models <- function(simData, tweedie_p, phi) { test_helper <- function(p, phi, offset) { + eps <- 1e-3 + simData <- generate_data(p, phi, offset) attach(train_models(simData, p, phi)) @@ -109,7 +111,7 @@ test_helper <- function(p, phi, offset) { p = p, phi = hdispersion ) - h2o.loglikelihood(hfit) - ) < 1e-5) + ) < eps) # are we better than R's implementation or at least the same? smaller the negative likelihood the better hnll <- nll( @@ -124,10 +126,10 @@ test_helper <- function(p, phi, offset) { p = p, phi = rdispersion ) - expect_true(hnll <= rnll || abs(hnll - rnll) < 1e-5) + expect_true(hnll <= rnll || abs(hnll - rnll) < eps) # check dispersion - allowed_dispersion_difference <- 1.01*abs(phi - rdispersion) + allowed_dispersion_difference <- 1.05*abs(phi - rdispersion) print(allowed_dispersion_difference) cat("H2o: ", hdispersion,"; R: ", rdispersion, "\n") expect_true(abs(phi - hdispersion) < allowed_dispersion_difference) diff --git a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R index da6b457a6f7c..471eaaf647a6 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R +++ b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R @@ -28,8 +28,8 @@ test_glm_gammas <- function() { generate_dataset<-function(f1R, numRows, numCols, pow, phi, mu) { resp <- tweedie::rtweedie(numRows, xi=pow, mu, phi, power=pow) - f1h2o <- as.h2o.data.frame(f1R) - resph2o <- as.h2o.data.frame(as.data.frame(resp)) + f1h2o <- as.h2o(f1R) + resph2o <- as.h2o(as.data.frame(resp)) finalFrame <- h2o.cbind(f1h2o, resph2o) return(finalFrame) } diff --git a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8774_gamma_fix_dispersion_parameter.R b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8774_gamma_fix_dispersion_parameter.R index 69e7500bb0df..d280733a6663 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8774_gamma_fix_dispersion_parameter.R +++ b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8774_gamma_fix_dispersion_parameter.R @@ -72,8 +72,8 @@ test_glm_gamma_fix_dispersion_parameter <- function() { generate_dataset<-function(f1R, numRows, numCols, pow, phi, mu) { resp <- tweedie::rtweedie(numRows, xi=pow, mu, phi, power=pow) - f1h2o <- as.h2o.data.frame(f1R) - resph2o <- as.h2o.data.frame(as.data.frame(resp)) + f1h2o <- as.h2o(f1R) + resph2o <- as.h2o(as.data.frame(resp)) finalFrame <- h2o.cbind(f1h2o, resph2o) return(finalFrame) } diff --git a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8775_gamma_null_model.R b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8775_gamma_null_model.R index 80e839b631db..97188c04085e 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8775_gamma_null_model.R +++ b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8775_gamma_null_model.R @@ -31,8 +31,8 @@ test_gamma_null_model <- function() { generate_dataset<-function(f1R, numRows, numCols, pow, phi, mu) { resp <- tweedie::rtweedie(numRows, xi=pow, mu, phi, power=pow) - f1h2o <- as.h2o.data.frame(f1R) - resph2o <- as.h2o.data.frame(as.data.frame(resp)) + f1h2o <- as.h2o(f1R) + resph2o <- as.h2o(as.data.frame(resp)) finalFrame <- h2o.cbind(f1h2o, resph2o) return(finalFrame) } From fcd2cad05dd648d306c40e9ddb23aabea38cdd85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 16 Jan 2024 17:23:50 +0100 Subject: [PATCH 22/50] Add new error message for cacert test --- h2o-py/tests/testdir_misc/pyunit_cacert_conf.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/h2o-py/tests/testdir_misc/pyunit_cacert_conf.py b/h2o-py/tests/testdir_misc/pyunit_cacert_conf.py index 04b411da366e..e30a540150e7 100644 --- a/h2o-py/tests/testdir_misc/pyunit_cacert_conf.py +++ b/h2o-py/tests/testdir_misc/pyunit_cacert_conf.py @@ -32,7 +32,8 @@ def test_cacert_in_config(): except H2OConnectionError as e: # any response is a good response - TLS handshake was successful which proves the certificate was used strErr = str(e) - assert "HTTP 404 Not Found" in strErr or "X509: NO_CERTIFICATE_OR_CRL_FOUND" in strErr + assert "HTTP 404 Not Found" in strErr or "X509: NO_CERTIFICATE_OR_CRL_FOUND" in strErr or \ + "[X509] no certificate or crl found" in strErr if __name__ == "__main__": From 51d8f2438ac9ad0857351bd10eff0479daf82aea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 16 Jan 2024 17:34:36 +0100 Subject: [PATCH 23/50] Deal with lifelines >= 0.27 incompatibility with py < 3.9 --- h2o-py/test-requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 2680d2860b85..8bd35a62e27c 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -34,7 +34,8 @@ sphinx_rtd_theme==0.2.4 sphinxcontrib-osexample>=0.1.1 shap==0.29.1 boto3==1.18.65 -lifelines>=0.19.5 +lifelines<0.27; python_version < '3.9' +lifelines>=0.27; python_version >= '3.9' xlsxwriter==1.3.7 xgboost==1.7.6; python_version >= '3.8' causalml==0.12.3; python_version <= '3.7' From 58aab3cbd7571cc93eecbbe359e3259ef928249f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 16 Jan 2024 18:07:03 +0100 Subject: [PATCH 24/50] Add more debugging info to the Tweedie test --- ...t_GLM_tweedie_ml_dispersion_estimation_only.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R index 3fd691ec5bcf..18fb0e69aa98 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R +++ b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R @@ -113,6 +113,9 @@ test_helper <- function(p, phi, offset) { ) - h2o.loglikelihood(hfit) ) < eps) + cat("Difference in negative log-likelihood calculation between R and H2O: ", + abs(nll(simData, mu = hmu, p = p, phi = hdispersion) - h2o.loglikelihood(hfit)), "\n", sep="") + # are we better than R's implementation or at least the same? smaller the negative likelihood the better hnll <- nll( simData, @@ -127,11 +130,18 @@ test_helper <- function(p, phi, offset) { phi = rdispersion ) expect_true(hnll <= rnll || abs(hnll - rnll) < eps) + cat("H2O negative log-likelihood: ", hnll, "\n", "R negative log-likelihood: ", rnll, "\n", + "H2O is better: ", hnll < rnll, "\n", + "H2O and R are roughly similar: ", abs(hnll - rnll) < eps, "\n", sep="" + ) # check dispersion - allowed_dispersion_difference <- 1.05*abs(phi - rdispersion) - print(allowed_dispersion_difference) - cat("H2o: ", hdispersion,"; R: ", rdispersion, "\n") + allowed_dispersion_difference <- 1.01*abs(phi - rdispersion) + cat("Dispersion tolerance: ", allowed_dispersion_difference, "\n", sep="") + cat("H2O Dispersion Estimation: ", hdispersion, "\nR Dispersion Estimation: ", rdispersion, "\n", + "H2O is as close as R or closer to the true dispersion: ", abs(phi - hdispersion) < allowed_dispersion_difference, + "\n", sep="" + ) expect_true(abs(phi - hdispersion) < allowed_dispersion_difference) } From ee38e5fb340b6c58b86b59c1d6996d527230174a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 16 Jan 2024 18:23:17 +0100 Subject: [PATCH 25/50] Fix runit_pubdev_8683_glm_gamma_dispersion_factor_large.R --- .../glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R index 471eaaf647a6..3884edbd19c5 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R +++ b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R @@ -82,7 +82,7 @@ compareH2ORGLM <- family = "gamma", lambda = 0, nfolds = 0, - dispersion_factor_method = "ml", + dispersion_parameter_method = "ml", compute_p_values = TRUE ) print("Comparing H2O and R GLM model coefficients....") From 619bce2e29687e169cd9227df63440a2da32e358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 17 Jan 2024 10:36:28 +0100 Subject: [PATCH 26/50] fix matplotlib issue in python demos and add more debugging info to tweedie ml dispersion estimation --- h2o-py/tests/pydemo_utils/utilsPY.py | 7 +++++-- .../glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/h2o-py/tests/pydemo_utils/utilsPY.py b/h2o-py/tests/pydemo_utils/utilsPY.py index eb8181feaf0e..121c2b384b24 100644 --- a/h2o-py/tests/pydemo_utils/utilsPY.py +++ b/h2o-py/tests/pydemo_utils/utilsPY.py @@ -41,8 +41,11 @@ def ipy_valid_lines(block): for line in lines: if "import matplotlib.pyplot as plt" in line or "%matplotlib inline" in line: import matplotlib - matplotlib.use('Agg', warn=False) - + try: + matplotlib.use('Agg', warn=False) + except TypeError: + matplotlib.use('Agg') + # remove ipython magic functions lines = [line for line in lines if not line.startswith('%')] diff --git a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R index 18fb0e69aa98..406ec7c314f0 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R +++ b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R @@ -67,7 +67,12 @@ train_models <- function(simData, tweedie_p, phi) { weights = weight, offset = offset_col ) + cat("\n\nRfit:\n") + print(coef(rfit)) + cat("\nH2Ofit:\n") + print(hfit@model$coefficients_table) + cat("\n\n") rdispersion <- summary(rfit)$dispersion # not a MLE if (tweedie_p > 1.4 && tweedie_p < 1.75 && !(phi == 1000 && tweedie_p == 1.7)) { # R's implementation can take very long time to finish for some other values From 8bfb7da1409542768d231c94e5638c7dabee1aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 17 Jan 2024 13:26:31 +0100 Subject: [PATCH 27/50] Fix h2o-py/demos/uplift_random_forest_compare_causalml.ipynb by removing the treatment from test set during prediction --- h2o-py/demos/uplift_random_forest_compare_causalml.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-py/demos/uplift_random_forest_compare_causalml.ipynb b/h2o-py/demos/uplift_random_forest_compare_causalml.ipynb index a9ff799ddc77..12e4a7c42492 100644 --- a/h2o-py/demos/uplift_random_forest_compare_causalml.ipynb +++ b/h2o-py/demos/uplift_random_forest_compare_causalml.ipynb @@ -108,7 +108,7 @@ " y=train_df[response_column].values)\n", "\n", "#test_df = train_df\n", - "causalml_preds = causalml_uplift_model.predict(test_df.values)" + "causalml_preds = causalml_uplift_model.predict(test_df.drop(\"treatment\", axis=1).values)" ] }, { From 20a3d2f8987f8c59cdf367f5cdc4ebeb93ec8fd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Thu, 18 Jan 2024 10:11:49 +0100 Subject: [PATCH 28/50] Make use of datatable and polars opt-in not opt-out --- h2o-py/h2o/frame.py | 5 ++- h2o-py/h2o/utils/shared_utils.py | 20 +++++++-- ...h_15729_15936_datatable_polars_2_pandas.py | 45 ++++++++----------- ...9_15936_datatable_polars_2_pandas_large.py | 21 ++++----- 4 files changed, 46 insertions(+), 45 deletions(-) diff --git a/h2o-py/h2o/frame.py b/h2o-py/h2o/frame.py index 70fb407d7f8f..f091a00cf0df 100644 --- a/h2o-py/h2o/frame.py +++ b/h2o-py/h2o/frame.py @@ -1966,7 +1966,7 @@ def as_data_frame(self, use_pandas=True, header=True): >>> airlines['FlightNum'] = airlines['FlightNum'].asfactor() >>> df = airlines.as_data_frame() >>> df - """ + """ if can_use_pandas() and use_pandas: import pandas if (can_use_datatable()) or (can_use_polars() and can_use_pyarrow()): # can use multi-thread @@ -1987,7 +1987,8 @@ def as_data_frame(self, use_pandas=True, header=True): frame.pop(0) return frame - def convert_with_polars(selfself, fileName): + + def convert_with_polars(self, fileName): import polars as pl dt_frame = pl.read_csv(fileName, null_values = "") return dt_frame.to_pandas() diff --git a/h2o-py/h2o/utils/shared_utils.py b/h2o-py/h2o/utils/shared_utils.py index 15cd23656609..d016e0b78e76 100644 --- a/h2o-py/h2o/utils/shared_utils.py +++ b/h2o-py/h2o/utils/shared_utils.py @@ -121,7 +121,7 @@ def temp_ctr(): def is_module_available(mod): - if local_env(mod+"_disabled"): # fast track if module is explicitly disabled + if local_env(mod+"_disabled"): # fast track if module is explicitly disabled return False if mod in sys.modules and sys.modules[mod] is not None: # fast track + safer in unusual environments return True @@ -129,20 +129,30 @@ def is_module_available(mod): import importlib.util return importlib.util.find_spec(mod) is not None + +def is_module_enabled(mod): + return local_env(mod+"_enabled") and is_module_available(mod) + + def can_use_pandas(): return is_module_available('pandas') + def can_use_datatable(): - return is_module_available('datatable') and sys.version_info.major == 3 and sys.version_info.minor <= 9 + return is_module_enabled('datatable') and sys.version_info.major == 3 and sys.version_info.minor <= 9 + def can_install_datatable(): return sys.version_info.major == 3 and sys.version_info.minor <= 9 + def can_install_polars(): return sys.version_info.major == 3 and sys.version_info.minor > 9 + def can_use_polars(): - return is_module_available('polars') and sys.version_info.major == 3 and sys.version_info.minor > 9 + return is_module_enabled('polars') and sys.version_info.major == 3 and sys.version_info.minor > 9 + def can_use_pyarrow(): if can_use_pandas() and sys.version_info.minor > 9: @@ -152,9 +162,11 @@ def can_use_pyarrow(): else: return False + def can_use_numpy(): return is_module_available('numpy') + _url_safe_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~" _url_chars_map = [chr(i) if chr(i) in _url_safe_chars else "%%%02X" % i for i in range(256)] @@ -376,7 +388,7 @@ def slice_is_normalized(s): h2o_predictor_class = "hex.genmodel.tools.PredictCsv" -def mojo_predict_pandas(dataframe, mojo_zip_path, genmodel_jar_path=None, classpath=None, java_options=None, +def mojo_predict_pandas(dataframe, mojo_zip_path, genmodel_jar_path=None, classpath=None, java_options=None, verbose=False, setInvNumNA=False, predict_contributions=False, predict_calibrated=False): """ MOJO scoring function to take a Pandas frame and use MOJO model as zip file to score. diff --git a/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas.py b/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas.py index c233515bce6a..219c34b779c8 100644 --- a/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas.py +++ b/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas.py @@ -8,6 +8,7 @@ import pandas as pd from h2o.utils.threading import local_context + # if datatable or polars/pyarrow is installed, this test will show that using datatable to convert h2o frame to pandas # frame is much faster for large datasets. def test_frame_conversion(dataset, original_pandas_frame, module): @@ -38,7 +39,8 @@ def test_frame_conversion(dataset, original_pandas_frame, module): diff = (new_pandas_frame[colNames[ind]] - original_pandas_frame[colNames[ind]]).abs() assert diff.max() < 1e-10 -def singl_thread_pandas_conversion(dataset): + +def single_thread_pandas_conversion(dataset): with local_context(datatable_disabled=True, polars_disabled=True): print("converting h2o frame to pandas frame using single thread") h2oFrame = h2o.import_file(pyunit_utils.locate(dataset)) @@ -48,41 +50,32 @@ def singl_thread_pandas_conversion(dataset): print("H2O frame to Pandas frame conversion time with single thread for dataset {1}: {0}".format(newTime, dataset)) return h2oframe_panda + def test_polars_datatable(): file1 = "smalldata/titanic/titanic_expanded.csv" file2 = "smalldata/glm_test/multinomial_3Class_10KRow.csv" file3 = "smalldata/timeSeries/CreditCard-ts_train.csv" - original_converted_frame1 = singl_thread_pandas_conversion(file1) - original_converted_frame2 = singl_thread_pandas_conversion(file2) - original_converted_frame3 = singl_thread_pandas_conversion(file3) + original_converted_frame1 = single_thread_pandas_conversion(file1) + original_converted_frame2 = single_thread_pandas_conversion(file2) + original_converted_frame3 = single_thread_pandas_conversion(file3) - if not(can_install_datatable()): - print("datatable is not available. Skipping tests using datatable.") - else: - if not(can_use_datatable()): - pyunit_utils.install("datatable") - - with local_context(polars_disabled=True): # run with datatable + if can_install_datatable(): + with local_context(polars_disabled=True, datatable_enabled=True): # run with datatable + assert can_use_datatable(), "Can't use datatable" print("test data frame conversion using datatable.") test_frame_conversion(file1, original_converted_frame1, "datatable") test_frame_conversion(file2, original_converted_frame2, "datatable") test_frame_conversion(file3, original_converted_frame3, "datatable") - - if not(can_install_polars()): - print("polars, pyarrow are not available. Skipping tests using polars and pyarrow") - else: - if not(can_use_polars()): - pyunit_utils.install("polars") - if not(can_use_pyarrow()): - pyunit_utils.install("pyarrow") - - with local_context(datatable_disabled=True): - if can_use_polars() and can_use_pyarrow(): - print("test data frame conversion using polars and pyarrow.") - test_frame_conversion(file1, original_converted_frame1, "polars and pyarrow") - test_frame_conversion(file2, original_converted_frame2, "polars and pyarrow") - test_frame_conversion(file3, original_converted_frame3, "polars and pyarrow") + + if can_install_polars(): + with local_context(datatable_disabled=True, polars_enabled=True): + assert can_use_polars() and can_use_pyarrow(), "Can't use polars" + print("test data frame conversion using polars and pyarrow.") + test_frame_conversion(file1, original_converted_frame1, "polars and pyarrow") + test_frame_conversion(file2, original_converted_frame2, "polars and pyarrow") + test_frame_conversion(file3, original_converted_frame3, "polars and pyarrow") + if __name__ == "__main__": pyunit_utils.standalone_test(test_polars_datatable) diff --git a/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas_large.py b/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas_large.py index f0acf2df1b7e..6a1944ebd0cf 100644 --- a/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas_large.py +++ b/h2o-py/tests/testdir_misc/pyunit_gh_15729_15936_datatable_polars_2_pandas_large.py @@ -7,6 +7,7 @@ import time from h2o.utils.threading import local_context + def test_frame_conversion(dataset, original_pandas_frame, module): # convert frame using datatable or polar h2oFrame = h2o.import_file(pyunit_utils.locate(dataset)) @@ -35,6 +36,7 @@ def test_frame_conversion(dataset, original_pandas_frame, module): diff = (new_pandas_frame[colNames[ind]] - original_pandas_frame[colNames[ind]]).abs() assert diff.max() < 1e-10 + def single_thread_pandas_conversion(dataset): with local_context(datatable_disabled=True, polars_disabled=True): print("converting h2o frame to pandas frame using single thread") @@ -45,6 +47,7 @@ def single_thread_pandas_conversion(dataset): print("H2O frame to Pandas frame conversion time with single thread for dataset {1}: {0}".format(newTime, dataset)) return h2oframe_panda + # if datatable or polars/pyarrow is installed, this test will show that using datatable to convert h2o frame to pandas # frame is much faster for large datasets. def test_polars_datatable_2_pandas(): @@ -52,25 +55,17 @@ def test_polars_datatable_2_pandas(): original_converted_frame1 = single_thread_pandas_conversion(file1) # need to run conversion in single thread if can_install_datatable(): - if not(can_use_datatable()): - pyunit_utils.install("datatable") - with local_context(polars_disabled=True): # run with datatable + with local_context(polars_disabled=True, datatable_enabled=True): # run with datatable + assert can_use_datatable(), "Can't use datatable" print("test data frame conversion using datatable.") test_frame_conversion(file1, original_converted_frame1, "datatable") - else: - print("datatable is not available. Skipping tests using datatable.") - if can_install_polars(): - if not(can_use_polars()): - pyunit_utils.install("polars") - if not(can_use_pyarrow()): - pyunit_utils.install("pyarrow") - with local_context(datatable_disabled=True): + with local_context(datatable_disabled=True, polars_enabled=True): + assert can_use_polars() and can_use_pyarrow(), "Can't use polars" print("test data frame conversion using polars and pyarrow.") test_frame_conversion(file1, original_converted_frame1, "polars and pyarrow") - else: - print("polars, pyarrow are not available. Skipping tests using polars and pyarrow") + if __name__ == "__main__": pyunit_utils.standalone_test(test_polars_datatable_2_pandas) From 5c8d97eb9526dc845050af6c68d6251540c101e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Thu, 18 Jan 2024 16:55:12 +0100 Subject: [PATCH 29/50] Make r tweedie dispersion test more reproducible --- .../glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R index 406ec7c314f0..72937fc09fff 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R +++ b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R @@ -7,6 +7,7 @@ library(tweedie) generate_data <- function(tweedie_p, phi, offset) { + set.seed(12345) molsp <- 1000 x <- seq(1, 10, 1) yd <- exp(1 + 1.015 * x) @@ -57,8 +58,11 @@ train_models <- function(simData, tweedie_p, phi) { lambda = 0, compute_p_values = T, solver = "IRLSM", - calc_like = T + calc_like = T, + dispersion_epsilon = 1e-5, + seed = 12345 ) + set.seed(12345) rfit <- glm( yr ~ xt, From 5594e32c61efc79911d4b6189b6ab3f5c54ec333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Fri, 19 Jan 2024 13:33:51 +0100 Subject: [PATCH 30/50] Fix logical error in an exceptional case in Tweedie dispersion estimation --- h2o-algos/src/main/java/hex/glm/DispersionUtils.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java index 9bb1fe9fa742..29d3280f95b6 100644 --- a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java +++ b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java @@ -141,12 +141,15 @@ private static double goldenRatioDispersionSearch(GLMModel.GLMParameters parms, double midLoPhi = sortedPhis.get(counter - 2); double midLoLLH = sortedLLHs.get(counter - 2); - if (midLoPhi > upperBound) { + double midHiPhi = lowPhi + d; + double midHiLLH = getTweedieLogLikelihood(parms, dinfo, midHiPhi, mu); + if (midLoPhi > midHiPhi) { midLoPhi = hiPhi - d; midLoLLH = getTweedieLogLikelihood(parms, dinfo, midLoPhi, mu); } - double midHiPhi = lowPhi + d; - double midHiLLH = getTweedieLogLikelihood(parms, dinfo, midHiPhi, mu); + assert lowerBound <= midLoPhi; + assert midLoPhi <= midHiPhi; + assert midHiPhi <= upperBound; for (; counter < iterationsLeft; counter++) { Log.info("Tweedie golden-section search[iter=" + counter + ", phis=(" + lowPhi + ", " + midLoPhi + ", " + midHiPhi + ", " + hiPhi + "), likelihoods=(" + From 9de4cf364c8ec28ddc034803cfe743f598fbbd83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Fri, 19 Jan 2024 14:46:37 +0100 Subject: [PATCH 31/50] log more information about Tweedie dispersion estimation --- h2o-algos/src/main/java/hex/glm/DispersionUtils.java | 1 + .../glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java index 29d3280f95b6..6676899c7f88 100644 --- a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java +++ b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java @@ -213,6 +213,7 @@ public static double estimateTweedieDispersionOnly(GLMModel.GLMParameters parms, logLikelihoodSanityChecks.add(bestLogLikelihoodFromSanityCheck); dispersionsSanityChecks.add(dispersionCurr); for (int index = 0; index < parms._max_iterations_dispersion; index++) { + Log.info("Tweedie dispersion ML estimation [iter="+index+", phi="+dispersionCurr+"]"); tDispersion.updateDispersionP(dispersionCurr); DispersionTask.ComputeMaxSumSeriesTsk computeTask = new DispersionTask.ComputeMaxSumSeriesTsk(tDispersion, parms, true); diff --git a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R index 72937fc09fff..c417a71c9060 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R +++ b/h2o-r/tests/testdir_algos/glm/runit_GLM_tweedie_ml_dispersion_estimation_only.R @@ -41,14 +41,14 @@ nll <- function(simData, mu, phi, p) { train_models <- function(simData, tweedie_p, phi) { simDataH2O <- as.h2o(simData) simData <- as.data.frame(simData) - + offset <- simData[1, "offset_col"] hfit <- h2o.glm( training_frame = simDataH2O, x = 'xt', y = 'yr', weights_column = 'weight', offset_column = "offset_col", - model_id = 'simDatatest', + model_id = paste0("TweedieDispersionMLE_p", tweedie_p,"_phi", phi, "_offset", offset), family = "tweedie", link = "tweedie", tweedie_link_power = 0, From 4711d1bfc5a6104681063e34c695d9e3aa13c133 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Fri, 19 Jan 2024 15:27:41 +0100 Subject: [PATCH 32/50] Increase tolerance in pyunit_PUBDEV_6117_xgboost_compare.py --- .../testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py b/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py index ab4865897eaa..67efc03fdc3f 100644 --- a/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py +++ b/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py @@ -39,7 +39,7 @@ def comparison_test(): dtrain=nativeXGBoostInput, num_boost_round=nativeXGBoostParam[1]) nativePred = nativeModel.predict(data=nativeXGBoostInput, ntree_limit=nativeXGBoostParam[1]) pyunit_utils.summarizeResult_binomial(h2oPredictD, nativePred, -1, -1, -1, - -1, tolerance=1e-10) + -1, tolerance=1e-7) else: print("******** Test skipped. This test cannot be performed in multinode environment.") From 7f94689c1d6bba3d377dbb7fc5d52f3994a37617 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Sun, 21 Jan 2024 15:50:11 +0100 Subject: [PATCH 33/50] Disable Newton's methon in dispersion estimation in GLM Tweedie --- .../main/java/hex/glm/DispersionUtils.java | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java index 6676899c7f88..e751b79e2dbf 100644 --- a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java +++ b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java @@ -191,14 +191,28 @@ private static double goldenRatioDispersionSearch(GLMModel.GLMParameters parms, */ public static double estimateTweedieDispersionOnly(GLMModel.GLMParameters parms, GLMModel model, Job job, double[] beta, DataInfo dinfo) { - long currTime = System.currentTimeMillis(); - long modelBuiltTime = currTime - model._output._start_time; - long timeLeft = parms._max_runtime_secs > 0 ? (long) (parms._max_runtime_secs * 1000 - modelBuiltTime) - : Long.MAX_VALUE; - TweedieMLDispersionOnly tDispersion = new TweedieMLDispersionOnly(parms.train(), parms, model, beta, dinfo); + DispersionTask.GenPrediction gPred = new DispersionTask.GenPrediction(beta, model, dinfo).doAll( 1, Vec.T_NUM, dinfo._adaptedFrame); Vec mu = Scope.track(gPred.outputFrame(Key.make(), new String[]{"prediction"}, null)).vec(0); + List logLikelihoodSanityChecks = new ArrayList<>(); + List dispersionsSanityChecks = new ArrayList<>(); + logLikelihoodSanityChecks.add(getTweedieLogLikelihood(parms, dinfo, parms._init_dispersion_parameter, mu)); + dispersionsSanityChecks.add(parms._init_dispersion_parameter); + final double dispersion = goldenRatioDispersionSearch(parms, dinfo, mu,logLikelihoodSanityChecks, dispersionsSanityChecks, job); + Log.info("Tweedie dispersion estimate = "+dispersion); + return dispersion; + + /* + // FIXME: The Newton's method seems not to be reproducible on jenkins (runit_GLM_tweedie_ml_dispersion_estimation_only.R) + + long timeLeft = parms._max_runtime_secs > 0 ? (long) (parms._max_runtime_secs * 1000 - modelBuiltTime) + : Long.MAX_VALUE; + long currTime = System.currentTimeMillis(); + + long modelBuiltTime = currTime - model._output._start_time; + + TweedieMLDispersionOnly tDispersion = new TweedieMLDispersionOnly(parms.train(), parms, model, beta, dinfo); double dispersionCurr = tDispersion._dispersionParameter; // initial value of dispersion parameter double dispersionNew; @@ -303,6 +317,8 @@ public static double estimateTweedieDispersionOnly(GLMModel.GLMParameters parms, } else return dispersionCurr; + + */ } static class NegativeBinomialGradientAndHessian extends MRTask { From 518038a6bd6831d0b8147587f384bdee6d46cc13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 22 Jan 2024 09:10:36 +0100 Subject: [PATCH 34/50] Fix pubdev_8685_tweedie_dispersion_factor_exceed2 --- .../main/java/hex/glm/DispersionUtils.java | 2 +- ..._8685_tweedie_dispersion_factor_exceed2.py | 29 ++++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java index e751b79e2dbf..6ecaa98a21fe 100644 --- a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java +++ b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java @@ -199,7 +199,7 @@ public static double estimateTweedieDispersionOnly(GLMModel.GLMParameters parms, List dispersionsSanityChecks = new ArrayList<>(); logLikelihoodSanityChecks.add(getTweedieLogLikelihood(parms, dinfo, parms._init_dispersion_parameter, mu)); dispersionsSanityChecks.add(parms._init_dispersion_parameter); - final double dispersion = goldenRatioDispersionSearch(parms, dinfo, mu,logLikelihoodSanityChecks, dispersionsSanityChecks, job); + final double dispersion = goldenRatioDispersionSearch(parms, dinfo, mu, logLikelihoodSanityChecks, dispersionsSanityChecks, job); Log.info("Tweedie dispersion estimate = "+dispersion); return dispersion; diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py b/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py index 98f84d7d8907..a9efea978384 100644 --- a/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py +++ b/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py @@ -6,11 +6,19 @@ from h2o.estimators.glm import H2OGeneralizedLinearEstimator -def test_gamma_dispersion_factor(): - training_data = h2o.import_file( - "http://h2o-public-test-data.s3.amazonaws.com/smalldata/glm_test/tweedie_p3_phi1_10KRows.csv") +def test_tweedie_dispersion_factor(): + training_data = h2o.import_file(pyunit_utils.locate("smalldata/glm_test/tweedie_p3_phi1_10KRows.csv")) Y = 'x' x = ['abs.C1.', 'abs.C2.', 'abs.C3.', 'abs.C4.', 'abs.C5.'] + + # With Tweedie variance power >= 2, Tweedie distribution has no mass or density on 0 -> log likelihood would be -Inf + training_data = training_data[training_data[Y] > 0] + + model_pearson = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, + tweedie_variance_power=3, + dispersion_parameter_method="pearson") + model_pearson.train(training_frame=training_data, x=x, y=Y) + # train ml model with initial guess below the true disperion value model_ml = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, tweedie_variance_power=3, @@ -22,18 +30,19 @@ def test_gamma_dispersion_factor(): init_dispersion_parameter=1.5, dispersion_parameter_method="ml") model_ml2.train(training_frame=training_data, x=x, y=Y) - model_pearson = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, + model_pearson2 = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, tweedie_variance_power=3, dispersion_parameter_method="pearson") - model_pearson.train(training_frame=training_data, x=x, y=Y) + model_pearson2.train(training_frame=training_data, x=x, y=Y) true_dispersion_factor = 1.0 dispersion_parameter_estimated = model_ml._model_json["output"]["dispersion"] dispersion_parameter_estimated2 = model_ml2._model_json["output"]["dispersion"] dispersion_parameter_estimated_pearson = model_pearson._model_json["output"]["dispersion"] - print("True dispersion parameter {0}. Estiamted ml dispersion parameter {1}. Estimated pearson dispersion " - "parameter {2}.".format(true_dispersion_factor, dispersion_parameter_estimated, - dispersion_parameter_estimated_pearson)) + dispersion_parameter_estimated_pearson2 = model_pearson._model_json["output"]["dispersion"] + print("True dispersion parameter {0}. Estimated ml dispersion parameter {1}. Estimated pearson dispersion " + "parameter {2} and without the data with y==0 {3}.".format(true_dispersion_factor, dispersion_parameter_estimated, + dispersion_parameter_estimated_pearson, dispersion_parameter_estimated_pearson2)) # make sure the ml estimates are closer to the true dispersion value than the dispersion value from pearson assert abs(true_dispersion_factor - dispersion_parameter_estimated) <= abs( dispersion_parameter_estimated_pearson - true_dispersion_factor), \ @@ -50,6 +59,6 @@ def test_gamma_dispersion_factor(): if __name__ == "__main__": - pyunit_utils.standalone_test(test_gamma_dispersion_factor) + pyunit_utils.standalone_test(test_tweedie_dispersion_factor) else: - test_gamma_dispersion_factor() + test_tweedie_dispersion_factor() From 09dfcd033aa50dffa55ce3783a89adf95fc3aa73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 23 Jan 2024 11:59:26 +0100 Subject: [PATCH 35/50] Increase tolerance in pyunit_PUBDEV_6117_xgboost_compare.py to 1e-6 --- .../testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py b/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py index 67efc03fdc3f..b13dc857f5a3 100644 --- a/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py +++ b/h2o-py/tests/testdir_algos/xgboost/pyunit_PUBDEV_6117_xgboost_compare.py @@ -39,7 +39,7 @@ def comparison_test(): dtrain=nativeXGBoostInput, num_boost_round=nativeXGBoostParam[1]) nativePred = nativeModel.predict(data=nativeXGBoostInput, ntree_limit=nativeXGBoostParam[1]) pyunit_utils.summarizeResult_binomial(h2oPredictD, nativePred, -1, -1, -1, - -1, tolerance=1e-7) + -1, tolerance=1e-6) else: print("******** Test skipped. This test cannot be performed in multinode environment.") From 689520a23b58c616f53876f37124ada2127669d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Tue, 23 Jan 2024 15:38:42 +0100 Subject: [PATCH 36/50] Fix models.R comments (roxygen complains) --- h2o-r/h2o-package/R/models.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R index 58189a0ccb88..0ea86bfce89b 100755 --- a/h2o-r/h2o-package/R/models.R +++ b/h2o-r/h2o-package/R/models.R @@ -1,10 +1,9 @@ -#' -#' H2O Model Related Functions -#' +# +# H2O Model Related Functions +# #' @importFrom graphics strwidth par legend polygon arrows points grid #' @importFrom grDevices dev.copy dev.off png rainbow adjustcolor #' @include classes.R - NULL #----------------------------------------------------------------------------------------------------------------------- @@ -871,7 +870,7 @@ h2o.transform_frame <- function(model, fr) { #' #' # Retrieve the results to view the best predictor subsets: #' h2o.result(sweepModel) -#' '} +#' } #' #' @export h2o.result <- function(model) { From 6d3b9f5ad1ce7ffe48d7d9ae15239d411bd47a4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 24 Jan 2024 10:48:35 +0100 Subject: [PATCH 37/50] Fix tweedie dispersion estimation for variance power >= 2 with response == 0 --- .../main/java/hex/glm/DispersionUtils.java | 13 ++++++++++-- .../main/java/hex/glm/TweedieEstimator.java | 20 ++++++++++++------- ..._8685_tweedie_dispersion_factor_exceed2.py | 13 +++--------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java index 6ecaa98a21fe..18e99aa0d952 100644 --- a/h2o-algos/src/main/java/hex/glm/DispersionUtils.java +++ b/h2o-algos/src/main/java/hex/glm/DispersionUtils.java @@ -77,7 +77,8 @@ private static double getTweedieLogLikelihood(GLMModel.GLMParameters parms, Data false, false, false, - false) + false, + true) .compute(mu, dinfo._adaptedFrame.vec(parms._response_column), parms._weights_column == null @@ -191,7 +192,15 @@ private static double goldenRatioDispersionSearch(GLMModel.GLMParameters parms, */ public static double estimateTweedieDispersionOnly(GLMModel.GLMParameters parms, GLMModel model, Job job, double[] beta, DataInfo dinfo) { - + if (parms._tweedie_variance_power >= 2 && + dinfo._adaptedFrame.vec(parms._response_column).min() <= 0) { + Log.warn("Response contains zeros or negative values but "+ + "Tweedie variance power does not support zeros. "+ + "Instances with response <= 0 will be skipped."); + model.addWarning("Response contains zeros or negative values but "+ + "Tweedie variance power does not support zeros. "+ + "Instances with response <= 0 will be skipped."); + } DispersionTask.GenPrediction gPred = new DispersionTask.GenPrediction(beta, model, dinfo).doAll( 1, Vec.T_NUM, dinfo._adaptedFrame); Vec mu = Scope.track(gPred.outputFrame(Key.make(), new String[]{"prediction"}, null)).vec(0); diff --git a/h2o-algos/src/main/java/hex/glm/TweedieEstimator.java b/h2o-algos/src/main/java/hex/glm/TweedieEstimator.java index 7cb5c1d709bd..991e1578a220 100644 --- a/h2o-algos/src/main/java/hex/glm/TweedieEstimator.java +++ b/h2o-algos/src/main/java/hex/glm/TweedieEstimator.java @@ -48,6 +48,7 @@ public class TweedieEstimator extends MRTask { private final boolean _needDp; private final boolean _needDpDp; private final boolean _forceInversion; + private final boolean _skipZerosIfVarPowerGT2; public long _skippedRows; public long _totalRows; @@ -63,15 +64,20 @@ enum LikelihoodEstimator { public LikelihoodEstimator _method; TweedieEstimator(double variancePower, double dispersion) { - this(variancePower, dispersion, false, false, false, false); + this(variancePower, dispersion, false, false, false, false, false); } TweedieEstimator(double variancePower, double dispersion, boolean forceInversion) { - this(variancePower, dispersion, false, false, false, forceInversion); + this(variancePower, dispersion, false, false, false, forceInversion, false); } - TweedieEstimator(double variancePower, double dispersion, - boolean useSaddlepointApprox, boolean needDp, boolean needDpDp, boolean forceInversion) { + TweedieEstimator(double variancePower, double dispersion, boolean useSaddlepointApprox, boolean needDp, + boolean needDpDp, boolean forceInversion) { + this(variancePower, dispersion, useSaddlepointApprox, needDp, needDpDp, forceInversion, false); + } + + TweedieEstimator(double variancePower, double dispersion, boolean useSaddlepointApprox, boolean needDp, + boolean needDpDp, boolean forceInversion, boolean skipZerosIfVarPowerGT2) { assert variancePower >= 1 : "Tweedie variance power has to be greater than 1!"; assert (forceInversion || useSaddlepointApprox) && !(needDp || needDpDp) || !forceInversion || !useSaddlepointApprox; _p = variancePower; @@ -102,6 +108,7 @@ enum LikelihoodEstimator { _needDp = needDp; _needDpDp = needDpDp; _forceInversion = forceInversion; // useful when bracketing close to p=2 + _skipZerosIfVarPowerGT2 = skipZerosIfVarPowerGT2; } @@ -163,7 +170,6 @@ private double invGaussLLH(double y, double mu, double w) { private void accumulate(double llh, double grad, double hess) { - //if (Double.isFinite(llh)) //&& llh <= 0) _loglikelihood += llh; if (Double.isFinite(grad)) _llhDp += grad; @@ -174,7 +180,7 @@ private void accumulate(double llh, double grad, double hess) { private double logLikelihood(double y, double mu, double w, boolean accumulate) { if (w == 0) return 0; if (_p >= 2 && y <= 0) { - if (accumulate) accumulate(Double.NEGATIVE_INFINITY, 0, 0); + if (accumulate && !_skipZerosIfVarPowerGT2) accumulate(Double.NEGATIVE_INFINITY, 0, 0); return Double.NEGATIVE_INFINITY; } double[] llh_llhDp_llhDpDp = MemoryManager.malloc8d(3); @@ -278,7 +284,7 @@ private void tweedieSeries(double y, double mu, double w, double[] out_llh_dp_dp } public TweedieEstimator compute(Vec mu, Vec y, Vec weights) { - if (_p >= 2 && y.min() <= 0) { + if (_p >= 2 && y.min() <= 0 && !_skipZerosIfVarPowerGT2) { _loglikelihood = Double.NEGATIVE_INFINITY; _llhDp = 0; _llhDpDp = 0; diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py b/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py index a9efea978384..eee1b77419a1 100644 --- a/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py +++ b/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py @@ -10,9 +10,6 @@ def test_tweedie_dispersion_factor(): training_data = h2o.import_file(pyunit_utils.locate("smalldata/glm_test/tweedie_p3_phi1_10KRows.csv")) Y = 'x' x = ['abs.C1.', 'abs.C2.', 'abs.C3.', 'abs.C4.', 'abs.C5.'] - - # With Tweedie variance power >= 2, Tweedie distribution has no mass or density on 0 -> log likelihood would be -Inf - training_data = training_data[training_data[Y] > 0] model_pearson = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, tweedie_variance_power=3, @@ -30,19 +27,15 @@ def test_tweedie_dispersion_factor(): init_dispersion_parameter=1.5, dispersion_parameter_method="ml") model_ml2.train(training_frame=training_data, x=x, y=Y) - model_pearson2 = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, - tweedie_variance_power=3, - dispersion_parameter_method="pearson") - model_pearson2.train(training_frame=training_data, x=x, y=Y) true_dispersion_factor = 1.0 dispersion_parameter_estimated = model_ml._model_json["output"]["dispersion"] dispersion_parameter_estimated2 = model_ml2._model_json["output"]["dispersion"] dispersion_parameter_estimated_pearson = model_pearson._model_json["output"]["dispersion"] - dispersion_parameter_estimated_pearson2 = model_pearson._model_json["output"]["dispersion"] + print("True dispersion parameter {0}. Estimated ml dispersion parameter {1}. Estimated pearson dispersion " - "parameter {2} and without the data with y==0 {3}.".format(true_dispersion_factor, dispersion_parameter_estimated, - dispersion_parameter_estimated_pearson, dispersion_parameter_estimated_pearson2)) + "parameter {2}".format(true_dispersion_factor, dispersion_parameter_estimated, + dispersion_parameter_estimated_pearson)) # make sure the ml estimates are closer to the true dispersion value than the dispersion value from pearson assert abs(true_dispersion_factor - dispersion_parameter_estimated) <= abs( dispersion_parameter_estimated_pearson - true_dispersion_factor), \ From 72be72d8d17bf8c8d1fb6048338c918eaf004a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 24 Jan 2024 12:38:23 +0100 Subject: [PATCH 38/50] Fix long line in R comment for CRAN checks --- h2o-r/h2o-package/R/models.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R index 0ea86bfce89b..dcff26351981 100755 --- a/h2o-r/h2o-package/R/models.R +++ b/h2o-r/h2o-package/R/models.R @@ -812,7 +812,9 @@ h2o.transform_frame <- function(model, fr) { #' h2o.init() #' #' # Import the prostate dataset: -#' prostate <- h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv") +#' prostate <- h2o.importFile( +#' "http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv" +#' ) #' #' # Set the predictors & response: #' predictors <- c("AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS") From 6d690143015cd4c2892a6b5bcb79ef123a95456e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Thu, 25 Jan 2024 13:53:00 +0100 Subject: [PATCH 39/50] Move model_pearson back where it used to be in tweedie dispersion factor test --- ..._pubdev_8685_tweedie_dispersion_factor_exceed2.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py b/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py index eee1b77419a1..8bb4ba8400b9 100644 --- a/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py +++ b/h2o-py/tests/testdir_algos/glm/pyunit_pubdev_8685_tweedie_dispersion_factor_exceed2.py @@ -10,11 +10,6 @@ def test_tweedie_dispersion_factor(): training_data = h2o.import_file(pyunit_utils.locate("smalldata/glm_test/tweedie_p3_phi1_10KRows.csv")) Y = 'x' x = ['abs.C1.', 'abs.C2.', 'abs.C3.', 'abs.C4.', 'abs.C5.'] - - model_pearson = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, - tweedie_variance_power=3, - dispersion_parameter_method="pearson") - model_pearson.train(training_frame=training_data, x=x, y=Y) # train ml model with initial guess below the true disperion value model_ml = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, @@ -26,13 +21,16 @@ def test_tweedie_dispersion_factor(): tweedie_variance_power=3, init_dispersion_parameter=1.5, dispersion_parameter_method="ml") model_ml2.train(training_frame=training_data, x=x, y=Y) - + + model_pearson = H2OGeneralizedLinearEstimator(family='tweedie', lambda_=0, compute_p_values=True, + tweedie_variance_power=3, + dispersion_parameter_method="pearson") + model_pearson.train(training_frame=training_data, x=x, y=Y) true_dispersion_factor = 1.0 dispersion_parameter_estimated = model_ml._model_json["output"]["dispersion"] dispersion_parameter_estimated2 = model_ml2._model_json["output"]["dispersion"] dispersion_parameter_estimated_pearson = model_pearson._model_json["output"]["dispersion"] - print("True dispersion parameter {0}. Estimated ml dispersion parameter {1}. Estimated pearson dispersion " "parameter {2}".format(true_dispersion_factor, dispersion_parameter_estimated, dispersion_parameter_estimated_pearson)) From c28ef0b95534e5ac734bf5b4e754c8ade2ad42cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Thu, 25 Jan 2024 14:35:03 +0100 Subject: [PATCH 40/50] Remove an unnecessary space --- h2o-py/h2o/utils/shared_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/h2o-py/h2o/utils/shared_utils.py b/h2o-py/h2o/utils/shared_utils.py index d016e0b78e76..37e0cd2f9d79 100644 --- a/h2o-py/h2o/utils/shared_utils.py +++ b/h2o-py/h2o/utils/shared_utils.py @@ -388,7 +388,7 @@ def slice_is_normalized(s): h2o_predictor_class = "hex.genmodel.tools.PredictCsv" -def mojo_predict_pandas(dataframe, mojo_zip_path, genmodel_jar_path=None, classpath=None, java_options=None, +def mojo_predict_pandas(dataframe, mojo_zip_path, genmodel_jar_path=None, classpath=None, java_options=None, verbose=False, setInvNumNA=False, predict_contributions=False, predict_calibrated=False): """ MOJO scoring function to take a Pandas frame and use MOJO model as zip file to score. From 3dc2316c076b5e77c4d232b25e8e2561214f53d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 29 Jan 2024 13:59:48 +0100 Subject: [PATCH 41/50] Fix R3.5.3 init java 11 test --- scripts/jenkins/groovy/makeTarget.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/jenkins/groovy/makeTarget.groovy b/scripts/jenkins/groovy/makeTarget.groovy index c7d93e7e364c..40a6aa992f20 100644 --- a/scripts/jenkins/groovy/makeTarget.groovy +++ b/scripts/jenkins/groovy/makeTarget.groovy @@ -108,7 +108,7 @@ def call(final pipelineContext, final Closure body) { private void execMake(final String buildAction, final String h2o3dir) { sh """ - export JAVA_HOME=`find /usr/lib/jvm -name '*java*${env.JAVA_VERSION}*' -type l` + export JAVA_HOME=`find /usr/lib/jvm -name '*java*${env.JAVA_VERSION}*' -type l | head -n1` export PATH=\${JAVA_HOME}/bin:\${PATH} cd ${h2o3dir} From 0f97d8a9ec48dd9eae46a31fc7454252ad1c1b5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 31 Jan 2024 12:51:49 +0100 Subject: [PATCH 42/50] Make runit_pubdev_8683_glm_gamma_dispersion_factor_large.R more robust - if R GLM fails to converge don't fail the test --- ...v_8683_glm_gamma_dispersion_factor_large.R | 37 +++++++++++++------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R index 3884edbd19c5..a69415bfe4af 100644 --- a/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R +++ b/h2o-r/tests/testdir_algos/glm/runit_pubdev_8683_glm_gamma_dispersion_factor_large.R @@ -57,13 +57,15 @@ compareH2ORGLM <- tolerance = 2e-4) { print("Define formula for R") formula <- (df[, "resp"] ~ .) - rmodel <- glm( - formula = formula, - data = df[, x], - family = tweedie(var.power = vpower, link.power = - lpower), - na.action = na.omit - ) + rmodel <- tryCatch({ + glm( + formula = formula, + data = df[, x], + family = tweedie(var.power = vpower, link.power = + lpower), + na.action = na.omit + ) + }, error = function(e) NULL) h2omodel <- h2o.glm( x = x, @@ -94,12 +96,19 @@ compareH2ORGLM <- h2omodel@model$dispersion, sep = ":" )) - print(paste( - "R model dispersion estimate", - summary(rmodel)$dispersion, - sep = ":" - )) + if (!is.null(rmodel)) { + print(paste( + "R model dispersion estimate", + summary(rmodel)$dispersion, + sep = ":" + )) + } h2oDiff = abs(h2omodel@model$dispersion - truedisp) + if (is.null(rmodel)) { + cat("R model did not converge!\n", + "H2O dispersion estimate - truedisp = ", h2oDiff, sep="") + return() + } rDiff = abs(summary(rmodel)$dispersion - truedisp) if (rDiff < h2oDiff) { val = (h2oDiff - rDiff)/truedisp @@ -121,6 +130,10 @@ compareH2ORGLM <- compareCoeffs <- function(rmodel, h2omodel, tolerance, x) { print("H2O GLM model....") print(h2omodel) + if (is.null(rmodel)) { + print("R model did not converge") + return() + } print("R GLM model....") print(summary(rmodel)) h2oCoeff <- h2omodel@model$coefficients From b68c59bfa964470cd4b7c20a1a93110372355f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Thu, 1 Feb 2024 14:20:40 +0100 Subject: [PATCH 43/50] Remove packages for python < 3.5 from test-requirements.txt --- h2o-py/test-requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 8bd35a62e27c..4239a8e1fbe8 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -45,8 +45,6 @@ datatable==1.0.0; python_version <= '3.9' polars==0.19.17; python_version > '3.9' pyarrow==14.0.1; python_version > '3.9' # not directly required, pinned by Snyk to avoid a vulnerability -pygments==2.7.4; python_version < '3.5' pygments==2.15.1; python_version >= '3.7' -sphinx==3.0.4; python_version < '3.5' sphinx==3.5.4; python_version >= '3.6' and python_version <= '3.8' sphinx==6.1.3; python_version > '3.8' From 73bb536e1a688658b82f8705c950f18a9e4bfd45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 5 Feb 2024 17:46:05 +0100 Subject: [PATCH 44/50] Try to fix R issues when building on jenkins Basically, some files are not used in the final package probably due to some non-deterministic issue in roxygen2 which fills our NAMESPACE file and creates our R man pages. --- h2o-bindings/bin/gen_R.py | 7 +++---- h2o-r/build.gradle | 1 - h2o-r/h2o-DESCRIPTION.template | 1 + h2o-r/h2o-package/R/adaboost.R | 2 +- h2o-r/h2o-package/R/aggregator.R | 2 +- h2o-r/h2o-package/R/anovaglm.R | 2 +- h2o-r/h2o-package/R/classes.R | 1 + h2o-r/h2o-package/R/constants.R | 13 ++++++------ h2o-r/h2o-package/R/coxph.R | 2 +- h2o-r/h2o-package/R/decisiontree.R | 2 +- h2o-r/h2o-package/R/deeplearning.R | 2 +- h2o-r/h2o-package/R/edicts.R | 20 +++++++++---------- h2o-r/h2o-package/R/export.R | 1 + h2o-r/h2o-package/R/extendedisolationforest.R | 2 +- h2o-r/h2o-package/R/frame.R | 1 + h2o-r/h2o-package/R/gam.R | 2 +- h2o-r/h2o-package/R/gbm.R | 2 +- h2o-r/h2o-package/R/generic.R | 2 +- h2o-r/h2o-package/R/glm.R | 2 +- h2o-r/h2o-package/R/glrm.R | 2 +- h2o-r/h2o-package/R/grid.R | 10 +++++----- h2o-r/h2o-package/R/import.R | 13 ++++++------ h2o-r/h2o-package/R/infogram.R | 2 +- h2o-r/h2o-package/R/isolationforest.R | 2 +- h2o-r/h2o-package/R/isotonicregression.R | 2 +- h2o-r/h2o-package/R/kmeans.R | 2 +- h2o-r/h2o-package/R/modelselection.R | 2 +- h2o-r/h2o-package/R/naivebayes.R | 2 +- h2o-r/h2o-package/R/pca.R | 2 +- h2o-r/h2o-package/R/psvm.R | 2 +- h2o-r/h2o-package/R/randomforest.R | 2 +- h2o-r/h2o-package/R/rulefit.R | 2 +- h2o-r/h2o-package/R/segment.R | 10 +++++----- h2o-r/h2o-package/R/stackedensemble.R | 2 +- h2o-r/h2o-package/R/svd.R | 2 +- h2o-r/h2o-package/R/targetencoder.R | 2 +- h2o-r/h2o-package/R/upliftrandomforest.R | 2 +- h2o-r/h2o-package/R/word2vec.R | 2 +- h2o-r/h2o-package/R/xgboost.R | 2 +- 39 files changed, 69 insertions(+), 65 deletions(-) diff --git a/h2o-bindings/bin/gen_R.py b/h2o-bindings/bin/gen_R.py index 310b76433b96..fdb0150d4d28 100644 --- a/h2o-bindings/bin/gen_R.py +++ b/h2o-bindings/bin/gen_R.py @@ -38,13 +38,12 @@ def gen_module(schema, algo, module): update_param = get_customizations_for(algo, 'update_param') yield "# This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py" - yield "# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) \n#'" - yield "# -------------------------- %s -------------------------- #" % model_name + yield "# Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) \n#" + yield "# -------------------------- %s -------------------------- #\n#'" % model_name # start documentation if doc_preamble: - yield "#'" yield reformat_block(doc_preamble, prefix="#' ") - yield "#'" + yield "#'" # start doc for signature required_params = odict([(p[0] if isinstance(p, tuple) else p, p[1] if isinstance(p, tuple) else None) diff --git a/h2o-r/build.gradle b/h2o-r/build.gradle index fb546c6fd559..da26d3fcd05b 100644 --- a/h2o-r/build.gradle +++ b/h2o-r/build.gradle @@ -252,7 +252,6 @@ setProperties.dependsOn gitbranch, getRVersion, pdflatex cpH2OAppJar.dependsOn setProperties setPackageFiles.dependsOn cpH2OAppJar buildPackageDocumentation.dependsOn setPackageFiles -genPDF.dependsOn setPackageFiles genPDF.dependsOn buildPackageDocumentation cpPDF.dependsOn genPDF cpMainPKG.dependsOn cpPDF diff --git a/h2o-r/h2o-DESCRIPTION.template b/h2o-r/h2o-DESCRIPTION.template index d4c56118bfa3..3d33b4c698b3 100644 --- a/h2o-r/h2o-DESCRIPTION.template +++ b/h2o-r/h2o-DESCRIPTION.template @@ -47,6 +47,7 @@ URL: https://github.com/h2oai/h2o-3 BugReports: https://github.com/h2oai/h2o-3/issues NeedsCompilation: no SystemRequirements: Java (>= 8, <= 17) +Encoding: UTF-8 Depends: R (>= 2.13.0), methods, stats diff --git a/h2o-r/h2o-package/R/adaboost.R b/h2o-r/h2o-package/R/adaboost.R index e39e5b51831a..c6ab53c647be 100644 --- a/h2o-r/h2o-package/R/adaboost.R +++ b/h2o-r/h2o-package/R/adaboost.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- AdaBoost Model -------------------------- # #' #' Build an AdaBoost model diff --git a/h2o-r/h2o-package/R/aggregator.R b/h2o-r/h2o-package/R/aggregator.R index 84b0f3ea1f65..2b2bff2ff8bb 100644 --- a/h2o-r/h2o-package/R/aggregator.R +++ b/h2o-r/h2o-package/R/aggregator.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- H2O Aggregator Model -------------------------- # #' #' Build an Aggregated Frame diff --git a/h2o-r/h2o-package/R/anovaglm.R b/h2o-r/h2o-package/R/anovaglm.R index 4c1be524aa83..ee4975cbed87 100644 --- a/h2o-r/h2o-package/R/anovaglm.R +++ b/h2o-r/h2o-package/R/anovaglm.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- ANOVA GLM -------------------------- # #' #' H2O ANOVAGLM is used to calculate Type III SS which is used to evaluate the contributions of individual predictors diff --git a/h2o-r/h2o-package/R/classes.R b/h2o-r/h2o-package/R/classes.R index 5fd4508855ae..296105062f62 100755 --- a/h2o-r/h2o-package/R/classes.R +++ b/h2o-r/h2o-package/R/classes.R @@ -8,6 +8,7 @@ #` will typically never have to reason with these objects directly, as there are #` S3 accessor methods provided for creating new objects. #` +NULL #----------------------------------------------------------------------------------------------------------------------- # Class Definitions diff --git a/h2o-r/h2o-package/R/constants.R b/h2o-r/h2o-package/R/constants.R index 1a623864abc8..fd302735c8d6 100755 --- a/h2o-r/h2o-package/R/constants.R +++ b/h2o-r/h2o-package/R/constants.R @@ -1,9 +1,10 @@ -#' -#' H2O Package Constants -#' -#' The API endpoints for interacting with H2O via REST are named here. -#' -#' Additionally, environment variables for the H2O package are named here. +# +# H2O Package Constants +# +# The API endpoints for interacting with H2O via REST are named here. +# +# Additionally, environment variables for the H2O package are named here. + #' Endpoint Version .h2o.__REST_API_VERSION <- 3L diff --git a/h2o-r/h2o-package/R/coxph.R b/h2o-r/h2o-package/R/coxph.R index 440cb0c27535..278f3bafdf50 100644 --- a/h2o-r/h2o-package/R/coxph.R +++ b/h2o-r/h2o-package/R/coxph.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- coxph -------------------------- # #' #' Trains a Cox Proportional Hazards Model (CoxPH) on an H2O dataset diff --git a/h2o-r/h2o-package/R/decisiontree.R b/h2o-r/h2o-package/R/decisiontree.R index cd096466cd53..85d51865a0bf 100644 --- a/h2o-r/h2o-package/R/decisiontree.R +++ b/h2o-r/h2o-package/R/decisiontree.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Decision Tree Model in H2O -------------------------- # #' #' Build a Decision Tree model diff --git a/h2o-r/h2o-package/R/deeplearning.R b/h2o-r/h2o-package/R/deeplearning.R index 1b6ed6c3aad8..bc69d06d9523 100644 --- a/h2o-r/h2o-package/R/deeplearning.R +++ b/h2o-r/h2o-package/R/deeplearning.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Deep Learning - Neural Network -------------------------- # #' #' Build a Deep Neural Network model using CPUs diff --git a/h2o-r/h2o-package/R/edicts.R b/h2o-r/h2o-package/R/edicts.R index 5591eb44ed35..bc71dfb0a0a8 100755 --- a/h2o-r/h2o-package/R/edicts.R +++ b/h2o-r/h2o-package/R/edicts.R @@ -1,13 +1,13 @@ -#' -#' TODO: No objects in this file are being used. Either remove file or use objects. -#' -#' Append a pair to a list. -#' -#' Contained here are a set of helper methods that perform type checking on the value passed in. -#' -#' @param parms a list to add the pair to -#' @param k a key, typically the name of some algorithm parameter -#' @param v a value, the value of the algorithm parameter +# +# TODO: No objects in this file are being used. Either remove file or use objects. +# +# Append a pair to a list. +# +# Contained here are a set of helper methods that perform type checking on the value passed in. +# +# @param parms a list to add the pair to +# @param k a key, typically the name of some algorithm parameter +# @param v a value, the value of the algorithm parameter .addParm <- function(parms, k, v) { cmd <- sprintf("parms$%s = v", k) diff --git a/h2o-r/h2o-package/R/export.R b/h2o-r/h2o-package/R/export.R index 7ea5a163564d..1197c089cb8a 100755 --- a/h2o-r/h2o-package/R/export.R +++ b/h2o-r/h2o-package/R/export.R @@ -3,6 +3,7 @@ #` #` Export data to local disk or HDFS. #` Save models to local disk or HDFS. +NULL #' Export an H2O Data Frame (H2OFrame) to a File or to a collection of Files. #' diff --git a/h2o-r/h2o-package/R/extendedisolationforest.R b/h2o-r/h2o-package/R/extendedisolationforest.R index cd278885f660..0739f7dc0fde 100644 --- a/h2o-r/h2o-package/R/extendedisolationforest.R +++ b/h2o-r/h2o-package/R/extendedisolationforest.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- extendedisolationforest -------------------------- # #' #' Trains an Extended Isolation Forest model diff --git a/h2o-r/h2o-package/R/frame.R b/h2o-r/h2o-package/R/frame.R index 45b36a11df32..b05e669c836d 100644 --- a/h2o-r/h2o-package/R/frame.R +++ b/h2o-r/h2o-package/R/frame.R @@ -34,6 +34,7 @@ #` the first N (typically 10) rows and all cols of the frame #` E$nrow <- the row count (total size, generally much larger than the local cached rows) #` E$types <- the H2O column types +NULL # since we only import data.table via requireNamespace this is required for data.table calls to # stop pretending to being data.frame and start behaving as data.table diff --git a/h2o-r/h2o-package/R/gam.R b/h2o-r/h2o-package/R/gam.R index c9c86c3fa319..28854d0a2989 100644 --- a/h2o-r/h2o-package/R/gam.R +++ b/h2o-r/h2o-package/R/gam.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Generalized Additive Model -------------------------- # #' #' Fit a General Additive Model diff --git a/h2o-r/h2o-package/R/gbm.R b/h2o-r/h2o-package/R/gbm.R index eab8529befac..4df63c27674b 100644 --- a/h2o-r/h2o-package/R/gbm.R +++ b/h2o-r/h2o-package/R/gbm.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Gradient Boosting Machine -------------------------- # #' #' Build gradient boosted classification or regression trees diff --git a/h2o-r/h2o-package/R/generic.R b/h2o-r/h2o-package/R/generic.R index 698e81674c8c..ada5e580c513 100644 --- a/h2o-r/h2o-package/R/generic.R +++ b/h2o-r/h2o-package/R/generic.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- generic -------------------------- # #' #' Imports a generic model into H2O. Such model can be used then used for scoring and obtaining diff --git a/h2o-r/h2o-package/R/glm.R b/h2o-r/h2o-package/R/glm.R index 002dbf0aebd7..4c59645bd341 100644 --- a/h2o-r/h2o-package/R/glm.R +++ b/h2o-r/h2o-package/R/glm.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- H2O Generalized Linear Models -------------------------- # #' #' Fit a generalized linear model diff --git a/h2o-r/h2o-package/R/glrm.R b/h2o-r/h2o-package/R/glrm.R index 85810e2c6865..6ceaad9b9db2 100644 --- a/h2o-r/h2o-package/R/glrm.R +++ b/h2o-r/h2o-package/R/glrm.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Generalized Low Rank Model -------------------------- # #' #' Generalized low rank decomposition of an H2O data frame diff --git a/h2o-r/h2o-package/R/grid.R b/h2o-r/h2o-package/R/grid.R index 9c56ee526669..f11cab231817 100644 --- a/h2o-r/h2o-package/R/grid.R +++ b/h2o-r/h2o-package/R/grid.R @@ -1,8 +1,8 @@ -#' -#' H2O Grid Support -#' -#' Provides a set of functions to launch a grid search and get -#' its results. +# +# H2O Grid Support +# +# Provides a set of functions to launch a grid search and get +# its results. #------------------------------------- # Grid-related functions start here :) diff --git a/h2o-r/h2o-package/R/import.R b/h2o-r/h2o-package/R/import.R index 54cfc461a4d2..1f2e2c805fc3 100755 --- a/h2o-r/h2o-package/R/import.R +++ b/h2o-r/h2o-package/R/import.R @@ -1,9 +1,10 @@ -##`' -##`' Data Import -##`' -##`' Importing data is a _lazy_ parse of the data. It adds an extra step so that a user may specify a variety of options -##`' including a header file, separator type, and in the future column type. Additionally, the import phase provides -##`' feedback on whether or not a folder or group of files may be imported together. +# +# Data Import +# +# Importing data is a _lazy_ parse of the data. It adds an extra step so that a user may specify a variety of options +# including a header file, separator type, and in the future column type. Additionally, the import phase provides +# feedback on whether or not a folder or group of files may be imported together. + #' #' Import Files into H2O diff --git a/h2o-r/h2o-package/R/infogram.R b/h2o-r/h2o-package/R/infogram.R index 1b1c2f0a3a5e..953864d80d04 100644 --- a/h2o-r/h2o-package/R/infogram.R +++ b/h2o-r/h2o-package/R/infogram.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Infogram -------------------------- # #' #' H2O Infogram diff --git a/h2o-r/h2o-package/R/isolationforest.R b/h2o-r/h2o-package/R/isolationforest.R index 4e69effcee90..9842a54161f4 100644 --- a/h2o-r/h2o-package/R/isolationforest.R +++ b/h2o-r/h2o-package/R/isolationforest.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- isolationforest -------------------------- # #' #' Trains an Isolation Forest model diff --git a/h2o-r/h2o-package/R/isotonicregression.R b/h2o-r/h2o-package/R/isotonicregression.R index 73313361b6fe..bcf47f1e3dfa 100644 --- a/h2o-r/h2o-package/R/isotonicregression.R +++ b/h2o-r/h2o-package/R/isotonicregression.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- isotonicregression -------------------------- # #' #' Build an Isotonic Regression model diff --git a/h2o-r/h2o-package/R/kmeans.R b/h2o-r/h2o-package/R/kmeans.R index 31b0c697b508..acbb2d4bc759 100755 --- a/h2o-r/h2o-package/R/kmeans.R +++ b/h2o-r/h2o-package/R/kmeans.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- KMeans Model in H2O -------------------------- # #' #' Performs k-means clustering on an H2O dataset diff --git a/h2o-r/h2o-package/R/modelselection.R b/h2o-r/h2o-package/R/modelselection.R index 67576a2d725a..8ecaac0592e3 100644 --- a/h2o-r/h2o-package/R/modelselection.R +++ b/h2o-r/h2o-package/R/modelselection.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Model Selection -------------------------- # #' #' H2O ModelSelection is used to build the best model with one predictor, two predictors, ... up to max_predictor_number diff --git a/h2o-r/h2o-package/R/naivebayes.R b/h2o-r/h2o-package/R/naivebayes.R index 227d7e22fe45..f06b93842605 100644 --- a/h2o-r/h2o-package/R/naivebayes.R +++ b/h2o-r/h2o-package/R/naivebayes.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Naive Bayes Model in H2O -------------------------- # #' #' Compute naive Bayes probabilities on an H2O dataset. diff --git a/h2o-r/h2o-package/R/pca.R b/h2o-r/h2o-package/R/pca.R index 21fb4c771f07..6f397bd09d16 100644 --- a/h2o-r/h2o-package/R/pca.R +++ b/h2o-r/h2o-package/R/pca.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Principal Components Analysis -------------------------- # #' #' Principal component analysis of an H2O data frame diff --git a/h2o-r/h2o-package/R/psvm.R b/h2o-r/h2o-package/R/psvm.R index 74d3bb4c47be..6d8fdad4909f 100644 --- a/h2o-r/h2o-package/R/psvm.R +++ b/h2o-r/h2o-package/R/psvm.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Support Vector Machine -------------------------- # #' #' Trains a Support Vector Machine model on an H2O dataset diff --git a/h2o-r/h2o-package/R/randomforest.R b/h2o-r/h2o-package/R/randomforest.R index 4284e0a1b0f9..35eb3eb5d269 100644 --- a/h2o-r/h2o-package/R/randomforest.R +++ b/h2o-r/h2o-package/R/randomforest.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Random Forest Model in H2O -------------------------- # #' #' Build a Random Forest model diff --git a/h2o-r/h2o-package/R/rulefit.R b/h2o-r/h2o-package/R/rulefit.R index b05323161f83..6f37975a18f1 100644 --- a/h2o-r/h2o-package/R/rulefit.R +++ b/h2o-r/h2o-package/R/rulefit.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- rulefit -------------------------- # #' #' Build a RuleFit Model diff --git a/h2o-r/h2o-package/R/segment.R b/h2o-r/h2o-package/R/segment.R index 8d76e0707779..76261276b21d 100644 --- a/h2o-r/h2o-package/R/segment.R +++ b/h2o-r/h2o-package/R/segment.R @@ -1,8 +1,8 @@ -#' -#' H2O Segmented-Data Bulk Model Training -#' -#' Provides a set of functions to train a group of models on different -#' segments (subpopulations) of the training set. +# +# H2O Segmented-Data Bulk Model Training +# +# Provides a set of functions to train a group of models on different +# segments (subpopulations) of the training set. #-------------------------------------------- # Segmented-data bulk model training function diff --git a/h2o-r/h2o-package/R/stackedensemble.R b/h2o-r/h2o-package/R/stackedensemble.R index 85dabd16db04..6cc96bc9d000 100644 --- a/h2o-r/h2o-package/R/stackedensemble.R +++ b/h2o-r/h2o-package/R/stackedensemble.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- H2O Stacked Ensemble -------------------------- # #' #' Builds a Stacked Ensemble diff --git a/h2o-r/h2o-package/R/svd.R b/h2o-r/h2o-package/R/svd.R index 1d5fc9100e5d..16b84b677143 100644 --- a/h2o-r/h2o-package/R/svd.R +++ b/h2o-r/h2o-package/R/svd.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Singular Value Decomposition -------------------------- # #' #' Singular value decomposition of an H2O data frame using the power method diff --git a/h2o-r/h2o-package/R/targetencoder.R b/h2o-r/h2o-package/R/targetencoder.R index 29c3637628fc..353d93e3d943 100644 --- a/h2o-r/h2o-package/R/targetencoder.R +++ b/h2o-r/h2o-package/R/targetencoder.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Target Encoder -------------------------- # #' #' Transformation of a categorical variable with a mean value of the target variable diff --git a/h2o-r/h2o-package/R/upliftrandomforest.R b/h2o-r/h2o-package/R/upliftrandomforest.R index 09aba7134ddf..5cbd190ef737 100644 --- a/h2o-r/h2o-package/R/upliftrandomforest.R +++ b/h2o-r/h2o-package/R/upliftrandomforest.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- Uplift Random Forest Model in H2O -------------------------- # #' #' Build a Uplift Random Forest model diff --git a/h2o-r/h2o-package/R/word2vec.R b/h2o-r/h2o-package/R/word2vec.R index 353fa48a3b78..debb404f3d48 100755 --- a/h2o-r/h2o-package/R/word2vec.R +++ b/h2o-r/h2o-package/R/word2vec.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- word2vec -------------------------- # #' #' Trains a word2vec model on a String column of an H2O data frame diff --git a/h2o-r/h2o-package/R/xgboost.R b/h2o-r/h2o-package/R/xgboost.R index 8560cae6fa91..aa463a048dff 100644 --- a/h2o-r/h2o-package/R/xgboost.R +++ b/h2o-r/h2o-package/R/xgboost.R @@ -1,6 +1,6 @@ # This file is auto-generated by h2o-3/h2o-bindings/bin/gen_R.py # Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details) -#' +# # -------------------------- XGBoost -------------------------- # #' #' Build an eXtreme Gradient Boosting model From 2c36fd3e25e8c2e1414d42aa10fc89f3f1ac01e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Mon, 5 Feb 2024 18:00:29 +0100 Subject: [PATCH 45/50] Fix 'The process cannot access the file because it is being used by another process' on windows in python --- h2o-py/h2o/frame.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/h2o-py/h2o/frame.py b/h2o-py/h2o/frame.py index f091a00cf0df..56d392cdf587 100644 --- a/h2o-py/h2o/frame.py +++ b/h2o-py/h2o/frame.py @@ -1970,12 +1970,16 @@ def as_data_frame(self, use_pandas=True, header=True): if can_use_pandas() and use_pandas: import pandas if (can_use_datatable()) or (can_use_polars() and can_use_pyarrow()): # can use multi-thread - with tempfile.NamedTemporaryFile(suffix=".h2oframe2Convert.csv") as exportFile: + exportFile = tempfile.NamedTemporaryFile(suffix=".h2oframe2Convert.csv", delete=False) + try: + exportFile.close() # needed for Windows h2o.export_file(self, exportFile.name, force=True) if can_use_datatable(): # use datatable for multi-thread by default return self.convert_with_datatable(exportFile.name) elif can_use_polars() and can_use_pyarrow(): # polar/pyarrow if datatable is not available return self.convert_with_polars(exportFile.name) + finally: + os.unlink(exportFile.name) warnings.warn("converting H2O frame to pandas dataframe using single-thread. For faster conversion using" " multi-thread, install datatable (for Python 3.9 or lower), or polars and pyarrow " "(for Python 3.10 or above).", H2ODependencyWarning) From 80a6a23892fbc7adf434a84f25b24727538d4e0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 7 Feb 2024 16:15:28 +0100 Subject: [PATCH 46/50] Make sure R images contain all the suggested packages --- docker/scripts/install_R_version | 52 ++++++++++++++++---------------- h2o-r/h2o-DESCRIPTION.template | 1 - 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/docker/scripts/install_R_version b/docker/scripts/install_R_version index 3af9eb8514be..265996d8d08e 100755 --- a/docker/scripts/install_R_version +++ b/docker/scripts/install_R_version @@ -175,10 +175,23 @@ function getAndInstallFromCRAN(){ # Install dependencies echo "Installing dependencies for R ${R_VERSION}" - +getAndInstallFromCRAN AUC 0.3.0 +getAndInstallFromCRAN DEoptimR 1.0-9 +getAndInstallFromCRAN Formula 1.2-4 +getAndInstallFromCRAN HDtweedie 1.1 +getAndInstallFromCRAN KernSmooth 2.23-15 +getAndInstallFromCRAN LiblineaR 1.94-2 +getAndInstallFromCRAN MASS 7.3-45 +getAndInstallFromCRAN MLmetrics 1.0.0 +getAndInstallFromCRAN R.methodsS3 1.8.1 +getAndInstallFromCRAN R6 2.5.1 +getAndInstallFromCRAN RColorBrewer 1.1-2 +getAndInstallFromCRAN RUnit 0.4.32 +getAndInstallFromCRAN Rcpp 1.0.12 +getAndInstallFromCRAN RcppParallel 5.1.4 +getAndInstallFromCRAN SparseM 1.81 getAndInstallFromCRAN abind 1.4-5 getAndInstallFromCRAN acepack 1.4.1 -getAndInstallFromCRAN AUC 0.3.0 getAndInstallFromCRAN backports 1.3.0 getAndInstallFromCRAN base64enc 0.1-3 getAndInstallFromCRAN bit 4.0.4 @@ -195,7 +208,6 @@ getAndInstallFromCRAN cpp11 0.4.1 getAndInstallFromCRAN crayon 1.4.2 getAndInstallFromCRAN curl 4.3.2 getAndInstallFromCRAN data.table 1.14.2 -getAndInstallFromCRAN DEoptimR 1.0-9 getAndInstallFromCRAN digest 0.6.28 getAndInstallFromCRAN diptest 0.76-0 getAndInstallFromCRAN evaluate 0.14 @@ -204,7 +216,6 @@ getAndInstallFromCRAN farver 2.1.0 getAndInstallFromCRAN fastmap 1.1.0 getAndInstallFromCRAN foreign 0.8-67 getAndInstallFromCRAN formatR 1.11 -getAndInstallFromCRAN Formula 1.2-4 getAndInstallFromCRAN fs 1.5.0 getAndInstallFromCRAN futile.options 1.0.1 getAndInstallFromCRAN generics 0.1.1 @@ -213,25 +224,20 @@ getAndInstallFromCRAN gitcreds 0.1.1 getAndInstallFromCRAN glue 1.5.0 getAndInstallFromCRAN gtable 0.3.0 getAndInstallFromCRAN gtools 3.9.2 -getAndInstallFromCRAN HDtweedie 1.1 getAndInstallFromCRAN highlight 0.5.0 getAndInstallFromCRAN ini 0.3.1 getAndInstallFromCRAN isoband 0.2.5 getAndInstallFromCRAN iterators 1.0.13 getAndInstallFromCRAN jsonlite 1.7.2 getAndInstallFromCRAN kernlab 0.9-29 -getAndInstallFromCRAN KernSmooth 2.23-15 getAndInstallFromCRAN labeling 0.4.2 getAndInstallFromCRAN lattice 0.20-34 getAndInstallFromCRAN lazyeval 0.2.2 -getAndInstallFromCRAN LiblineaR 1.94-2 getAndInstallFromCRAN magrittr 2.0.1 -getAndInstallFromCRAN MASS 7.3-45 getAndInstallFromCRAN mclust 5.4.8 getAndInstallFromCRAN mime 0.12 getAndInstallFromCRAN misc3d 0.9-1 getAndInstallFromCRAN mlbench 2.1-3 -getAndInstallFromCRAN MLmetrics 1.0.0 getAndInstallFromCRAN modeltools 0.2-23 getAndInstallFromCRAN mvtnorm 1.0-0 getAndInstallFromCRAN nnet 7.3-12 @@ -242,22 +248,15 @@ getAndInstallFromCRAN praise 1.0.0 getAndInstallFromCRAN prettyunits 1.1.1 getAndInstallFromCRAN proxy 0.4-20 getAndInstallFromCRAN ps 1.6.0 -getAndInstallFromCRAN R.methodsS3 1.8.1 -getAndInstallFromCRAN R6 2.5.1 getAndInstallFromCRAN randomForest 4.6-14 getAndInstallFromCRAN rappdirs 0.3.3 -getAndInstallFromCRAN RColorBrewer 1.1-2 -getAndInstallFromCRAN Rcpp 1.0.7 -getAndInstallFromCRAN RcppParallel 5.1.4 getAndInstallFromCRAN remotes 2.4.1 getAndInstallFromCRAN rlang 0.4.12 getAndInstallFromCRAN rpart 4.1-10 getAndInstallFromCRAN rprojroot 2.0.2 getAndInstallFromCRAN rstudioapi 0.13 -getAndInstallFromCRAN RUnit 0.4.32 getAndInstallFromCRAN slam 0.1-40 getAndInstallFromCRAN sourcetools 0.1.7 -getAndInstallFromCRAN SparseM 1.81 getAndInstallFromCRAN sparsepp 1.22 getAndInstallFromCRAN spatial 7.3-11 getAndInstallFromCRAN statmod 1.4.36 @@ -277,29 +276,29 @@ getAndInstallFromCRAN xml2 1.3.2 getAndInstallFromCRAN xtable 1.8-4 getAndInstallFromCRAN yaml 2.2.1 getAndInstallFromCRAN zip 2.2.0 +getAndInstallFromCRAN robustbase 0.93-7 +getAndInstallFromCRAN class 7.3-14 +getAndInstallFromCRAN R.oo 1.24.0 +getAndInstallFromCRAN RcppArmadillo 0.10.7.3.0 getAndInstallFromCRAN checkmate 2.0.0 getAndInstallFromCRAN bit64 4.0.5 -getAndInstallFromCRAN caTools 1.17.1.4 getAndInstallFromCRAN RCurl 1.95-4.12 +getAndInstallFromCRAN caTools 1.17.1.4 getAndInstallFromCRAN munsell 0.5.0 getAndInstallFromCRAN diffobj 0.3.5 -getAndInstallFromCRAN robustbase 0.93-7 getAndInstallFromCRAN lambda.r 1.2.4 getAndInstallFromCRAN optparse 1.7.1 getAndInstallFromCRAN cli 3.1.0 getAndInstallFromCRAN gridExtra 2.3 getAndInstallFromCRAN foreach 1.5.1 getAndInstallFromCRAN Matrix 1.2-8 +getAndInstallFromCRAN latticeExtra 0.6-28 getAndInstallFromCRAN nlme 3.1-131 getAndInstallFromCRAN sp 1.4-5 -getAndInstallFromCRAN class 7.3-14 getAndInstallFromCRAN prabclus 2.3-2 getAndInstallFromCRAN plot3D 1.4 getAndInstallFromCRAN flexmix 2.3-17 -getAndInstallFromCRAN R.oo 1.24.0 getAndInstallFromCRAN processx 3.5.2 -getAndInstallFromCRAN latticeExtra 0.6-28 -getAndInstallFromCRAN RcppArmadillo 0.10.7.3.0 getAndInstallFromCRAN cachem 1.0.6 getAndInstallFromCRAN ellipsis 0.3.2 getAndInstallFromCRAN htmltools 0.5.2 @@ -312,6 +311,9 @@ getAndInstallFromCRAN askpass 1.1 getAndInstallFromCRAN highr 0.9 getAndInstallFromCRAN tinytex 0.35 getAndInstallFromCRAN rversions 2.1.1 +getAndInstallFromCRAN e1071 1.7-9 +getAndInstallFromCRAN flexclust 1.4-0 +getAndInstallFromCRAN R.utils 2.11.0 getAndInstallFromCRAN gplots 3.1.1 getAndInstallFromCRAN futile.logger 1.4.3 getAndInstallFromCRAN sessioninfo 1.2.1 @@ -321,10 +323,7 @@ getAndInstallFromCRAN survival 2.44-1.1 getAndInstallFromCRAN xgboost 1.0.0.2 getAndInstallFromCRAN mgcv 1.8-17 getAndInstallFromCRAN ade4 1.7-18 -getAndInstallFromCRAN e1071 1.7-9 -getAndInstallFromCRAN flexclust 1.4-0 getAndInstallFromCRAN fpc 2.2-9 -getAndInstallFromCRAN R.utils 2.11.0 getAndInstallFromCRAN callr 3.7.0 getAndInstallFromCRAN xopen 1.0.0 getAndInstallFromCRAN memoise 2.0.0 @@ -341,10 +340,10 @@ getAndInstallFromCRAN openssl 1.4.5 getAndInstallFromCRAN knitr 1.36 getAndInstallFromCRAN ROCR 1.0-7 getAndInstallFromCRAN text2vec 0.5.0 +getAndInstallFromCRAN RItools 0.1-17 getAndInstallFromCRAN coin 1.0-0 getAndInstallFromCRAN gbm 2.1.3 getAndInstallFromCRAN penalized 0.9-51 -getAndInstallFromCRAN RItools 0.1-17 getAndInstallFromCRAN pkgbuild 1.2.0 getAndInstallFromCRAN webshot 0.5.2 getAndInstallFromCRAN pillar 1.6.4 @@ -361,6 +360,7 @@ getAndInstallFromCRAN tables 0.9.6 getAndInstallFromCRAN rcmdcheck 1.4.0 getAndInstallFromCRAN repr 1.1.3 getAndInstallFromCRAN tibble 3.1.6 +getAndInstallFromCRAN DT 0.31 getAndInstallFromCRAN shiny 1.7.1 getAndInstallFromCRAN gert 1.3.0 getAndInstallFromCRAN gh 1.3.0 diff --git a/h2o-r/h2o-DESCRIPTION.template b/h2o-r/h2o-DESCRIPTION.template index 3d33b4c698b3..d4c56118bfa3 100644 --- a/h2o-r/h2o-DESCRIPTION.template +++ b/h2o-r/h2o-DESCRIPTION.template @@ -47,7 +47,6 @@ URL: https://github.com/h2oai/h2o-3 BugReports: https://github.com/h2oai/h2o-3/issues NeedsCompilation: no SystemRequirements: Java (>= 8, <= 17) -Encoding: UTF-8 Depends: R (>= 2.13.0), methods, stats From 00dc1080f09e860fe18e37186c6227e16eb77b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Thu, 8 Feb 2024 18:26:07 +0100 Subject: [PATCH 47/50] Update roxygen and rlang in R images --- docker/scripts/install_R_version | 55 ++++++++++++++++---------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/docker/scripts/install_R_version b/docker/scripts/install_R_version index 265996d8d08e..83a369e3ea57 100755 --- a/docker/scripts/install_R_version +++ b/docker/scripts/install_R_version @@ -175,23 +175,9 @@ function getAndInstallFromCRAN(){ # Install dependencies echo "Installing dependencies for R ${R_VERSION}" -getAndInstallFromCRAN AUC 0.3.0 -getAndInstallFromCRAN DEoptimR 1.0-9 -getAndInstallFromCRAN Formula 1.2-4 -getAndInstallFromCRAN HDtweedie 1.1 -getAndInstallFromCRAN KernSmooth 2.23-15 -getAndInstallFromCRAN LiblineaR 1.94-2 -getAndInstallFromCRAN MASS 7.3-45 -getAndInstallFromCRAN MLmetrics 1.0.0 -getAndInstallFromCRAN R.methodsS3 1.8.1 -getAndInstallFromCRAN R6 2.5.1 -getAndInstallFromCRAN RColorBrewer 1.1-2 -getAndInstallFromCRAN RUnit 0.4.32 -getAndInstallFromCRAN Rcpp 1.0.12 -getAndInstallFromCRAN RcppParallel 5.1.4 -getAndInstallFromCRAN SparseM 1.81 getAndInstallFromCRAN abind 1.4-5 getAndInstallFromCRAN acepack 1.4.1 +getAndInstallFromCRAN AUC 0.3.0 getAndInstallFromCRAN backports 1.3.0 getAndInstallFromCRAN base64enc 0.1-3 getAndInstallFromCRAN bit 4.0.4 @@ -208,6 +194,7 @@ getAndInstallFromCRAN cpp11 0.4.1 getAndInstallFromCRAN crayon 1.4.2 getAndInstallFromCRAN curl 4.3.2 getAndInstallFromCRAN data.table 1.14.2 +getAndInstallFromCRAN DEoptimR 1.0-9 getAndInstallFromCRAN digest 0.6.28 getAndInstallFromCRAN diptest 0.76-0 getAndInstallFromCRAN evaluate 0.14 @@ -216,6 +203,7 @@ getAndInstallFromCRAN farver 2.1.0 getAndInstallFromCRAN fastmap 1.1.0 getAndInstallFromCRAN foreign 0.8-67 getAndInstallFromCRAN formatR 1.11 +getAndInstallFromCRAN Formula 1.2-4 getAndInstallFromCRAN fs 1.5.0 getAndInstallFromCRAN futile.options 1.0.1 getAndInstallFromCRAN generics 0.1.1 @@ -224,20 +212,25 @@ getAndInstallFromCRAN gitcreds 0.1.1 getAndInstallFromCRAN glue 1.5.0 getAndInstallFromCRAN gtable 0.3.0 getAndInstallFromCRAN gtools 3.9.2 +getAndInstallFromCRAN HDtweedie 1.1 getAndInstallFromCRAN highlight 0.5.0 getAndInstallFromCRAN ini 0.3.1 getAndInstallFromCRAN isoband 0.2.5 getAndInstallFromCRAN iterators 1.0.13 getAndInstallFromCRAN jsonlite 1.7.2 getAndInstallFromCRAN kernlab 0.9-29 +getAndInstallFromCRAN KernSmooth 2.23-15 getAndInstallFromCRAN labeling 0.4.2 getAndInstallFromCRAN lattice 0.20-34 getAndInstallFromCRAN lazyeval 0.2.2 +getAndInstallFromCRAN LiblineaR 1.94-2 getAndInstallFromCRAN magrittr 2.0.1 +getAndInstallFromCRAN MASS 7.3-45 getAndInstallFromCRAN mclust 5.4.8 getAndInstallFromCRAN mime 0.12 getAndInstallFromCRAN misc3d 0.9-1 getAndInstallFromCRAN mlbench 2.1-3 +getAndInstallFromCRAN MLmetrics 1.0.0 getAndInstallFromCRAN modeltools 0.2-23 getAndInstallFromCRAN mvtnorm 1.0-0 getAndInstallFromCRAN nnet 7.3-12 @@ -248,15 +241,22 @@ getAndInstallFromCRAN praise 1.0.0 getAndInstallFromCRAN prettyunits 1.1.1 getAndInstallFromCRAN proxy 0.4-20 getAndInstallFromCRAN ps 1.6.0 +getAndInstallFromCRAN R.methodsS3 1.8.1 +getAndInstallFromCRAN R6 2.5.1 getAndInstallFromCRAN randomForest 4.6-14 getAndInstallFromCRAN rappdirs 0.3.3 +getAndInstallFromCRAN RColorBrewer 1.1-2 +getAndInstallFromCRAN Rcpp 1.0.12 +getAndInstallFromCRAN RcppParallel 5.1.4 getAndInstallFromCRAN remotes 2.4.1 -getAndInstallFromCRAN rlang 0.4.12 +getAndInstallFromCRAN rlang 1.0.0 getAndInstallFromCRAN rpart 4.1-10 getAndInstallFromCRAN rprojroot 2.0.2 getAndInstallFromCRAN rstudioapi 0.13 +getAndInstallFromCRAN RUnit 0.4.32 getAndInstallFromCRAN slam 0.1-40 getAndInstallFromCRAN sourcetools 0.1.7 +getAndInstallFromCRAN SparseM 1.81 getAndInstallFromCRAN sparsepp 1.22 getAndInstallFromCRAN spatial 7.3-11 getAndInstallFromCRAN statmod 1.4.36 @@ -276,29 +276,29 @@ getAndInstallFromCRAN xml2 1.3.2 getAndInstallFromCRAN xtable 1.8-4 getAndInstallFromCRAN yaml 2.2.1 getAndInstallFromCRAN zip 2.2.0 -getAndInstallFromCRAN robustbase 0.93-7 -getAndInstallFromCRAN class 7.3-14 -getAndInstallFromCRAN R.oo 1.24.0 -getAndInstallFromCRAN RcppArmadillo 0.10.7.3.0 getAndInstallFromCRAN checkmate 2.0.0 getAndInstallFromCRAN bit64 4.0.5 -getAndInstallFromCRAN RCurl 1.95-4.12 getAndInstallFromCRAN caTools 1.17.1.4 +getAndInstallFromCRAN RCurl 1.95-4.12 getAndInstallFromCRAN munsell 0.5.0 getAndInstallFromCRAN diffobj 0.3.5 +getAndInstallFromCRAN robustbase 0.93-7 getAndInstallFromCRAN lambda.r 1.2.4 getAndInstallFromCRAN optparse 1.7.1 getAndInstallFromCRAN cli 3.1.0 getAndInstallFromCRAN gridExtra 2.3 getAndInstallFromCRAN foreach 1.5.1 getAndInstallFromCRAN Matrix 1.2-8 -getAndInstallFromCRAN latticeExtra 0.6-28 getAndInstallFromCRAN nlme 3.1-131 getAndInstallFromCRAN sp 1.4-5 +getAndInstallFromCRAN class 7.3-14 getAndInstallFromCRAN prabclus 2.3-2 getAndInstallFromCRAN plot3D 1.4 getAndInstallFromCRAN flexmix 2.3-17 +getAndInstallFromCRAN R.oo 1.24.0 getAndInstallFromCRAN processx 3.5.2 +getAndInstallFromCRAN latticeExtra 0.6-28 +getAndInstallFromCRAN RcppArmadillo 0.10.7.3.0 getAndInstallFromCRAN cachem 1.0.6 getAndInstallFromCRAN ellipsis 0.3.2 getAndInstallFromCRAN htmltools 0.5.2 @@ -311,9 +311,6 @@ getAndInstallFromCRAN askpass 1.1 getAndInstallFromCRAN highr 0.9 getAndInstallFromCRAN tinytex 0.35 getAndInstallFromCRAN rversions 2.1.1 -getAndInstallFromCRAN e1071 1.7-9 -getAndInstallFromCRAN flexclust 1.4-0 -getAndInstallFromCRAN R.utils 2.11.0 getAndInstallFromCRAN gplots 3.1.1 getAndInstallFromCRAN futile.logger 1.4.3 getAndInstallFromCRAN sessioninfo 1.2.1 @@ -323,7 +320,10 @@ getAndInstallFromCRAN survival 2.44-1.1 getAndInstallFromCRAN xgboost 1.0.0.2 getAndInstallFromCRAN mgcv 1.8-17 getAndInstallFromCRAN ade4 1.7-18 +getAndInstallFromCRAN e1071 1.7-9 +getAndInstallFromCRAN flexclust 1.4-0 getAndInstallFromCRAN fpc 2.2-9 +getAndInstallFromCRAN R.utils 2.11.0 getAndInstallFromCRAN callr 3.7.0 getAndInstallFromCRAN xopen 1.0.0 getAndInstallFromCRAN memoise 2.0.0 @@ -340,10 +340,10 @@ getAndInstallFromCRAN openssl 1.4.5 getAndInstallFromCRAN knitr 1.36 getAndInstallFromCRAN ROCR 1.0-7 getAndInstallFromCRAN text2vec 0.5.0 -getAndInstallFromCRAN RItools 0.1-17 getAndInstallFromCRAN coin 1.0-0 getAndInstallFromCRAN gbm 2.1.3 getAndInstallFromCRAN penalized 0.9-51 +getAndInstallFromCRAN RItools 0.1-17 getAndInstallFromCRAN pkgbuild 1.2.0 getAndInstallFromCRAN webshot 0.5.2 getAndInstallFromCRAN pillar 1.6.4 @@ -355,12 +355,11 @@ getAndInstallFromCRAN httr 1.4.2 getAndInstallFromCRAN rsconnect 0.8.24 getAndInstallFromCRAN htmlTable 2.3.0 getAndInstallFromCRAN rmarkdown 2.11 -getAndInstallFromCRAN roxygen2 7.1.2 +getAndInstallFromCRAN roxygen2 7.2.2 getAndInstallFromCRAN tables 0.9.6 getAndInstallFromCRAN rcmdcheck 1.4.0 getAndInstallFromCRAN repr 1.1.3 getAndInstallFromCRAN tibble 3.1.6 -getAndInstallFromCRAN DT 0.31 getAndInstallFromCRAN shiny 1.7.1 getAndInstallFromCRAN gert 1.3.0 getAndInstallFromCRAN gh 1.3.0 From b9bf5ae1d0966b712c1777463c511f8577cf8a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 14 Feb 2024 22:36:02 +0100 Subject: [PATCH 48/50] Update pkgdown --- docker/scripts/install_R_version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/scripts/install_R_version b/docker/scripts/install_R_version index 83a369e3ea57..bdc9d89e5a13 100755 --- a/docker/scripts/install_R_version +++ b/docker/scripts/install_R_version @@ -373,7 +373,7 @@ getAndInstallFromCRAN shinyjs 2.0.0 getAndInstallFromCRAN usethis 2.0.1 getAndInstallFromCRAN tidyr 1.1.4 getAndInstallFromCRAN viridis 0.6.2 -getAndInstallFromCRAN pkgdown 1.3.0 +getAndInstallFromCRAN pkgdown 1.5.1 getAndInstallFromCRAN waldo 0.3.1 getAndInstallFromCRAN manipulateWidget 0.11.1 getAndInstallFromCRAN plotly 4.10.0 From c6ffe57ffc26614363cffb2d871baae7cc3b77f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Wed, 14 Feb 2024 22:37:29 +0100 Subject: [PATCH 49/50] Add mypy and specify the explicit version of already present typing_extensions --- h2o-py/test-requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/h2o-py/test-requirements.txt b/h2o-py/test-requirements.txt index 4239a8e1fbe8..12807923acc3 100644 --- a/h2o-py/test-requirements.txt +++ b/h2o-py/test-requirements.txt @@ -48,3 +48,9 @@ pyarrow==14.0.1; python_version > '3.9' pygments==2.15.1; python_version >= '3.7' sphinx==3.5.4; python_version >= '3.6' and python_version <= '3.8' sphinx==6.1.3; python_version > '3.8' +typing_extensions==4.1.1; python_version < '3.7' +typing_extensions==4.7.1; python_version >= '3.7' and python_version < '3.8' +typing_extensions==4.9.0; python_version >= '3.8' +mypy==0.971; python_version < '3.7' +mypy==1.4.1; python_version >= '3.7' and python_version < '3.8' +mypy==1.8.0; python_version >= '3.8' From 50ad27b7957f337007fb7fa47e83a42a2e0bc0f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Fr=C3=BDda?= Date: Fri, 16 Feb 2024 14:21:44 +0100 Subject: [PATCH 50/50] Increase timeout in stages that are too often aborted The R medium stages didn't run tests that contained missing package so it's understandable that the runtime there increased. Java JUnit tests also are aborted very often in master so this PR likely doesn't cause the issue. --- scripts/jenkins/groovy/defineTestStages.groovy | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/jenkins/groovy/defineTestStages.groovy b/scripts/jenkins/groovy/defineTestStages.groovy index aa6b85c5016c..c13732b32a23 100644 --- a/scripts/jenkins/groovy/defineTestStages.groovy +++ b/scripts/jenkins/groovy/defineTestStages.groovy @@ -195,7 +195,7 @@ def call(final pipelineContext) { ], [ stageName: 'R3.5 Medium-large', target: 'test-r-medium-large', rVersion: '3.5.3', - timeoutValue: 130, component: pipelineContext.getBuildConfig().COMPONENT_R + timeoutValue: 210, component: pipelineContext.getBuildConfig().COMPONENT_R ], [ stageName: 'R3.5 Demos Medium-large', target: 'test-r-demos-medium-large', rVersion: '3.5.3', @@ -220,7 +220,7 @@ def call(final pipelineContext) { ], [ stageName: 'Java 8 JUnit', target: 'test-junit-jenkins', pythonVersion: '3.6', javaVersion: 8, - timeoutValue: 350, component: pipelineContext.getBuildConfig().COMPONENT_JAVA, + timeoutValue: 400, component: pipelineContext.getBuildConfig().COMPONENT_JAVA, additionalTestPackages: [pipelineContext.getBuildConfig().COMPONENT_PY], imageSpecifier: 'python-3.6-jdk-8' ], @@ -426,7 +426,7 @@ def call(final pipelineContext) { ], [ stageName: 'Java 17 JUnit', target: 'test-junit-17-jenkins', pythonVersion: '3.6', javaVersion: 17, - timeoutValue: 350, component: pipelineContext.getBuildConfig().COMPONENT_JAVA, + timeoutValue: 400, component: pipelineContext.getBuildConfig().COMPONENT_JAVA, additionalTestPackages: [pipelineContext.getBuildConfig().COMPONENT_PY], imageSpecifier: "python-3.6-jdk-17" ], @@ -452,7 +452,7 @@ def call(final pipelineContext) { ], [ stageName: 'R3.3 Medium-large', target: 'test-r-medium-large', rVersion: '3.3.3', - timeoutValue: 130, component: pipelineContext.getBuildConfig().COMPONENT_R + timeoutValue: 210, component: pipelineContext.getBuildConfig().COMPONENT_R ], [ stageName: 'R3.3 Small', target: 'test-r-small', rVersion: '3.3.3', @@ -508,7 +508,7 @@ def call(final pipelineContext) { ], [ stageName: 'Java 11 JUnit', target: 'test-junit-11-jenkins', pythonVersion: '3.6', javaVersion: 11, - timeoutValue: 340, component: pipelineContext.getBuildConfig().COMPONENT_JAVA, + timeoutValue: 400, component: pipelineContext.getBuildConfig().COMPONENT_JAVA, additionalTestPackages: [pipelineContext.getBuildConfig().COMPONENT_PY], imageSpecifier: "python-3.6-jdk-11" ],