-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathtest_python.sh
executable file
·136 lines (105 loc) · 5.73 KB
/
test_python.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: BSD-3-Clause
set -euo pipefail
source "$(dirname "$0")/test_utils.sh"
rapids-logger "Create test conda environment"
. /opt/conda/etc/profile.d/conda.sh
rapids-dependency-file-generator \
--output conda \
--file_key test_python \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" | tee env.yaml
rapids-mamba-retry env create --force -f env.yaml -n test
conda activate test
# TODO: Perhaps install from conda? We need distributed installed in developer
# mode to provide test utils, but that's probably not doable from conda packages.
rapids-logger "Install Dask and Distributed"
pip install git+https://github.com/dask/dask@main
rm -rf /tmp/distributed
git clone https://github.com/dask/distributed /tmp/distributed
pip install -e /tmp/distributed
rapids-print-env
print_system_stats
run_tests() {
CMD_LINE="timeout 2m pytest -vs python/ucxx/_lib/tests/"
log_command "${CMD_LINE}"
timeout 2m pytest -vs python/ucxx/_lib/tests/
}
run_tests_async() {
PROGRESS_MODE=$1
ENABLE_DELAYED_SUBMISSION=$2
ENABLE_PYTHON_FUTURE=$3
SKIP=$4
CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50"
if [ $SKIP -ne 0 ]; then
echo -e "\e[1;33mSkipping unstable test: ${CMD_LINE}\e[0m"
else
log_command "${CMD_LINE}"
UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 20m pytest -vs python/ucxx/_lib_async/tests/ --durations=50
fi
}
run_py_benchmark() {
BACKEND=$1
PROGRESS_MODE=$2
ASYNCIO_WAIT=$3
ENABLE_DELAYED_SUBMISSION=$4
ENABLE_PYTHON_FUTURE=$5
N_BUFFERS=$6
SLOW=$7
if [ $ASYNCIO_WAIT -ne 0 ]; then
ASYNCIO_WAIT="--asyncio-wait"
else
ASYNCIO_WAIT=""
fi
CMD_LINE="UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT}"
# Workaround for https://github.com/rapidsai/ucxx/issues/15
CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}"
log_command "${CMD_LINE}"
if [ $SLOW -ne 0 ]; then
echo -e "\e[1;33mSLOW BENCHMARK: it may seem like a deadlock but will eventually complete.\e[0m"
fi
UCX_KEEPALIVE_INTERVAL=1ms UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 2m python -m ucxx.benchmarks.send_recv --backend ${BACKEND} -o cupy --reuse-alloc -n 8MiB --n-buffers $N_BUFFERS --progress-mode ${PROGRESS_MODE} ${ASYNCIO_WAIT}
}
run_distributed_ucxx_tests() {
PROGRESS_MODE=$1
ENABLE_DELAYED_SUBMISSION=$2
ENABLE_PYTHON_FUTURE=$3
CMD_LINE="UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/"
# Workaround for https://github.com/rapidsai/ucxx/issues/15
# CMD_LINE="UCX_KEEPALIVE_INTERVAL=1ms ${CMD_LINE}"
log_command "${CMD_LINE}"
UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m pytest -vs python/distributed-ucxx/distributed_ucxx/tests/
}
rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libucxx ucxx distributed-ucxx
print_ucx_config
rapids-logger "Run tests with conda package"
rapids-logger "Python Core Tests"
run_tests
rapids-logger "Python Async Tests"
# run_tests_async PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE SKIP
run_tests_async thread 0 0 0
run_tests_async thread 1 1 0
rapids-logger "Python Benchmarks"
# run_py_benchmark BACKEND PROGRESS_MODE ASYNCIO_WAIT ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE NBUFFERS SLOW
run_py_benchmark ucxx-core thread 0 0 0 1 0
run_py_benchmark ucxx-core thread 1 0 0 1 0
for nbuf in 1 8; do
if [[ ! $RAPIDS_CUDA_VERSION =~ 11.2.* ]]; then
# run_py_benchmark BACKEND PROGRESS_MODE ASYNCIO_WAIT ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE NBUFFERS SLOW
run_py_benchmark ucxx-async thread 0 0 0 ${nbuf} 0
run_py_benchmark ucxx-async thread 0 0 1 ${nbuf} 0
run_py_benchmark ucxx-async thread 0 1 0 ${nbuf} 0
run_py_benchmark ucxx-async thread 0 1 1 ${nbuf} 0
fi
done
rapids-logger "Distributed Tests"
# run_distributed_ucxx_tests PROGRESS_MODE ENABLE_DELAYED_SUBMISSION ENABLE_PYTHON_FUTURE
run_distributed_ucxx_tests polling 0 0
run_distributed_ucxx_tests thread 0 0
run_distributed_ucxx_tests thread 0 1
run_distributed_ucxx_tests thread 1 0
run_distributed_ucxx_tests thread 1 1