-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDockerfile.cuda
68 lines (50 loc) · 2.06 KB
/
Dockerfile.cuda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
FROM registry.access.redhat.com/ubi9/ubi:latest as cuda-base
ARG CUDA_PKG_VERSION=12-3
ENV CUDA_PKG_VERSION=${CUDA_PKG_VERSION}
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo && \
dnf install -y \
cuda-cudart-${CUDA_PKG_VERSION} \
cuda-compat-${CUDA_PKG_VERSION} \
libcublas-${CUDA_PKG_VERSION} \
&& \
echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf && \
dnf clean all
ENV CUDA_HOME="/usr/local/cuda"
ENV LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib/:/usr/local/cuda/compat:${LD_LIBRARY_PATH}
FROM cuda-base as builder
ARG CUDA_PKG_VERSION
ENV CUDA_PKG_VERSION=${CUDA_PKG_VERSION}
RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo && \
dnf -y install \
git gcc-toolset-12-gcc-c++ gcc-toolset-12 cmake python3.11-pip \
cuda-nvcc-${CUDA_PKG_VERSION} \
libcublas-devel-${CUDA_PKG_VERSION} \
cuda-cudart-devel-${CUDA_PKG_VERSION} \
cuda-driver-devel-${CUDA_PKG_VERSION} \
&& \
dnf clean all && \
pip3.11 install --no-cache-dir --upgrade pip wheel
ENV PATH="${CUDA_HOME}/bin:${PATH}"
ENV POETRY_VIRTUALENVS_IN_PROJECT=1
WORKDIR /llama-cpp-python
COPY pyproject.toml .
COPY poetry.lock .
ENV CMAKE_ARGS="-DGGML_CUDA=on"
RUN pip3.11 install poetry && \
scl enable gcc-toolset-12 -- poetry install
FROM cuda-base as deploy
RUN dnf -y install \
shadow-utils python3.11 && \
dnf clean all
WORKDIR /llama-cpp-python
COPY --from=builder /llama-cpp-python/.venv /llama-cpp-python/.venv
ENV VIRTUAL_ENV=/llama-cpp-python/.venv
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN groupadd --system llama --gid 1001 && \
adduser --system --uid 1001 --gid 0 --groups llama \
--create-home --home-dir /llama-cpp-python --shell /sbin/nologin \
--comment "Llama user" llama
USER llama
# TODO: configuration?
ENTRYPOINT ["python", "-m", "llama_cpp.server"]