# Package logic: # 1. base target: # - Install tools. # - Upgrade GCC if needed. # - Install C buildkit. # - Upgrade Python if needed. # - Install Python buildkit. # - Install Torch. # 2. build-base target: # - Install tools. # - Upgrade GCC if needed. # - Install C buildkit. # - Upgrade Python if needed. # - Install Python buildkit. # - Install Torch. # 2.1. flashinfer-build target: # - Build FlashInfer wheel. # 3. gpustack target(final): # - Install FlashInfer as a Python library for GPUStack if existed. # - Install GPUStack. # - Install Vox-Box as an independent executor for GPUStack, # see https://github.com/gpustack/gpustack/pull/2473#issue-3222391256. # - Set up the entrypoint to start GPUStack. # Arguments description: # - CUDA_VERSION is the version of NVIDIA CUDA, # which is used to point to the base image for running. # - CUDA_DEVEL_VERSION is the version of NVIDIA CUDA, # which is used to point to the base image for 3rdparty components building. # - TORCH_VERSION is the version of PyTorch, # which should be compatible with the CUDA, vLLM and other components. # - TORCH_CUDA_ARCH_LIST is the CUDA architecture list for PyTorch, # which is used to build the components that depend on PyTorch, # default is empty, which means it will be set automatically based on the CUDA version. # - FLASHINFER_VERSION is the version of FlashInfer, # which is used to build the FlashInfer wheel. # - PYTHON_VERSION is the version of Python, # which should be properly set, it must be 3.x. ARG CUDA_VERSION=12.4.1 ARG CUDA_DEVEL_VERSION=12.6.3 ARG TORCH_VERSION=2.7.1 ARG TORCH_CUDA_ARCH_LIST="" ARG FLASHINFER_VERSION=0.2.8rc1 ARG FLASHINFER_BUILD_MAX_JOBS="" ARG PYTHON_VERSION=3.11 # Stage Base # # Example build command: # docker build --tag=gpustack/gpustack:cuda-base --file=pack/Dockerfile --target=base --progress=plain . # FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu22.04 AS base SHELL ["/bin/bash", "-eo", "pipefail", "-c"] ARG TARGETPLATFORM ARG TARGETOS ARG TARGETARCH ENV DEBIAN_FRONTEND=noninteractive \ LANG='en_US.UTF-8' \ LANGUAGE='en_US:en' \ LC_ALL='en_US.UTF-8' RUN < /etc/timezone \ && dpkg-reconfigure --frontend noninteractive tzdata # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* \ && rm -rf /var/cache/apt EOF ## Upgrade GCC if needed RUN <= 21.04" | bc -l) )); then echo "Skipping GCC upgrade for ${VERSION_ID}..." exit 0 fi # Install apt-get install -y --no-install-recommends \ gcc-11 g++-11 gfortran-11 gfortran # Update alternatives if [[ -f /etc/alternatives/gcov-dump ]]; then update-alternatives --remove-all gcov-dump; fi; update-alternatives --install /usr/bin/gcov-dump gcov-dump /usr/bin/gcov-dump-11 10 if [[ -f /etc/alternatives/lto-dump ]]; then update-alternatives --remove-all lto-dump; fi; update-alternatives --install /usr/bin/lto-dump lto-dump /usr/bin/lto-dump-11 10 if [[ -f /etc/alternatives/gcov ]]; then update-alternatives --remove-all gcov; fi; update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-11 10 if [[ -f /etc/alternatives/gcc ]]; then update-alternatives --remove-all gcc; fi; update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 if [[ -f /etc/alternatives/gcc-nm ]]; then update-alternatives --remove-all gcc-nm; fi; update-alternatives --install /usr/bin/gcc-nm gcc-nm /usr/bin/gcc-nm-11 10 if [[ -f /etc/alternatives/cpp ]]; then update-alternatives --remove-all cpp; fi; update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 10 if [[ -f /etc/alternatives/g++ ]]; then update-alternatives --remove-all g++; fi; update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10 if [[ -f /etc/alternatives/gcc-ar ]]; then update-alternatives --remove-all gcc-ar; fi; update-alternatives --install /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 10 if [[ -f /etc/alternatives/gcov-tool ]]; then update-alternatives --remove-all gcov-tool; fi; update-alternatives --install /usr/bin/gcov-tool gcov-tool /usr/bin/gcov-tool-11 10 if [[ -f /etc/alternatives/gcc-ranlib ]]; then update-alternatives --remove-all gcc-ranlib; fi; update-alternatives --install /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 10 if [[ -f /etc/alternatives/gfortran ]]; then update-alternatives --remove-all gfortran; fi; update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-11 10 # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* \ && rm -rf /var/cache/apt EOF ## Install C buildkit RUN </dev/null | grep libpython${PYTHON_VERSION})" ]]; then PYTHON_LIB_PREFIX=$(python3 -c "import sys; print(sys.base_prefix);") echo "${PYTHON_LIB_PREFIX}/lib" >> /etc/ld.so.conf.d/python3.conf echo "${PYTHON_LIB_PREFIX}/lib64" >> /etc/ld.so.conf.d/python3.conf fi exit 0 fi # Add deadsnakes PPA for Python versions for i in 1 2 3; do add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; } done apt-get update -y # Install apt-get install -y --no-install-recommends \ python${PYTHON_VERSION} \ python${PYTHON_VERSION}-dev \ python${PYTHON_VERSION}-venv \ python${PYTHON_VERSION}-distutils \ python${PYTHON_VERSION}-lib2to3 \ python${PYTHON_VERSION}-gdbm \ python${PYTHON_VERSION}-tk \ libibverbs-dev # Update alternatives if [[ -f /etc/alternatives/python3 ]]; then update-alternatives --remove-all python3; fi; update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 if [[ -f /etc/alternatives/python ]]; then update-alternatives --remove-all python; fi; update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 curl -sS "https://bootstrap.pypa.io/get-pip.py" | python${PYTHON_VERSION} if [[ -f /etc/alternatives/2to3 ]]; then update-alternatives --remove-all 2to3; fi; update-alternatives --install /usr/bin/2to3 2to3 /usr/bin/2to3${PYTHON_VERSION} 1 || true if [[ -f /etc/alternatives/pydoc3 ]]; then update-alternatives --remove-all pydoc3; fi; update-alternatives --install /usr/bin/pydoc3 pydoc3 /usr/bin/pydoc${PYTHON_VERSION} 1 || true if [[ -f /etc/alternatives/idle3 ]]; then update-alternatives --remove-all idle3; fi; update-alternatives --install /usr/bin/idle3 idle3 /usr/bin/idle${PYTHON_VERSION} 1 || true if [[ -f /etc/alternatives/python3-config ]]; then update-alternatives --remove-all python3-config; fi; update-alternatives --install /usr/bin/python3-config python3-config /usr/bin/python${PYTHON_VERSION}-config 1 || true # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* \ && rm -rf /var/cache/apt EOF ## Install Python buildkit ENV PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PIP_ROOT_USER_ACTION=ignore RUN </tmp/requirements.txt build cmake<4 ninja<1.11 setuptools<80 setuptools-scm packaging<25 wheel pybind11 Cython psutil==7.0.0 pipx==1.7.1 EOT pip install -r /tmp/requirements.txt # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* EOF ## Preset this to simplify configuration, ## it is the output of $(pipx environment --value PIPX_LOCAL_VENVS). ENV PIPX_HOME=/root/.local/share/pipx \ PIPX_LOCAL_VENVS=/root/.local/share/pipx/venvs \ USE_EMOJI="false" ## Install Torch ARG CUDA_VERSION ARG CUDA_DEVEL_VERSION ARG TORCH_VERSION ENV CUDA_HOME="/usr/local/cuda" \ CUDA_VERSION=${CUDA_VERSION} \ CUDA_DEVEL_VERSION=${CUDA_DEVEL_VERSION} \ TORCH_VERSION=${TORCH_VERSION} ENV LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/compat:${LD_LIBRARY_PATH}" RUN </tmp/requirements.txt torch==${TORCH_VERSION} torchvision torchaudio EOT IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${CUDA_DEVEL_VERSION}" if (( $(echo "${CUDA_MAJOR}.${CUDA_MINOR} > 12.7" | bc -l) )) || [[ "${TARGETARCH}" == "amd64" ]]; then pip install --index-url https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR} \ -r /tmp/requirements.txt else pip install --extra-index-url https://download.pytorch.org/whl/cpu/ \ -r /tmp/requirements.txt fi pip install \ numpy scipy # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* EOF # # Stage Build Base # # Example build command: # docker build --tag=gpustack/gpustack:cuda-base-build --file=pack/Dockerfile --target=build-base --progress=plain . # FROM nvidia/cuda:${CUDA_DEVEL_VERSION}-cudnn-devel-ubuntu22.04 AS base-build SHELL ["/bin/bash", "-eo", "pipefail", "-c"] ARG TARGETPLATFORM ARG TARGETOS ARG TARGETARCH ENV DEBIAN_FRONTEND=noninteractive \ LANG='en_US.UTF-8' \ LANGUAGE='en_US:en' \ LC_ALL='en_US.UTF-8' RUN < /etc/timezone \ && dpkg-reconfigure --frontend noninteractive tzdata # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* \ && rm -rf /var/cache/apt EOF ## Upgrade GCC if needed RUN <= 21.04" | bc -l) )); then echo "Skipping GCC upgrade for ${VERSION_ID}..." exit 0 fi # Install apt-get install -y --no-install-recommends \ gcc-11 g++-11 gfortran-11 gfortran # Update alternatives if [[ -f /etc/alternatives/gcov-dump ]]; then update-alternatives --remove-all gcov-dump; fi; update-alternatives --install /usr/bin/gcov-dump gcov-dump /usr/bin/gcov-dump-11 10 if [[ -f /etc/alternatives/lto-dump ]]; then update-alternatives --remove-all lto-dump; fi; update-alternatives --install /usr/bin/lto-dump lto-dump /usr/bin/lto-dump-11 10 if [[ -f /etc/alternatives/gcov ]]; then update-alternatives --remove-all gcov; fi; update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-11 10 if [[ -f /etc/alternatives/gcc ]]; then update-alternatives --remove-all gcc; fi; update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10 if [[ -f /etc/alternatives/gcc-nm ]]; then update-alternatives --remove-all gcc-nm; fi; update-alternatives --install /usr/bin/gcc-nm gcc-nm /usr/bin/gcc-nm-11 10 if [[ -f /etc/alternatives/cpp ]]; then update-alternatives --remove-all cpp; fi; update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 10 if [[ -f /etc/alternatives/g++ ]]; then update-alternatives --remove-all g++; fi; update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10 if [[ -f /etc/alternatives/gcc-ar ]]; then update-alternatives --remove-all gcc-ar; fi; update-alternatives --install /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 10 if [[ -f /etc/alternatives/gcov-tool ]]; then update-alternatives --remove-all gcov-tool; fi; update-alternatives --install /usr/bin/gcov-tool gcov-tool /usr/bin/gcov-tool-11 10 if [[ -f /etc/alternatives/gcc-ranlib ]]; then update-alternatives --remove-all gcc-ranlib; fi; update-alternatives --install /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 10 if [[ -f /etc/alternatives/gfortran ]]; then update-alternatives --remove-all gfortran; fi; update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-11 10 # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* \ && rm -rf /var/cache/apt EOF ## Install C buildkit RUN </dev/null | grep libpython${PYTHON_VERSION})" ]]; then PYTHON_LIB_PREFIX=$(python3 -c "import sys; print(sys.base_prefix);") echo "${PYTHON_LIB_PREFIX}/lib" >> /etc/ld.so.conf.d/python3.conf echo "${PYTHON_LIB_PREFIX}/lib64" >> /etc/ld.so.conf.d/python3.conf fi exit 0 fi # Add deadsnakes PPA for Python versions for i in 1 2 3; do add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; } done apt-get update -y # Install apt-get install -y --no-install-recommends \ python${PYTHON_VERSION} \ python${PYTHON_VERSION}-dev \ python${PYTHON_VERSION}-venv \ python${PYTHON_VERSION}-distutils \ python${PYTHON_VERSION}-lib2to3 \ python${PYTHON_VERSION}-gdbm \ python${PYTHON_VERSION}-tk \ libibverbs-dev # Update alternatives if [[ -f /etc/alternatives/python3 ]]; then update-alternatives --remove-all python3; fi; update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 if [[ -f /etc/alternatives/python ]]; then update-alternatives --remove-all python; fi; update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 curl -sS "https://bootstrap.pypa.io/get-pip.py" | python${PYTHON_VERSION} if [[ -f /etc/alternatives/2to3 ]]; then update-alternatives --remove-all 2to3; fi; update-alternatives --install /usr/bin/2to3 2to3 /usr/bin/2to3${PYTHON_VERSION} 1 || true if [[ -f /etc/alternatives/pydoc3 ]]; then update-alternatives --remove-all pydoc3; fi; update-alternatives --install /usr/bin/pydoc3 pydoc3 /usr/bin/pydoc${PYTHON_VERSION} 1 || true if [[ -f /etc/alternatives/idle3 ]]; then update-alternatives --remove-all idle3; fi; update-alternatives --install /usr/bin/idle3 idle3 /usr/bin/idle${PYTHON_VERSION} 1 || true if [[ -f /etc/alternatives/python3-config ]]; then update-alternatives --remove-all python3-config; fi; update-alternatives --install /usr/bin/python3-config python3-config /usr/bin/python${PYTHON_VERSION}-config 1 || true # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* \ && rm -rf /var/cache/apt EOF ## Install Python buildkit ENV PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PIP_ROOT_USER_ACTION=ignore RUN </tmp/requirements.txt build cmake<4 ninja<1.11 setuptools<80 setuptools-scm packaging<25 wheel pybind11 Cython psutil==7.0.0 pipx==1.7.1 EOT pip install -r /tmp/requirements.txt # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* EOF ## Preset this to simplify configuration, ## it is the output of $(pipx environment --value PIPX_LOCAL_VENVS). ENV PIPX_HOME=/root/.local/share/pipx \ PIPX_LOCAL_VENVS=/root/.local/share/pipx/venvs \ USE_EMOJI="false" ## Install Torch ARG CUDA_VERSION ARG CUDA_DEVEL_VERSION ARG TORCH_VERSION ENV CUDA_HOME="/usr/local/cuda" \ CUDA_VERSION=${CUDA_VERSION} \ CUDA_DEVEL_VERSION=${CUDA_DEVEL_VERSION} \ TORCH_VERSION=${TORCH_VERSION} ENV LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/compat:${LD_LIBRARY_PATH}" RUN </tmp/requirements.txt torch==${TORCH_VERSION} torchvision torchaudio EOT IFS="." read -r CUDA_MAJOR CUDA_MINOR CUDA_PATCH <<< "${CUDA_DEVEL_VERSION}" if (( $(echo "${CUDA_MAJOR}.${CUDA_MINOR} > 12.7" | bc -l) )) || [[ "${TARGETARCH}" == "amd64" ]]; then pip install --index-url https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR} \ -r /tmp/requirements.txt else pip install --extra-index-url https://download.pytorch.org/whl/cpu/ \ -r /tmp/requirements.txt fi pip install \ numpy scipy # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* EOF # # Stage FlashInfer Build (linux/amd64 only) # # Example build command: # docker build --platform=linux/amd64 --tag=gpustack/gpustack:cuda-flashinfer-build --file=pack/Dockerfile --target=flashinfer-build --progress=plain . # FROM base-build AS flashinfer-build ARG TARGETPLATFORM ARG TARGETOS ARG TARGETARCH ARG TORCH_CUDA_ARCH_LIST ARG FLASHINFER_VERSION ARG FLASHINFER_BUILD_MAX_JOBS ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} \ FLASHINFER_VERSION=${FLASHINFER_VERSION} \ FLASHINFER_BUILD_MAX_JOBS=${FLASHINFER_BUILD_MAX_JOBS} ## Build FlashInfer RUN < 4" | bc -l) )); then export MAX_JOBS="4" fi export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" if [[ -z "${TORCH_CUDA_ARCH_LIST}" ]]; then if (( $(echo "${CUDA_MAJOR} < 12" | bc -l) )); then export TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9" elif (( $(echo "${CUDA_MAJOR}.${CUDA_MINOR} < 12.8" | bc -l) )); then export TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0+PTX" else export TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0+PTX 10.0+PTX 12.0" fi fi export FLASHINFER_LOCAL_VERSION="cu${CUDA_MAJOR}${CUDA_MINOR}torch${TORCH_MAJOR}.${TORCH_MINOR}" export LD_PRELOAD="${CUDA_HOME}/lib64/libcudart.so:${LD_PRELOAD}" # Ensure CUDA runtime is preloaded echo "Building FlashInfer with the following environment variables:" echo " MAX_JOBS: ${MAX_JOBS}" echo " TORCH_CUDA_ARCH_LIST: ${TORCH_CUDA_ARCH_LIST}" echo " FLASHINFER_LOCAL_VERSION: ${FLASHINFER_LOCAL_VERSION}" echo " LD_PRELOAD=${LD_PRELOAD}" pushd /tmp/flashinfer \ && python -v -m flashinfer.aot \ && python -v -m build --no-isolation --wheel \ && tree -hs /tmp/flashinfer/dist \ && mv /tmp/flashinfer/dist /workspace # Cleanup rm -rf /var/tmp/* \ && rm -rf /tmp/* EOF # # Stage GPUStack # # Example build command: # docker build --tag=gpustack/gpustack:cuda --file=pack/Dockerfile --progress=plain . # FROM base AS gpustack SHELL ["/bin/bash", "-eo", "pipefail", "-c"] ARG TARGETPLATFORM ARG TARGETOS ARG TARGETARCH ARG CUDA_VERSION ARG CUDA_DEVEL_VERSION ARG TORCH_VERSION ENV CUDA_VERSION=${CUDA_VERSION} \ CUDA_DEVEL_VERSION=${CUDA_DEVEL_VERSION} \ TORCH_VERSION=${TORCH_VERSION} ## Install FlashInfer as a Python library for GPUStack if existed RUN --mount=type=bind,from=flashinfer-build,source=/,target=/flashinfer,rw <