You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gpustack/pack/Dockerfile.npu

785 lines
27 KiB

# Packaging logic:
# 1. base target:
# - Install tools.
# - Upgrade GCC if needed.
# - Install C buildkit.
# - Upgrade Python if needed.
# - Install Python buildkit.
# - Install CANN buildkit if needed.
# 2.1. mindie-install target:
# - Add ATB models.
# - Install dependencies for MindIE into system site packages, including Torch, Torch-NPU and TorchVision,
# which is used to support multi-versions of MindIE.
# - Create a virtual environment to place MindIE: /opt/venvs/mindie.
# - Install specific version MindIE.
# 2.2. vllm-install target (parallel against mindie-install):
# - Create a virtual environment to place vLLM: /opt/venvs/vllm.
# - Install specific version Torch, Torch-NPU and TorchVision.
# - Install specific version vLLM and vLLM Ascend.
# 3. gpustack target (final):
# - Install GPUStack, and override the required dependencies after installed.
# - Set up the environment for CANN, NNAL and ATB models.
# - Set up the entrypoint to start GPUStack.
# Arguments description:
# - CANN_VERSION is the version of Ascend CANN,
# which is used to install the Ascend CANN Toolkit, Kernels and NNAL.
# - CANN_CHIP is the chip version of Ascend CANN,
# which is used to install the Ascend CANN Kernels.
# - MINDIE_VERSION is the version of Ascend MindIE,
# which is used to install the Ascend MindIE,
# please check https://www.hiascend.com/developer/download/community/result?module=ie%2Bpt%2Bcann for details.
# - MINDIE_TORCH_VERSION is the version of Torch used by Ascend MindIE.
# - VLLM_VERSION is the version of vLLM,
# which is used to install the vLLM.
# - VLLM_TORCH_VERSION is the version of Torch used by vLLM.
# - VLLM_ASCEND_VERSION is the version of vLLM Ascend,
# which is used to install the vLLM Ascend,
# please check https://vllm-ascend.readthedocs.io/en/stable/installation.html for details.
# - PYTHON_VERSION is the version of Python,
# which should be properly set, it must be 3.x.
ARG CANN_VERSION=8.2.rc1
ARG CANN_CHIP=910b
ARG MINDIE_VERSION=2.1.rc1
ARG MINDIE_TORCH_VERSION=2.1.0
ARG VLLM_VERSION=0.10.0
ARG VLLM_TORCH_VERSION=2.7.1
ARG VLLM_ASCEND_VERSION=0.10.0rc1
ARG PYTHON_VERSION=3.11
#
# Stage Base
#
# Example build command:
# docker build --tag=gpustack/gpustack:npu-base --file=pack/Dockerfile.npu --target base --progress=plain .
#
FROM quay.io/ascend/cann:${CANN_VERSION}-${CANN_CHIP}-ubuntu22.04-py${PYTHON_VERSION} AS base
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
ARG TARGETPLATFORM
ARG TARGETOS
ARG TARGETARCH
ENV DEBIAN_FRONTEND=noninteractive \
LANG='en_US.UTF-8' \
LANGUAGE='en_US:en' \
LC_ALL='en_US.UTF-8'
RUN <<EOF
# Tools
# Refresh
apt-get update -y && apt-get install -y --no-install-recommends \
software-properties-common apt-transport-https \
ca-certificates gnupg2 lsb-release gnupg-agent \
&& apt-get update -y \
&& add-apt-repository -y ppa:ubuntu-toolchain-r/test \
&& apt-get update -y
# Install
apt-get install -y --no-install-recommends \
ca-certificates build-essential binutils bash openssl \
curl wget aria2 \
git git-lfs \
unzip xz-utils \
tzdata locales \
iproute2 iputils-ping ifstat net-tools dnsutils pciutils ipmitool \
procps sysstat htop \
tini vim jq bc tree
# Update locale
localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
# Update timezone
rm -f /etc/localtime \
&& ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& echo "Asia/Shanghai" > /etc/timezone \
&& dpkg-reconfigure --frontend noninteractive tzdata
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt
EOF
## Upgrade GCC if needed
RUN <<EOF
# GCC
# Upgrade GCC if the Ubuntu version is lower than 21.04.
source /etc/os-release
if (( $(echo "${VERSION_ID} > 21.04" | bc -l) )); then
echo "Skipping GCC upgrade for ${VERSION_ID}..."
exit 0
fi
# Install
apt-get install -y --no-install-recommends \
gcc-11 g++-11 gfortran-11 gfortran
# Update alternatives
if [[ -f /etc/alternatives/gcov-dump ]]; then update-alternatives --remove-all gcov-dump; fi; update-alternatives --install /usr/bin/gcov-dump gcov-dump /usr/bin/gcov-dump-11 10
if [[ -f /etc/alternatives/lto-dump ]]; then update-alternatives --remove-all lto-dump; fi; update-alternatives --install /usr/bin/lto-dump lto-dump /usr/bin/lto-dump-11 10
if [[ -f /etc/alternatives/gcov ]]; then update-alternatives --remove-all gcov; fi; update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-11 10
if [[ -f /etc/alternatives/gcc ]]; then update-alternatives --remove-all gcc; fi; update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-11 10
if [[ -f /etc/alternatives/gcc-nm ]]; then update-alternatives --remove-all gcc-nm; fi; update-alternatives --install /usr/bin/gcc-nm gcc-nm /usr/bin/gcc-nm-11 10
if [[ -f /etc/alternatives/cpp ]]; then update-alternatives --remove-all cpp; fi; update-alternatives --install /usr/bin/cpp cpp /usr/bin/cpp-11 10
if [[ -f /etc/alternatives/g++ ]]; then update-alternatives --remove-all g++; fi; update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-11 10
if [[ -f /etc/alternatives/gcc-ar ]]; then update-alternatives --remove-all gcc-ar; fi; update-alternatives --install /usr/bin/gcc-ar gcc-ar /usr/bin/gcc-ar-11 10
if [[ -f /etc/alternatives/gcov-tool ]]; then update-alternatives --remove-all gcov-tool; fi; update-alternatives --install /usr/bin/gcov-tool gcov-tool /usr/bin/gcov-tool-11 10
if [[ -f /etc/alternatives/gcc-ranlib ]]; then update-alternatives --remove-all gcc-ranlib; fi; update-alternatives --install /usr/bin/gcc-ranlib gcc-ranlib /usr/bin/gcc-ranlib-11 10
if [[ -f /etc/alternatives/gfortran ]]; then update-alternatives --remove-all gfortran; fi; update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-11 10
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt
EOF
## Install C buildkit
RUN <<EOF
# C buildkit
# Install
apt-get install -y --no-install-recommends \
make ninja-build pkg-config ccache
curl --retry 3 --retry-connrefused -fL "https://github.com/Kitware/CMake/releases/download/v3.31.7/cmake-3.31.7-linux-$(uname -m).tar.gz" | tar -zx -C /usr --strip-components 1
# Install dependencies
apt-get install -y --no-install-recommends \
perl-openssl-defaults perl yasm \
zlib1g zlib1g-dev libbz2-dev libffi-dev libgdbm-dev libgdbm-compat-dev \
openssl libssl-dev libsqlite3-dev lcov libomp-dev \
libblas-dev liblapack-dev libopenblas-dev libblas3 liblapack3 libhdf5-dev \
libxml2 libxslt1-dev libgl1-mesa-glx libgmpxx4ldbl \
libncurses5-dev libreadline6-dev libsqlite3-dev \
liblzma-dev lzma lzma-dev tk-dev uuid-dev libmpdec-dev \
ffmpeg libjpeg-dev libpng-dev libtiff-dev libwebp-dev \
libnuma-dev libjemalloc-dev
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt
EOF
## Upgrade Python if needed
ARG PYTHON_VERSION
ENV PYTHON_VERSION=${PYTHON_VERSION}
RUN <<EOF
# Python
if (( $(echo "$(python3 --version | cut -d' ' -f2 | cut -d'.' -f1,2) == ${PYTHON_VERSION}" | bc -l) )); then
echo "Skipping Python upgrade for ${PYTHON_VERSION}..."
if [[ -z "$(ldconfig -v 2>/dev/null | grep libpython${PYTHON_VERSION})" ]]; then
PYTHON_LIB_PREFIX=$(python3 -c "import sys; print(sys.base_prefix);")
echo "${PYTHON_LIB_PREFIX}/lib" >> /etc/ld.so.conf.d/python3.conf
echo "${PYTHON_LIB_PREFIX}/lib64" >> /etc/ld.so.conf.d/python3.conf
fi
exit 0
fi
# Add deadsnakes PPA for Python versions
for i in 1 2 3; do
add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }
done
apt-get update -y
# Install
apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
python${PYTHON_VERSION}-venv \
python${PYTHON_VERSION}-distutils \
python${PYTHON_VERSION}-lib2to3 \
python${PYTHON_VERSION}-gdbm \
python${PYTHON_VERSION}-tk \
libibverbs-dev
# Update alternatives
if [[ -f /etc/alternatives/python3 ]]; then update-alternatives --remove-all python3; fi; update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1
if [[ -f /etc/alternatives/python ]]; then update-alternatives --remove-all python; fi; update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1
curl -sS "https://bootstrap.pypa.io/get-pip.py" | python${PYTHON_VERSION}
if [[ -f /etc/alternatives/2to3 ]]; then update-alternatives --remove-all 2to3; fi; update-alternatives --install /usr/bin/2to3 2to3 /usr/bin/2to3${PYTHON_VERSION} 1 || true
if [[ -f /etc/alternatives/pydoc3 ]]; then update-alternatives --remove-all pydoc3; fi; update-alternatives --install /usr/bin/pydoc3 pydoc3 /usr/bin/pydoc${PYTHON_VERSION} 1 || true
if [[ -f /etc/alternatives/idle3 ]]; then update-alternatives --remove-all idle3; fi; update-alternatives --install /usr/bin/idle3 idle3 /usr/bin/idle${PYTHON_VERSION} 1 || true
if [[ -f /etc/alternatives/python3-config ]]; then update-alternatives --remove-all python3-config; fi; update-alternatives --install /usr/bin/python3-config python3-config /usr/bin/python${PYTHON_VERSION}-config 1 || true
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt \
&& pip cache purge
EOF
## Install Python buildkit
ENV PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_ROOT_USER_ACTION=ignore
RUN <<EOF
# Buildkit
cat <<EOT >/tmp/requirements.txt
build
cmake<4
ninja<1.11
setuptools<80
setuptools-scm
packaging<25
wheel
pybind11
Cython
psutil==7.0.0
pipx==1.7.1
EOT
pip install -r /tmp/requirements.txt
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/*
EOF
## Preset this to simplify configuration.
ENV LOCAL_VENVS=/opt/venvs \
USE_EMOJI="false"
## Install CANN buildkit if needed
ARG CANN_VERSION
ARG CANN_CHIP
ENV CANN_VERSION=${CANN_VERSION} \
CANN_CHIP=${CANN_CHIP} \
CANN_HOME="/usr/local/Ascend"
RUN <<EOF
# CANN Toolkit
OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
ARCH="$(uname -m)"
DOWNLOAD_VERSION="$(echo ${CANN_VERSION%\.beta1} | tr '[:lower:]' '[:upper:]')"
URL_PREFIX="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%20${DOWNLOAD_VERSION}"
URL_SUFFIX="response-content-type=application/octet-stream"
if [[ -d "${CANN_HOME}/ascend-toolkit/${DOWNLOAD_VERSION}" ]]; then
echo "Skipping CANN Toolkit upgrade for ${DOWNLOAD_VERSION}..."
exit 0
fi
# Install dependencies
cat <<EOT >/tmp/requirements.txt
attrs==24.3.0
numpy==1.26.4
decorator==5.2.1
sympy==1.14.0
cffi==1.17.1
PyYAML==6.0.2
pathlib2==2.3.7.post1
psutil==7.0.0
protobuf==6.31.0
scipy==1.15.3
requests==2.32.3
absl-py==2.2.2
EOT
pip install -r /tmp/requirements.txt
# Install toolkit
TOOLKIT_FILE="Ascend-cann-toolkit_${DOWNLOAD_VERSION}_${OS}-${ARCH}.run"
TOOLKIT_PATH="/tmp/${TOOLKIT_FILE}"
TOOLKIT_URL="${URL_PREFIX}/${TOOLKIT_FILE}?${URL_SUFFIX}"
curl -H 'Referer: https://www.hiascend.com/' --retry 3 --retry-connrefused -fL -o "${TOOLKIT_PATH}" "${TOOLKIT_URL}"
chmod a+x "${TOOLKIT_PATH}"
printf "Y\n" | "${TOOLKIT_PATH}" --install --install-for-all --install-path="${CANN_HOME}"
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt \
&& rm -rf /var/log/ascend \
&& rm -rf /var/log/ascend_seclog
EOF
RUN <<EOF
# CANN Kernels
OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
ARCH="$(uname -m)"
DOWNLOAD_VERSION="$(echo ${CANN_VERSION%\.beta1} | tr '[:lower:]' '[:upper:]')"
URL_PREFIX="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%20${DOWNLOAD_VERSION}"
URL_SUFFIX="response-content-type=application/octet-stream"
if [[ -d "${CANN_HOME}/ascend-toolkit/${DOWNLOAD_VERSION}" ]]; then
echo "Skipping CANN Kernels upgrade for ${DOWNLOAD_VERSION}..."
exit 0
fi
# Prepare environment
source ${CANN_HOME}/ascend-toolkit/set_env.sh
# Install kernels
KERNELS_FILE="Ascend-cann-kernels-${CANN_CHIP}_${DOWNLOAD_VERSION}_${OS}-${ARCH}.run"
if ! curl -H 'Referer: https://www.hiascend.com/' --retry 3 --retry-connrefused -fsSIL "${URL_PREFIX}/${KERNELS_FILE}?${URL_SUFFIX}" >/dev/null 2>&1; then
# Fallback to generic kernels
KERNELS_FILE="Ascend-cann-kernels-${CANN_CHIP}_${DOWNLOAD_VERSION}_${OS}.run"
fi
KERNELS_PATH="/tmp/${KERNELS_FILE}"
KERNELS_URL="${URL_PREFIX}/${KERNELS_FILE}?${URL_SUFFIX}"
curl -H 'Referer: https://www.hiascend.com/' --retry 3 --retry-connrefused -fL -o "${KERNELS_PATH}" "${KERNELS_URL}"
chmod a+x "${KERNELS_PATH}"
printf "Y\n" |"${KERNELS_PATH}" --install --install-for-all --install-path="${CANN_HOME}"
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt \
&& rm -rf /var/log/ascend \
&& rm -rf /var/log/ascend_seclog
EOF
RUN <<EOF
# CANN NNAL
OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
ARCH="$(uname -m)"
DOWNLOAD_VERSION="$(echo ${CANN_VERSION%\.beta1} | tr '[:lower:]' '[:upper:]')"
URL_PREFIX="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%20${DOWNLOAD_VERSION}"
URL_SUFFIX="response-content-type=application/octet-stream"
if [[ -d "${CANN_HOME}/ascend-toolkit/${DOWNLOAD_VERSION}" ]] && [[ -d "${CANN_HOME}/nnal" ]]; then
echo "Skipping CANN NNAL upgrade for ${DOWNLOAD_VERSION}..."
exit 0
fi
# Prepare environment
source ${CANN_HOME}/ascend-toolkit/set_env.sh
# Install NNAL
NNAL_FILE="Ascend-cann-nnal_${DOWNLOAD_VERSION}_${OS}-${ARCH}.run"
NNAL_PATH="/tmp/${NNAL_FILE}"
NNAL_URL="${URL_PREFIX}/${NNAL_FILE}?${URL_SUFFIX}"
curl -H 'Referer: https://www.hiascend.com/' --retry 3 --retry-connrefused -fL -o "${NNAL_PATH}" "${NNAL_URL}"
chmod a+x "${NNAL_PATH}"
printf "Y\n" | "${NNAL_PATH}" --install --install-path="${CANN_HOME}"
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt \
&& rm -rf /var/log/ascend_seclog \
&& rm -rf /var/log/cann_atb_log
EOF
#
# Stage MindIE Install
#
# Example build command:
# docker build --tag=gpustack/gpustack:npu-mindie-install --file=pack/Dockerfile.npu --target mindie-install --progress=plain .
#
FROM base AS mindie-install
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
ARG TARGETPLATFORM
ARG TARGETOS
ARG TARGETARCH
## Install MindIE
ARG MINDIE_VERSION
ARG MINDIE_TORCH_VERSION
ENV MINDIE_VERSION=${MINDIE_VERSION} \
MINDIE_TORCH_VERSION=${MINDIE_TORCH_VERSION}
ADD --chown=root:root pack/mindie-atb-models_${MINDIE_VERSION}_${TARGETOS}-${TARGETARCH}_py${PYTHON_VERSION}_torch${MINDIE_TORCH_VERSION}-abi0.tar.gz ${CANN_HOME}/atb-models/
RUN <<EOF
# MindIE
OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
ARCH="$(uname -m)"
DOWNLOAD_VERSION="$(echo ${MINDIE_VERSION%\.beta1} | tr '[:lower:]' '[:upper:]')"
URL_PREFIX="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindIE/MindIE%20${DOWNLOAD_VERSION}"
URL_SUFFIX="response-content-type=application/octet-stream"
# Install Torch, Torch-npu, TorchVision,
# according to Ascend Extension Installation, have the mapping requirements for the CANN_VERSION,
# please check https://www.hiascend.com/developer/download/community/result?module=ie%2Bpt%2Bcann for details.
cat <<EOT >/tmp/requirements.txt
torch==${MINDIE_TORCH_VERSION}
torchvision
torchaudio
EOT
if [[ "${TARGETARCH}" == "amd64" ]]; then
pip install --extra-index-url https://download.pytorch.org/whl/cpu/ \
-r /tmp/requirements.txt
else
pip install \
-r /tmp/requirements.txt
fi
pip install torch-npu==${MINDIE_TORCH_VERSION}.*
# Install dependencies.
cat <<EOT >/tmp/requirements.txt
absl-py==2.2.2
accelerate==0.34.2
aiohappyeyeballs==2.6.1
aiohttp==3.11.18
aiosignal==1.3.2
attrs==24.3.0
av==14.3.0
certifi==2024.8.30
cloudpickle==3.0.0
cpm-kernels==1.0.11
decorator==5.2.1
easydict==1.13
einops==0.8.1
et-xmlfile==1.1.0
frozenlist==1.6.0
fuzzywuzzy==0.18.0
gevent==24.2.1
geventhttpclient==2.3.1
greenlet==3.2.1
grpcio==1.71.0
icetk==0.0.4
idna==2.8
jieba==0.42.1
Jinja2==3.1.6
jsonlines==4.0.0
jsonschema-specifications==2025.4.1
jsonschema==4.23.0
latex2mathml==3.77.0
loguru==0.7.2
Markdown==3.7
matplotlib==3.9.2
mdtex2html==1.3.0
ml_dtypes==0.5.0
multidict==6.4.3
nltk==3.9.1
numba==0.61.2
numpy==1.26.4
onnx==1.17.0
openpyxl==3.1.5
pandas==2.2.3
pillow==11.2.1
prettytable==3.11.0
propcache==0.3.1
psutil==7.0.0
pyarrow==19.0.1
pydantic_core==2.23.4
pydantic==2.9.2
python-rapidjson==1.20
requests==2.32.3
rouge-score==0.1.2
rouge==1.0.1
sacrebleu==2.4.3
scipy==1.15.3
text-generation==0.7.0
thefuzz==0.22.1
tiktoken==0.7.0
tornado==6.4.2
tqdm==4.67.1
transformers==4.52.3
safetensors==0.5.3
tritonclient==2.49.0
typing_extensions==4.13.2
tzdata==2024.2
urllib3==2.4.0
yarl==1.20.0
zope.event==5.0
zope.interface==7.0.3
EOT
pip install -r /tmp/requirements.txt
# Install MindIE ATB models
pip install ${CANN_HOME}/atb-models/*.whl
# Pre process
# - Create virtual environment to place MindIE
python -m venv --system-site-packages ${LOCAL_VENVS}/mindie
# - Prepare environment
source ${CANN_HOME}/ascend-toolkit/set_env.sh
source ${CANN_HOME}/nnal/atb/set_env.sh
source ${LOCAL_VENVS}/mindie/bin/activate
# Install MindIE
MINDIE_FILE="Ascend-mindie_${DOWNLOAD_VERSION}_${OS}-${ARCH}.run"
IFS="." read -r MINDIE_MAJOR MINDIE_MINOR MINDIE_PATCH <<< "${MINDIE_VERSION}"
if (( $(echo "${MINDIE_MAJOR}.${MINDIE_MINOR} >= 2.1" | bc -l) )); then
MINDIE_FILE="Ascend-mindie_${DOWNLOAD_VERSION}_${OS}-${ARCH}_abi0.run"
fi
MINDIE_PATH="/tmp/${MINDIE_FILE}"
MINDIE_URL="${URL_PREFIX}/${MINDIE_FILE}?${URL_SUFFIX}"
curl -H 'Referer: https://www.hiascend.com/' --retry 3 --retry-connrefused -fL -o "${MINDIE_PATH}" "${MINDIE_URL}"
chmod a+x "${MINDIE_PATH}"
printf "Y\n" | "${MINDIE_PATH}" --install --install-path="${CANN_HOME}"
# Post process
# - Make MindIE service configuration writable
chmod +w "${CANN_HOME}/mindie/${DOWNLOAD_VERSION}/mindie-service/conf"
# - Tell GPUStack how to launch MindIE
cat <<EOT >>"${CANN_HOME}/mindie/${DOWNLOAD_VERSION}/mindie-service/set_env.sh"
# NB(thxCode): This is a workaround for GPUStack to activate MindIE.
source ${LOCAL_VENVS}/mindie/bin/activate || true
EOT
chmod -w "${CANN_HOME}/mindie/${DOWNLOAD_VERSION}/mindie-service/set_env.sh"
# Review
pip freeze
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt \
&& rm -rf /var/log/ascend_seclog \
&& rm -rf /var/log/cann_atb_log \
&& rm -rf /var/log/mindie_log \
&& rm -rf ~/log
EOF
#
# Stage vLLM Install
#
# Example build command:
# docker build --tag=gpustack/gpustack:npu-vllm-install --file=pack/Dockerfile.npu --target vllm-install --progress=plain .
#
FROM base AS vllm-install
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
ARG TARGETPLATFORM
ARG TARGETOS
ARG TARGETARCH
## Install vLLM (Ascend)
ARG VLLM_VERSION
ARG VLLM_TORCH_VERSION
ARG VLLM_ASCEND_VERSION
ENV VLLM_VERSION=${VLLM_VERSION} \
VLLM_TORCH_VERSION=${VLLM_TORCH_VERSION} \
VLLM_ASCEND_VERSION=${VLLM_ASCEND_VERSION}
RUN <<EOF
# vLLM
OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
ARCH="$(uname -m)"
DOWNLOAD_VERSION="$(echo ${MINDIE_VERSION%\.beta1} | tr '[:lower:]' '[:upper:]')"
URL_PREFIX="https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/MindIE/MindIE%20${DOWNLOAD_VERSION}"
URL_SUFFIX="response-content-type=application/octet-stream"
# Pre process
# - Create virtual environment to place vLLM
python -m venv --system-site-packages ${LOCAL_VENVS}/vllm
# - Prepare environment
source ${CANN_HOME}/ascend-toolkit/set_env.sh
source ${CANN_HOME}/nnal/atb/set_env.sh
source ${LOCAL_VENVS}/vllm/bin/activate
# Install dependencies.
cat <<EOT >/tmp/requirements.txt
ml-dtypes==0.5.0
tornado==6.4.2
gevent==24.2.1
geventhttpclient==2.3.1
sacrebleu==2.4.3
pandas==2.2.3
rouge_score==0.1.2
pybind11==2.13.6
pytest==8.4.0
cloudpickle==3.0.0
ray[default]==2.47.1
protobuf>3.20.0
grpcio==1.71.0
EOT
pip install -r /tmp/requirements.txt
# Install vLLM
if [[ "${TARGETARCH}" == "amd64" ]]; then
pip install --extra-index-url https://download.pytorch.org/whl/cpu/ \
vllm==${VLLM_VERSION}
else
pip install \
vllm==${VLLM_VERSION}
fi
# Fix conflicting packages.
# - In x86, triton will be installed, triton doesn't work correctly in Ascend, we need to uninstall it.
pip uninstall -y triton || true
# Fix conflicting packages.
# - In Ascend, opencv-python-headless requires numpy>2.0.0, which is conflicting with Ascend, we need to reinstall it.
pip uninstall -y numpy || true
# Install Torch, Torch-npu, TorchVision,
# according to Ascend Extension Installation, have the mapping requirements for the CANN_VERSION,
# please check https://www.hiascend.com/developer/download/community/result?module=ie%2Bpt%2Bcann for details.
cat <<EOT >/tmp/requirements.txt
torch==${VLLM_TORCH_VERSION}
torchvision
torchaudio
EOT
if [[ "${TARGETARCH}" == "amd64" ]]; then
pip install --extra-index-url https://download.pytorch.org/whl/cpu/ \
-r /tmp/requirements.txt
else
pip install \
-r /tmp/requirements.txt
fi
pip install torch-npu==${VLLM_TORCH_VERSION}.* --pre --extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
# Install vLLM Ascend
pip install vllm-ascend==${VLLM_ASCEND_VERSION} --pre --extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
# Fix SoC version.
if [[ "${CANN_CHIP}" == "310p" ]]; then
sed -i "s/^__soc_version__.*/__soc_version__ = 'ASCEND310P3'/" ${LOCAL_VENVS}/vllm/lib/python3.11/site-packages/vllm_ascend/_build_info.py
fi
# Review
pip freeze
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt \
&& rm -rf ~/log
EOF
#
# Stage GPUStack
#
# Example build command:
# docker build --tag=gpustack/gpustack:npu --file=pack/Dockerfile.npu --progress=plain .
#
FROM mindie-install AS gpustack
SHELL ["/bin/bash", "-eo", "pipefail", "-c"]
ARG TARGETPLATFORM
ARG TARGETOS
ARG TARGETARCH
## Copy vLLM from vllm-install stage
COPY --from=vllm-install ${LOCAL_VENVS}/vllm ${LOCAL_VENVS}/vllm
RUN --mount=type=bind,target=/workspace/gpustack,rw <<EOF
# Patch vLLM
for dir in lib lib64; do
if [ -d "${LOCAL_VENVS}/vllm/${dir}/python${PYTHON_VERSION}/site-packages/vllm" ]; then
cp /workspace/gpustack/gpustack/_sitecustomize.py ${LOCAL_VENVS}/vllm/${dir}/python${PYTHON_VERSION}/site-packages/sitecustomize.py
fi
done
EOF
## Install GPUStack
RUN --mount=type=bind,target=/workspace/gpustack,rw <<EOF
# GPUStack
# Build GPUStack
export PATH="$(pipx environment --value PIPX_BIN_DIR):${PATH}"
cd /workspace/gpustack \
&& git config --global --add safe.directory /workspace/gpustack \
&& make build
# Pre process
# - Create virtual environment to place gpustack
python -m venv --system-site-packages ${LOCAL_VENVS}/gpustack
# - Prepare environment
source ${LOCAL_VENVS}/gpustack/bin/activate
# Install GPUStack,
# vox-box relies on PyTorch 2.7, which is not compatible with MindIE.
WHEEL_PACKAGE="$(ls /workspace/gpustack/dist/*.whl)"
pip install ${WHEEL_PACKAGE} \
&& ln -vsf ${LOCAL_VENVS}/gpustack/bin/gpustack /usr/local/bin/gpustack
# Download tools
gpustack download-tools --device npu
tree -hs "$(pip show gpustack | grep Location: | head -n 1 | cut -d" " -f 2)/gpustack/third_party"
# Active MindIE
# MindIE is combined with a lot of components, and it is conflict with vLLM,
# so we need to active MindIE manually at GPUStack.
# Active vLLM
ln -vsf ${LOCAL_VENVS}/vllm/bin/vllm ${LOCAL_VENVS}/gpustack/bin/vllm
# - Redirect RAY.
rm -rf ${LOCAL_VENVS}/gpustack/bin/ray \
&& ln -vsf ${LOCAL_VENVS}/vllm/bin/ray ${LOCAL_VENVS}/gpustack/bin/ray
# Set up environment
mkdir -p /var/lib/gpustack \
&& chmod -R 0755 /var/lib/gpustack
# Review
pip freeze
# Cleanup
rm -rf /var/tmp/* \
&& rm -rf /tmp/* \
&& rm -rf /var/cache/apt \
&& rm -rf /workspace/gpustack/dist
EOF
## Setup environment
RUN <<EOF
# Export Python lib
PYTHON_LIB_PREFIX=$(python3 -c "import sys; print(sys.base_prefix);")
EXPORT_PYTHON_LIB="export LD_LIBRARY_PATH=${PYTHON_LIB_PREFIX}/lib:${PYTHON_LIB_PREFIX}/lib64:\${LD_LIBRARY_PATH}"
echo "${EXPORT_PYTHON_LIB}" >> /etc/profile
echo "${EXPORT_PYTHON_LIB}" >> ~/.bashrc
# Export CANN lib
CANN_LIB_PREFIX="${CANN_HOME}/ascend-toolkit/$(echo ${CANN_VERSION%\.beta1} | tr '[:lower:]' '[:upper:]')/$(uname -m)-linux"
EXPORT_CANN_LIB="export LD_LIBRARY_PATH=${CANN_LIB_PREFIX}/lib:${CANN_LIB_PREFIX}/lib64:\${LD_LIBRARY_PATH}"
echo "${EXPORT_CANN_LIB}" >> /etc/profile
echo "${EXPORT_CANN_LIB}" >> ~/.bashrc
# Export CANN driver lib
EXPORT_DRIVER_LIB="export LD_LIBRARY_PATH=${CANN_HOME}/driver/lib64/common:${CANN_HOME}/driver/lib64/driver:\${LD_LIBRARY_PATH}"
echo "${EXPORT_DRIVER_LIB}" >> /etc/profile
echo "${EXPORT_DRIVER_LIB}" >> ~/.bashrc
# Source CANN Toolkit environment
SOURCE_TOOLKIT_ENV="source ${CANN_HOME}/ascend-toolkit/set_env.sh"
echo "${SOURCE_TOOLKIT_ENV}" >> /etc/profile
echo "${SOURCE_TOOLKIT_ENV}" >> ~/.bashrc
# Source CANN NNAL environment
SOURCE_NNAL_ENV="source ${CANN_HOME}/nnal/atb/set_env.sh"
echo "${SOURCE_NNAL_ENV}" >> /etc/profile
echo "${SOURCE_NNAL_ENV}" >> ~/.bashrc
# Source ATB model environment
SOURCE_ATB_MODEL_ENV="source ${CANN_HOME}/atb-models/set_env.sh"
echo "${SOURCE_ATB_MODEL_ENV}" >> /etc/profile
echo "${SOURCE_ATB_MODEL_ENV}" >> ~/.bashrc
# Export Driver tools
EXPORT_DRIVER_TOOLS="export PATH=${CANN_HOME}/driver/tools:\${PATH}"
echo "${EXPORT_DRIVER_TOOLS}" >> /etc/profile
echo "${EXPORT_DRIVER_TOOLS}" >> ~/.bashrc
# NB(thxCode): For specific MindIE version supporting,
# we need to process environment setting up during GPUStack deployment.
# NB(thxCode): Any tuning environment variables should NOT be set here.
EOF
# Persist pipx venvs in the data directory
ENV PIPX_HOME=/var/lib/gpustack/pipx \
PIPX_BIN_DIR=/var/lib/gpustack/bin
ENTRYPOINT [ "tini", "--", "/usr/bin/bash", "-c", "source /etc/profile && exec gpustack start \"$@\"", "--" ]