feat: support deploy model in windows arm

pull/414/head
michelia 1 year ago committed by Lawrence Li
parent b2e10a65b7
commit 5c6ef4e8a0

@ -16,7 +16,7 @@ When users deploy a model, the backend is selected automatically based on the fo
### Supported Platforms
The llama-box backend works on a wide range of platforms, including Windows, MacOS, and Linux.
The llama-box backend works on a wide range of platforms, including MacOS, Linux and Windows(with CPU offloading only on Windows ARM architecture).
### Supported Models

@ -1,11 +1,57 @@
import os
import platform
import logging
import threading
logger = logging.getLogger(__name__)
def system() -> str:
return platform.uname().system.lower()
def get_native_arch() -> str:
system = platform.system()
if system == "Windows":
import pythoncom
if threading.current_thread() is not threading.main_thread():
pythoncom.CoInitialize()
# Windows emulation will mask the native architecture
# https://learn.microsoft.com/en-us/windows/arm/apps-on-arm-x86-emulation
try:
import wmi
c = wmi.WMI()
processor_info = c.Win32_Processor()
arch_num = processor_info[0].Architecture
# https://learn.microsoft.com/en-us/windows/win32/cimwin32prov/win32-processor
arch_map = {
0: 'x86',
1: 'MIPS',
2: 'Alpha',
3: 'PowerPC',
5: 'ARM',
6: 'ia64',
9: 'AMD64',
12: 'ARM64',
}
arch = arch_map.get(arch_num, 'unknown')
if arch != 'unknown':
return arch.lower()
except Exception as e:
logger.warning(f"Failed to get native architecture from WMI, {e}")
finally:
if threading.current_thread() is not threading.main_thread():
pythoncom.CoUninitialize()
return platform.machine().lower()
def arch() -> str:
arch_map = {
"x86_64": "amd64",
@ -18,8 +64,13 @@ def arch() -> str:
"arm": "arm",
"ppc64le": "ppc64le",
"s390x": "s390x",
"x86": "x86",
"mips": "mips",
"alpha": "alpha",
"powerpc": "powerpc",
"ia64": "ia64",
}
return arch_map.get(platform.machine().lower(), "unknown")
return arch_map.get(get_native_arch(), "unknown")
def device() -> str:

91
poetry.lock generated

@ -2247,7 +2247,7 @@ files = [
name = "mpmath"
version = "1.3.0"
description = "Python library for arbitrary-precision floating-point arithmetic"
optional = false
optional = true
python-versions = "*"
files = [
{file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
@ -2543,7 +2543,7 @@ files = [
name = "networkx"
version = "3.3"
description = "Python package for creating and manipulating graphs and networks"
optional = false
optional = true
python-versions = ">=3.10"
files = [
{file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"},
@ -2676,7 +2676,7 @@ files = [
name = "nvidia-cublas-cu12"
version = "12.1.3.1"
description = "CUBLAS native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
@ -2687,7 +2687,7 @@ files = [
name = "nvidia-cuda-cupti-cu12"
version = "12.1.105"
description = "CUDA profiling tools runtime libs."
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
@ -2698,7 +2698,7 @@ files = [
name = "nvidia-cuda-nvrtc-cu12"
version = "12.1.105"
description = "NVRTC native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
@ -2709,7 +2709,7 @@ files = [
name = "nvidia-cuda-runtime-cu12"
version = "12.1.105"
description = "CUDA Runtime native Libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
@ -2720,7 +2720,7 @@ files = [
name = "nvidia-cudnn-cu12"
version = "9.1.0.70"
description = "cuDNN runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
@ -2734,7 +2734,7 @@ nvidia-cublas-cu12 = "*"
name = "nvidia-cufft-cu12"
version = "11.0.2.54"
description = "CUFFT native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
@ -2745,7 +2745,7 @@ files = [
name = "nvidia-curand-cu12"
version = "10.3.2.106"
description = "CURAND native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
@ -2756,7 +2756,7 @@ files = [
name = "nvidia-cusolver-cu12"
version = "11.4.5.107"
description = "CUDA solver native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
@ -2772,7 +2772,7 @@ nvidia-nvjitlink-cu12 = "*"
name = "nvidia-cusparse-cu12"
version = "12.1.0.106"
description = "CUSPARSE native runtime libraries"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
@ -2797,7 +2797,7 @@ files = [
name = "nvidia-nccl-cu12"
version = "2.20.5"
description = "NVIDIA Collective Communication Library (NCCL) Runtime"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
@ -2808,7 +2808,7 @@ files = [
name = "nvidia-nvjitlink-cu12"
version = "12.6.77"
description = "Nvidia JIT LTO Library"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"},
@ -2820,7 +2820,7 @@ files = [
name = "nvidia-nvtx-cu12"
version = "12.1.105"
description = "NVIDIA Tools Extension"
optional = false
optional = true
python-versions = ">=3"
files = [
{file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
@ -3831,6 +3831,33 @@ files = [
{file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
]
[[package]]
name = "pywin32"
version = "308"
description = "Python for Window Extensions"
optional = false
python-versions = "*"
files = [
{file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
{file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
{file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"},
{file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"},
{file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"},
{file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"},
{file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"},
{file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"},
{file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"},
{file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"},
{file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"},
{file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"},
{file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"},
{file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"},
{file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"},
{file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"},
{file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"},
{file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
]
[[package]]
name = "pywin32-ctypes"
version = "0.2.3"
@ -5026,7 +5053,7 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7
name = "sympy"
version = "1.13.3"
description = "Computer algebra system (CAS) in Python"
optional = false
optional = true
python-versions = ">=3.8"
files = [
{file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"},
@ -5223,7 +5250,7 @@ files = [
name = "torch"
version = "2.4.0"
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
optional = false
optional = true
python-versions = ">=3.8.0"
files = [
{file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"},
@ -5408,7 +5435,7 @@ vision = ["Pillow (>=10.0.1,<=15.0)"]
name = "triton"
version = "3.0.0"
description = "A language and compiler for custom Deep Learning operations"
optional = false
optional = true
python-versions = "*"
files = [
{file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"},
@ -5416,11 +5443,6 @@ files = [
{file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
{file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
{file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
{file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
{file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
{file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
{file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
{file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
]
[package.dependencies]
@ -5943,6 +5965,27 @@ files = [
{file = "websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878"},
]
[[package]]
name = "wmi"
version = "1.5.1"
description = "Windows Management Instrumentation"
optional = false
python-versions = "*"
files = [
{file = "WMI-1.5.1-py2.py3-none-any.whl", hash = "sha256:1d6b085e5c445141c475476000b661f60fff1aaa19f76bf82b7abb92e0ff4942"},
{file = "WMI-1.5.1.tar.gz", hash = "sha256:b6a6be5711b1b6c8d55bda7a8befd75c48c12b770b9d227d31c1737dbf0d40a6"},
]
[package.dependencies]
pywin32 = "*"
[package.extras]
all = ["pytest", "sphinx", "twine", "wheel"]
dev = ["pytest", "sphinx", "twine", "wheel"]
docs = ["sphinx"]
package = ["twine", "wheel"]
tests = ["pytest"]
[[package]]
name = "xformers"
version = "0.0.27.post2"
@ -6225,9 +6268,9 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
type = ["pytest-mypy"]
[extras]
vllm = ["mistral_common", "vllm"]
vllm = ["mistral_common", "torch", "vllm"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "bb90951ef5762b8acefa6b7a0bf063a16fe32ae696d6c187df34330026636aac"
content-hash = "71ded6f2361e0499d441d7d5e5f172365989993ca309764f7e04fadd08b1beec"

@ -45,6 +45,8 @@ mistral_common = {version = "^1.4.3", optional = true, extras = ["opencv"]}
uvicorn = "^0.30.6"
transformers = "^4.44.2"
fastapi-cdn-host = "^0.8.0"
wmi = { version="^1.5.1", markers = "platform_system == 'Windows'" }
pywin32 = { version="^308", markers = "platform_system == 'Windows'" }
[tool.poetry.group.dev.dependencies]
coverage = {extras = ["toml"], version = "^7.5.1"}
flake8 = "^7.0.0"

Loading…
Cancel
Save