From 5c6ef4e8a0c8e89370a4fd9c851777345b844a67 Mon Sep 17 00:00:00 2001 From: michelia Date: Fri, 18 Oct 2024 14:57:04 +0800 Subject: [PATCH] feat: support deploy model in windows arm --- docs/user-guide/inference-backends.md | 2 +- gpustack/utils/platform.py | 53 +++++++++++++++- poetry.lock | 91 ++++++++++++++++++++------- pyproject.toml | 2 + 4 files changed, 122 insertions(+), 26 deletions(-) diff --git a/docs/user-guide/inference-backends.md b/docs/user-guide/inference-backends.md index 6639c30..4285007 100644 --- a/docs/user-guide/inference-backends.md +++ b/docs/user-guide/inference-backends.md @@ -16,7 +16,7 @@ When users deploy a model, the backend is selected automatically based on the fo ### Supported Platforms -The llama-box backend works on a wide range of platforms, including Windows, MacOS, and Linux. +The llama-box backend works on a wide range of platforms, including MacOS, Linux and Windows(with CPU offloading only on Windows ARM architecture). ### Supported Models diff --git a/gpustack/utils/platform.py b/gpustack/utils/platform.py index f34a872..5492701 100644 --- a/gpustack/utils/platform.py +++ b/gpustack/utils/platform.py @@ -1,11 +1,57 @@ import os import platform +import logging +import threading + + +logger = logging.getLogger(__name__) def system() -> str: return platform.uname().system.lower() +def get_native_arch() -> str: + system = platform.system() + if system == "Windows": + import pythoncom + + if threading.current_thread() is not threading.main_thread(): + pythoncom.CoInitialize() + + # Windows emulation will mask the native architecture + # https://learn.microsoft.com/en-us/windows/arm/apps-on-arm-x86-emulation + try: + import wmi + + c = wmi.WMI() + processor_info = c.Win32_Processor() + arch_num = processor_info[0].Architecture + + # https://learn.microsoft.com/en-us/windows/win32/cimwin32prov/win32-processor + arch_map = { + 0: 'x86', + 1: 'MIPS', + 2: 'Alpha', + 3: 'PowerPC', + 5: 'ARM', + 6: 'ia64', + 9: 'AMD64', + 12: 'ARM64', + } + + arch = arch_map.get(arch_num, 'unknown') + if arch != 'unknown': + return arch.lower() + except Exception as e: + logger.warning(f"Failed to get native architecture from WMI, {e}") + finally: + if threading.current_thread() is not threading.main_thread(): + pythoncom.CoUninitialize() + + return platform.machine().lower() + + def arch() -> str: arch_map = { "x86_64": "amd64", @@ -18,8 +64,13 @@ def arch() -> str: "arm": "arm", "ppc64le": "ppc64le", "s390x": "s390x", + "x86": "x86", + "mips": "mips", + "alpha": "alpha", + "powerpc": "powerpc", + "ia64": "ia64", } - return arch_map.get(platform.machine().lower(), "unknown") + return arch_map.get(get_native_arch(), "unknown") def device() -> str: diff --git a/poetry.lock b/poetry.lock index 07cf71f..01374f7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2247,7 +2247,7 @@ files = [ name = "mpmath" version = "1.3.0" description = "Python library for arbitrary-precision floating-point arithmetic" -optional = false +optional = true python-versions = "*" files = [ {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, @@ -2543,7 +2543,7 @@ files = [ name = "networkx" version = "3.3" description = "Python package for creating and manipulating graphs and networks" -optional = false +optional = true python-versions = ">=3.10" files = [ {file = "networkx-3.3-py3-none-any.whl", hash = "sha256:28575580c6ebdaf4505b22c6256a2b9de86b316dc63ba9e93abde3d78dfdbcf2"}, @@ -2676,7 +2676,7 @@ files = [ name = "nvidia-cublas-cu12" version = "12.1.3.1" description = "CUBLAS native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"}, @@ -2687,7 +2687,7 @@ files = [ name = "nvidia-cuda-cupti-cu12" version = "12.1.105" description = "CUDA profiling tools runtime libs." -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"}, @@ -2698,7 +2698,7 @@ files = [ name = "nvidia-cuda-nvrtc-cu12" version = "12.1.105" description = "NVRTC native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"}, @@ -2709,7 +2709,7 @@ files = [ name = "nvidia-cuda-runtime-cu12" version = "12.1.105" description = "CUDA Runtime native Libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"}, @@ -2720,7 +2720,7 @@ files = [ name = "nvidia-cudnn-cu12" version = "9.1.0.70" description = "cuDNN runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"}, @@ -2734,7 +2734,7 @@ nvidia-cublas-cu12 = "*" name = "nvidia-cufft-cu12" version = "11.0.2.54" description = "CUFFT native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"}, @@ -2745,7 +2745,7 @@ files = [ name = "nvidia-curand-cu12" version = "10.3.2.106" description = "CURAND native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"}, @@ -2756,7 +2756,7 @@ files = [ name = "nvidia-cusolver-cu12" version = "11.4.5.107" description = "CUDA solver native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"}, @@ -2772,7 +2772,7 @@ nvidia-nvjitlink-cu12 = "*" name = "nvidia-cusparse-cu12" version = "12.1.0.106" description = "CUSPARSE native runtime libraries" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"}, @@ -2797,7 +2797,7 @@ files = [ name = "nvidia-nccl-cu12" version = "2.20.5" description = "NVIDIA Collective Communication Library (NCCL) Runtime" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"}, @@ -2808,7 +2808,7 @@ files = [ name = "nvidia-nvjitlink-cu12" version = "12.6.77" description = "Nvidia JIT LTO Library" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"}, @@ -2820,7 +2820,7 @@ files = [ name = "nvidia-nvtx-cu12" version = "12.1.105" description = "NVIDIA Tools Extension" -optional = false +optional = true python-versions = ">=3" files = [ {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"}, @@ -3831,6 +3831,33 @@ files = [ {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, ] +[[package]] +name = "pywin32" +version = "308" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"}, + {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"}, + {file = "pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c"}, + {file = "pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a"}, + {file = "pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b"}, + {file = "pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6"}, + {file = "pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897"}, + {file = "pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47"}, + {file = "pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091"}, + {file = "pywin32-308-cp313-cp313-win32.whl", hash = "sha256:1c44539a37a5b7b21d02ab34e6a4d314e0788f1690d65b48e9b0b89f31abbbed"}, + {file = "pywin32-308-cp313-cp313-win_amd64.whl", hash = "sha256:fd380990e792eaf6827fcb7e187b2b4b1cede0585e3d0c9e84201ec27b9905e4"}, + {file = "pywin32-308-cp313-cp313-win_arm64.whl", hash = "sha256:ef313c46d4c18dfb82a2431e3051ac8f112ccee1a34f29c263c583c568db63cd"}, + {file = "pywin32-308-cp37-cp37m-win32.whl", hash = "sha256:1f696ab352a2ddd63bd07430080dd598e6369152ea13a25ebcdd2f503a38f1ff"}, + {file = "pywin32-308-cp37-cp37m-win_amd64.whl", hash = "sha256:13dcb914ed4347019fbec6697a01a0aec61019c1046c2b905410d197856326a6"}, + {file = "pywin32-308-cp38-cp38-win32.whl", hash = "sha256:5794e764ebcabf4ff08c555b31bd348c9025929371763b2183172ff4708152f0"}, + {file = "pywin32-308-cp38-cp38-win_amd64.whl", hash = "sha256:3b92622e29d651c6b783e368ba7d6722b1634b8e70bd376fd7610fe1992e19de"}, + {file = "pywin32-308-cp39-cp39-win32.whl", hash = "sha256:7873ca4dc60ab3287919881a7d4f88baee4a6e639aa6962de25a98ba6b193341"}, + {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"}, +] + [[package]] name = "pywin32-ctypes" version = "0.2.3" @@ -5026,7 +5053,7 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7 name = "sympy" version = "1.13.3" description = "Computer algebra system (CAS) in Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"}, @@ -5223,7 +5250,7 @@ files = [ name = "torch" version = "2.4.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" -optional = false +optional = true python-versions = ">=3.8.0" files = [ {file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"}, @@ -5408,7 +5435,7 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] name = "triton" version = "3.0.0" description = "A language and compiler for custom Deep Learning operations" -optional = false +optional = true python-versions = "*" files = [ {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"}, @@ -5416,11 +5443,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -5943,6 +5965,27 @@ files = [ {file = "websockets-13.1.tar.gz", hash = "sha256:a3b3366087c1bc0a2795111edcadddb8b3b59509d5db5d7ea3fdd69f954a8878"}, ] +[[package]] +name = "wmi" +version = "1.5.1" +description = "Windows Management Instrumentation" +optional = false +python-versions = "*" +files = [ + {file = "WMI-1.5.1-py2.py3-none-any.whl", hash = "sha256:1d6b085e5c445141c475476000b661f60fff1aaa19f76bf82b7abb92e0ff4942"}, + {file = "WMI-1.5.1.tar.gz", hash = "sha256:b6a6be5711b1b6c8d55bda7a8befd75c48c12b770b9d227d31c1737dbf0d40a6"}, +] + +[package.dependencies] +pywin32 = "*" + +[package.extras] +all = ["pytest", "sphinx", "twine", "wheel"] +dev = ["pytest", "sphinx", "twine", "wheel"] +docs = ["sphinx"] +package = ["twine", "wheel"] +tests = ["pytest"] + [[package]] name = "xformers" version = "0.0.27.post2" @@ -6225,9 +6268,9 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -vllm = ["mistral_common", "vllm"] +vllm = ["mistral_common", "torch", "vllm"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bb90951ef5762b8acefa6b7a0bf063a16fe32ae696d6c187df34330026636aac" +content-hash = "71ded6f2361e0499d441d7d5e5f172365989993ca309764f7e04fadd08b1beec" diff --git a/pyproject.toml b/pyproject.toml index 3879570..87685ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,6 +45,8 @@ mistral_common = {version = "^1.4.3", optional = true, extras = ["opencv"]} uvicorn = "^0.30.6" transformers = "^4.44.2" fastapi-cdn-host = "^0.8.0" +wmi = { version="^1.5.1", markers = "platform_system == 'Windows'" } +pywin32 = { version="^308", markers = "platform_system == 'Windows'" } [tool.poetry.group.dev.dependencies] coverage = {extras = ["toml"], version = "^7.5.1"} flake8 = "^7.0.0"