feat: support using full path of llama-box

use llama-box-<os>-<arch>-<device> first and fallback to llama-box
pull/2156/head
Yuxing Deng 7 months ago committed by Lawrence Li
parent bef3b521ef
commit 03cd217189

@ -212,3 +212,7 @@ def get_cann_chip() -> str:
# TODO(thxCode): figure out a way to discover the CANN chip version
return os.getenv("CANN_CHIP", "")
def get_executable_suffix() -> str:
return '.exe' if system() == 'windows' else ''

@ -9,6 +9,7 @@ import psutil
from gpustack.schemas.workers import Worker
from gpustack.utils import platform
from gpustack.utils.platform import get_executable_suffix as exe
from gpustack.schemas.models import (
ModelInstance,
ModelInstanceStateEnum,
@ -19,22 +20,21 @@ from gpustack.schemas.models import (
from gpustack.utils.command import find_parameter, get_versioned_command
from gpustack.utils.compat_importlib import pkg_resources
from gpustack.worker.backends.base import InferenceServer
from gpustack.worker.tools_manager import get_llama_box_command
logger = logging.getLogger(__name__)
class LlamaBoxServer(InferenceServer):
def start(self): # noqa: C901
command_path = pkg_resources.files(
"gpustack.third_party.bin.llama-box"
).joinpath(get_llama_box_command())
base_path = pkg_resources.files("gpustack.third_party.bin").joinpath(
'llama-box'
)
command_path = get_llama_box_command(str(base_path))
if self._model.backend_version:
command_path = os.path.join(
self._config.bin_dir,
get_versioned_command(
get_llama_box_command(), self._model.backend_version
),
get_versioned_command(f'llama-box{exe()}', self._model.backend_version),
)
layers = -1
@ -216,13 +216,6 @@ def set_priority(pid: int):
logger.error(f"Failed to set priority for process {pid}: {e}")
def get_llama_box_command():
command = "llama-box"
if platform.system() == "windows":
command += ".exe"
return command
def get_rpc_servers(
model_instance: ModelInstance, worker_map: Dict[int, Worker]
) -> Tuple[List[str], List[int]]:

@ -9,7 +9,7 @@ import shutil
import stat
import subprocess
import time
from typing import Optional, Dict
from typing import Optional, Dict, Union
import zipfile
import requests
@ -17,6 +17,7 @@ from gpustack.schemas.models import BackendEnum
from gpustack.utils.command import get_versioned_command
from gpustack.utils.compat_importlib import pkg_resources
from gpustack.utils import platform, envs
from gpustack.utils.platform import get_executable_suffix as exe
from gpustack.config.config import get_global_config
logger = logging.getLogger(__name__)
@ -171,8 +172,8 @@ class ToolsManager:
def download_llama_box(self):
version = BUILTIN_LLAMA_BOX_VERSION
target_dir = self.third_party_bin_path / "llama-box"
file_name = "llama-box.exe" if self._os == "windows" else "llama-box"
target_file = target_dir / file_name
target_file = Path(get_llama_box_command(target_dir))
file_name = os.path.basename(target_file)
if (
target_file.is_file()
@ -441,19 +442,17 @@ class ToolsManager:
st = os.stat(target_file)
os.chmod(target_file, st.st_mode | stat.S_IEXEC)
self._link_llama_box_rpc_server()
self._link_llama_box_rpc_server(target_file)
# Clean up temporary directory
shutil.rmtree(llama_box_tmp_dir)
def _link_llama_box_rpc_server(self):
def _link_llama_box_rpc_server(self, llama_box_file: Path):
"""
Create a symlink for llama-box-rpc-server in the bin directory.
This is used to help differentiate between the llama-box and llama-box-rpc-server processes.
"""
target_dir = self.third_party_bin_path / "llama-box"
file_name = "llama-box.exe" if self._os == "windows" else "llama-box"
llama_box_file = target_dir / file_name
if self._os == "windows":
target_rpc_server_file = target_dir / "llama-box-rpc-server.exe"
@ -787,3 +786,21 @@ class ToolsManager:
)
except subprocess.CalledProcessError as e:
raise Exception(f"Failed to install Ascend MindIE {command}: {e}")
def get_llama_box_command(
base_path: Union[
str,
Path,
]
) -> str:
system = platform.system()
arch = platform.arch()
device = platform.device()
device = f'-{device}' if device != '' else ''
full_path = os.path.join(base_path, f"llama-box-{system}-{arch}{device}{exe()}")
default_path = os.path.join(base_path, f"llama-box{exe()}")
# If both full_path and default_path do not exist, return full_path.
if not os.path.exists(full_path) and not os.path.exists(default_path):
return full_path
return full_path if os.path.exists(full_path) else default_path

Loading…
Cancel
Save