Compare commits

...

4 Commits

@ -17,6 +17,7 @@ from gpustack.schemas.dashboard import (
SystemLoadSummary,
SystemSummary,
TimeSeriesData,
InstanceInfo,
)
from gpustack.schemas.model_usage import ModelUsage
from gpustack.schemas.models import Model, ModelInstance
@ -263,8 +264,27 @@ async def get_active_models(session: AsyncSession) -> List[ModelSummary]:
ram=0,
vram=0,
)
instance_infos = []
if result.id in model_instances_by_id:
for model_instance in model_instances_by_id[result.id]:
vram = 0
for vram_e in (
model_instance.computed_resource_claim.vram or {}
).values():
vram += vram_e
instance_infos.append(
InstanceInfo(
id=model_instance.id,
name=model_instance.name,
worker_name=model_instance.worker_name,
worker_ip=model_instance.worker_ip,
gpu_indexes=model_instance.gpu_indexes,
gpu_addresses=model_instance.gpu_addresses,
ram=model_instance.computed_resource_claim.ram or 0,
vram=vram,
)
)
aggregate_resource_claim(resource_claim, model_instance)
model_summary.append(
@ -274,6 +294,7 @@ async def get_active_models(session: AsyncSession) -> List[ModelSummary]:
categories=result.categories,
resource_claim=resource_claim,
instance_count=result.instance_count,
instance_infos=instance_infos,
token_count=(
result.total_token_count
if result.total_token_count is not None

@ -1,5 +1,9 @@
from typing import List
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from gpustack.schemas.models import ModelInstance
from gpustack.server.deps import ListParamsDep, SessionDep, EngineDep
from gpustack.schemas.gpu_devices import (
GPUDevice,
@ -28,13 +32,21 @@ async def get_gpus(
media_type="text/event-stream",
)
return await GPUDevice.paginated_by_query(
gpu_device_list = await GPUDevice.paginated_by_query(
session=session,
fuzzy_fields=fuzzy_fields,
page=params.page,
per_page=params.perPage,
)
for gpu_device in gpu_device_list.items:
model_instances = await get_model_instances_by_gpu_and_worker(
session, gpu_device.index, gpu_device.worker_id
)
gpu_device.model_instances = model_instances
return gpu_device_list
@router.get("/{id}", response_model=GPUDevicePublic)
async def get_gpu(session: SessionDep, id: str):
@ -43,3 +55,17 @@ async def get_gpu(session: SessionDep, id: str):
raise NotFoundException(message="GPU device not found")
return model
async def get_model_instances_by_gpu_and_worker(
session: SessionDep, gpu_index: int, worker_id: int
):
fields = {
"gpu_index": gpu_index,
"worker_id": worker_id,
}
model_instances: List[ModelInstance] = await ModelInstance.all_by_fields(
session, fields=fields
)
return model_instances

@ -148,6 +148,7 @@ async def get_model_instances(
)
instances = model.instances
count = len(instances)
total_page = math.ceil(count / params.perPage)
pagination = Pagination(

@ -1,5 +1,8 @@
from typing import List
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from gpustack.schemas.models import ModelInstance
from gpustack.api.exceptions import (
AlreadyExistsException,
@ -44,7 +47,7 @@ async def get_workers(
media_type="text/event-stream",
)
return await Worker.paginated_by_query(
worker_list = await Worker.paginated_by_query(
session=session,
fields=fields,
fuzzy_fields=fuzzy_fields,
@ -52,6 +55,12 @@ async def get_workers(
per_page=params.perPage,
)
for worker in worker_list.items:
model_instances = await get_model_instances_by_worker(session, worker.id)
worker.model_instances = model_instances
return worker_list
@router.get("/{id}", response_model=WorkerPublic)
async def get_worker(session: SessionDep, id: int):
@ -101,3 +110,14 @@ async def delete_worker(session: SessionDep, id: int):
await WorkerService(session).delete(worker)
except Exception as e:
raise InternalServerErrorException(message=f"Failed to delete worker: {e}")
async def get_model_instances_by_worker(session: SessionDep, worker_id: int):
fields = {
"worker_id": worker_id,
}
model_instances: List[ModelInstance] = await ModelInstance.all_by_fields(
session, fields=fields
)
return model_instances

@ -48,6 +48,17 @@ class ResourceClaim(BaseModel):
vram: int # in bytes
class InstanceInfo(BaseModel):
id: int
name: str
worker_name: Optional[str] = None
worker_ip: Optional[str] = None
gpu_indexes: Optional[List[int]]
gpu_addresses: Optional[List[str]]
ram: Optional[int] = 0
vram: Optional[int] = 0
class ModelSummary(BaseModel):
id: int
name: str
@ -55,6 +66,7 @@ class ModelSummary(BaseModel):
instance_count: int
token_count: int
categories: Optional[List[str]] = None
instance_infos: Optional[List[InstanceInfo]] = None
class ResourceCounts(BaseModel):

@ -25,15 +25,30 @@ function download_ui() {
local ui_path="${ROOT_DIR}/gpustack/ui"
local tmp_ui_path="${ui_path}/tmp"
local tag="latest"
local local_ui_dir="/home/gpustack-ui/dist" # 本地 UI 构建目录
if [[ "${GIT_VERSION}" != "v0.0.0" ]]; then
tag="${GIT_VERSION}"
fi
# 删除旧的 ui 目录
rm -rf "${ui_path}"
mkdir -p "${tmp_ui_path}/ui"
mkdir -p "${ui_path}"
gpustack::log::info "Checking for local UI build at ${local_ui_dir}"
# 优先检查本地构建目录是否存在且非空
if [[ -d "${local_ui_dir}" ]] && [[ -n "$(ls -A ${local_ui_dir} 2>/dev/null)" ]]; then
gpustack::log::info "Local UI found at ${local_ui_dir}, copying..."
cp -a "${local_ui_dir}/." "${ui_path}"
gpustack::log::info "Local UI copied successfully."
return 0
else
gpustack::log::info "No valid local UI found at ${local_ui_dir}, proceeding with download..."
fi
gpustack::log::info "downloading '${tag}' UI assets"
# 如果本地没有,则下载远程 UI
mkdir -p "${tmp_ui_path}/ui"
if ! curl --retry 3 --retry-connrefused --retry-delay 3 -sSfL "https://gpustack-ui-1303613262.cos.accelerate.myqcloud.com/releases/${tag}.tar.gz" 2>/dev/null |
tar -xzf - --directory "${tmp_ui_path}/ui" 2>/dev/null; then
@ -48,8 +63,11 @@ function download_ui() {
gpustack::log::fatal "failed to download '${default_tag}' ui archive"
fi
fi
# 复制解压后的内容
cp -a "${tmp_ui_path}/ui/dist/." "${ui_path}"
# 清理临时目录
rm -rf "${tmp_ui_path}"
}

2
poetry.lock generated

@ -10685,4 +10685,4 @@ vllm = ["bitsandbytes", "mistral_common", "timm", "vllm"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "3b91cdb95388d8479acd383dfb3e585de2fce906b70709dff4a6a01e4d2819a7"
content-hash = "4462e07255a2ae6b0f3af20a305c4044dade8edffbdf8e5dd605cea8a03d5d96"

@ -86,6 +86,12 @@ twine = "^5.1.1"
mike = "^2.1.3"
mkdocs-glightbox = "^0.4.0"
[[tool.poetry.source]]
name = "tuna"
url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
priority = "explicit"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

Loading…
Cancel
Save