Compare commits

...

6 Commits
main ... test

@ -17,6 +17,7 @@ from gpustack.schemas.dashboard import (
SystemLoadSummary,
SystemSummary,
TimeSeriesData,
InstanceInfo,
)
from gpustack.schemas.model_usage import ModelUsage
from gpustack.schemas.models import Model, ModelInstance
@ -263,8 +264,27 @@ async def get_active_models(session: AsyncSession) -> List[ModelSummary]:
ram=0,
vram=0,
)
instance_infos = []
if result.id in model_instances_by_id:
for model_instance in model_instances_by_id[result.id]:
vram = 0
for vram_e in (
model_instance.computed_resource_claim.vram or {}
).values():
vram += vram_e
instance_infos.append(
InstanceInfo(
id=model_instance.id,
name=model_instance.name,
worker_name=model_instance.worker_name,
worker_ip=model_instance.worker_ip,
gpu_indexes=model_instance.gpu_indexes,
gpu_addresses=model_instance.gpu_addresses,
ram=model_instance.computed_resource_claim.ram or 0,
vram=vram,
)
)
aggregate_resource_claim(resource_claim, model_instance)
model_summary.append(
@ -274,6 +294,7 @@ async def get_active_models(session: AsyncSession) -> List[ModelSummary]:
categories=result.categories,
resource_claim=resource_claim,
instance_count=result.instance_count,
instance_infos=instance_infos,
token_count=(
result.total_token_count
if result.total_token_count is not None

@ -1,5 +1,9 @@
from typing import List
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from gpustack.schemas.models import ModelInstance
from gpustack.server.deps import ListParamsDep, SessionDep, EngineDep
from gpustack.schemas.gpu_devices import (
GPUDevice,
@ -28,13 +32,21 @@ async def get_gpus(
media_type="text/event-stream",
)
return await GPUDevice.paginated_by_query(
gpu_device_list = await GPUDevice.paginated_by_query(
session=session,
fuzzy_fields=fuzzy_fields,
page=params.page,
per_page=params.perPage,
)
for gpu_device in gpu_device_list.items:
model_instances = await get_model_instances_by_gpu_and_worker(
session, gpu_device.index, gpu_device.worker_id
)
gpu_device.model_instances = model_instances
return gpu_device_list
@router.get("/{id}", response_model=GPUDevicePublic)
async def get_gpu(session: SessionDep, id: str):
@ -43,3 +55,17 @@ async def get_gpu(session: SessionDep, id: str):
raise NotFoundException(message="GPU device not found")
return model
async def get_model_instances_by_gpu_and_worker(
session: SessionDep, gpu_index: int, worker_id: int
):
fields = {
"gpu_index": gpu_index,
"worker_id": worker_id,
}
model_instances: List[ModelInstance] = await ModelInstance.all_by_fields(
session, fields=fields
)
return model_instances

@ -148,6 +148,7 @@ async def get_model_instances(
)
instances = model.instances
count = len(instances)
total_page = math.ceil(count / params.perPage)
pagination = Pagination(

@ -1,5 +1,8 @@
from typing import List
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from gpustack.schemas.models import ModelInstance
from gpustack.api.exceptions import (
AlreadyExistsException,
@ -11,15 +14,16 @@ from gpustack.schemas.workers import (
WorkerCreate,
WorkerPublic,
WorkerUpdate,
WorkersPublic,
Worker,
WorkerWithInstancesPublic,
WorkerWithInstances,
)
from gpustack.server.services import WorkerService
router = APIRouter()
@router.get("", response_model=WorkersPublic)
@router.get("", response_model=WorkerWithInstancesPublic)
async def get_workers(
engine: EngineDep,
session: SessionDep,
@ -44,7 +48,7 @@ async def get_workers(
media_type="text/event-stream",
)
return await Worker.paginated_by_query(
worker_list = await Worker.paginated_by_query(
session=session,
fields=fields,
fuzzy_fields=fuzzy_fields,
@ -52,6 +56,18 @@ async def get_workers(
per_page=params.perPage,
)
enriched_workers = []
for worker in worker_list.items:
model_instances = await get_model_instances_by_worker(session, worker.id)
enriched_worker = WorkerWithInstances(
**worker.model_dump(), model_instances=model_instances
)
enriched_workers.append(enriched_worker)
return WorkerWithInstancesPublic(
items=enriched_workers, pagination=worker_list.pagination
)
@router.get("/{id}", response_model=WorkerPublic)
async def get_worker(session: SessionDep, id: int):
@ -101,3 +117,14 @@ async def delete_worker(session: SessionDep, id: int):
await WorkerService(session).delete(worker)
except Exception as e:
raise InternalServerErrorException(message=f"Failed to delete worker: {e}")
async def get_model_instances_by_worker(session: SessionDep, worker_id: int):
fields = {
"worker_id": worker_id,
}
model_instances: List[ModelInstance] = await ModelInstance.all_by_fields(
session, fields=fields
)
return model_instances

@ -48,6 +48,17 @@ class ResourceClaim(BaseModel):
vram: int # in bytes
class InstanceInfo(BaseModel):
id: int
name: str
worker_name: Optional[str] = None
worker_ip: Optional[str] = None
gpu_indexes: Optional[List[int]]
gpu_addresses: Optional[List[str]]
ram: Optional[int] = 0
vram: Optional[int] = 0
class ModelSummary(BaseModel):
id: int
name: str
@ -55,6 +66,7 @@ class ModelSummary(BaseModel):
instance_count: int
token_count: int
categories: Optional[List[str]] = None
instance_infos: Optional[List[InstanceInfo]] = None
class ResourceCounts(BaseModel):

@ -1,6 +1,8 @@
from datetime import datetime, timezone
from enum import Enum
from typing import Dict, Optional
from gpustack.schemas.models import ModelInstance
from pydantic import ConfigDict, BaseModel
from sqlmodel import Field, SQLModel, JSON, Column, Text
@ -218,6 +220,10 @@ class Worker(WorkerBase, BaseModelMixin, table=True):
return False
class WorkerWithInstances(WorkerBase, BaseModelMixin):
model_instances: list[ModelInstance] = []
class WorkerCreate(WorkerBase):
pass
@ -234,4 +240,10 @@ class WorkerPublic(
updated_at: datetime
class WorkerWithInstancesPublic(WorkerPublic):
model_instances: list[ModelInstance] = []
WorkersPublic = PaginatedList[WorkerPublic]
WorkerWithInstancesPublic = PaginatedList[WorkerWithInstancesPublic]

@ -25,15 +25,30 @@ function download_ui() {
local ui_path="${ROOT_DIR}/gpustack/ui"
local tmp_ui_path="${ui_path}/tmp"
local tag="latest"
local local_ui_dir="/home/gpustack-ui/dist" # 本地 UI 构建目录
if [[ "${GIT_VERSION}" != "v0.0.0" ]]; then
tag="${GIT_VERSION}"
fi
# 删除旧的 ui 目录
rm -rf "${ui_path}"
mkdir -p "${tmp_ui_path}/ui"
mkdir -p "${ui_path}"
gpustack::log::info "Checking for local UI build at ${local_ui_dir}"
# 优先检查本地构建目录是否存在且非空
if [[ -d "${local_ui_dir}" ]] && [[ -n "$(ls -A ${local_ui_dir} 2>/dev/null)" ]]; then
gpustack::log::info "Local UI found at ${local_ui_dir}, copying..."
cp -a "${local_ui_dir}/." "${ui_path}"
gpustack::log::info "Local UI copied successfully."
return 0
else
gpustack::log::info "No valid local UI found at ${local_ui_dir}, proceeding with download..."
fi
gpustack::log::info "downloading '${tag}' UI assets"
# 如果本地没有,则下载远程 UI
mkdir -p "${tmp_ui_path}/ui"
if ! curl --retry 3 --retry-connrefused --retry-delay 3 -sSfL "https://gpustack-ui-1303613262.cos.accelerate.myqcloud.com/releases/${tag}.tar.gz" 2>/dev/null |
tar -xzf - --directory "${tmp_ui_path}/ui" 2>/dev/null; then
@ -48,8 +63,11 @@ function download_ui() {
gpustack::log::fatal "failed to download '${default_tag}' ui archive"
fi
fi
# 复制解压后的内容
cp -a "${tmp_ui_path}/ui/dist/." "${ui_path}"
# 清理临时目录
rm -rf "${tmp_ui_path}"
}

2
poetry.lock generated

@ -10685,4 +10685,4 @@ vllm = ["bitsandbytes", "mistral_common", "timm", "vllm"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<3.13"
content-hash = "3b91cdb95388d8479acd383dfb3e585de2fce906b70709dff4a6a01e4d2819a7"
content-hash = "4462e07255a2ae6b0f3af20a305c4044dade8edffbdf8e5dd605cea8a03d5d96"

@ -86,6 +86,12 @@ twine = "^5.1.1"
mike = "^2.1.3"
mkdocs-glightbox = "^0.4.0"
[[tool.poetry.source]]
name = "tuna"
url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
priority = "explicit"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

Loading…
Cancel
Save