|
|
|
|
@ -17,6 +17,7 @@ from gpustack.schemas.dashboard import (
|
|
|
|
|
SystemLoadSummary,
|
|
|
|
|
SystemSummary,
|
|
|
|
|
TimeSeriesData,
|
|
|
|
|
InstanceInfo,
|
|
|
|
|
)
|
|
|
|
|
from gpustack.schemas.model_usage import ModelUsage
|
|
|
|
|
from gpustack.schemas.models import Model, ModelInstance
|
|
|
|
|
@ -263,8 +264,27 @@ async def get_active_models(session: AsyncSession) -> List[ModelSummary]:
|
|
|
|
|
ram=0,
|
|
|
|
|
vram=0,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
instance_infos = []
|
|
|
|
|
if result.id in model_instances_by_id:
|
|
|
|
|
for model_instance in model_instances_by_id[result.id]:
|
|
|
|
|
vram = 0
|
|
|
|
|
for vram_e in (
|
|
|
|
|
model_instance.computed_resource_claim.vram or {}
|
|
|
|
|
).values():
|
|
|
|
|
vram += vram_e
|
|
|
|
|
instance_infos.append(
|
|
|
|
|
InstanceInfo(
|
|
|
|
|
id=model_instance.id,
|
|
|
|
|
name=model_instance.name,
|
|
|
|
|
worker_name=model_instance.worker_name,
|
|
|
|
|
worker_ip=model_instance.worker_ip,
|
|
|
|
|
gpu_indexes=model_instance.gpu_indexes,
|
|
|
|
|
gpu_addresses=model_instance.gpu_addresses,
|
|
|
|
|
ram=model_instance.computed_resource_claim.ram or 0,
|
|
|
|
|
vram=vram,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
aggregate_resource_claim(resource_claim, model_instance)
|
|
|
|
|
|
|
|
|
|
model_summary.append(
|
|
|
|
|
@ -274,6 +294,7 @@ async def get_active_models(session: AsyncSession) -> List[ModelSummary]:
|
|
|
|
|
categories=result.categories,
|
|
|
|
|
resource_claim=resource_claim,
|
|
|
|
|
instance_count=result.instance_count,
|
|
|
|
|
instance_infos=instance_infos,
|
|
|
|
|
token_count=(
|
|
|
|
|
result.total_token_count
|
|
|
|
|
if result.total_token_count is not None
|
|
|
|
|
|