You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
4.4 KiB
170 lines
4.4 KiB
package service
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"github.com/go-kratos/kratos/v2/log"
|
|
"slices"
|
|
"sort"
|
|
"strings"
|
|
pb "vgpu/api/v1"
|
|
"vgpu/internal/biz"
|
|
"vgpu/internal/database"
|
|
)
|
|
|
|
type CardService struct {
|
|
pb.UnimplementedCardServer
|
|
|
|
node *biz.NodeUsecase
|
|
pod *biz.PodUseCase
|
|
ms *MonitorService
|
|
}
|
|
|
|
func NewCardService(node *biz.NodeUsecase, pod *biz.PodUseCase, ms *MonitorService) *CardService {
|
|
return &CardService{node: node, pod: pod, ms: ms}
|
|
}
|
|
|
|
func (s *CardService) GetAllGPUs(ctx context.Context, req *pb.GetAllGpusReq) (*pb.GPUsReply, error) {
|
|
filters := req.Filters
|
|
deviceInfos, err := s.node.ListAllDevices(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var res = &pb.GPUsReply{List: []*pb.GPUReply{}}
|
|
for _, device := range deviceInfos {
|
|
gpu := &pb.GPUReply{}
|
|
//nodeName := strings.Trim(filters.NodeName, " ")
|
|
//if nodeName != "" && nodeName != device.NodeName {
|
|
// continue
|
|
//}
|
|
//deviceType := strings.Trim(filters.Type, " ")
|
|
//if deviceType != "" && deviceType != device.Type {
|
|
// continue
|
|
//}
|
|
//deviceUid := strings.Trim(filters.Uid, " ")
|
|
//if deviceUid != "" && deviceUid != device.Id {
|
|
// continue
|
|
//}
|
|
|
|
nodeNames := strings.Trim(filters.NodeName, " ")
|
|
if nodeNames != "" {
|
|
names := strings.Split(nodeNames, "|")
|
|
log.Info("GetAllGPUs names: ", names)
|
|
if !slices.Contains(names, device.NodeName) {
|
|
continue
|
|
}
|
|
}
|
|
|
|
deviceTypes := strings.Trim(filters.Type, " ")
|
|
if deviceTypes != "" {
|
|
types := strings.Split(deviceTypes, "|")
|
|
log.Info("GetAllGPUs types: ", types)
|
|
if !slices.Contains(types, device.Type) {
|
|
continue
|
|
}
|
|
}
|
|
deviceUids := strings.Trim(filters.Uid, " ")
|
|
if deviceUids != "" {
|
|
uids := strings.Split(deviceUids, "|")
|
|
log.Info("GetAllGPUs uids: ", uids)
|
|
if !slices.Contains(uids, device.NodeUid) {
|
|
continue
|
|
}
|
|
}
|
|
gpu.Uuid = device.Id
|
|
gpu.NodeName = device.NodeName
|
|
gpu.Type = device.Type
|
|
gpu.VgpuTotal = device.Count
|
|
gpu.CoreTotal = device.Devcore
|
|
gpu.MemoryTotal = device.Devmem
|
|
gpu.NodeUid = device.NodeUid
|
|
gpu.Health = device.Health
|
|
gpu.Mode = device.Mode
|
|
resourcePoolNames, err := database.QueryResourceNamesByNodeName(device.NodeName)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
gpu.ResourcePools = resourcePoolNames
|
|
|
|
vGPU, core, memory, err := s.pod.StatisticsByDeviceId(ctx, device.AliasId)
|
|
if err == nil {
|
|
gpu.VgpuUsed = vGPU
|
|
gpu.CoreUsed = core
|
|
gpu.MemoryUsed = memory
|
|
}
|
|
resp, err := s.ms.QueryInstant(ctx, &pb.QueryInstantRequest{Query: fmt.Sprintf("avg(hami_core_size{deviceuuid=~\"%s\"})", device.Id)})
|
|
if err == nil && len(resp.Data) > 0 {
|
|
gpu.CoreTotal = int32(resp.Data[0].Value)
|
|
}
|
|
resp, err = s.ms.QueryInstant(ctx, &pb.QueryInstantRequest{Query: fmt.Sprintf("avg(hami_memory_size{deviceuuid=~\"%s\"})", device.Id)})
|
|
if err == nil && len(resp.Data) > 0 {
|
|
gpu.MemoryTotal = int32(resp.Data[0].Value)
|
|
}
|
|
res.List = append(res.List, gpu)
|
|
}
|
|
|
|
sort.SliceStable(res.List, func(i, j int) bool {
|
|
return res.List[i].Uuid < res.List[j].Uuid
|
|
})
|
|
return res, nil
|
|
}
|
|
|
|
func (s *CardService) GetAllGPUTypes(ctx context.Context, req *pb.GetAllGpusReq) (*pb.GPUsReply, error) {
|
|
deviceInfos, err := s.node.ListAllDevices(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var res = &pb.GPUsReply{List: []*pb.GPUReply{}}
|
|
seenTypes := make(map[string]struct{})
|
|
|
|
filters := req.Filters
|
|
provider := strings.Trim(filters.Provider, " ")
|
|
for _, device := range deviceInfos {
|
|
if provider != "" && provider != device.Provider {
|
|
continue
|
|
}
|
|
|
|
if _, exists := seenTypes[device.Type]; !exists {
|
|
seenTypes[device.Type] = struct{}{}
|
|
gpu := &pb.GPUReply{}
|
|
gpu.Type = device.Type
|
|
res.List = append(res.List, gpu)
|
|
}
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
func (s *CardService) GetGPU(ctx context.Context, req *pb.GetGpuReq) (*pb.GPUReply, error) {
|
|
devices, err := s.node.ListAllDevices(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
gpu := &pb.GPUReply{}
|
|
for _, device := range devices {
|
|
deviceUid := strings.Trim(req.Uid, " ")
|
|
if deviceUid == "" || deviceUid != device.Id {
|
|
continue
|
|
}
|
|
gpu.Uuid = device.Id
|
|
gpu.NodeName = device.NodeName
|
|
gpu.Type = device.Type
|
|
gpu.VgpuTotal = device.Count
|
|
gpu.CoreTotal = device.Devcore
|
|
gpu.MemoryTotal = device.Devmem
|
|
gpu.NodeUid = device.NodeUid
|
|
gpu.Health = device.Health
|
|
gpu.Mode = device.Mode
|
|
|
|
vGPU, core, memory, err := s.pod.StatisticsByDeviceId(ctx, device.AliasId)
|
|
if err == nil {
|
|
gpu.VgpuUsed = vGPU
|
|
gpu.CoreUsed = core
|
|
gpu.MemoryUsed = memory
|
|
}
|
|
return gpu, nil
|
|
}
|
|
return gpu, nil
|
|
}
|