From 351b95503cad3fb717be39dcf0ea0edd9e57fd85 Mon Sep 17 00:00:00 2001 From: youys <1272586223@qq.com> Date: Thu, 7 Aug 2025 17:53:11 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20cpu=E5=92=8C=E5=86=85=E5=AD=98=E5=AD=97?= =?UTF-8?q?=E6=AE=B5=E9=87=8D=E6=96=B0=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- server/api/v1/node.proto | 6 +- server/api/v1/resource_pool.proto | 8 +- server/internal/service/node.go | 8 +- server/internal/service/resource_pool.go | 15 +- server/openapi.yaml | 351 +++++++++++++++++------ 5 files changed, 280 insertions(+), 108 deletions(-) diff --git a/server/api/v1/node.proto b/server/api/v1/node.proto index 2617383..0d7603d 100644 --- a/server/api/v1/node.proto +++ b/server/api/v1/node.proto @@ -109,9 +109,9 @@ message NodeReply { int32 vgpu_used = 5; int32 vgpu_total = 6; int32 core_used = 7; - int64 core_total = 8; + int32 core_total = 8; int32 memory_used = 9; - int64 memory_total = 10; + int32 memory_total = 10; string uid = 11; string name = 12; int32 card_cnt = 13; @@ -125,6 +125,8 @@ message NodeReply { string creation_timestamp = 21; int64 disk_size = 22; repeated string resource_pools = 23; + int64 cpu_cores = 24; + int64 total_memory = 25; } diff --git a/server/api/v1/resource_pool.proto b/server/api/v1/resource_pool.proto index 990b51f..3b18322 100644 --- a/server/api/v1/resource_pool.proto +++ b/server/api/v1/resource_pool.proto @@ -115,9 +115,9 @@ message PoolNodeReply { int32 vgpu_used = 5; int32 vgpu_total = 6; int32 core_used = 7; - int64 core_total = 8; - int64 memory_used = 9; - int64 memory_total = 10; + int32 core_total = 8; + int32 memory_used = 9; + int32 memory_total = 10; string uid = 11; string name = 12; int32 card_cnt = 13; @@ -131,6 +131,8 @@ message PoolNodeReply { string creation_timestamp = 21; int64 disk_size = 22; int64 node_id = 23; + int64 cpu_cores = 24; + int64 total_memory = 25; } message ResourcePoolDetailRequest { diff --git a/server/internal/service/node.go b/server/internal/service/node.go index ea52ef3..bc3c720 100644 --- a/server/internal/service/node.go +++ b/server/internal/service/node.go @@ -150,16 +150,16 @@ func (s *NodeService) buildNodeReply(ctx context.Context, node *biz.Node) (*pb.N KubeProxyVersion: node.KubeProxyVersion, Architecture: node.Architecture, CreationTimestamp: node.CreationTimestamp, - CoreTotal: node.CPUCores, - MemoryTotal: node.TotalMemory, DiskSize: node.DiskTotal, + CpuCores: node.CPUCores, + TotalMemory: node.TotalMemory, } for _, device := range node.Devices { nodeReply.Type = append(nodeReply.Type, device.Type) nodeReply.VgpuTotal += device.Count - nodeReply.CoreTotal += int64(device.Devcore) - nodeReply.MemoryTotal += int64(device.Devmem) + nodeReply.CoreTotal += device.Devcore + nodeReply.MemoryTotal += device.Devmem vGPU, core, memory, err := s.pod.StatisticsByDeviceId(ctx, device.AliasId) if err == nil { nodeReply.VgpuUsed += vGPU diff --git a/server/internal/service/resource_pool.go b/server/internal/service/resource_pool.go index 59522bf..6e9d19f 100644 --- a/server/internal/service/resource_pool.go +++ b/server/internal/service/resource_pool.go @@ -2,7 +2,6 @@ package service import ( "context" - "encoding/json" "errors" "fmt" "github.com/go-kratos/kratos/v2/log" @@ -176,8 +175,6 @@ func (s *ResourcePoolService) GetDetail(ctx context.Context, req *pb.ResourcePoo nodes, err := s.uc.ListAllNodesV2(ctx) for _, poolNode := range poolNodes { - b1, _ := json.MarshalIndent(poolNode, "", " ") - log.Info(string(b1)) node := s.filterNode(poolNode.NodeIp, nodes) if node == nil { continue @@ -253,8 +250,6 @@ func (s *ResourcePoolService) simpleQuery(ctx context.Context, query string) int func (s *ResourcePoolService) filterNode(nodeIp string, nodes []*biz.Node) *biz.Node { for _, node := range nodes { - b, _ := json.MarshalIndent(node, "", " ") - log.Info(string(b)) if node.IP == nodeIp { return node } @@ -278,21 +273,21 @@ func (s *ResourcePoolService) buildNodeReply(ctx context.Context, node *biz.Node KubeProxyVersion: node.KubeProxyVersion, Architecture: node.Architecture, CreationTimestamp: node.CreationTimestamp, - CoreTotal: node.CPUCores, - MemoryTotal: node.TotalMemory, + CpuCores: node.CPUCores, + TotalMemory: node.TotalMemory, DiskSize: node.DiskTotal, } for _, device := range node.Devices { nodeReply.Type = append(nodeReply.Type, device.Type) nodeReply.VgpuTotal += device.Count - nodeReply.CoreTotal += int64(device.Devcore) - nodeReply.MemoryTotal += int64(device.Devmem) + nodeReply.CoreTotal += device.Devcore + nodeReply.MemoryTotal += device.Devmem vGPU, core, memory, err := s.pod.StatisticsByDeviceId(ctx, device.AliasId) if err == nil { nodeReply.VgpuUsed += vGPU nodeReply.CoreUsed += core - nodeReply.MemoryUsed += int64(memory) + nodeReply.MemoryUsed += memory } } diff --git a/server/openapi.yaml b/server/openapi.yaml index c1755ea..0b450b4 100644 --- a/server/openapi.yaml +++ b/server/openapi.yaml @@ -3,50 +3,37 @@ openapi: 3.0.3 info: - title: Container API + title: ResourcePool API version: 0.0.1 paths: - /v1/container: + /v1/available/nodes: get: tags: - - Container - operationId: Container_GetContainer - parameters: - - name: name - in: query - schema: - type: string - - name: podUid - in: query - schema: - type: string - - name: deviceId - in: query - schema: - type: string + - ResourcePool + operationId: ResourcePool_GetAvailableNodes responses: "200": description: OK content: application/json: schema: - $ref: '#/components/schemas/ContainerReply' + $ref: '#/components/schemas/AvailableNodesResponse' default: description: Default error response content: application/json: schema: $ref: '#/components/schemas/Status' - /v1/containers: + /v1/resource/pool/create: post: tags: - - Container - operationId: Container_GetAllContainers + - ResourcePool + operationId: ResourcePool_Create requestBody: content: application/json: schema: - $ref: '#/components/schemas/GetAllContainersReq' + $ref: '#/components/schemas/ResourcePoolCreateRequest' required: true responses: "200": @@ -54,7 +41,121 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/ContainersReply' + $ref: '#/components/schemas/BaseResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' + /v1/resource/pool/delete: + post: + tags: + - ResourcePool + operationId: ResourcePool_Delete + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ResourcePoolDeleteRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/BaseResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' + /v1/resource/pool/detail: + post: + tags: + - ResourcePool + operationId: ResourcePool_GetDetail + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ResourcePoolDetailRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ResourcePoolDetailResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' + /v1/resource/pool/list: + get: + tags: + - ResourcePool + operationId: ResourcePool_List + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/ResourcePoolListResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' + /v1/resource/pool/removeNode: + post: + tags: + - ResourcePool + operationId: ResourcePool_RemoveNode + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RemoveNodeRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/BaseResponse' + default: + description: Default error response + content: + application/json: + schema: + $ref: '#/components/schemas/Status' + /v1/resource/pool/update: + post: + tags: + - ResourcePool + operationId: ResourcePool_Update + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/ResourcePoolUpdateRequest' + required: true + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: '#/components/schemas/BaseResponse' default: description: Default error response content: @@ -63,119 +164,191 @@ paths: $ref: '#/components/schemas/Status' components: schemas: - ContainerReply: + AvailableNodesInfo: type: object properties: - name: + nodeName: + type: string + cpuCores: + type: string + gpuNum: + type: string + gpuMemory: + type: string + totalMemory: + type: string + diskSize: + type: string + nodeIp: + type: string + AvailableNodesResponse: + type: object + properties: + data: + type: array + items: + $ref: '#/components/schemas/AvailableNodesInfo' + BaseResponse: + type: object + properties: + code: + type: integer + format: int32 + message: type: string - status: + data: + type: object + GoogleProtobufAny: + type: object + properties: + '@type': type: string - appName: + description: The type of the serialized message. + additionalProperties: true + description: Contains an arbitrary serialized message along with a @type that describes the type of the serialized message. + Nodes: + type: object + properties: + nodeIp: type: string nodeName: type: string - allocatedDevices: + PoolNodeReply: + type: object + properties: + ip: + type: string + isSchedulable: + type: boolean + isReady: + type: boolean + type: + type: array + items: + type: string + vgpuUsed: type: integer format: int32 - allocatedCores: + vgpuTotal: type: integer format: int32 - allocatedMem: + coreUsed: type: integer format: int32 - type: + coreTotal: + type: integer + format: int32 + memoryUsed: + type: integer + format: int32 + memoryTotal: + type: integer + format: int32 + uid: type: string - createTime: + name: type: string - startTime: + cardCnt: + type: integer + format: int32 + osImage: type: string - endTime: + operatingSystem: type: string - podUid: + kernelVersion: type: string - nodeUid: + containerRuntimeVersion: type: string - resourcePools: - type: array - items: - type: string - flavor: + kubeletVersion: type: string - priority: + kubeProxyVersion: type: string - namespace: + architecture: type: string - deviceIds: - type: array - items: - type: string - podName: + creationTimestamp: type: string - taskType: + diskSize: type: string - shixunName: + nodeId: type: string - role: + cpuCores: type: string - username: + totalMemory: type: string - requestedCpuCores: - type: number - format: float - requestedMemory: + RemoveNodeRequest: + type: object + properties: + nodeId: type: string - ContainersReply: + ResourcePoolCreateRequest: type: object properties: - items: + poolName: + type: string + nodes: type: array items: - $ref: '#/components/schemas/ContainerReply' - GetAllContainersReq: + $ref: '#/components/schemas/Nodes' + ResourcePoolDeleteRequest: type: object properties: - filters: - $ref: '#/components/schemas/GetAllContainersReq_Filters' - pageSize: - $ref: '#/components/schemas/GetAllContainersReq_PageSize' - GetAllContainersReq_Filters: + poolId: + type: string + ResourcePoolDetailRequest: type: object properties: - name: + poolId: type: string - nodeName: + ResourcePoolDetailResponse: + type: object + properties: + list: + type: array + items: + $ref: '#/components/schemas/PoolNodeReply' + ResourcePoolListData: + type: object + properties: + poolId: type: string - status: + poolName: type: string - deviceId: + cpuCores: type: string - nodeUid: + nodeNum: type: string - resourceGroup: + gpuNum: type: string - priority: + availableMemory: type: string - GetAllContainersReq_PageSize: - type: object - properties: - pageSize: - type: integer - format: int32 - pageNo: - type: integer - format: int32 - sort: + totalMemory: type: string - sortField: + diskSize: type: string - GoogleProtobufAny: + nodeList: + type: array + items: + $ref: '#/components/schemas/Nodes' + linkUrl: + type: string + ResourcePoolListResponse: type: object properties: - '@type': + data: + type: array + items: + $ref: '#/components/schemas/ResourcePoolListData' + ResourcePoolUpdateRequest: + type: object + properties: + poolId: type: string - description: The type of the serialized message. - additionalProperties: true - description: Contains an arbitrary serialized message along with a @type that describes the type of the serialized message. + poolName: + type: string + nodes: + type: array + items: + $ref: '#/components/schemas/Nodes' Status: type: object properties: @@ -193,4 +366,4 @@ components: description: A list of messages that carry the error details. There is a common set of message types for APIs to use. description: 'The `Status` type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by [gRPC](https://github.com/grpc). Each `Status` message contains three pieces of data: error code, error message, and error details. You can find out more about this error model and how to work with it in the [API Design Guide](https://cloud.google.com/apis/design/errors).' tags: - - name: Container + - name: ResourcePool