fix(node): 资源池移除节点接口,显卡管理,任务管理

main
youys 5 days ago
parent dbef1be45f
commit 02028e09e1

@ -65,6 +65,7 @@ message GPUReply {
string node_uid = 10;
bool health = 11;
string mode = 12;
repeated string resource_pools = 13;
}
message GPUsReply {

@ -61,11 +61,16 @@ message ContainerReply {
string end_time = 11;
string pod_uid = 12;
string node_uid = 13;
string resource_pool = 14;
repeated string resource_pool = 14;
string flavor = 15;
string priority = 16;
string namespace = 17;
repeated string device_ids = 18;
string pod_name = 19;
string task_type = 20;
string shixun_name = 21;
string role = 22;
string username = 23;
}
message ContainersReply {

@ -41,6 +41,16 @@ service ResourcePool {
};
}
rpc RemoveNode (RemoveNodeRequest) returns (BaseResponse) {
option (google.api.http) = {
post: "/v1/resource/pool/removeNode",
body: "*"
};
option (grpc.gateway.protoc_gen_openapiv2.options.openapiv2_operation) = {
summary: "移除节点";
};
}
rpc List (ResourcePoolListRequest) returns (ResourcePoolListResponse) {
option (google.api.http) = {
get: "/v1/resource/pool/list"
@ -52,7 +62,7 @@ service ResourcePool {
rpc GetDetail (ResourcePoolDetailRequest) returns (ResourcePoolDetailResponse) {
option (google.api.http) = {
get: "/v1/resource/pool/detail"
post: "/v1/resource/pool/detail"
};
option (grpc.gateway.protoc_gen_openapiv2.options.openapiv2_operation) = {
summary: "资源池详情";
@ -67,6 +77,7 @@ service ResourcePool {
summary: "可用节点列表";
};
}
}
message BaseResponse {
@ -118,6 +129,7 @@ message PoolNodeReply {
string architecture = 20;
string creation_timestamp = 21;
int64 disk_size = 22;
int64 node_id = 23;
}
message ResourcePoolDetailRequest {
@ -137,7 +149,7 @@ message ResourcePoolListData{
int64 available_memory = 6;//kb
int64 total_memory = 7; // kb
int64 disk_size = 8;
repeated string node_list = 9;
repeated Nodes node_list = 9;
}
message ResourcePoolListRequest {
@ -167,4 +179,8 @@ message AvailableNodesInfo{
}
message RemoveNodeRequest{
int64 node_id = 1;
}

@ -34,6 +34,7 @@ func main() {
flag.Parse()
var ctx = context.Background()
database.InitConfigPath(flagconf)
if err := initDatabase(); err != nil {
log.Errorf("数据库初始化失败: %v", err)
os.Exit(1)
@ -71,13 +72,22 @@ func getNodeSelectors(c *conf.Bootstrap) map[string]string {
}
func initDatabase() error {
config, err := database.LoadConfig(flagconf)
log.Infof("config: %+v", config)
driver, err := database.Get("database.driver")
if err != nil {
log.Errorf("Failed to load config: %v", err)
return err
}
database.InitDB(&config.Database)
log.Infof("初始化%s成功", config.Database.Driver)
dataSourceName, err := database.Get("database.dataSourceName")
if err != nil {
log.Errorf("Failed to load config: %v", err)
return err
}
var config = &database.DatabaseConfig{}
config.Driver = driver.(string)
config.DataSourceName = dataSourceName.(string)
database.InitDB(config)
log.Infof("初始化%s成功", driver)
return nil
}

@ -15,4 +15,6 @@ node_selectors:
MLU: mlu=on
database:
driver: mysql
dataSourceName: testeducoder:TEST@123@tcp(testeducoder-public.mysql.polardb.rds.aliyuncs.com:3306)/hami?parseTime=true&loc=Local
dataSourceName: testeducoder:TEST@123@tcp(testeducoder-public.mysql.polardb.rds.aliyuncs.com:3306)/hami?parseTime=true&loc=Local
web_domain: http://172.16.100.14
big_model_resource_pool_name: "大模型资源池"

@ -21,6 +21,7 @@ type Container struct {
Priority string
NodeUID string
Namespace string
TpiID string
}
type PodInfo struct {
@ -31,6 +32,7 @@ type PodInfo struct {
Devices PodDevices
CtrIDs []string
Ctrs []*Container
Labels map[string]string
}
type PodRepo interface {

@ -22,15 +22,17 @@ type podRepo struct {
data *Data
podLister listerscorev1.PodLister
pods map[k8stypes.UID]*biz.PodInfo
allPods []*biz.PodInfo
mutex sync.RWMutex
log *log.Helper
}
func NewPodRepo(data *Data, logger log.Logger) biz.PodRepo {
repo := &podRepo{
data: data,
pods: make(map[k8stypes.UID]*biz.PodInfo),
log: log.NewHelper(logger),
data: data,
pods: make(map[k8stypes.UID]*biz.PodInfo),
allPods: []*biz.PodInfo{},
log: log.NewHelper(logger),
}
repo.init()
return repo
@ -91,8 +93,9 @@ func (r *podRepo) addPod(pod *corev1.Pod, nodeID string, devices biz.PodDevices)
r.mutex.Lock()
defer r.mutex.Unlock()
ctrs := r.fetchContainerInfo(pod)
pi := &biz.PodInfo{Name: pod.Name, UID: pod.UID, Namespace: pod.Namespace, NodeID: nodeID, Devices: devices, Ctrs: ctrs}
pi := &biz.PodInfo{Name: pod.Name, UID: pod.UID, Namespace: pod.Namespace, NodeID: nodeID, Devices: devices, Ctrs: ctrs, Labels: pod.Labels}
r.pods[pod.UID] = pi
r.allPods = append(r.allPods, pi)
r.log.Infof("Pod added: Name: %s, UID: %s, Namespace: %s, NodeID: %s", pod.Name, pod.UID, pod.Namespace, nodeID)
}
@ -185,7 +188,11 @@ func (r *podRepo) GetStartTime(pod *corev1.Pod) time.Time {
func (r *podRepo) ListAll(context.Context) ([]*biz.Container, error) {
var containerList []*biz.Container
for _, pod := range r.pods {
containerList = append(containerList, pod.Ctrs...)
TpiID := pod.Labels["tpi-id"]
for _, container := range pod.Ctrs {
container.TpiID = TpiID
containerList = append(containerList, container)
}
}
return containerList, nil
}

@ -1,31 +1,97 @@
package database
import (
"encoding/json"
"fmt"
"github.com/go-kratos/kratos/v2/log"
"gopkg.in/yaml.v3"
"os"
"strings"
"sync"
)
type DatabaseConfig struct {
Driver string `yaml:"driver"`
DataSourceName string `yaml:"dataSourceName"`
}
var (
configData map[string]interface{}
loadOnce sync.Once
loadErr error
configPath string
)
type Config struct {
Database DatabaseConfig `yaml:"database"`
// InitConfigPath 设置配置路径(可选)
func InitConfigPath(path string) {
configPath = path
}
func LoadConfig(filePath string) (*Config, error) {
// loadYAML 加载 YAML 到 map
func loadYAML(filePath string) (map[string]interface{}, error) {
yamlFile, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("failed to read config file: %v", err)
}
var config Config
err = yaml.Unmarshal(yamlFile, &config)
var raw map[string]interface{}
err = yaml.Unmarshal(yamlFile, &raw)
if err != nil {
return nil, fmt.Errorf("failed to unmarshal yaml: %v", err)
}
b, _ := json.MarshalIndent(raw, "", " ")
log.Info("loadYAML: ", string(b))
converted, ok := toStringKeyMap(raw).(map[string]interface{})
if !ok {
return nil, fmt.Errorf("failed to convert config to map[string]interface{}")
}
return converted, nil
}
// GetConfig 获取全局配置 map只加载一次
func GetConfig() (map[string]interface{}, error) {
loadOnce.Do(func() {
if configPath == "" {
configPath = "config.yaml"
}
configData, loadErr = loadYAML(configPath)
})
return configData, loadErr
}
// Get 获取嵌套配置,例如 Get("database.driver")
func Get(key string) (interface{}, error) {
cfg, err := GetConfig()
if err != nil {
return nil, fmt.Errorf("failed to unmarshal config file: %v", err)
return nil, err
}
keys := strings.Split(key, ".")
var val interface{} = cfg
for _, k := range keys {
m, ok := val.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("invalid path: %s", key)
}
val, ok = m[k]
if !ok {
return nil, fmt.Errorf("key not found: %s", key)
}
}
return val, nil
}
return &config, nil
func toStringKeyMap(i interface{}) interface{} {
switch x := i.(type) {
case map[interface{}]interface{}:
m2 := map[string]interface{}{}
for k, v := range x {
keyStr := fmt.Sprintf("%v", k)
m2[keyStr] = toStringKeyMap(v)
}
return m2
case []interface{}:
for i, v := range x {
x[i] = toStringKeyMap(v)
}
}
return i
}

@ -8,6 +8,15 @@ import (
var db *sql.DB
type DatabaseConfig struct {
Driver string `yaml:"driver"`
DataSourceName string `yaml:"dataSourceName"`
}
type Config struct {
Database DatabaseConfig `yaml:"database"`
}
func InitDB(config *DatabaseConfig) {
var err error
switch config.Driver {

@ -180,6 +180,37 @@ func QueryResourceNamesByIp(nodeIp string) ([]string, error) {
return resourcePoolNames, nil
}
func QueryResourceNamesByNodeName(nodeName string) ([]string, error) {
// 执行查询
rows, err := db.Query("select pool_name from resource_pool where id in (select distinct pool_id from nodes where node_name=?)", nodeName)
if err != nil {
log.Infof("Query failed: %v", err)
return nil, err
}
defer rows.Close()
// 存放结果的切片
resourcePoolNames := make([]string, 0)
// 遍历每一行
for rows.Next() {
var name string
err := rows.Scan(&name)
if err != nil {
log.Infof("Scan failed: %v", err)
return nil, err
}
resourcePoolNames = append(resourcePoolNames, name)
}
// 检查 rows 是否遍历中出错
if err := rows.Err(); err != nil {
return nil, err
}
return resourcePoolNames, nil
}
func InsertResourcePool(poolName string) (int64, error) {
querySql := "INSERT INTO resource_pool(pool_name) VALUES (?)"
@ -272,3 +303,18 @@ func DeleteNodesByPoolId(poolId int64) (int64, error) {
return rowsAffected, nil
}
func DeleteNodeById(nodeId int64) (int64, error) {
result, err := db.Exec("DELETE FROM nodes WHERE id = ?", nodeId)
if err != nil {
return 0, fmt.Errorf("delete failed: %w", err)
}
// 返回影响的行数0 表示未删除任何数据)
rowsAffected, err := result.RowsAffected()
if err != nil {
return 0, fmt.Errorf("get rows affected failed: %w", err)
}
return rowsAffected, nil
}

@ -7,6 +7,7 @@ import (
"strings"
pb "vgpu/api/v1"
"vgpu/internal/biz"
"vgpu/internal/database"
)
type CardService struct {
@ -51,6 +52,11 @@ func (s *CardService) GetAllGPUs(ctx context.Context, req *pb.GetAllGpusReq) (*p
gpu.NodeUid = device.NodeUid
gpu.Health = device.Health
gpu.Mode = device.Mode
resourcePoolNames, err := database.QueryResourceNamesByNodeName(device.NodeName)
if err != nil {
return nil, err
}
gpu.ResourcePools = resourcePoolNames
vGPU, core, memory, err := s.pod.StatisticsByDeviceId(ctx, device.AliasId)
if err == nil {

@ -2,11 +2,16 @@ package service
import (
"context"
"encoding/json"
"github.com/go-kratos/kratos/v2/log"
"slices"
"sort"
"strings"
"time"
pb "vgpu/api/v1"
"vgpu/internal/biz"
"vgpu/internal/database"
"vgpu/internal/utils"
)
var statusOrder = map[string]int{
@ -86,6 +91,32 @@ func (s *ContainerService) GetAllContainers(ctx context.Context, req *pb.GetAllC
if containerReply.DeviceIds == nil {
continue
}
resourcePoolNames, err := database.QueryResourceNamesByNodeName(container.NodeName)
if err != nil {
return nil, err
}
containerReply.ResourcePool = resourcePoolNames
resourcePoolName, err := database.Get("big_model_resource_pool_name")
if err != nil {
return nil, err
}
if slices.Contains(resourcePoolNames, resourcePoolName.(string)) {
containerReply.TaskType = "big_model"
} else {
containerReply.TaskType = "shixun"
}
if len(container.TpiID) > 0 {
err := s.setShixunData(ctx, containerReply, container.TpiID)
if err != nil {
return nil, err
}
}
containerReply.PodName = container.PodName
containerReply.CreateTime = container.CreateTime.Format(time.RFC3339)
res.Items = append(res.Items, containerReply)
}
@ -127,3 +158,34 @@ func (s *ContainerService) GetContainer(ctx context.Context, req *pb.GetContaine
ctrReply.CreateTime = container.CreateTime.Format(time.RFC3339)
return ctrReply, nil
}
func (s *ContainerService) setShixunData(ctx context.Context, containerReply *pb.ContainerReply, tpiId string) error {
webDomain, err := database.Get("web_domain")
if err != nil {
return err
}
client := utils.GetDefaultClient()
url := webDomain.(string) + "/api/myshixuns/get_shixun_info.json"
log.Info("Get shixun info url: ", url, " tpiId: ", tpiId)
jsonData := map[string]interface{}{
"tpiID": tpiId,
}
body, status, err := client.PostJSON(ctx, url, jsonData, nil)
if err != nil {
return err
}
log.Infof("Get shixun info: %s, status: %d", string(body), status)
var respMap map[string]interface{}
err = json.Unmarshal(body, &respMap)
log.Info("Get shixun info: ", respMap, "----", respMap["status"])
if respMap["status"].(float64) == 0 {
data := respMap["data"].(map[string]interface{})
containerReply.ShixunName = data["shixun_name"].(string)
containerReply.Role = data["user_identity"].(string)
containerReply.Username = data["user_name"].(string)
}
return nil
}

@ -109,6 +109,17 @@ func (s *ResourcePoolService) Delete(ctx context.Context, req *pb.ResourcePoolDe
return &pb.BaseResponse{Code: 200, Message: "成功"}, nil
}
func (s *ResourcePoolService) RemoveNode(ctx context.Context, req *pb.RemoveNodeRequest) (*pb.BaseResponse, error) {
log.Info("RemoveNode called", req)
nodeId := req.NodeId
num, err := database.DeleteNodeById(nodeId)
if err != nil {
return &pb.BaseResponse{Code: 500, Message: "移除节点失败"}, nil
}
log.Infof("RemoveNode success poolId: %d, 影响行数: %d", nodeId, num)
return &pb.BaseResponse{Code: 200, Message: "成功"}, nil
}
func (s *ResourcePoolService) List(ctx context.Context, req *pb.ResourcePoolListRequest) (*pb.ResourcePoolListResponse, error) {
log.Info("GetResourcePoolList", req)
@ -137,7 +148,10 @@ func (s *ResourcePoolService) List(ctx context.Context, req *pb.ResourcePoolList
poolData.TotalMemory = poolData.TotalMemory + node.TotalMemory
poolData.AvailableMemory = poolData.AvailableMemory + node.AvailableMemory
poolData.DiskSize = poolData.DiskSize + node.DiskTotal
poolData.NodeList = append(poolData.NodeList, n.NodeIp)
poolData.NodeList = append(poolData.NodeList, &pb.Nodes{
NodeIp: n.NodeIp,
NodeName: n.NodeName,
})
}
data = append(data, &poolData)
}
@ -165,6 +179,7 @@ func (s *ResourcePoolService) GetDetail(ctx context.Context, req *pb.ResourcePoo
continue
}
nodeReply, err := s.buildNodeReply(ctx, node)
nodeReply.NodeId = poolNode.Id
if err != nil {
return nil, err
}

@ -0,0 +1,110 @@
package utils
import (
"bytes"
"context"
"encoding/json"
"io"
"net/http"
"net/url"
"strings"
"sync"
"time"
)
type HttpClient struct {
client *http.Client
}
// NewHttpClient 创建带超时的 HTTP 客户端
func NewHttpClient(timeout time.Duration) *HttpClient {
return &HttpClient{
client: &http.Client{
Timeout: timeout,
},
}
}
// Get 发送 GET 请求
func (hc *HttpClient) Get(ctx context.Context, rawUrl string, headers map[string]string) ([]byte, int, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawUrl, nil)
if err != nil {
return nil, 0, err
}
for k, v := range headers {
req.Header.Set(k, v)
}
resp, err := hc.client.Do(req)
if err != nil {
return nil, 0, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
return body, resp.StatusCode, err
}
// PostJSON 发送 POST 请求Body 是 JSON
func (hc *HttpClient) PostJSON(ctx context.Context, rawUrl string, data interface{}, headers map[string]string) ([]byte, int, error) {
bodyBytes, err := json.Marshal(data)
if err != nil {
return nil, 0, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawUrl, bytes.NewReader(bodyBytes))
if err != nil {
return nil, 0, err
}
req.Header.Set("Content-Type", "application/json")
for k, v := range headers {
req.Header.Set(k, v)
}
resp, err := hc.client.Do(req)
if err != nil {
return nil, 0, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
return body, resp.StatusCode, err
}
// PostForm 发送 POST 表单请求
func (hc *HttpClient) PostForm(ctx context.Context, rawUrl string, formData map[string]string, headers map[string]string) ([]byte, int, error) {
data := url.Values{}
for k, v := range formData {
data.Set(k, v)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, rawUrl, strings.NewReader(data.Encode()))
if err != nil {
return nil, 0, err
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
for k, v := range headers {
req.Header.Set(k, v)
}
resp, err := hc.client.Do(req)
if err != nil {
return nil, 0, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
return body, resp.StatusCode, err
}
var (
defaultClient *HttpClient
once sync.Once
)
func GetDefaultClient() *HttpClient {
once.Do(func() {
defaultClient = NewHttpClient(10 * time.Second)
})
return defaultClient
}

@ -3,42 +3,37 @@
openapi: 3.0.3
info:
title: Node API
title: ResourcePool API
version: 0.0.1
paths:
/v1/node:
/v1/available/nodes:
get:
tags:
- Node
operationId: Node_GetNode
parameters:
- name: uid
in: query
schema:
type: string
- ResourcePool
operationId: ResourcePool_GetAvailableNodes
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/NodeReply'
$ref: '#/components/schemas/AvailableNodesResponse'
default:
description: Default error response
content:
application/json:
schema:
$ref: '#/components/schemas/Status'
/v1/node/discovered:
/v1/resource/pool/create:
post:
tags:
- Node
operationId: Node_DiscoveredNode
- ResourcePool
operationId: ResourcePool_Create
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/DiscoveredNodeRequest'
$ref: '#/components/schemas/ResourcePoolCreateRequest'
required: true
responses:
"200":
@ -46,23 +41,23 @@ paths:
content:
application/json:
schema:
$ref: '#/components/schemas/DiscoveredNodeResponse'
$ref: '#/components/schemas/BaseResponse'
default:
description: Default error response
content:
application/json:
schema:
$ref: '#/components/schemas/Status'
/v1/node/join:
/v1/resource/pool/delete:
post:
tags:
- Node
operationId: Node_JoinNode
- ResourcePool
operationId: ResourcePool_Delete
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/JoinNodeRequest'
$ref: '#/components/schemas/ResourcePoolDeleteRequest'
required: true
responses:
"200":
@ -70,47 +65,64 @@ paths:
content:
application/json:
schema:
$ref: '#/components/schemas/JoinNodeResponse'
$ref: '#/components/schemas/BaseResponse'
default:
description: Default error response
content:
application/json:
schema:
$ref: '#/components/schemas/Status'
/v1/node/status/update:
/v1/resource/pool/detail:
post:
tags:
- Node
operationId: Node_UpdateNodeStatus
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateNodeStatusRequest'
required: true
- ResourcePool
operationId: ResourcePool_GetDetail
parameters:
- name: poolId
in: query
schema:
type: string
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/ResourcePoolDetailResponse'
default:
description: Default error response
content:
application/json:
schema:
$ref: '#/components/schemas/Status'
/v1/resource/pool/list:
get:
tags:
- ResourcePool
operationId: ResourcePool_List
responses:
"200":
description: OK
content:
application/json:
schema:
$ref: '#/components/schemas/UpdateNodeStatusResponse'
$ref: '#/components/schemas/ResourcePoolListResponse'
default:
description: Default error response
content:
application/json:
schema:
$ref: '#/components/schemas/Status'
/v1/nodes:
/v1/resource/pool/removeNode:
post:
tags:
- Node
operationId: Node_GetAllNodes
- ResourcePool
operationId: ResourcePool_RemoveNode
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetAllNodesReq'
$ref: '#/components/schemas/RemoveNodeRequest'
required: true
responses:
"200":
@ -118,23 +130,23 @@ paths:
content:
application/json:
schema:
$ref: '#/components/schemas/NodesReply'
$ref: '#/components/schemas/BaseResponse'
default:
description: Default error response
content:
application/json:
schema:
$ref: '#/components/schemas/Status'
/v1/summary:
/v1/resource/pool/update:
post:
tags:
- Node
operationId: Node_GetSummary
- ResourcePool
operationId: ResourcePool_Update
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/GetSummaryReq'
$ref: '#/components/schemas/ResourcePoolUpdateRequest'
required: true
responses:
"200":
@ -142,7 +154,7 @@ paths:
content:
application/json:
schema:
$ref: '#/components/schemas/DeviceSummaryReply'
$ref: '#/components/schemas/BaseResponse'
default:
description: Default error response
content:
@ -151,78 +163,40 @@ paths:
$ref: '#/components/schemas/Status'
components:
schemas:
DeviceSummaryReply:
AvailableNodesInfo:
type: object
properties:
vgpuUsed:
type: integer
format: int32
vgpuTotal:
type: integer
format: int32
coreUsed:
type: integer
format: int32
coreTotal:
type: integer
format: int32
memoryUsed:
type: integer
format: int32
memoryTotal:
type: integer
format: int32
gpuCount:
type: integer
format: int32
nodeCount:
type: integer
format: int32
DiscoveredNodeInfo:
type: object
properties:
nodeIp:
type: string
nodeName:
type: string
DiscoveredNodeRequest:
type: object
properties: {}
DiscoveredNodeResponse:
type: object
properties:
list:
type: array
items:
$ref: '#/components/schemas/DiscoveredNodeInfo'
GetAllNodesReq:
type: object
properties:
filters:
$ref: '#/components/schemas/GetAllNodesReq_Filters'
GetAllNodesReq_Filters:
type: object
properties:
ip:
cpuCores:
type: string
type:
gpuNum:
type: string
isSchedulable:
gpuMemory:
type: string
totalMemory:
type: string
diskSize:
type: string
nodeIp:
type: string
GetSummaryReq:
AvailableNodesResponse:
type: object
properties:
filters:
$ref: '#/components/schemas/GetSummaryReq_Filters'
GetSummaryReq_Filters:
data:
type: array
items:
$ref: '#/components/schemas/AvailableNodesInfo'
BaseResponse:
type: object
properties:
type:
type: string
nodeUid:
type: string
deviceId:
code:
type: integer
format: int32
message:
type: string
data:
type: object
GoogleProtobufAny:
type: object
properties:
@ -231,22 +205,14 @@ components:
description: The type of the serialized message.
additionalProperties: true
description: Contains an arbitrary serialized message along with a @type that describes the type of the serialized message.
JoinNodeRequest:
Nodes:
type: object
properties:
nodeNames:
type: array
items:
type: string
JoinNodeResponse:
type: object
properties:
code:
type: integer
format: int32
message:
nodeIp:
type: string
nodeName:
type: string
NodeReply:
PoolNodeReply:
type: object
properties:
ip:
@ -271,8 +237,7 @@ components:
coreTotal:
type: string
memoryUsed:
type: integer
format: int32
type: string
memoryTotal:
type: string
uid:
@ -300,47 +265,90 @@ components:
type: string
diskSize:
type: string
resourcePools:
nodeId:
type: string
RemoveNodeRequest:
type: object
properties:
nodeId:
type: string
ResourcePoolCreateRequest:
type: object
properties:
poolName:
type: string
nodes:
type: array
items:
type: string
NodesReply:
$ref: '#/components/schemas/Nodes'
ResourcePoolDeleteRequest:
type: object
properties:
poolId:
type: string
ResourcePoolDetailResponse:
type: object
properties:
list:
type: array
items:
$ref: '#/components/schemas/NodeReply'
Status:
$ref: '#/components/schemas/PoolNodeReply'
ResourcePoolListData:
type: object
properties:
code:
type: integer
description: The status code, which should be an enum value of [google.rpc.Code][google.rpc.Code].
format: int32
message:
poolId:
type: string
description: A developer-facing error message, which should be in English. Any user-facing error message should be localized and sent in the [google.rpc.Status.details][google.rpc.Status.details] field, or localized by the client.
details:
poolName:
type: string
cpuCores:
type: string
nodeNum:
type: string
gpuNum:
type: string
availableMemory:
type: string
totalMemory:
type: string
diskSize:
type: string
nodeList:
type: array
items:
$ref: '#/components/schemas/GoogleProtobufAny'
description: A list of messages that carry the error details. There is a common set of message types for APIs to use.
description: 'The `Status` type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by [gRPC](https://github.com/grpc). Each `Status` message contains three pieces of data: error code, error message, and error details. You can find out more about this error model and how to work with it in the [API Design Guide](https://cloud.google.com/apis/design/errors).'
UpdateNodeStatusRequest:
$ref: '#/components/schemas/Nodes'
ResourcePoolListResponse:
type: object
properties:
nodeName:
data:
type: array
items:
$ref: '#/components/schemas/ResourcePoolListData'
ResourcePoolUpdateRequest:
type: object
properties:
poolId:
type: string
status:
poolName:
type: string
UpdateNodeStatusResponse:
nodes:
type: array
items:
$ref: '#/components/schemas/Nodes'
Status:
type: object
properties:
code:
type: integer
description: The status code, which should be an enum value of [google.rpc.Code][google.rpc.Code].
format: int32
message:
type: string
description: A developer-facing error message, which should be in English. Any user-facing error message should be localized and sent in the [google.rpc.Status.details][google.rpc.Status.details] field, or localized by the client.
details:
type: array
items:
$ref: '#/components/schemas/GoogleProtobufAny'
description: A list of messages that carry the error details. There is a common set of message types for APIs to use.
description: 'The `Status` type defines a logical error model that is suitable for different programming environments, including REST APIs and RPC APIs. It is used by [gRPC](https://github.com/grpc). Each `Status` message contains three pieces of data: error code, error message, and error details. You can find out more about this error model and how to work with it in the [API Design Guide](https://cloud.google.com/apis/design/errors).'
tags:
- name: Node
- name: ResourcePool

Loading…
Cancel
Save