upgrade v2.6.0

main
czd 2 months ago
parent 5b017e0e74
commit cfe36488a7

@ -42,8 +42,8 @@
在master节点上运行
```
# nerdctl -nk8s.io load -i hami251.tar
# nerdctl -nk8s.io push sealos.hub:5000/projecthami/hami:v2.5.1
# nerdctl -nk8s.io load -i hami260.tar
# nerdctl -nk8s.io push sealos.hub:5000/projecthami/hami:v2.6.0
# nerdctl -nk8s.io load -i scheduler1-28-9.tar
# nerdctl -nk8s.io push sealos.hub:5000/google_containers/kube-scheduler:v1.28.9
# nerdctl -nk8s.io load -i kube-webhook-certgen.tar

Binary file not shown.

@ -1,15 +1,15 @@
apiVersion: v2
appVersion: 2.5.1
appVersion: 2.6.0
description: Heterogeneous AI Computing Virtualization Middleware
keywords:
- vgpu
- gpu
kubeVersion: '>= 1.18.0'
kubeVersion: '>= 1.18.0-0'
maintainers:
- email: limengxuan@4paradigm.com
- email: archlitchi@gmail.com
name: limengxuan
- email: xiaozhang0210@hotmail.com
name: zhangxiao
name: hami
type: application
version: 2.5.1
version: 2.6.0

@ -23,6 +23,17 @@ If release name contains chart name it will be used as a full name.
{{- end -}}
{{- end -}}
{{/*
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
*/}}
{{- define "hami-vgpu.namespace" -}}
{{- if .Values.namespaceOverride -}}
{{- .Values.namespaceOverride -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/*
The app name for Scheduler
*/}}

@ -2,6 +2,7 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.labels" . | nindent 4 }}

@ -2,6 +2,7 @@ apiVersion: apps/v1
kind: DaemonSet
metadata:
name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.labels" . | nindent 4 }}
@ -26,8 +27,11 @@ spec:
app.kubernetes.io/component: hami-device-plugin
hami.io/webhook: ignore
{{- include "hami-vgpu.selectorLabels" . | nindent 8 }}
annotations:
checksum/hami-device-plugin-config: {{ include (print $.Template.BasePath "/device-plugin/configmap.yaml") . | sha256sum }}
checksum/hami-scheduler-device-config: {{ include (print $.Template.BasePath "/scheduler/device-configmap.yaml") . | sha256sum }}
{{- if .Values.devicePlugin.podAnnotations }}
annotations: {{ toYaml .Values.devicePlugin.podAnnotations | nindent 8 }}
{{- toYaml .Values.devicePlugin.podAnnotations | nindent 8 }}
{{- end }}
spec:
{{- if .Values.devicePlugin.runtimeClassName }}
@ -92,7 +96,11 @@ spec:
- name: vgpu-monitor
image: {{ .Values.devicePlugin.image }}:{{ .Values.version }}
imagePullPolicy: {{ .Values.devicePlugin.imagePullPolicy | quote }}
command: ["vGPUmonitor"]
command:
- "vGPUmonitor"
{{- range .Values.devicePlugin.extraArgs }}
- {{ . }}
{{- end }}
securityContext:
allowPrivilegeEscalation: false
capabilities:

@ -13,4 +13,4 @@ roleRef:
subjects:
- kind: ServiceAccount
name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ .Release.Namespace | quote }}
namespace: {{ include "hami-vgpu.namespace" . }}

@ -2,22 +2,26 @@ apiVersion: v1
kind: Service
metadata:
name: {{ include "hami-vgpu.device-plugin" . }}-monitor
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.labels" . | nindent 4 }}
{{- if .Values.scheduler.service.labels }}
{{ toYaml .Values.scheduler.service.labels | indent 4 }}
{{- if .Values.devicePlugin.service.labels }} # Use devicePlugin instead of scheduler
{{ toYaml .Values.devicePlugin.service.labels | indent 4 }}
{{- end }}
{{- if .Values.scheduler.service.annotations }}
annotations: {{ toYaml .Values.scheduler.service.annotations | nindent 4 }}
{{- if .Values.devicePlugin.service.annotations }} # Use devicePlugin instead of scheduler
annotations: {{ toYaml .Values.devicePlugin.service.annotations | nindent 4 }}
{{- end }}
spec:
externalTrafficPolicy: Local
selector:
app.kubernetes.io/component: hami-device-plugin
type: NodePort
type: {{ .Values.devicePlugin.service.type | default "NodePort" }} # Default type is NodePort
ports:
- name: monitorport
port: {{ .Values.devicePlugin.service.httpPort }}
port: {{ .Values.devicePlugin.service.httpPort | default 31992 }} # Default HTTP port is 31992
targetPort: 9394
nodePort: {{ .Values.devicePlugin.service.httpPort }}
{{- if eq (.Values.devicePlugin.service.type | default "NodePort") "NodePort" }} # If type is NodePort, set nodePort
nodePort: {{ .Values.devicePlugin.service.httpPort | default 31992 }}
{{- end }}
protocol: TCP
selector:
app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.selectorLabels" . | nindent 4 }}

@ -2,7 +2,7 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ .Release.Namespace | quote }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: "hami-device-plugin"
{{- include "hami-vgpu.labels" . | nindent 4 }}

@ -0,0 +1,9 @@
{{- if and .Values.devicePlugin.createRuntimeClass .Values.devicePlugin.runtimeClassName }}
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: {{ .Values.devicePlugin.runtimeClassName }}
annotations:
helm.sh/hook: pre-install,pre-upgrade
handler: nvidia
{{- end }}

@ -0,0 +1,29 @@
{{- if .Values.scheduler.certManager.enabled }}
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: {{ include "hami-vgpu.scheduler" . }}-serving-cert
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
spec:
dnsNames:
- {{ include "hami-vgpu.scheduler" . }}.{{ include "hami-vgpu.namespace" . }}.svc
- {{ include "hami-vgpu.scheduler" . }}.{{ include "hami-vgpu.namespace" . }}.svc.cluster.local
issuerRef:
kind: Issuer
name: {{ include "hami-vgpu.scheduler" . }}-selfsigned-issuer
secretName: {{ include "hami-vgpu.scheduler.tls" . }}
---
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: {{ include "hami-vgpu.scheduler" . }}-selfsigned-issuer
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
spec:
selfSigned: {}
{{- end }}

@ -3,6 +3,7 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
@ -40,6 +41,14 @@ data:
},
{{- end }}
{{- end }}
{{- if .Values.devices.enflame.enabled }}
{{- range .Values.devices.enflame.customresources }}
{
"name": "{{ . }}",
"ignoredByScheduler": true
},
{{- end }}
{{- end }}
{
"name": "{{ .Values.resourceName }}",
"ignoredByScheduler": true
@ -79,6 +88,22 @@ data:
{
"name": "{{ .Values.iluvatarResourceName }}",
"ignoredByScheduler": true
},
{
"name": "metax-tech.com/gpu",
"ignoredByScheduler": true
},
{
"name": "{{ .Values.metaxResourceName }}",
"ignoredByScheduler": true
},
{
"name": "{{ .Values.metaxResourceCore }}",
"ignoredByScheduler": true
},
{
"name": "{{ .Values.metaxResourceMem }}",
"ignoredByScheduler": true
}
],
"ignoreable": false

@ -3,6 +3,7 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "hami-vgpu.scheduler" . }}-newversion
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
@ -49,6 +50,14 @@ data:
ignoredByScheduler: true
- name: {{ .Values.iluvatarResourceName }}
ignoredByScheduler: true
- name: "metax-tech.com/gpu"
ignoredByScheduler: true
- name: {{ .Values.metaxResourceName }}
ignoredByScheduler: true
- name: {{ .Values.metaxResourceCore }}
ignoredByScheduler: true
- name: {{ .Values.metaxResourceMem }}
ignoredByScheduler: true
{{- if .Values.devices.ascend.enabled }}
{{- range .Values.devices.ascend.customresources }}
- name: {{ . }}
@ -61,4 +70,10 @@ data:
ignoredByScheduler: true
{{- end }}
{{- end }}
{{- if .Values.devices.enflame.enabled }}
{{- range .Values.devices.enflame.customresources }}
- name: {{ . }}
ignoredByScheduler: true
{{- end }}
{{- end }}
{{- end }}

@ -2,6 +2,7 @@ apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
@ -12,7 +13,11 @@ metadata:
annotations: {{ toYaml .Values.global.annotations | nindent 4}}
{{- end }}
spec:
{{- if .Values.scheduler.leaderElect }}
replicas: {{ .Values.scheduler.replicas }}
{{- else }}
replicas: 1
{{- end }}
selector:
matchLabels:
app.kubernetes.io/component: hami-scheduler
@ -23,8 +28,15 @@ spec:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.selectorLabels" . | nindent 8 }}
hami.io/webhook: ignore
annotations:
{{- if ge (regexReplaceAll "[^0-9]" .Capabilities.KubeVersion.Minor "" | int) 22 }}
checksum/hami-scheduler-newversion-config: {{ include (print $.Template.BasePath "/scheduler/configmapnew.yaml") . | sha256sum }}
{{- else }}
checksum/hami-scheduler-config: {{ include (print $.Template.BasePath "/scheduler/configmap.yaml") . | sha256sum }}
{{- end }}
checksum/hami-scheduler-device-config: {{ include (print $.Template.BasePath "/scheduler/device-configmap.yaml") . | sha256sum }}
{{- if .Values.scheduler.podAnnotations }}
annotations: {{ toYaml .Values.scheduler.podAnnotations | nindent 8 }}
{{- toYaml .Values.scheduler.podAnnotations | nindent 8 }}
{{- end }}
spec:
{{- include "hami-vgpu.imagePullSecrets" . | nindent 6}}
@ -49,7 +61,7 @@ spec:
{{- end }}
- --leader-elect={{ .Values.scheduler.leaderElect }}
- --leader-elect-resource-name={{ .Values.schedulerName }}
- --leader-elect-resource-namespace={{ .Release.Namespace }}
- --leader-elect-resource-namespace={{ include "hami-vgpu.namespace" . }}
resources:
{{- toYaml .Values.scheduler.kubeScheduler.resources | nindent 12 }}
volumeMounts:

@ -2,6 +2,7 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: {{ include "hami-vgpu.scheduler" . }}-device
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
@ -23,6 +24,9 @@ data:
deviceSplitCount: {{ .Values.devicePlugin.deviceSplitCount }}
deviceMemoryScaling: {{ .Values.devicePlugin.deviceMemoryScaling }}
deviceCoreScaling: {{ .Values.devicePlugin.deviceCoreScaling }}
gpuCorePolicy: {{ .Values.devices.nvidia.gpuCorePolicy }}
libCudaLogLevel: {{ .Values.devices.nvidia.libCudaLogLevel }}
runtimeClassName: "{{ .Values.devicePlugin.runtimeClassName }}"
knownMigGeometries:
- models: [ "A30" ]
allowedGeometries:
@ -90,6 +94,12 @@ data:
resourceCoreName: {{ .Values.dcuResourceCores }}
metax:
resourceCountName: "metax-tech.com/gpu"
resourceVCountName: {{ .Values.metaxResourceName }}
resourceVMemoryName: {{ .Values.metaxResourceMem }}
resourceVCoreName: {{ .Values.metaxResourceCore }}
enflame:
resourceCountName: "enflame.com/vgcu"
resourcePercentageName: "enflame.com/vgcu-percentage"
mthreads:
resourceCountName: "mthreads.com/vgpu"
resourceMemoryName: "mthreads.com/sgpu-memory"
@ -119,6 +129,27 @@ data:
- name: vir16
memory: 17476
aiCore: 16
- chipName: 910B2
commonWord: Ascend910B2
resourceName: huawei.com/Ascend910B2
resourceMemoryName: huawei.com/Ascend910B2-memory
memoryAllocatable: 65536
memoryCapacity: 65536
aiCore: 24
aiCPU: 6
templates:
- name: vir03_1c_8g
memory: 8192
aiCore: 3
aiCPU: 1
- name: vir06_1c_16g
memory: 16384
aiCore: 6
aiCPU: 1
- name: vir12_3c_32g
memory: 32768
aiCore: 12
aiCPU: 3
- chipName: 910B3
commonWord: Ascend910B
resourceName: huawei.com/Ascend910B

@ -1,3 +1,4 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
@ -24,3 +25,4 @@ rules:
resourceNames:
- {{ include "hami-vgpu.fullname" . }}-admission
{{- end }}
{{- end }}

@ -1,3 +1,4 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
@ -15,4 +16,5 @@ roleRef:
subjects:
- kind: ServiceAccount
name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ .Release.Namespace | quote }}
namespace: {{ include "hami-vgpu.namespace" . }}
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission-create
namespace: {{ include "hami-vgpu.namespace" . }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -41,11 +43,11 @@ spec:
- --cert-name=tls.crt
- --key-name=tls.key
{{- if .Values.scheduler.admissionWebhook.customURL.enabled }}
- --host={{ printf "%s.%s.svc,127.0.0.1,%s" (include "hami-vgpu.scheduler" .) .Release.Namespace .Values.scheduler.admissionWebhook.customURL.host}}
- --host={{ printf "%s.%s.svc,127.0.0.1,%s" (include "hami-vgpu.scheduler" .) (include "hami-vgpu.namespace" .) .Values.scheduler.admissionWebhook.customURL.host}}
{{- else }}
- --host={{ printf "%s.%s.svc,127.0.0.1" (include "hami-vgpu.scheduler" .) .Release.Namespace }}
- --host={{ printf "%s.%s.svc,127.0.0.1" (include "hami-vgpu.scheduler" .) (include "hami-vgpu.namespace" .) }}
{{- end }}
- --namespace={{ .Release.Namespace }}
- --namespace={{ include "hami-vgpu.namespace" . }}
- --secret-name={{ include "hami-vgpu.scheduler.tls" . }}
restartPolicy: OnFailure
serviceAccountName: {{ include "hami-vgpu.fullname" . }}-admission
@ -58,3 +60,4 @@ spec:
securityContext:
runAsNonRoot: true
runAsUser: {{ .Values.scheduler.patch.runAsUser }}
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission-patch
namespace: {{ include "hami-vgpu.namespace" . }}
annotations:
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -39,7 +41,7 @@ spec:
args:
- patch
- --webhook-name={{ include "hami-vgpu.scheduler.webhook" . }}
- --namespace={{ .Release.Namespace }}
- --namespace={{ include "hami-vgpu.namespace" . }}
- --patch-validating=false
- --secret-name={{ include "hami-vgpu.scheduler.tls" . }}
restartPolicy: OnFailure
@ -53,3 +55,4 @@ spec:
securityContext:
runAsNonRoot: true
runAsUser: {{ .Values.scheduler.patch.runAsUser }}
{{- end }}

@ -1,3 +1,4 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
{{- if .Values.podSecurityPolicy.enabled }}
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
@ -34,3 +35,4 @@ spec:
- secret
- downwardAPI
{{- end }}
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ include "hami-vgpu.namespace" . }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -16,3 +18,4 @@ rules:
verbs:
- get
- create
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ include "hami-vgpu.namespace" . }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -15,4 +17,5 @@ roleRef:
subjects:
- kind: ServiceAccount
name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ .Release.Namespace | quote }}
namespace: {{ include "hami-vgpu.namespace" . }}
{{- end }}

@ -1,10 +1,13 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ include "hami-vgpu.namespace" . }}
annotations:
"helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
labels:
{{- include "hami-vgpu.labels" . | nindent 4 }}
app.kubernetes.io/component: admission-webhook
{{- end }}

@ -12,4 +12,4 @@ roleRef:
subjects:
- kind: ServiceAccount
name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ .Release.Namespace | quote }}
namespace: {{ include "hami-vgpu.namespace" . }}

@ -2,6 +2,7 @@ apiVersion: v1
kind: Service
metadata:
name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
@ -12,19 +13,22 @@ metadata:
annotations: {{ toYaml .Values.scheduler.service.annotations | nindent 4 }}
{{- end }}
spec:
type: NodePort
type: {{ .Values.scheduler.service.type | default "NodePort" }} # Default type is NodePort
ports:
- name: http
port: {{ .Values.scheduler.service.httpPort }}
targetPort: 443
nodePort: {{ .Values.scheduler.service.schedulerPort }}
port: {{ .Values.scheduler.service.httpPort | default 443 }} # Default HTTP port is 443
targetPort: {{ .Values.scheduler.service.httpTargetPort | default 443 }}
{{- if eq (.Values.scheduler.service.type | default "NodePort") "NodePort" }} # If type is NodePort, set nodePort
nodePort: {{ .Values.scheduler.service.schedulerPort | default 31998 }}
{{- end }}
protocol: TCP
- name: monitor
port: {{ .Values.scheduler.service.monitorPort }}
targetPort: {{ (split ":" (printf "%s" .Values.scheduler.metricsBindAddress))._1 }}
nodePort: {{ .Values.scheduler.service.monitorPort }}
port: {{ .Values.scheduler.service.monitorPort | default 31993 }} # Default monitoring port is 31993
targetPort: {{ .Values.scheduler.service.monitorTargetPort | default 9395 }}
{{- if eq (.Values.scheduler.service.type | default "NodePort") "NodePort" }} # If type is NodePort, set nodePort
nodePort: {{ .Values.scheduler.service.monitorPort | default 31993 }}
{{- end }}
protocol: TCP
selector:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.selectorLabels" . | nindent 4 }}

@ -2,7 +2,7 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ .Release.Namespace | quote }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: "hami-scheduler"
{{- include "hami-vgpu.labels" . | nindent 4 }}

@ -1,6 +1,10 @@
apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration
metadata:
{{- if .Values.scheduler.certManager.enabled }}
annotations:
cert-manager.io/inject-ca-from: {{ include "hami-vgpu.namespace" . }}/{{ include "hami-vgpu.scheduler" . }}-serving-cert
{{- end }}
name: {{ include "hami-vgpu.scheduler.webhook" . }}
webhooks:
- admissionReviewVersions:
@ -11,7 +15,7 @@ webhooks:
{{- else }}
service:
name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ .Release.Namespace }}
namespace: {{ include "hami-vgpu.namespace" . }}
path: /webhook
port: {{ .Values.scheduler.service.httpPort }}
{{- end }}

@ -2,8 +2,9 @@
nameOverride: ""
fullnameOverride: ""
namespaceOverride: ""
imagePullSecrets: [ ]
version: "v2.5.1"
version: "v2.6.0"
#Nvidia GPU Parameters
resourceName: "nvidia.com/gpu"
@ -27,6 +28,11 @@ iluvatarResourceName: "iluvatar.ai/vgpu"
iluvatarResourceMem: "iluvatar.ai/vcuda-memory"
iluvatarResourceCore: "iluvatar.ai/vcuda-core"
#Metax SGPU Parameters
metaxResourceName: "metax-tech.com/sgpu"
metaxResourceCore: "metax-tech.com/vcore"
metaxResourceMem: "metax-tech.com/vmemory"
schedulerName: "hami-scheduler"
podSecurityPolicy:
@ -55,6 +61,8 @@ scheduler:
metricsBindAddress: ":9395"
livenessProbe: false
leaderElect: true
# when leaderElect is true, replicas is available, otherwise replicas is 1.
replicas: 1
kubeScheduler:
# @param enabled indicate whether to run kube-scheduler container in the scheduler pod, it's true by default.
enabled: true
@ -109,7 +117,14 @@ scheduler:
# - istio-system
reinvocationPolicy: Never
failurePolicy: Ignore
## TLS Certificate Option 1: Use cert-manager to generate self-signed certificate.
## If enabled, always takes precedence over options 2.
certManager:
enabled: false
## TLS Certificate Option 2: Use kube-webhook-certgen to generate self-signed certificate.
## If true and certManager.enabled is false, Helm will automatically create a self-signed cert and secret for you.
patch:
enabled: true
image: docker.io/jettech/kube-webhook-certgen:v1.5.2
imageNew: liangjw/kube-webhook-certgen:v1.1.1
imagePullPolicy: IfNotPresent
@ -119,9 +134,11 @@ scheduler:
tolerations: []
runAsUser: 2000
service:
httpPort: 443
schedulerPort: 31998
monitorPort: 31993
type: NodePort # Default type is NodePort, can be changed to ClusterIP
httpPort: 443 # HTTP port
schedulerPort: 31998 # NodePort for HTTP
monitorPort: 31993 # Monitoring port
monitorTargetPort: 9395
labels: {}
annotations: {}
@ -130,10 +147,13 @@ devicePlugin:
monitorimage: "projecthami/hami"
monitorctrPath: /usr/local/vgpu/containers
imagePullPolicy: IfNotPresent
deviceSplitCount: 40
deviceSplitCount: 10
deviceMemoryScaling: 1
deviceCoreScaling: 1
# The runtime class name to be used by the device plugin, and added to the pod.spec.runtimeClassName of applications utilizing NVIDIA GPUs
runtimeClassName: ""
# Whether to create runtime class, name comes from runtimeClassName when it is set
createRuntimeClass: false
migStrategy: "none"
disablecorelimit: "false"
passDeviceSpecsEnabled: false
@ -141,7 +161,10 @@ devicePlugin:
- -v=4
service:
type: NodePort # Default type is NodePort, can be changed to ClusterIP
httpPort: 31992
labels: {}
annotations: {}
pluginPath: /var/lib/kubelet/device-plugins
libPath: /usr/local/vgpu
@ -181,10 +204,18 @@ devicePlugin:
# memory: 100Mi
devices:
enflame:
enabled: false
customresources:
- enflame.com/vgcu
- enflame.com/vgcu-percentage
mthreads:
enabled: false
customresources:
- mthreads.com/vgpu
nvidia:
gpuCorePolicy: default
libCudaLogLevel: 1
ascend:
enabled: false
image: ""
@ -196,6 +227,8 @@ devices:
customresources:
- huawei.com/Ascend910A
- huawei.com/Ascend910A-memory
- huawei.com/Ascend910B2
- huawei.com/Ascend910B2-memory
- huawei.com/Ascend910B
- huawei.com/Ascend910B-memory
- huawei.com/Ascend910B4

Loading…
Cancel
Save