upgrade v2.6.0

main
czd 2 months ago
parent 5b017e0e74
commit cfe36488a7

@ -42,8 +42,8 @@
在master节点上运行 在master节点上运行
``` ```
# nerdctl -nk8s.io load -i hami251.tar # nerdctl -nk8s.io load -i hami260.tar
# nerdctl -nk8s.io push sealos.hub:5000/projecthami/hami:v2.5.1 # nerdctl -nk8s.io push sealos.hub:5000/projecthami/hami:v2.6.0
# nerdctl -nk8s.io load -i scheduler1-28-9.tar # nerdctl -nk8s.io load -i scheduler1-28-9.tar
# nerdctl -nk8s.io push sealos.hub:5000/google_containers/kube-scheduler:v1.28.9 # nerdctl -nk8s.io push sealos.hub:5000/google_containers/kube-scheduler:v1.28.9
# nerdctl -nk8s.io load -i kube-webhook-certgen.tar # nerdctl -nk8s.io load -i kube-webhook-certgen.tar

Binary file not shown.

@ -1,15 +1,15 @@
apiVersion: v2 apiVersion: v2
appVersion: 2.5.1 appVersion: 2.6.0
description: Heterogeneous AI Computing Virtualization Middleware description: Heterogeneous AI Computing Virtualization Middleware
keywords: keywords:
- vgpu - vgpu
- gpu - gpu
kubeVersion: '>= 1.18.0' kubeVersion: '>= 1.18.0-0'
maintainers: maintainers:
- email: limengxuan@4paradigm.com - email: archlitchi@gmail.com
name: limengxuan name: limengxuan
- email: xiaozhang0210@hotmail.com - email: xiaozhang0210@hotmail.com
name: zhangxiao name: zhangxiao
name: hami name: hami
type: application type: application
version: 2.5.1 version: 2.6.0

@ -23,6 +23,17 @@ If release name contains chart name it will be used as a full name.
{{- end -}} {{- end -}}
{{- end -}} {{- end -}}
{{/*
Allow the release namespace to be overridden for multi-namespace deployments in combined charts
*/}}
{{- define "hami-vgpu.namespace" -}}
{{- if .Values.namespaceOverride -}}
{{- .Values.namespaceOverride -}}
{{- else -}}
{{- .Release.Namespace -}}
{{- end -}}
{{- end -}}
{{/* {{/*
The app name for Scheduler The app name for Scheduler
*/}} */}}

@ -2,6 +2,7 @@ apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
name: {{ include "hami-vgpu.device-plugin" . }} name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-device-plugin app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}

@ -2,6 +2,7 @@ apiVersion: apps/v1
kind: DaemonSet kind: DaemonSet
metadata: metadata:
name: {{ include "hami-vgpu.device-plugin" . }} name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-device-plugin app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -26,8 +27,11 @@ spec:
app.kubernetes.io/component: hami-device-plugin app.kubernetes.io/component: hami-device-plugin
hami.io/webhook: ignore hami.io/webhook: ignore
{{- include "hami-vgpu.selectorLabels" . | nindent 8 }} {{- include "hami-vgpu.selectorLabels" . | nindent 8 }}
annotations:
checksum/hami-device-plugin-config: {{ include (print $.Template.BasePath "/device-plugin/configmap.yaml") . | sha256sum }}
checksum/hami-scheduler-device-config: {{ include (print $.Template.BasePath "/scheduler/device-configmap.yaml") . | sha256sum }}
{{- if .Values.devicePlugin.podAnnotations }} {{- if .Values.devicePlugin.podAnnotations }}
annotations: {{ toYaml .Values.devicePlugin.podAnnotations | nindent 8 }} {{- toYaml .Values.devicePlugin.podAnnotations | nindent 8 }}
{{- end }} {{- end }}
spec: spec:
{{- if .Values.devicePlugin.runtimeClassName }} {{- if .Values.devicePlugin.runtimeClassName }}
@ -92,7 +96,11 @@ spec:
- name: vgpu-monitor - name: vgpu-monitor
image: {{ .Values.devicePlugin.image }}:{{ .Values.version }} image: {{ .Values.devicePlugin.image }}:{{ .Values.version }}
imagePullPolicy: {{ .Values.devicePlugin.imagePullPolicy | quote }} imagePullPolicy: {{ .Values.devicePlugin.imagePullPolicy | quote }}
command: ["vGPUmonitor"] command:
- "vGPUmonitor"
{{- range .Values.devicePlugin.extraArgs }}
- {{ . }}
{{- end }}
securityContext: securityContext:
allowPrivilegeEscalation: false allowPrivilegeEscalation: false
capabilities: capabilities:

@ -13,4 +13,4 @@ roleRef:
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ include "hami-vgpu.device-plugin" . }} name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ .Release.Namespace | quote }} namespace: {{ include "hami-vgpu.namespace" . }}

@ -2,22 +2,26 @@ apiVersion: v1
kind: Service kind: Service
metadata: metadata:
name: {{ include "hami-vgpu.device-plugin" . }}-monitor name: {{ include "hami-vgpu.device-plugin" . }}-monitor
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-device-plugin app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
{{- if .Values.scheduler.service.labels }} {{- if .Values.devicePlugin.service.labels }} # Use devicePlugin instead of scheduler
{{ toYaml .Values.scheduler.service.labels | indent 4 }} {{ toYaml .Values.devicePlugin.service.labels | indent 4 }}
{{- end }} {{- end }}
{{- if .Values.scheduler.service.annotations }} {{- if .Values.devicePlugin.service.annotations }} # Use devicePlugin instead of scheduler
annotations: {{ toYaml .Values.scheduler.service.annotations | nindent 4 }} annotations: {{ toYaml .Values.devicePlugin.service.annotations | nindent 4 }}
{{- end }} {{- end }}
spec: spec:
externalTrafficPolicy: Local type: {{ .Values.devicePlugin.service.type | default "NodePort" }} # Default type is NodePort
selector:
app.kubernetes.io/component: hami-device-plugin
type: NodePort
ports: ports:
- name: monitorport - name: monitorport
port: {{ .Values.devicePlugin.service.httpPort }} port: {{ .Values.devicePlugin.service.httpPort | default 31992 }} # Default HTTP port is 31992
targetPort: 9394 targetPort: 9394
nodePort: {{ .Values.devicePlugin.service.httpPort }} {{- if eq (.Values.devicePlugin.service.type | default "NodePort") "NodePort" }} # If type is NodePort, set nodePort
nodePort: {{ .Values.devicePlugin.service.httpPort | default 31992 }}
{{- end }}
protocol: TCP
selector:
app.kubernetes.io/component: hami-device-plugin
{{- include "hami-vgpu.selectorLabels" . | nindent 4 }}

@ -2,7 +2,7 @@ apiVersion: v1
kind: ServiceAccount kind: ServiceAccount
metadata: metadata:
name: {{ include "hami-vgpu.device-plugin" . }} name: {{ include "hami-vgpu.device-plugin" . }}
namespace: {{ .Release.Namespace | quote }} namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: "hami-device-plugin" app.kubernetes.io/component: "hami-device-plugin"
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}

@ -0,0 +1,9 @@
{{- if and .Values.devicePlugin.createRuntimeClass .Values.devicePlugin.runtimeClassName }}
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
name: {{ .Values.devicePlugin.runtimeClassName }}
annotations:
helm.sh/hook: pre-install,pre-upgrade
handler: nvidia
{{- end }}

@ -0,0 +1,29 @@
{{- if .Values.scheduler.certManager.enabled }}
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: {{ include "hami-vgpu.scheduler" . }}-serving-cert
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
spec:
dnsNames:
- {{ include "hami-vgpu.scheduler" . }}.{{ include "hami-vgpu.namespace" . }}.svc
- {{ include "hami-vgpu.scheduler" . }}.{{ include "hami-vgpu.namespace" . }}.svc.cluster.local
issuerRef:
kind: Issuer
name: {{ include "hami-vgpu.scheduler" . }}-selfsigned-issuer
secretName: {{ include "hami-vgpu.scheduler.tls" . }}
---
apiVersion: cert-manager.io/v1
kind: Issuer
metadata:
name: {{ include "hami-vgpu.scheduler" . }}-selfsigned-issuer
namespace: {{ include "hami-vgpu.namespace" . }}
labels:
app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }}
spec:
selfSigned: {}
{{- end }}

@ -3,6 +3,7 @@ apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
name: {{ include "hami-vgpu.scheduler" . }} name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -40,6 +41,14 @@ data:
}, },
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if .Values.devices.enflame.enabled }}
{{- range .Values.devices.enflame.customresources }}
{
"name": "{{ . }}",
"ignoredByScheduler": true
},
{{- end }}
{{- end }}
{ {
"name": "{{ .Values.resourceName }}", "name": "{{ .Values.resourceName }}",
"ignoredByScheduler": true "ignoredByScheduler": true
@ -79,6 +88,22 @@ data:
{ {
"name": "{{ .Values.iluvatarResourceName }}", "name": "{{ .Values.iluvatarResourceName }}",
"ignoredByScheduler": true "ignoredByScheduler": true
},
{
"name": "metax-tech.com/gpu",
"ignoredByScheduler": true
},
{
"name": "{{ .Values.metaxResourceName }}",
"ignoredByScheduler": true
},
{
"name": "{{ .Values.metaxResourceCore }}",
"ignoredByScheduler": true
},
{
"name": "{{ .Values.metaxResourceMem }}",
"ignoredByScheduler": true
} }
], ],
"ignoreable": false "ignoreable": false

@ -3,6 +3,7 @@ apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
name: {{ include "hami-vgpu.scheduler" . }}-newversion name: {{ include "hami-vgpu.scheduler" . }}-newversion
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -49,6 +50,14 @@ data:
ignoredByScheduler: true ignoredByScheduler: true
- name: {{ .Values.iluvatarResourceName }} - name: {{ .Values.iluvatarResourceName }}
ignoredByScheduler: true ignoredByScheduler: true
- name: "metax-tech.com/gpu"
ignoredByScheduler: true
- name: {{ .Values.metaxResourceName }}
ignoredByScheduler: true
- name: {{ .Values.metaxResourceCore }}
ignoredByScheduler: true
- name: {{ .Values.metaxResourceMem }}
ignoredByScheduler: true
{{- if .Values.devices.ascend.enabled }} {{- if .Values.devices.ascend.enabled }}
{{- range .Values.devices.ascend.customresources }} {{- range .Values.devices.ascend.customresources }}
- name: {{ . }} - name: {{ . }}
@ -61,4 +70,10 @@ data:
ignoredByScheduler: true ignoredByScheduler: true
{{- end }} {{- end }}
{{- end }} {{- end }}
{{- if .Values.devices.enflame.enabled }}
{{- range .Values.devices.enflame.customresources }}
- name: {{ . }}
ignoredByScheduler: true
{{- end }}
{{- end }}
{{- end }} {{- end }}

@ -2,6 +2,7 @@ apiVersion: apps/v1
kind: Deployment kind: Deployment
metadata: metadata:
name: {{ include "hami-vgpu.scheduler" . }} name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -12,7 +13,11 @@ metadata:
annotations: {{ toYaml .Values.global.annotations | nindent 4}} annotations: {{ toYaml .Values.global.annotations | nindent 4}}
{{- end }} {{- end }}
spec: spec:
{{- if .Values.scheduler.leaderElect }}
replicas: {{ .Values.scheduler.replicas }}
{{- else }}
replicas: 1 replicas: 1
{{- end }}
selector: selector:
matchLabels: matchLabels:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
@ -23,8 +28,15 @@ spec:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.selectorLabels" . | nindent 8 }} {{- include "hami-vgpu.selectorLabels" . | nindent 8 }}
hami.io/webhook: ignore hami.io/webhook: ignore
annotations:
{{- if ge (regexReplaceAll "[^0-9]" .Capabilities.KubeVersion.Minor "" | int) 22 }}
checksum/hami-scheduler-newversion-config: {{ include (print $.Template.BasePath "/scheduler/configmapnew.yaml") . | sha256sum }}
{{- else }}
checksum/hami-scheduler-config: {{ include (print $.Template.BasePath "/scheduler/configmap.yaml") . | sha256sum }}
{{- end }}
checksum/hami-scheduler-device-config: {{ include (print $.Template.BasePath "/scheduler/device-configmap.yaml") . | sha256sum }}
{{- if .Values.scheduler.podAnnotations }} {{- if .Values.scheduler.podAnnotations }}
annotations: {{ toYaml .Values.scheduler.podAnnotations | nindent 8 }} {{- toYaml .Values.scheduler.podAnnotations | nindent 8 }}
{{- end }} {{- end }}
spec: spec:
{{- include "hami-vgpu.imagePullSecrets" . | nindent 6}} {{- include "hami-vgpu.imagePullSecrets" . | nindent 6}}
@ -49,7 +61,7 @@ spec:
{{- end }} {{- end }}
- --leader-elect={{ .Values.scheduler.leaderElect }} - --leader-elect={{ .Values.scheduler.leaderElect }}
- --leader-elect-resource-name={{ .Values.schedulerName }} - --leader-elect-resource-name={{ .Values.schedulerName }}
- --leader-elect-resource-namespace={{ .Release.Namespace }} - --leader-elect-resource-namespace={{ include "hami-vgpu.namespace" . }}
resources: resources:
{{- toYaml .Values.scheduler.kubeScheduler.resources | nindent 12 }} {{- toYaml .Values.scheduler.kubeScheduler.resources | nindent 12 }}
volumeMounts: volumeMounts:

@ -2,6 +2,7 @@ apiVersion: v1
kind: ConfigMap kind: ConfigMap
metadata: metadata:
name: {{ include "hami-vgpu.scheduler" . }}-device name: {{ include "hami-vgpu.scheduler" . }}-device
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -23,6 +24,9 @@ data:
deviceSplitCount: {{ .Values.devicePlugin.deviceSplitCount }} deviceSplitCount: {{ .Values.devicePlugin.deviceSplitCount }}
deviceMemoryScaling: {{ .Values.devicePlugin.deviceMemoryScaling }} deviceMemoryScaling: {{ .Values.devicePlugin.deviceMemoryScaling }}
deviceCoreScaling: {{ .Values.devicePlugin.deviceCoreScaling }} deviceCoreScaling: {{ .Values.devicePlugin.deviceCoreScaling }}
gpuCorePolicy: {{ .Values.devices.nvidia.gpuCorePolicy }}
libCudaLogLevel: {{ .Values.devices.nvidia.libCudaLogLevel }}
runtimeClassName: "{{ .Values.devicePlugin.runtimeClassName }}"
knownMigGeometries: knownMigGeometries:
- models: [ "A30" ] - models: [ "A30" ]
allowedGeometries: allowedGeometries:
@ -90,6 +94,12 @@ data:
resourceCoreName: {{ .Values.dcuResourceCores }} resourceCoreName: {{ .Values.dcuResourceCores }}
metax: metax:
resourceCountName: "metax-tech.com/gpu" resourceCountName: "metax-tech.com/gpu"
resourceVCountName: {{ .Values.metaxResourceName }}
resourceVMemoryName: {{ .Values.metaxResourceMem }}
resourceVCoreName: {{ .Values.metaxResourceCore }}
enflame:
resourceCountName: "enflame.com/vgcu"
resourcePercentageName: "enflame.com/vgcu-percentage"
mthreads: mthreads:
resourceCountName: "mthreads.com/vgpu" resourceCountName: "mthreads.com/vgpu"
resourceMemoryName: "mthreads.com/sgpu-memory" resourceMemoryName: "mthreads.com/sgpu-memory"
@ -119,6 +129,27 @@ data:
- name: vir16 - name: vir16
memory: 17476 memory: 17476
aiCore: 16 aiCore: 16
- chipName: 910B2
commonWord: Ascend910B2
resourceName: huawei.com/Ascend910B2
resourceMemoryName: huawei.com/Ascend910B2-memory
memoryAllocatable: 65536
memoryCapacity: 65536
aiCore: 24
aiCPU: 6
templates:
- name: vir03_1c_8g
memory: 8192
aiCore: 3
aiCPU: 1
- name: vir06_1c_16g
memory: 16384
aiCore: 6
aiCPU: 1
- name: vir12_3c_32g
memory: 32768
aiCore: 12
aiCPU: 3
- chipName: 910B3 - chipName: 910B3
commonWord: Ascend910B commonWord: Ascend910B
resourceName: huawei.com/Ascend910B resourceName: huawei.com/Ascend910B

@ -1,3 +1,4 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole kind: ClusterRole
metadata: metadata:
@ -24,3 +25,4 @@ rules:
resourceNames: resourceNames:
- {{ include "hami-vgpu.fullname" . }}-admission - {{ include "hami-vgpu.fullname" . }}-admission
{{- end }} {{- end }}
{{- end }}

@ -1,3 +1,4 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding kind: ClusterRoleBinding
metadata: metadata:
@ -15,4 +16,5 @@ roleRef:
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ include "hami-vgpu.fullname" . }}-admission name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ .Release.Namespace | quote }} namespace: {{ include "hami-vgpu.namespace" . }}
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: batch/v1 apiVersion: batch/v1
kind: Job kind: Job
metadata: metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission-create name: {{ include "hami-vgpu.fullname" . }}-admission-create
namespace: {{ include "hami-vgpu.namespace" . }}
annotations: annotations:
"helm.sh/hook": pre-install,pre-upgrade "helm.sh/hook": pre-install,pre-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -41,11 +43,11 @@ spec:
- --cert-name=tls.crt - --cert-name=tls.crt
- --key-name=tls.key - --key-name=tls.key
{{- if .Values.scheduler.admissionWebhook.customURL.enabled }} {{- if .Values.scheduler.admissionWebhook.customURL.enabled }}
- --host={{ printf "%s.%s.svc,127.0.0.1,%s" (include "hami-vgpu.scheduler" .) .Release.Namespace .Values.scheduler.admissionWebhook.customURL.host}} - --host={{ printf "%s.%s.svc,127.0.0.1,%s" (include "hami-vgpu.scheduler" .) (include "hami-vgpu.namespace" .) .Values.scheduler.admissionWebhook.customURL.host}}
{{- else }} {{- else }}
- --host={{ printf "%s.%s.svc,127.0.0.1" (include "hami-vgpu.scheduler" .) .Release.Namespace }} - --host={{ printf "%s.%s.svc,127.0.0.1" (include "hami-vgpu.scheduler" .) (include "hami-vgpu.namespace" .) }}
{{- end }} {{- end }}
- --namespace={{ .Release.Namespace }} - --namespace={{ include "hami-vgpu.namespace" . }}
- --secret-name={{ include "hami-vgpu.scheduler.tls" . }} - --secret-name={{ include "hami-vgpu.scheduler.tls" . }}
restartPolicy: OnFailure restartPolicy: OnFailure
serviceAccountName: {{ include "hami-vgpu.fullname" . }}-admission serviceAccountName: {{ include "hami-vgpu.fullname" . }}-admission
@ -58,3 +60,4 @@ spec:
securityContext: securityContext:
runAsNonRoot: true runAsNonRoot: true
runAsUser: {{ .Values.scheduler.patch.runAsUser }} runAsUser: {{ .Values.scheduler.patch.runAsUser }}
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: batch/v1 apiVersion: batch/v1
kind: Job kind: Job
metadata: metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission-patch name: {{ include "hami-vgpu.fullname" . }}-admission-patch
namespace: {{ include "hami-vgpu.namespace" . }}
annotations: annotations:
"helm.sh/hook": post-install,post-upgrade "helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -39,7 +41,7 @@ spec:
args: args:
- patch - patch
- --webhook-name={{ include "hami-vgpu.scheduler.webhook" . }} - --webhook-name={{ include "hami-vgpu.scheduler.webhook" . }}
- --namespace={{ .Release.Namespace }} - --namespace={{ include "hami-vgpu.namespace" . }}
- --patch-validating=false - --patch-validating=false
- --secret-name={{ include "hami-vgpu.scheduler.tls" . }} - --secret-name={{ include "hami-vgpu.scheduler.tls" . }}
restartPolicy: OnFailure restartPolicy: OnFailure
@ -53,3 +55,4 @@ spec:
securityContext: securityContext:
runAsNonRoot: true runAsNonRoot: true
runAsUser: {{ .Values.scheduler.patch.runAsUser }} runAsUser: {{ .Values.scheduler.patch.runAsUser }}
{{- end }}

@ -1,3 +1,4 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
{{- if .Values.podSecurityPolicy.enabled }} {{- if .Values.podSecurityPolicy.enabled }}
apiVersion: policy/v1beta1 apiVersion: policy/v1beta1
kind: PodSecurityPolicy kind: PodSecurityPolicy
@ -34,3 +35,4 @@ spec:
- secret - secret
- downwardAPI - downwardAPI
{{- end }} {{- end }}
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: Role kind: Role
metadata: metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ include "hami-vgpu.namespace" . }}
annotations: annotations:
"helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -16,3 +18,4 @@ rules:
verbs: verbs:
- get - get
- create - create
{{- end }}

@ -1,7 +1,9 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: rbac.authorization.k8s.io/v1 apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding kind: RoleBinding
metadata: metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ include "hami-vgpu.namespace" . }}
annotations: annotations:
"helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -15,4 +17,5 @@ roleRef:
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ include "hami-vgpu.fullname" . }}-admission name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ .Release.Namespace | quote }} namespace: {{ include "hami-vgpu.namespace" . }}
{{- end }}

@ -1,10 +1,13 @@
{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
apiVersion: v1 apiVersion: v1
kind: ServiceAccount kind: ServiceAccount
metadata: metadata:
name: {{ include "hami-vgpu.fullname" . }}-admission name: {{ include "hami-vgpu.fullname" . }}-admission
namespace: {{ include "hami-vgpu.namespace" . }}
annotations: annotations:
"helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
"helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
labels: labels:
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
app.kubernetes.io/component: admission-webhook app.kubernetes.io/component: admission-webhook
{{- end }}

@ -12,4 +12,4 @@ roleRef:
subjects: subjects:
- kind: ServiceAccount - kind: ServiceAccount
name: {{ include "hami-vgpu.scheduler" . }} name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ .Release.Namespace | quote }} namespace: {{ include "hami-vgpu.namespace" . }}

@ -2,6 +2,7 @@ apiVersion: v1
kind: Service kind: Service
metadata: metadata:
name: {{ include "hami-vgpu.scheduler" . }} name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -12,19 +13,22 @@ metadata:
annotations: {{ toYaml .Values.scheduler.service.annotations | nindent 4 }} annotations: {{ toYaml .Values.scheduler.service.annotations | nindent 4 }}
{{- end }} {{- end }}
spec: spec:
type: NodePort type: {{ .Values.scheduler.service.type | default "NodePort" }} # Default type is NodePort
ports: ports:
- name: http - name: http
port: {{ .Values.scheduler.service.httpPort }} port: {{ .Values.scheduler.service.httpPort | default 443 }} # Default HTTP port is 443
targetPort: 443 targetPort: {{ .Values.scheduler.service.httpTargetPort | default 443 }}
nodePort: {{ .Values.scheduler.service.schedulerPort }} {{- if eq (.Values.scheduler.service.type | default "NodePort") "NodePort" }} # If type is NodePort, set nodePort
nodePort: {{ .Values.scheduler.service.schedulerPort | default 31998 }}
{{- end }}
protocol: TCP protocol: TCP
- name: monitor - name: monitor
port: {{ .Values.scheduler.service.monitorPort }} port: {{ .Values.scheduler.service.monitorPort | default 31993 }} # Default monitoring port is 31993
targetPort: {{ (split ":" (printf "%s" .Values.scheduler.metricsBindAddress))._1 }} targetPort: {{ .Values.scheduler.service.monitorTargetPort | default 9395 }}
nodePort: {{ .Values.scheduler.service.monitorPort }} {{- if eq (.Values.scheduler.service.type | default "NodePort") "NodePort" }} # If type is NodePort, set nodePort
nodePort: {{ .Values.scheduler.service.monitorPort | default 31993 }}
{{- end }}
protocol: TCP protocol: TCP
selector: selector:
app.kubernetes.io/component: hami-scheduler app.kubernetes.io/component: hami-scheduler
{{- include "hami-vgpu.selectorLabels" . | nindent 4 }} {{- include "hami-vgpu.selectorLabels" . | nindent 4 }}

@ -2,7 +2,7 @@ apiVersion: v1
kind: ServiceAccount kind: ServiceAccount
metadata: metadata:
name: {{ include "hami-vgpu.scheduler" . }} name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ .Release.Namespace | quote }} namespace: {{ include "hami-vgpu.namespace" . }}
labels: labels:
app.kubernetes.io/component: "hami-scheduler" app.kubernetes.io/component: "hami-scheduler"
{{- include "hami-vgpu.labels" . | nindent 4 }} {{- include "hami-vgpu.labels" . | nindent 4 }}

@ -1,6 +1,10 @@
apiVersion: admissionregistration.k8s.io/v1 apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration kind: MutatingWebhookConfiguration
metadata: metadata:
{{- if .Values.scheduler.certManager.enabled }}
annotations:
cert-manager.io/inject-ca-from: {{ include "hami-vgpu.namespace" . }}/{{ include "hami-vgpu.scheduler" . }}-serving-cert
{{- end }}
name: {{ include "hami-vgpu.scheduler.webhook" . }} name: {{ include "hami-vgpu.scheduler.webhook" . }}
webhooks: webhooks:
- admissionReviewVersions: - admissionReviewVersions:
@ -11,7 +15,7 @@ webhooks:
{{- else }} {{- else }}
service: service:
name: {{ include "hami-vgpu.scheduler" . }} name: {{ include "hami-vgpu.scheduler" . }}
namespace: {{ .Release.Namespace }} namespace: {{ include "hami-vgpu.namespace" . }}
path: /webhook path: /webhook
port: {{ .Values.scheduler.service.httpPort }} port: {{ .Values.scheduler.service.httpPort }}
{{- end }} {{- end }}

@ -2,8 +2,9 @@
nameOverride: "" nameOverride: ""
fullnameOverride: "" fullnameOverride: ""
namespaceOverride: ""
imagePullSecrets: [ ] imagePullSecrets: [ ]
version: "v2.5.1" version: "v2.6.0"
#Nvidia GPU Parameters #Nvidia GPU Parameters
resourceName: "nvidia.com/gpu" resourceName: "nvidia.com/gpu"
@ -27,6 +28,11 @@ iluvatarResourceName: "iluvatar.ai/vgpu"
iluvatarResourceMem: "iluvatar.ai/vcuda-memory" iluvatarResourceMem: "iluvatar.ai/vcuda-memory"
iluvatarResourceCore: "iluvatar.ai/vcuda-core" iluvatarResourceCore: "iluvatar.ai/vcuda-core"
#Metax SGPU Parameters
metaxResourceName: "metax-tech.com/sgpu"
metaxResourceCore: "metax-tech.com/vcore"
metaxResourceMem: "metax-tech.com/vmemory"
schedulerName: "hami-scheduler" schedulerName: "hami-scheduler"
podSecurityPolicy: podSecurityPolicy:
@ -55,6 +61,8 @@ scheduler:
metricsBindAddress: ":9395" metricsBindAddress: ":9395"
livenessProbe: false livenessProbe: false
leaderElect: true leaderElect: true
# when leaderElect is true, replicas is available, otherwise replicas is 1.
replicas: 1
kubeScheduler: kubeScheduler:
# @param enabled indicate whether to run kube-scheduler container in the scheduler pod, it's true by default. # @param enabled indicate whether to run kube-scheduler container in the scheduler pod, it's true by default.
enabled: true enabled: true
@ -109,7 +117,14 @@ scheduler:
# - istio-system # - istio-system
reinvocationPolicy: Never reinvocationPolicy: Never
failurePolicy: Ignore failurePolicy: Ignore
## TLS Certificate Option 1: Use cert-manager to generate self-signed certificate.
## If enabled, always takes precedence over options 2.
certManager:
enabled: false
## TLS Certificate Option 2: Use kube-webhook-certgen to generate self-signed certificate.
## If true and certManager.enabled is false, Helm will automatically create a self-signed cert and secret for you.
patch: patch:
enabled: true
image: docker.io/jettech/kube-webhook-certgen:v1.5.2 image: docker.io/jettech/kube-webhook-certgen:v1.5.2
imageNew: liangjw/kube-webhook-certgen:v1.1.1 imageNew: liangjw/kube-webhook-certgen:v1.1.1
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
@ -119,9 +134,11 @@ scheduler:
tolerations: [] tolerations: []
runAsUser: 2000 runAsUser: 2000
service: service:
httpPort: 443 type: NodePort # Default type is NodePort, can be changed to ClusterIP
schedulerPort: 31998 httpPort: 443 # HTTP port
monitorPort: 31993 schedulerPort: 31998 # NodePort for HTTP
monitorPort: 31993 # Monitoring port
monitorTargetPort: 9395
labels: {} labels: {}
annotations: {} annotations: {}
@ -130,10 +147,13 @@ devicePlugin:
monitorimage: "projecthami/hami" monitorimage: "projecthami/hami"
monitorctrPath: /usr/local/vgpu/containers monitorctrPath: /usr/local/vgpu/containers
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
deviceSplitCount: 40 deviceSplitCount: 10
deviceMemoryScaling: 1 deviceMemoryScaling: 1
deviceCoreScaling: 1 deviceCoreScaling: 1
# The runtime class name to be used by the device plugin, and added to the pod.spec.runtimeClassName of applications utilizing NVIDIA GPUs
runtimeClassName: "" runtimeClassName: ""
# Whether to create runtime class, name comes from runtimeClassName when it is set
createRuntimeClass: false
migStrategy: "none" migStrategy: "none"
disablecorelimit: "false" disablecorelimit: "false"
passDeviceSpecsEnabled: false passDeviceSpecsEnabled: false
@ -141,7 +161,10 @@ devicePlugin:
- -v=4 - -v=4
service: service:
type: NodePort # Default type is NodePort, can be changed to ClusterIP
httpPort: 31992 httpPort: 31992
labels: {}
annotations: {}
pluginPath: /var/lib/kubelet/device-plugins pluginPath: /var/lib/kubelet/device-plugins
libPath: /usr/local/vgpu libPath: /usr/local/vgpu
@ -181,10 +204,18 @@ devicePlugin:
# memory: 100Mi # memory: 100Mi
devices: devices:
enflame:
enabled: false
customresources:
- enflame.com/vgcu
- enflame.com/vgcu-percentage
mthreads: mthreads:
enabled: false enabled: false
customresources: customresources:
- mthreads.com/vgpu - mthreads.com/vgpu
nvidia:
gpuCorePolicy: default
libCudaLogLevel: 1
ascend: ascend:
enabled: false enabled: false
image: "" image: ""
@ -196,6 +227,8 @@ devices:
customresources: customresources:
- huawei.com/Ascend910A - huawei.com/Ascend910A
- huawei.com/Ascend910A-memory - huawei.com/Ascend910A-memory
- huawei.com/Ascend910B2
- huawei.com/Ascend910B2-memory
- huawei.com/Ascend910B - huawei.com/Ascend910B
- huawei.com/Ascend910B-memory - huawei.com/Ascend910B-memory
- huawei.com/Ascend910B4 - huawei.com/Ascend910B4

Loading…
Cancel
Save