upgrade v2.6.0

2 months ago · cfe36488a7
parent 5b017e0e74
commit cfe36488a7
28 changed files with 252 additions and 45 deletions
--- a/README.md
+++ b/README.md
@ -42,8 +42,8 @@
 在master节点上运行：

 ```
-    # nerdctl -nk8s.io load -i hami251.tar
-    # nerdctl -nk8s.io push sealos.hub:5000/projecthami/hami:v2.5.1
+    # nerdctl -nk8s.io load -i hami260.tar
+    # nerdctl -nk8s.io push sealos.hub:5000/projecthami/hami:v2.6.0
    # nerdctl -nk8s.io load -i scheduler1-28-9.tar
    # nerdctl -nk8s.io push sealos.hub:5000/google_containers/kube-scheduler:v1.28.9
    # nerdctl -nk8s.io load -i kube-webhook-certgen.tar
--- a/hami260.tar
+++ b/hami260.tar
--- a/helm/hami/Chart.yaml
+++ b/helm/hami/Chart.yaml
@ -1,15 +1,15 @@
 apiVersion: v2
-appVersion: 2.5.1
+appVersion: 2.6.0
 description: Heterogeneous AI Computing Virtualization Middleware
 keywords:
 - vgpu
 - gpu
-kubeVersion: '>= 1.18.0'
+kubeVersion: '>= 1.18.0-0'
 maintainers:
- email: limengxuan@4paradigm.com
+- email: archlitchi@gmail.com
  name: limengxuan
 - email: xiaozhang0210@hotmail.com
  name: zhangxiao
 name: hami
 type: application
-version: 2.5.1
+version: 2.6.0
--- a/helm/hami/templates/_helpers.tpl
+++ b/helm/hami/templates/_helpers.tpl
@ -23,6 +23,17 @@ If release name contains chart name it will be used as a full name.
 {{- end -}}
 {{- end -}}

+{{/*
+Allow the release namespace to be overridden for multi-namespace deployments in combined charts
+*/}}
+{{- define "hami-vgpu.namespace" -}}
+  {{- if .Values.namespaceOverride -}}
+    {{- .Values.namespaceOverride -}}
+  {{- else -}}
+    {{- .Release.Namespace -}}
+  {{- end -}}
+{{- end -}}
+
 {{/*
 The app name for Scheduler
 */}}
--- a/helm/hami/templates/device-plugin/configmap.yaml
+++ b/helm/hami/templates/device-plugin/configmap.yaml
@ -2,6 +2,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ include "hami-vgpu.device-plugin" . }}
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-device-plugin
    {{- include "hami-vgpu.labels" . | nindent 4 }}
--- a/helm/hami/templates/device-plugin/daemonsetnvidia.yaml
+++ b/helm/hami/templates/device-plugin/daemonsetnvidia.yaml
@ -2,6 +2,7 @@ apiVersion: apps/v1
 kind: DaemonSet
 metadata:
  name: {{ include "hami-vgpu.device-plugin" . }}
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-device-plugin
    {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -26,8 +27,11 @@ spec:
        app.kubernetes.io/component: hami-device-plugin
        hami.io/webhook: ignore
        {{- include "hami-vgpu.selectorLabels" . | nindent 8 }}
+      annotations: 
+        checksum/hami-device-plugin-config: {{ include (print $.Template.BasePath "/device-plugin/configmap.yaml") . | sha256sum }}
+        checksum/hami-scheduler-device-config: {{ include (print $.Template.BasePath "/scheduler/device-configmap.yaml") . | sha256sum }}
      {{- if .Values.devicePlugin.podAnnotations }}
-      annotations: {{ toYaml .Values.devicePlugin.podAnnotations | nindent 8 }}
+        {{- toYaml .Values.devicePlugin.podAnnotations | nindent 8 }}
      {{- end }}
    spec:
      {{- if .Values.devicePlugin.runtimeClassName }}
@ -92,7 +96,11 @@ spec:
        - name: vgpu-monitor
          image: {{ .Values.devicePlugin.image }}:{{ .Values.version }}
          imagePullPolicy: {{ .Values.devicePlugin.imagePullPolicy | quote }}
-          command: ["vGPUmonitor"]
+          command:
+            - "vGPUmonitor"
+            {{- range .Values.devicePlugin.extraArgs }}
+            - {{ . }}
+            {{- end }}
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
--- a/helm/hami/templates/device-plugin/monitorrolebinding.yaml
+++ b/helm/hami/templates/device-plugin/monitorrolebinding.yaml
@ -13,4 +13,4 @@ roleRef:
 subjects:
  - kind: ServiceAccount
    name: {{ include "hami-vgpu.device-plugin" . }}
-    namespace: {{ .Release.Namespace | quote }}
+    namespace: {{ include "hami-vgpu.namespace" . }}
--- a/helm/hami/templates/device-plugin/monitorservice.yaml
+++ b/helm/hami/templates/device-plugin/monitorservice.yaml
@ -2,22 +2,26 @@ apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "hami-vgpu.device-plugin" . }}-monitor
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-device-plugin
    {{- include "hami-vgpu.labels" . | nindent 4 }}
-    {{- if .Values.scheduler.service.labels }}
-    {{ toYaml .Values.scheduler.service.labels | indent 4 }}
+    {{- if .Values.devicePlugin.service.labels }}  # Use devicePlugin instead of scheduler
+    {{ toYaml .Values.devicePlugin.service.labels | indent 4 }}
    {{- end }}
-  {{- if .Values.scheduler.service.annotations }}
-  annotations: {{ toYaml .Values.scheduler.service.annotations | nindent 4 }}
+  {{- if .Values.devicePlugin.service.annotations }}  # Use devicePlugin instead of scheduler
+  annotations: {{ toYaml .Values.devicePlugin.service.annotations | nindent 4 }}
  {{- end }}
 spec:
-  externalTrafficPolicy: Local
-  selector:
-    app.kubernetes.io/component: hami-device-plugin
-  type: NodePort
+  type: {{ .Values.devicePlugin.service.type | default "NodePort" }}  # Default type is NodePort
  ports:
    - name: monitorport
-      port: {{ .Values.devicePlugin.service.httpPort }}
+      port: {{ .Values.devicePlugin.service.httpPort | default 31992 }}  # Default HTTP port is 31992
      targetPort: 9394
-      nodePort: {{ .Values.devicePlugin.service.httpPort }}
+      {{- if eq (.Values.devicePlugin.service.type | default "NodePort") "NodePort" }}  # If type is NodePort, set nodePort
+      nodePort: {{ .Values.devicePlugin.service.httpPort | default 31992 }}
+      {{- end }}
+      protocol: TCP
+  selector:
+    app.kubernetes.io/component: hami-device-plugin
+    {{- include "hami-vgpu.selectorLabels" . | nindent 4 }}
--- a/helm/hami/templates/device-plugin/monitorserviceaccount.yaml
+++ b/helm/hami/templates/device-plugin/monitorserviceaccount.yaml
@ -2,7 +2,7 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ include "hami-vgpu.device-plugin" . }}
-  namespace: {{ .Release.Namespace | quote }}
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: "hami-device-plugin"
    {{- include "hami-vgpu.labels" . | nindent 4 }}
--- a/helm/hami/templates/device-plugin/runtime-class.yaml
+++ b/helm/hami/templates/device-plugin/runtime-class.yaml
@ -0,0 +1,9 @@
+{{- if and .Values.devicePlugin.createRuntimeClass .Values.devicePlugin.runtimeClassName }}
+apiVersion: node.k8s.io/v1
+kind: RuntimeClass
+metadata:
+  name: {{ .Values.devicePlugin.runtimeClassName }}
+  annotations:
+    helm.sh/hook: pre-install,pre-upgrade
+handler: nvidia
+{{- end }}
--- a/helm/hami/templates/scheduler/certmanager.yaml
+++ b/helm/hami/templates/scheduler/certmanager.yaml
@ -0,0 +1,29 @@
+{{- if .Values.scheduler.certManager.enabled }}
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: {{ include "hami-vgpu.scheduler" . }}-serving-cert
+  namespace: {{ include "hami-vgpu.namespace" . }}
+  labels:
+    app.kubernetes.io/component: hami-scheduler
+    {{- include "hami-vgpu.labels" . | nindent 4 }}
+spec:
+  dnsNames:
+    - {{ include "hami-vgpu.scheduler" . }}.{{ include "hami-vgpu.namespace" . }}.svc
+    - {{ include "hami-vgpu.scheduler" . }}.{{ include "hami-vgpu.namespace" . }}.svc.cluster.local
+  issuerRef:
+    kind: Issuer
+    name: {{ include "hami-vgpu.scheduler" . }}-selfsigned-issuer
+  secretName: {{ include "hami-vgpu.scheduler.tls" . }}
+---
+apiVersion: cert-manager.io/v1
+kind: Issuer
+metadata:
+  name: {{ include "hami-vgpu.scheduler" . }}-selfsigned-issuer
+  namespace: {{ include "hami-vgpu.namespace" . }}
+  labels:
+    app.kubernetes.io/component: hami-scheduler
+    {{- include "hami-vgpu.labels" . | nindent 4 }}
+spec:
+  selfSigned: {}
+{{- end }}
--- a/helm/hami/templates/scheduler/configmap.yaml
+++ b/helm/hami/templates/scheduler/configmap.yaml
@ -3,6 +3,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ include "hami-vgpu.scheduler" . }}
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-scheduler
    {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -40,6 +41,14 @@ data:
                    },
                    {{- end }}
                    {{- end }}
+                    {{- if .Values.devices.enflame.enabled }}
+                    {{- range .Values.devices.enflame.customresources }}
+                    {
+                      "name": "{{ . }}",
+                      "ignoredByScheduler": true
+                    },
+                    {{- end }}
+                    {{- end }}
                    {
                        "name": "{{ .Values.resourceName }}",
                        "ignoredByScheduler": true
@ -79,6 +88,22 @@ data:
                    {
                        "name": "{{ .Values.iluvatarResourceName }}",
                        "ignoredByScheduler": true
+                    },
+                    {
+                        "name": "metax-tech.com/gpu",
+                        "ignoredByScheduler": true
+                    },
+                    {
+                        "name": "{{ .Values.metaxResourceName }}",
+                        "ignoredByScheduler": true
+                    },
+                    {
+                        "name": "{{ .Values.metaxResourceCore }}",
+                        "ignoredByScheduler": true
+                    },
+                    {
+                        "name": "{{ .Values.metaxResourceMem }}",
+                        "ignoredByScheduler": true
                    }
                ],
                "ignoreable": false
--- a/helm/hami/templates/scheduler/configmapnew.yaml
+++ b/helm/hami/templates/scheduler/configmapnew.yaml
@ -3,6 +3,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ include "hami-vgpu.scheduler" . }}-newversion
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-scheduler
    {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -49,6 +50,14 @@ data:
        ignoredByScheduler: true
      - name: {{ .Values.iluvatarResourceName }}
        ignoredByScheduler: true
+      - name: "metax-tech.com/gpu"
+        ignoredByScheduler: true
+      - name: {{ .Values.metaxResourceName }}
+        ignoredByScheduler: true
+      - name: {{ .Values.metaxResourceCore }}
+        ignoredByScheduler: true
+      - name: {{ .Values.metaxResourceMem }}
+        ignoredByScheduler: true
      {{- if .Values.devices.ascend.enabled }}
      {{- range .Values.devices.ascend.customresources }}
      - name: {{ . }}
@ -61,4 +70,10 @@ data:
        ignoredByScheduler: true
      {{- end }}
      {{- end }}
+      {{- if .Values.devices.enflame.enabled }}
+      {{- range .Values.devices.enflame.customresources }}
+      - name: {{ . }}
+        ignoredByScheduler: true
+      {{- end }}
+      {{- end }}
 {{- end }}
--- a/helm/hami/templates/scheduler/deployment.yaml
+++ b/helm/hami/templates/scheduler/deployment.yaml
@ -2,6 +2,7 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ include "hami-vgpu.scheduler" . }}
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-scheduler
    {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -12,7 +13,11 @@ metadata:
  annotations: {{ toYaml .Values.global.annotations | nindent 4}}
  {{- end }}
 spec:
+  {{- if .Values.scheduler.leaderElect }}
+  replicas: {{ .Values.scheduler.replicas }}
+  {{- else }}
  replicas: 1
+  {{- end }}
  selector:
    matchLabels:
      app.kubernetes.io/component: hami-scheduler
@ -23,8 +28,15 @@ spec:
        app.kubernetes.io/component: hami-scheduler
        {{- include "hami-vgpu.selectorLabels" . | nindent 8 }}
        hami.io/webhook: ignore
+      annotations:
+        {{- if ge (regexReplaceAll "[^0-9]" .Capabilities.KubeVersion.Minor "" | int) 22 }}
+        checksum/hami-scheduler-newversion-config: {{ include (print $.Template.BasePath "/scheduler/configmapnew.yaml") . | sha256sum }}
+        {{- else }}
+        checksum/hami-scheduler-config: {{ include (print $.Template.BasePath "/scheduler/configmap.yaml") . | sha256sum }}
+        {{- end }}
+        checksum/hami-scheduler-device-config: {{ include (print $.Template.BasePath "/scheduler/device-configmap.yaml") . | sha256sum }}
      {{- if .Values.scheduler.podAnnotations }}
-      annotations: {{ toYaml .Values.scheduler.podAnnotations | nindent 8 }}
+        {{- toYaml .Values.scheduler.podAnnotations | nindent 8 }}
      {{- end }}
    spec:
      {{- include "hami-vgpu.imagePullSecrets" . | nindent 6}}
@ -49,7 +61,7 @@ spec:
            {{- end }}
            - --leader-elect={{ .Values.scheduler.leaderElect }}
            - --leader-elect-resource-name={{ .Values.schedulerName }}
-            - --leader-elect-resource-namespace={{ .Release.Namespace }}
+            - --leader-elect-resource-namespace={{ include "hami-vgpu.namespace" . }}
          resources:
          {{- toYaml .Values.scheduler.kubeScheduler.resources | nindent 12 }}
          volumeMounts:
--- a/helm/hami/templates/scheduler/device-configmap.yaml
+++ b/helm/hami/templates/scheduler/device-configmap.yaml
@ -2,6 +2,7 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ include "hami-vgpu.scheduler" . }}-device
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-scheduler
    {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -23,6 +24,9 @@ data:
      deviceSplitCount: {{ .Values.devicePlugin.deviceSplitCount }}
      deviceMemoryScaling: {{ .Values.devicePlugin.deviceMemoryScaling }}
      deviceCoreScaling: {{ .Values.devicePlugin.deviceCoreScaling }}
+      gpuCorePolicy: {{ .Values.devices.nvidia.gpuCorePolicy }}
+      libCudaLogLevel: {{ .Values.devices.nvidia.libCudaLogLevel }}
+      runtimeClassName: "{{ .Values.devicePlugin.runtimeClassName }}"
      knownMigGeometries:
      - models: [ "A30" ]
        allowedGeometries:
@ -90,6 +94,12 @@ data:
      resourceCoreName: {{ .Values.dcuResourceCores }}
    metax:
      resourceCountName: "metax-tech.com/gpu"
+      resourceVCountName: {{ .Values.metaxResourceName }}
+      resourceVMemoryName: {{ .Values.metaxResourceMem }}
+      resourceVCoreName: {{ .Values.metaxResourceCore }}
+    enflame:
+      resourceCountName: "enflame.com/vgcu"
+      resourcePercentageName: "enflame.com/vgcu-percentage"
    mthreads:
      resourceCountName: "mthreads.com/vgpu"
      resourceMemoryName: "mthreads.com/sgpu-memory"
@ -119,6 +129,27 @@ data:
        - name: vir16
          memory: 17476
          aiCore: 16
+    - chipName: 910B2
+      commonWord: Ascend910B2
+      resourceName: huawei.com/Ascend910B2
+      resourceMemoryName: huawei.com/Ascend910B2-memory
+      memoryAllocatable: 65536
+      memoryCapacity: 65536
+      aiCore: 24
+      aiCPU: 6
+      templates:
+        - name: vir03_1c_8g
+          memory: 8192
+          aiCore: 3
+          aiCPU: 1
+        - name: vir06_1c_16g
+          memory: 16384
+          aiCore: 6
+          aiCPU: 1
+        - name: vir12_3c_32g
+          memory: 32768
+          aiCore: 12
+          aiCPU: 3
    - chipName: 910B3
      commonWord: Ascend910B
      resourceName: huawei.com/Ascend910B
--- a/helm/hami/templates/scheduler/job-patch/clusterrole.yaml
+++ b/helm/hami/templates/scheduler/job-patch/clusterrole.yaml
@ -1,3 +1,4 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRole
 metadata:
@ -24,3 +25,4 @@ rules:
    resourceNames:
    - {{ include "hami-vgpu.fullname" . }}-admission
 {{- end }}
+{{- end }}
--- a/helm/hami/templates/scheduler/job-patch/clusterrolebinding.yaml
+++ b/helm/hami/templates/scheduler/job-patch/clusterrolebinding.yaml
@ -1,3 +1,4 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 apiVersion: rbac.authorization.k8s.io/v1
 kind: ClusterRoleBinding
 metadata:
@ -15,4 +16,5 @@ roleRef:
 subjects:
  - kind: ServiceAccount
    name: {{ include "hami-vgpu.fullname" . }}-admission
-    namespace: {{ .Release.Namespace | quote }}
+    namespace: {{ include "hami-vgpu.namespace" . }}
+{{- end }}
--- a/helm/hami/templates/scheduler/job-patch/job-createSecret.yaml
+++ b/helm/hami/templates/scheduler/job-patch/job-createSecret.yaml
@ -1,7 +1,9 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: {{ include "hami-vgpu.fullname" . }}-admission-create
+  namespace: {{ include "hami-vgpu.namespace" . }}
  annotations:
    "helm.sh/hook": pre-install,pre-upgrade
    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -41,11 +43,11 @@ spec:
            - --cert-name=tls.crt
            - --key-name=tls.key
            {{- if .Values.scheduler.admissionWebhook.customURL.enabled }}
-            - --host={{ printf "%s.%s.svc,127.0.0.1,%s" (include "hami-vgpu.scheduler" .) .Release.Namespace .Values.scheduler.admissionWebhook.customURL.host}}
+            - --host={{ printf "%s.%s.svc,127.0.0.1,%s" (include "hami-vgpu.scheduler" .) (include "hami-vgpu.namespace" .) .Values.scheduler.admissionWebhook.customURL.host}}
            {{- else }}
-            - --host={{ printf "%s.%s.svc,127.0.0.1" (include "hami-vgpu.scheduler" .) .Release.Namespace }}
+            - --host={{ printf "%s.%s.svc,127.0.0.1" (include "hami-vgpu.scheduler" .) (include "hami-vgpu.namespace" .) }}
            {{- end }}
-            - --namespace={{ .Release.Namespace }}
+            - --namespace={{ include "hami-vgpu.namespace" . }}
            - --secret-name={{ include "hami-vgpu.scheduler.tls" . }}
      restartPolicy: OnFailure
      serviceAccountName: {{ include "hami-vgpu.fullname" . }}-admission
@ -58,3 +60,4 @@ spec:
      securityContext:
        runAsNonRoot: true
        runAsUser: {{ .Values.scheduler.patch.runAsUser }}
+{{- end }}
--- a/helm/hami/templates/scheduler/job-patch/job-patchWebhook.yaml
+++ b/helm/hami/templates/scheduler/job-patch/job-patchWebhook.yaml
@ -1,7 +1,9 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 apiVersion: batch/v1
 kind: Job
 metadata:
  name: {{ include "hami-vgpu.fullname" . }}-admission-patch
+  namespace: {{ include "hami-vgpu.namespace" . }}
  annotations:
    "helm.sh/hook": post-install,post-upgrade
    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -39,7 +41,7 @@ spec:
          args:
            - patch
            - --webhook-name={{ include "hami-vgpu.scheduler.webhook" . }}
-            - --namespace={{ .Release.Namespace }}
+            - --namespace={{ include "hami-vgpu.namespace" . }}
            - --patch-validating=false
            - --secret-name={{ include "hami-vgpu.scheduler.tls" . }}
      restartPolicy: OnFailure
@ -53,3 +55,4 @@ spec:
      securityContext:
        runAsNonRoot: true
        runAsUser: {{ .Values.scheduler.patch.runAsUser }}
+{{- end }}
--- a/helm/hami/templates/scheduler/job-patch/psp.yaml
+++ b/helm/hami/templates/scheduler/job-patch/psp.yaml
@ -1,3 +1,4 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 {{- if .Values.podSecurityPolicy.enabled }}
 apiVersion: policy/v1beta1
 kind: PodSecurityPolicy
@ -34,3 +35,4 @@ spec:
  - secret
  - downwardAPI
 {{- end }}
+{{- end }}
--- a/helm/hami/templates/scheduler/job-patch/role.yaml
+++ b/helm/hami/templates/scheduler/job-patch/role.yaml
@ -1,7 +1,9 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
  name:  {{ include "hami-vgpu.fullname" . }}-admission
+  namespace: {{ include "hami-vgpu.namespace" . }}
  annotations:
    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -16,3 +18,4 @@ rules:
    verbs:
      - get
      - create
+{{- end }}
--- a/helm/hami/templates/scheduler/job-patch/rolebinding.yaml
+++ b/helm/hami/templates/scheduler/job-patch/rolebinding.yaml
@ -1,7 +1,9 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 apiVersion: rbac.authorization.k8s.io/v1
 kind: RoleBinding
 metadata:
  name: {{ include "hami-vgpu.fullname" . }}-admission
+  namespace: {{ include "hami-vgpu.namespace" . }}
  annotations:
    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
@ -15,4 +17,5 @@ roleRef:
 subjects:
  - kind: ServiceAccount
    name: {{ include "hami-vgpu.fullname" . }}-admission
-    namespace: {{ .Release.Namespace | quote }}
+    namespace: {{ include "hami-vgpu.namespace" . }}
+{{- end }}
--- a/helm/hami/templates/scheduler/job-patch/serviceaccount.yaml
+++ b/helm/hami/templates/scheduler/job-patch/serviceaccount.yaml
@ -1,10 +1,13 @@
+{{- if and (.Values.scheduler.patch.enabled) (not .Values.scheduler.certManager.enabled) }}
 apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ include "hami-vgpu.fullname" . }}-admission
+  namespace: {{ include "hami-vgpu.namespace" . }}
  annotations:
    "helm.sh/hook": pre-install,pre-upgrade,post-install,post-upgrade
    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
  labels:
    {{- include "hami-vgpu.labels" . | nindent 4 }}
    app.kubernetes.io/component: admission-webhook
+{{- end }}
--- a/helm/hami/templates/scheduler/rolebinding.yaml
+++ b/helm/hami/templates/scheduler/rolebinding.yaml
@ -12,4 +12,4 @@ roleRef:
 subjects:
  - kind: ServiceAccount
    name: {{ include "hami-vgpu.scheduler" . }}
-    namespace: {{ .Release.Namespace | quote }}
+    namespace: {{ include "hami-vgpu.namespace" . }}
--- a/helm/hami/templates/scheduler/service.yaml
+++ b/helm/hami/templates/scheduler/service.yaml
@ -2,6 +2,7 @@ apiVersion: v1
 kind: Service
 metadata:
  name: {{ include "hami-vgpu.scheduler" . }}
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: hami-scheduler
    {{- include "hami-vgpu.labels" . | nindent 4 }}
@ -12,19 +13,22 @@ metadata:
  annotations: {{ toYaml .Values.scheduler.service.annotations | nindent 4 }}
  {{- end }}
 spec:
-  type: NodePort
+  type: {{ .Values.scheduler.service.type | default "NodePort" }}  # Default type is NodePort
  ports:
    - name: http
-      port: {{ .Values.scheduler.service.httpPort }}
-      targetPort: 443
-      nodePort: {{ .Values.scheduler.service.schedulerPort }}
+      port: {{ .Values.scheduler.service.httpPort | default 443 }}  # Default HTTP port is 443
+      targetPort: {{ .Values.scheduler.service.httpTargetPort | default 443 }}
+      {{- if eq (.Values.scheduler.service.type | default "NodePort") "NodePort" }}  # If type is NodePort, set nodePort
+      nodePort: {{ .Values.scheduler.service.schedulerPort | default 31998 }}
+      {{- end }}
      protocol: TCP
    - name: monitor
-      port: {{ .Values.scheduler.service.monitorPort }}
-      targetPort: {{ (split ":" (printf "%s" .Values.scheduler.metricsBindAddress))._1 }}
-      nodePort: {{ .Values.scheduler.service.monitorPort }}
+      port: {{ .Values.scheduler.service.monitorPort | default 31993 }}  # Default monitoring port is 31993
+      targetPort: {{ .Values.scheduler.service.monitorTargetPort | default 9395 }}
+      {{- if eq (.Values.scheduler.service.type | default "NodePort") "NodePort" }}  # If type is NodePort, set nodePort
+      nodePort: {{ .Values.scheduler.service.monitorPort | default 31993 }}
+      {{- end }}
      protocol: TCP
  selector:
    app.kubernetes.io/component: hami-scheduler
    {{- include "hami-vgpu.selectorLabels" . | nindent 4 }}
-
--- a/helm/hami/templates/scheduler/serviceaccount.yaml
+++ b/helm/hami/templates/scheduler/serviceaccount.yaml
@ -2,7 +2,7 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ include "hami-vgpu.scheduler" . }}
-  namespace: {{ .Release.Namespace | quote }}
+  namespace: {{ include "hami-vgpu.namespace" . }}
  labels:
    app.kubernetes.io/component: "hami-scheduler"
    {{- include "hami-vgpu.labels" . | nindent 4 }}
--- a/helm/hami/templates/scheduler/webhook.yaml
+++ b/helm/hami/templates/scheduler/webhook.yaml
@ -1,6 +1,10 @@
 apiVersion: admissionregistration.k8s.io/v1
 kind: MutatingWebhookConfiguration
 metadata:
+  {{- if .Values.scheduler.certManager.enabled }}
+  annotations:
+    cert-manager.io/inject-ca-from: {{ include "hami-vgpu.namespace" . }}/{{ include "hami-vgpu.scheduler" . }}-serving-cert
+  {{- end }}
  name: {{ include "hami-vgpu.scheduler.webhook" . }}
 webhooks:
  - admissionReviewVersions:
@ -11,7 +15,7 @@ webhooks:
      {{- else }}
      service:
        name: {{ include "hami-vgpu.scheduler" . }}
-        namespace: {{ .Release.Namespace }}
+        namespace: {{ include "hami-vgpu.namespace" . }}
        path: /webhook
        port: {{ .Values.scheduler.service.httpPort }}
      {{- end }}
--- a/helm/hami/values.yaml
+++ b/helm/hami/values.yaml
@ -2,8 +2,9 @@

 nameOverride: ""
 fullnameOverride: ""
+namespaceOverride: ""
 imagePullSecrets: [ ]
-version: "v2.5.1"
+version: "v2.6.0"

 #Nvidia GPU Parameters
 resourceName: "nvidia.com/gpu"
@ -27,6 +28,11 @@ iluvatarResourceName: "iluvatar.ai/vgpu"
 iluvatarResourceMem: "iluvatar.ai/vcuda-memory"
 iluvatarResourceCore: "iluvatar.ai/vcuda-core"

+#Metax SGPU Parameters
+metaxResourceName: "metax-tech.com/sgpu"
+metaxResourceCore: "metax-tech.com/vcore"
+metaxResourceMem: "metax-tech.com/vmemory"
+
 schedulerName: "hami-scheduler"

 podSecurityPolicy:
@ -55,6 +61,8 @@ scheduler:
  metricsBindAddress: ":9395"
  livenessProbe: false
  leaderElect: true
+  # when leaderElect is true, replicas is available, otherwise replicas is 1.
+  replicas: 1
  kubeScheduler:
    # @param enabled indicate whether to run kube-scheduler container in the scheduler pod, it's true by default.
    enabled: true
@ -109,7 +117,14 @@ scheduler:
      # - istio-system
    reinvocationPolicy: Never
    failurePolicy: Ignore
+  ## TLS Certificate Option 1: Use cert-manager to generate self-signed certificate.
+  ## If enabled, always takes precedence over options 2.
+  certManager:
+    enabled: false
+  ## TLS Certificate Option 2: Use kube-webhook-certgen to generate self-signed certificate.
+  ## If true and certManager.enabled is false, Helm will automatically create a self-signed cert and secret for you.
  patch:
+    enabled: true
    image: docker.io/jettech/kube-webhook-certgen:v1.5.2
    imageNew: liangjw/kube-webhook-certgen:v1.1.1
    imagePullPolicy: IfNotPresent
@ -119,9 +134,11 @@ scheduler:
    tolerations: []
    runAsUser: 2000
  service:
-    httpPort: 443
-    schedulerPort: 31998
-    monitorPort: 31993
+    type: NodePort  # Default type is NodePort, can be changed to ClusterIP
+    httpPort: 443   # HTTP port
+    schedulerPort: 31998  # NodePort for HTTP
+    monitorPort: 31993    # Monitoring port
+    monitorTargetPort: 9395
    labels: {}
    annotations: {}

@ -130,10 +147,13 @@ devicePlugin:
  monitorimage: "projecthami/hami"
  monitorctrPath: /usr/local/vgpu/containers
  imagePullPolicy: IfNotPresent
-  deviceSplitCount: 40
+  deviceSplitCount: 10
  deviceMemoryScaling: 1
  deviceCoreScaling: 1
+  # The runtime class name to be used by the device plugin, and added to the pod.spec.runtimeClassName of applications utilizing NVIDIA GPUs
  runtimeClassName: ""
+  # Whether to create runtime class, name comes from runtimeClassName when it is set
+  createRuntimeClass: false
  migStrategy: "none"
  disablecorelimit: "false"
  passDeviceSpecsEnabled: false
@ -141,7 +161,10 @@ devicePlugin:
    - -v=4
  
  service:
+    type: NodePort  # Default type is NodePort, can be changed to ClusterIP
    httpPort: 31992
+    labels: {}
+    annotations: {}
    
  pluginPath: /var/lib/kubelet/device-plugins
  libPath: /usr/local/vgpu
@ -181,10 +204,18 @@ devicePlugin:
 #        memory: 100Mi

 devices:
+  enflame:
+    enabled: false
+    customresources:
+      - enflame.com/vgcu
+      - enflame.com/vgcu-percentage
  mthreads:
    enabled: false
    customresources:
      - mthreads.com/vgpu
+  nvidia:
+    gpuCorePolicy: default
+    libCudaLogLevel: 1
  ascend:
    enabled: false
    image: ""
@ -196,6 +227,8 @@ devices:
    customresources:
      - huawei.com/Ascend910A
      - huawei.com/Ascend910A-memory
+      - huawei.com/Ascend910B2
+      - huawei.com/Ascend910B2-memory
      - huawei.com/Ascend910B
      - huawei.com/Ascend910B-memory
      - huawei.com/Ascend910B4