diff --git a/charts/kubezero-addons/Chart.yaml b/charts/kubezero-addons/Chart.yaml index bfa9e105..db466062 100644 --- a/charts/kubezero-addons/Chart.yaml +++ b/charts/kubezero-addons/Chart.yaml @@ -2,14 +2,13 @@ apiVersion: v2 name: kubezero-addons description: KubeZero umbrella chart for various optional cluster addons type: application -version: 0.5.0 +version: 0.5.1 appVersion: v1.22.8 home: https://kubezero.com icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png keywords: - kubezero - fuse-device-plugin - - k8s-ecr-login-renew - aws-node-termination-handler - external-dns maintainers: diff --git a/charts/kubezero-addons/README.md b/charts/kubezero-addons/README.md index 96f64de6..9d963fa2 100644 --- a/charts/kubezero-addons/README.md +++ b/charts/kubezero-addons/README.md @@ -25,19 +25,9 @@ Kubernetes: `>= 1.20.0` # device-plugins -# k8s-ecr-login-renew +## AWS Neuron +Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/) - [Inf1 instances](https://aws.amazon.com/ec2/instance-types/inf1/) -## IAM setup - - Create IAM user for ECR read-only access and attach the following managed policy: `AmazonEC2ContainerRegistryReadOnly` - - create AWS credentials for the IAM users - -## Kubernetes secret -Create secret with the IAM user credential for ecr-renew to use, using the credentials from the previous step: -`kubectl create secret -n kube-system generic ecr-renew-cred --from-literal=AWS_REGION= --from-literal=AWS_ACCESS_KEY_ID= --from-literal=AWS_SECRET_ACCESS_KEY=` - -## Resources -- https://github.com/nabsul/k8s-ecr-login-renew - ## Values | Key | Type | Default | Description | @@ -65,6 +55,7 @@ Create secret with the IAM user credential for ecr-renew to use, using the crede | aws-node-termination-handler.taintNode | bool | `true` | | | aws-node-termination-handler.tolerations[0].effect | string | `"NoSchedule"` | | | aws-node-termination-handler.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | | +| awsNeuron.enabled | bool | `false` | | | clusterBackup.enabled | bool | `false` | | | clusterBackup.extraEnv | list | `[]` | | | clusterBackup.image.name | string | `"public.ecr.aws/zero-downtime/kubezero-admin"` | | @@ -96,4 +87,3 @@ Create secret with the IAM user credential for ecr-renew to use, using the crede | forseti.image.name | string | `"public.ecr.aws/zero-downtime/forseti"` | | | forseti.image.tag | string | `"v0.1.2"` | | | fuseDevicePlugin.enabled | bool | `false` | | -| k8sEcrLoginRenew.enabled | bool | `false` | | diff --git a/charts/kubezero-addons/README.md.gotmpl b/charts/kubezero-addons/README.md.gotmpl index fae2e774..f7ff4638 100644 --- a/charts/kubezero-addons/README.md.gotmpl +++ b/charts/kubezero-addons/README.md.gotmpl @@ -17,17 +17,7 @@ # device-plugins -# k8s-ecr-login-renew +## AWS Neuron +Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/) - [Inf1 instances](https://aws.amazon.com/ec2/instance-types/inf1/) -## IAM setup - - Create IAM user for ECR read-only access and attach the following managed policy: `AmazonEC2ContainerRegistryReadOnly` - - create AWS credentials for the IAM users - -## Kubernetes secret -Create secret with the IAM user credential for ecr-renew to use, using the credentials from the previous step: -`kubectl create secret -n kube-system generic ecr-renew-cred --from-literal=AWS_REGION= --from-literal=AWS_ACCESS_KEY_ID= --from-literal=AWS_SECRET_ACCESS_KEY=` - -## Resources -- https://github.com/nabsul/k8s-ecr-login-renew - {{ template "chart.valuesSection" . }} diff --git a/charts/kubezero-addons/templates/device-plugins/aws-neuron-ds.yaml b/charts/kubezero-addons/templates/device-plugins/aws-neuron-ds.yaml new file mode 100644 index 00000000..f9665395 --- /dev/null +++ b/charts/kubezero-addons/templates/device-plugins/aws-neuron-ds.yaml @@ -0,0 +1,79 @@ +{{- if .Values.awsNeuron.enabled }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: neuron-device-plugin + namespace: kube-system +spec: + selector: + matchLabels: + name: neuron-device-plugin-ds + updateStrategy: + type: RollingUpdate + template: + metadata: + annotations: + scheduler.alpha.kubernetes.io/critical-pod: "" + labels: + name: neuron-device-plugin-ds + spec: + serviceAccount: neuron-device-plugin + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - key: aws.amazon.com/neuron + operator: Exists + effect: NoSchedule + # Mark this pod as a critical add-on; when enabled, the critical add-on + # scheduler reserves resources for critical add-on pods so that they can + # be rescheduled after a failure. + # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/ + priorityClassName: "system-node-critical" + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "beta.kubernetes.io/instance-type" + operator: In + values: + - inf1.xlarge + - inf1.2xlarge + - inf1.6xlarge + - inf1.4xlarge + - matchExpressions: + - key: "node.kubernetes.io/instance-type" + operator: In + values: + - inf1.xlarge + - inf1.2xlarge + - inf1.6xlarge + - inf1.24xlarge + containers: + - image: "{{ .Values.awsNeuron.image.name }}:{{ .Values.awsNeuron.image.tag }}" + imagePullPolicy: IfNotPresent + name: neuron-device-plugin + env: + - name: KUBECONFIG + value: /etc/kubernetes/kubelet.conf + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: infa-map + mountPath: /run + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: infa-map + hostPath: + path: /run +{{- end }} diff --git a/charts/kubezero-addons/templates/device-plugins/aws-neuron-rbac.yaml b/charts/kubezero-addons/templates/device-plugins/aws-neuron-rbac.yaml new file mode 100644 index 00000000..193a82db --- /dev/null +++ b/charts/kubezero-addons/templates/device-plugins/aws-neuron-rbac.yaml @@ -0,0 +1,59 @@ +{{- if .Values.awsNeuron.enabled }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: neuron-device-plugin +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch +- apiGroups: + - "" + resources: + - pods + verbs: + - update + - patch + - get + - list + - watch +- apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch + - update +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: neuron-device-plugin + namespace: kube-system +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: neuron-device-plugin + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: neuron-device-plugin +subjects: +- kind: ServiceAccount + name: neuron-device-plugin + namespace: kube-system +{{- end }} diff --git a/charts/kubezero-addons/templates/k8s-ecr-login-renew/cronjob.yaml b/charts/kubezero-addons/templates/k8s-ecr-login-renew/cronjob.yaml deleted file mode 100644 index ff048281..00000000 --- a/charts/kubezero-addons/templates/k8s-ecr-login-renew/cronjob.yaml +++ /dev/null @@ -1,42 +0,0 @@ -{{- if .Values.k8sEcrLoginRenew.enabled }} -apiVersion: batch/v1beta1 -kind: CronJob -metadata: - namespace: kube-system - name: ecr-renew - labels: - app: ecr-renew -spec: - schedule: "0 */6 * * *" - successfulJobsHistoryLimit: 3 - failedJobsHistoryLimit: 5 - jobTemplate: - spec: - template: - spec: - restartPolicy: OnFailure - serviceAccountName: ecr-renew - containers: - - name: ecr-renew - image: nabsul/k8s-ecr-login-renew:v1.4 - env: - - name: DOCKER_SECRET_NAME - value: ecr-login - - name: TARGET_NAMESPACE - value: "*" - - name: AWS_REGION - valueFrom: - secretKeyRef: - name: ecr-renew-cred - key: AWS_REGION - - name: AWS_ACCESS_KEY_ID - valueFrom: - secretKeyRef: - name: ecr-renew-cred - key: AWS_ACCESS_KEY_ID - - name: AWS_SECRET_ACCESS_KEY - valueFrom: - secretKeyRef: - name: ecr-renew-cred - key: AWS_SECRET_ACCESS_KEY -{{- end }} diff --git a/charts/kubezero-addons/templates/k8s-ecr-login-renew/service-account.yml b/charts/kubezero-addons/templates/k8s-ecr-login-renew/service-account.yml deleted file mode 100644 index 52804fc9..00000000 --- a/charts/kubezero-addons/templates/k8s-ecr-login-renew/service-account.yml +++ /dev/null @@ -1,35 +0,0 @@ -{{- if .Values.k8sEcrLoginRenew.enabled }} -apiVersion: v1 -kind: ServiceAccount -metadata: - namespace: kube-system - name: ecr-renew ---- - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: ecr-renew -rules: - - apiGroups: [""] - resources: ["secrets"] - verbs: ["create", "update", "get", "delete"] - - apiGroups: [""] - resources: ["namespaces"] - verbs: ["get", "list"] ---- - -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - namespace: kube-system - name: ecr-renew -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: ecr-renew -subjects: - - kind: ServiceAccount - name: ecr-renew - namespace: kube-system -{{- end }} diff --git a/charts/kubezero-addons/values.yaml b/charts/kubezero-addons/values.yaml index 6ebf0dff..4fa6a26b 100644 --- a/charts/kubezero-addons/values.yaml +++ b/charts/kubezero-addons/values.yaml @@ -70,9 +70,13 @@ aws-node-termination-handler: fuseDevicePlugin: enabled: false -k8sEcrLoginRenew: +awsNeuron: enabled: false + image: + name: public.ecr.aws/neuron/neuron-device-plugin + tag: 1.9.0.0 + external-dns: enabled: false