feat: bump ci tools, fix gitea PVC

Squashed '.ci/' changes from 227e39f..2c44e4f
2c44e4f Disable concurrent builds 7144a42 Improve Trivy scanning logic c1a48a6 Remove auto stash push / pop as being too dangerous 318c19e Add merge comment for subtree 22ed100 Fix custom branch docker tags git-subtree-dir: .ci git-subtree-split: 2c44e4fd8550d30fba503a2bcccec8e0bac1c151
2024-05-17 11:37:57 +00:00 · 2024-05-17 11:36:26 +00:00 · 2024-05-17 11:35:51 +00:00 · 2024-05-17 11:35:22 +00:00 · 2024-05-17 11:10:56 +00:00 · 2024-05-17 11:09:41 +00:00
763 changed files with 92772 additions and 53680 deletions
--- a/.ci/ecr_public_lifecycle.py
+++ b/.ci/ecr_public_lifecycle.py
@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+import argparse
+import boto3
+
+parser = argparse.ArgumentParser(
+    description='Implement basic public ECR lifecycle policy')
+parser.add_argument('--repo', dest='repositoryName', action='store', required=True,
+                    help='Name of the public ECR repository')
+parser.add_argument('--keep', dest='keep', action='store', default=10, type=int,
+                    help='number of tagged images to keep, default 10')
+parser.add_argument('--dev', dest='delete_dev', action='store_true',
+                    help='also delete in-development images only having tags like v0.1.1-commitNr-githash')
+
+args = parser.parse_args()
+
+client = boto3.client('ecr-public', region_name='us-east-1')
+
+images = client.describe_images(repositoryName=args.repositoryName)[
+    "imageDetails"]
+
+untagged = []
+kept = 0
+
+# actual Image
+# imageManifestMediaType: 'application/vnd.oci.image.manifest.v1+json'
+# image Index
+# imageManifestMediaType: 'application/vnd.oci.image.index.v1+json'
+
+# Sort by date uploaded
+for image in sorted(images, key=lambda d: d['imagePushedAt'], reverse=True):
+    # Remove all untagged
+    # if registry uses image index all actual images will be untagged anyways
+    if 'imageTags' not in image:
+        untagged.append({"imageDigest": image['imageDigest']})
+        # print("Delete untagged image {}".format(image["imageDigest"]))
+        continue
+
+    # check for dev tags
+    if args.delete_dev:
+        _delete = True
+        for tag in image["imageTags"]:
+            # Look for at least one tag NOT beign a SemVer dev tag
+            if "-" not in tag:
+                _delete = False
+        if _delete:
+            print("Deleting development image {}".format(image["imageTags"]))
+            untagged.append({"imageDigest": image['imageDigest']})
+            continue
+
+    if kept < args.keep:
+        kept = kept+1
+        print("Keeping tagged image {}".format(image["imageTags"]))
+        continue
+    else:
+        print("Deleting tagged image {}".format(image["imageTags"]))
+        untagged.append({"imageDigest": image['imageDigest']})
+
+deleted_images = client.batch_delete_image(
+    repositoryName=args.repositoryName, imageIds=untagged)
+
+if deleted_images["imageIds"]:
+    print("Deleted images: {}".format(deleted_images["imageIds"]))
--- a/.ci/podman.mk
+++ b/.ci/podman.mk
@ -1,61 +1,84 @@
 # Parse version from latest git semver tag
-GTAG=$(shell git describe --tags --match v*.*.* 2>/dev/null || git rev-parse --short HEAD 2>/dev/null)
-TAG ?= $(shell echo $(GTAG) | awk -F '-' '{ print $$1 "-" $$2 }' | sed -e 's/-$$//')
+GIT_TAG ?= $(shell git describe --tags --match v*.*.* 2>/dev/null || git rev-parse --short HEAD 2>/dev/null)
+GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)

-ifeq ($(TRIVY_REMOTE),)
-  TRIVY_OPTS := image
-else
-  TRIVY_OPTS := client --remote ${TRIVY_REMOTE}
+TAG ::= $(GIT_TAG)
+# append branch name to tag if NOT main nor master
+ifeq (,$(filter main master, $(GIT_BRANCH)))
+	# If branch is substring of tag, omit branch name
+	ifeq ($(findstring $(GIT_BRANCH), $(GIT_TAG)),)
+		# only append branch name if not equal tag
+		ifneq ($(GIT_TAG), $(GIT_BRANCH))
+			# Sanitize GIT_BRANCH to allowed Docker tag character set
+			TAG = $(GIT_TAG)-$(shell echo $$GIT_BRANCH | sed -e 's/[^a-zA-Z0-9]/-/g')
+		endif
+	endif
 endif

-.PHONY: build test scan push clean
+ARCH ::= amd64
+ALL_ARCHS ::= amd64 arm64
+_ARCH = $(or $(filter $(ARCH),$(ALL_ARCHS)),$(error $$ARCH [$(ARCH)] must be exactly one of "$(ALL_ARCHS)"))

-all: test
+ifneq ($(TRIVY_REMOTE),)
+	TRIVY_OPTS ::= --server $(TRIVY_REMOTE)
+endif

+.SILENT: ; # no need for @
+.ONESHELL: ; # recipes execute in same shell
+.NOTPARALLEL: ; # wait for this target to finish
+.EXPORT_ALL_VARIABLES: ; # send all vars to shell
+.PHONY: all # All targets are accessible for user
+.DEFAULT: help # Running Make will run the help target

-build:
-	@docker image exists $(IMAGE):$(TAG) || \
-		docker build --rm -t $(IMAGE):$(TAG) --build-arg TAG=$(TAG) .
+help: ## Show Help
+	grep -E '^[a-zA-Z_-]+:.*?## .*$$' .ci/podman.mk | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

-test: build rm-test-image
-	@test -f Dockerfile.test && \
-		{ docker build --rm -t $(IMAGE):$(TAG)-test --from=$(IMAGE):$(TAG) -f Dockerfile.test . && \
-			docker run --rm --env-host -t $(IMAGE):$(TAG)-test; } || \
-		echo "No Dockerfile.test found, skipping test"
+prepare:: ## custom step on the build agent before building

-scan: build
-	@echo "Scanning $(IMAGE):$(TAG) using Trivy"
-	@trivy $(TRIVY_OPTS) $(IMAGE):$(TAG)
+fmt:: ## auto format source

-push: build
-	@aws ecr-public get-login-password --region $(REGION) | docker login --username AWS --password-stdin $(REGISTRY)
-	@docker tag $(IMAGE):$(TAG) $(REGISTRY)/$(IMAGE):$(TAG) $(REGISTRY)/$(IMAGE):latest
-	docker push $(REGISTRY)/$(IMAGE):$(TAG)
-	docker push $(REGISTRY)/$(IMAGE):latest
+lint:: ## Lint source

-clean: rm-test-image rm-image
+build: ## Build the app
+	buildah build --rm --layers -t $(IMAGE):$(TAG)-$(_ARCH) --build-arg TAG=$(TAG) --build-arg ARCH=$(_ARCH) --platform linux/$(_ARCH) .

-# Delete all untagged images
-.PHONY: rm-remote-untagged
-rm-remote-untagged:
-	@echo "Removing all untagged images from $(IMAGE) in $(REGION)"
-	@aws ecr-public batch-delete-image --repository-name $(IMAGE) --region $(REGION) --image-ids $$(for image in $$(aws ecr-public describe-images --repository-name $(IMAGE) --region $(REGION) --output json | jq -r '.imageDetails[] | select(.imageTags | not ).imageDigest'); do echo -n "imageDigest=$$image "; done)
+test:: ## test built artificats
+
+scan: ## Scan image using trivy
+	echo "Scanning $(IMAGE):$(TAG)-$(_ARCH) using Trivy $(TRIVY_REMOTE)"
+	trivy image $(TRIVY_OPTS) --quiet --no-progress localhost/$(IMAGE):$(TAG)-$(_ARCH)
+
+# first tag and push all actual images
+# create new manifest for each tag and add all available TAG-ARCH before pushing
+push: ecr-login ## push images to registry
+	for t in $(TAG) latest $(EXTRA_TAGS); do \
+		echo "Tagging image with $(REGISTRY)/$(IMAGE):$${t}-$(ARCH)"
+		buildah tag $(IMAGE):$(TAG)-$(_ARCH) $(REGISTRY)/$(IMAGE):$${t}-$(_ARCH); \
+		buildah manifest rm $(IMAGE):$$t || true; \
+		buildah manifest create $(IMAGE):$$t; \
+		for a in $(ALL_ARCHS); do \
+			buildah manifest add $(IMAGE):$$t $(REGISTRY)/$(IMAGE):$(TAG)-$$a; \
+		done; \
+		echo "Pushing manifest $(IMAGE):$$t"
+		buildah manifest push --all $(IMAGE):$$t docker://$(REGISTRY)/$(IMAGE):$$t; \
+	done
+
+ecr-login: ## log into AWS ECR public
+	aws ecr-public get-login-password --region $(REGION) | podman login --username AWS --password-stdin $(REGISTRY)
+
+rm-remote-untagged: ## delete all remote untagged and in-dev images, keep 10 tagged
+	echo "Removing all untagged and in-dev images from $(IMAGE) in $(REGION)"
+	.ci/ecr_public_lifecycle.py --repo $(IMAGE) --dev
+
+clean:: ## clean up source folder

-.PHONY: rm-image
 rm-image:
-	@test -z "$$(docker image ls -q $(IMAGE):$(TAG))" || docker image rm -f $(IMAGE):$(TAG) > /dev/null
-	@test -z "$$(docker image ls -q $(IMAGE):$(TAG))" || echo "Error: Removing image failed"
+	test -z "$$(podman image ls -q $(IMAGE):$(TAG)-$(_ARCH))" || podman image rm -f $(IMAGE):$(TAG)-$(_ARCH) > /dev/null
+	test -z "$$(podman image ls -q $(IMAGE):$(TAG)-$(_ARCH))" || echo "Error: Removing image failed"

-# Ensure we run the tests by removing any previous runs
-.PHONY: rm-test-image
-rm-test-image:
-	@test -z "$$(docker image ls -q $(IMAGE):$(TAG)-test)" || docker image rm -f $(IMAGE):$(TAG)-test > /dev/null
-	@test -z "$$(docker image ls -q $(IMAGE):$(TAG)-test)" || echo "Error: Removing test image failed"
+## some useful tasks during development
+ci-pull-upstream: ## pull latest shared .ci subtree
+	git subtree pull --prefix .ci ssh://git@git.zero-downtime.net/ZeroDownTime/ci-tools-lib.git master --squash -m "Merge latest ci-tools-lib"

-# Convience task during dev of downstream projects
-.PHONY: ci-pull-upstream
-ci-pull-upstream:
-	git stash && git subtree pull --prefix .ci ssh://git@git.zero-downtime.net/ZeroDownTime/ci-tools-lib.git master --squash && git stash pop
-
-.DEFAULT:
-	@echo "$@ not implemented. NOOP"
+create-repo: ## create new AWS ECR public repository
+	aws ecr-public create-repository --repository-name $(IMAGE) --region $(REGION)
--- a/.ci/vars/buildPodman.groovy
+++ b/.ci/vars/buildPodman.groovy
@ -2,24 +2,33 @@

 def call(Map config=[:]) {
    pipeline {
+      options {
+        disableConcurrentBuilds()
+      }
      agent {
        node {
          label 'podman-aws-trivy'
        }
      }
-
      stages {
        stage('Prepare') {
-          // get tags
          steps {
-            sh 'git fetch -q --tags ${GIT_URL} +refs/heads/${BRANCH_NAME}:refs/remotes/origin/${BRANCH_NAME}'
+            sh 'mkdir -p reports'
+
+            // we set pull tags as project adv. options
+            // pull tags
+            //withCredentials([gitUsernamePassword(credentialsId: 'gitea-jenkins-user')]) {
+            //  sh 'git fetch -q --tags ${GIT_URL}'
+            //}
+            // Optional project specific preparations
+            sh 'make prepare'
          }
        }

        // Build using rootless podman
        stage('Build') {
          steps {
-            sh 'make build'
+            sh 'make build GIT_BRANCH=$GIT_BRANCH'
          }
        }

@ -31,13 +40,13 @@ def call(Map config=[:]) {

        // Scan via trivy
        stage('Scan') {
-          environment {
-            TRIVY_FORMAT = "template"
-            TRIVY_OUTPUT = "reports/trivy.html"
-          }
          steps {
-            sh 'mkdir -p reports'
-            sh 'make scan'
+            // we always scan and create the full json report
+            sh 'TRIVY_FORMAT=json TRIVY_OUTPUT="reports/trivy.json" make scan'
+
+            // render custom full html report
+            sh 'trivy convert -f template -t @/home/jenkins/html.tpl -o reports/trivy.html reports/trivy.json'
+
            publishHTML target: [
              allowMissing: true,
              alwaysLinkToLastBuild: true,
@ -47,25 +56,33 @@ def call(Map config=[:]) {
              reportName: 'TrivyScan',
              reportTitles: 'TrivyScan'
            ]
+            sh 'echo "Trivy report at: $BUILD_URL/TrivyScan"'

-            // Scan again and fail on CRITICAL vulns, if not overridden
+            // fail build if issues found above trivy threshold
            script {
-              if (config.trivyFail == 'NONE') {
-                echo 'trivyFail == NONE, review Trivy report manually. Proceeding ...'
-              } else {
-                sh "TRIVY_EXIT_CODE=1 TRIVY_SEVERITY=${config.trivyFail} make scan"
+              if ( config.trivyFail ) {
+                sh "TRIVY_SEVERITY=${config.trivyFail} trivy convert --report summary --exit-code 1 reports/trivy.json"
              }
            }
          }
        }

-        // Push to ECR
+        // Push to container registry if not PR
+        // incl. basic registry retention removing any untagged images
        stage('Push') {
+          when { not { changeRequest() } }
          steps {
            sh 'make push'
+            sh 'make rm-remote-untagged'
          }
        }

+        // generic clean
+        stage('cleanup') {
+          steps {
+            sh 'make clean'
+          }
+        }
      }
    }
  }
--- a/30
+++ b/30
@ -1,28 +1,38 @@
-ARG ALPINE_VERSION
+ARG ALPINE_VERSION=3.19

-FROM alpine:${ALPINE_VERSION}
+FROM docker.io/alpine:${ALPINE_VERSION}

 ARG ALPINE_VERSION
-ARG KUBE_VERSION
+ARG KUBE_VERSION=1.28.9

 RUN cd /etc/apk/keys && \
    wget "https://cdn.zero-downtime.net/alpine/stefan@zero-downtime.net-61bb6bfb.rsa.pub" && \
    echo "@kubezero https://cdn.zero-downtime.net/alpine/v${ALPINE_VERSION}/kubezero" >> /etc/apk/repositories && \
-    echo "@testing http://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories && \
+    echo "@edge-testing http://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories && \
+    echo "@edge-community http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \
    apk upgrade -U -a --no-cache && \
    apk --no-cache add \
      jq \
      yq \
+      diffutils \
+      bash \
+      python3 \
+      py3-yaml \
+      restic \
+      helm \
      cri-tools@kubezero \
      kubeadm@kubezero~=${KUBE_VERSION} \
      kubectl@kubezero~=${KUBE_VERSION} \
-      etcd-ctl@testing \
-      restic@testing \
-      helm@testing
+      etcdhelper@kubezero \
+      etcd-ctl@edge-testing
+
+RUN helm repo add kubezero https://cdn.zero-downtime.net/charts && \
+    mkdir -p /var/lib/kubezero
+
+ADD admin/kubezero.sh admin/libhelm.sh admin/migrate_argo_values.py /usr/bin
+ADD admin/libhelm.sh /var/lib/kubezero

-ADD releases/v${KUBE_VERSION}/kubezero.sh /usr/bin
 ADD charts/kubeadm /charts/kubeadm
-ADD charts/kubezero-addons /charts/kubezero-addons
-ADD charts/kubezero-network /charts/kubezero-network
+ADD charts/kubezero /charts/kubezero

 ENTRYPOINT ["kubezero.sh"]
--- a/28
+++ b/28
@ -1,28 +1,14 @@
-VERSION ?= 1.22.8
-ALPINE_VERSION ?= 3.15
 REGISTRY := public.ecr.aws/zero-downtime
-REPOSITORY := kubezero-admin
-TAG := $(REPOSITORY):v$(VERSION)
-KUBE_VERSION := $(shell echo $(VERSION) | sed -e 's/\.[[:digit:]]*$$//')
+IMAGE := kubezero-admin
+REGION := us-east-1

-.PHONY: build push clean scan
+# Use KubeZero chart version rather than git tag for admin image
+GIT_TAG = v$(shell yq .version < charts/kubezero/Chart.yaml)

-all: build push
+# Also tag as Kubernetes major version
+EXTRA_TAGS = $(shell echo $(GIT_TAG) | awk -F '.' '{ print $$1 "." $$2 }')

-build:
-	podman build --rm --build-arg KUBE_VERSION=$(KUBE_VERSION) --build-arg ALPINE_VERSION=$(ALPINE_VERSION) -t $(TAG) .
-
-push:
-	aws ecr-public get-login-password --region us-east-1 | podman login --username AWS --password-stdin $(REGISTRY)
-	podman tag $(TAG) $(REGISTRY)/$(TAG)
-	podman push $(REGISTRY)/$(TAG)
-
-clean:
-	podman image prune -f
-
-scan:
-	podman system service&
-	sleep 5; trivy $(TAG)
+include .ci/podman.mk

 update-charts:
 	./scripts/update_helm.sh
--- a/README.md
+++ b/README.md
@ -4,12 +4,12 @@ KubeZero is a Kubernetes distribution providing an integrated container platform

 # Design philosophy

- Cloud provider agnostic, bare-metal/self-hosted
- Focus on security and simplicity before feature bloat
- No vendor lock in, most components are optional and could be exchanged
- Organic Open Source / open and permissive licenses over closed-source solutions
+- Focus on security and simplicity over feature creep
+- No vendor lock in, most components are optional and could be easily changed as needed
 - No premium services / subscriptions required
- Staying and contributing back to upstream projects as much as possible
+- Staying up to date and contributing back to upstream projects, like alpine-cloud-images and others
+- Cloud provider agnostic, bare-metal/self-hosted
+- Organic Open Source / open and permissive licenses over closed-source solutions
 - Corgi approved :dog:


@ -18,33 +18,46 @@ KubeZero is a Kubernetes distribution providing an integrated container platform


 # Version / Support Matrix
-KubeZero releases track the same *minor* version of Kubernetes.  
-Any 1.21.X-Y release of Kubezero supports any Kubernetes cluster 1.21.X.
+KubeZero releases track the same *minor* version of Kubernetes.
+Any 1.26.X-Y release of Kubezero supports any Kubernetes cluster 1.26.X.

 KubeZero is distributed as a collection of versioned Helm charts, allowing custom upgrade schedules and module versions as needed.

-| KubeZero Version | Kubernetes Version  | EOL         |
-|------------------|---------------------|-------------|
-| v1.23.X-Y        | v1.23.X             | Alpha       |
-| v1.22.8-Y        | v1.22.8             | 30 Sep 2022 |
-| v1.21.9-Y        | v1.21.9             | 31 May 2022 |
-| v1.20.8          | v1.20               | Feb 2022    |
-| v1.19            | v1.19               | Jul 2021    |
-| v1.18            | v1.18               | Apr 2021    |
+```mermaid
+%%{init: {'theme':'dark'}}%%
+gantt
+    title KubeZero Support Timeline
+    dateFormat  YYYY-MM-DD
+    section 1.27
+    beta     :127b, 2023-09-01, 2023-09-30
+    release  :after 127b, 2024-04-30
+    section 1.28
+    beta     :128b, 2024-03-01, 2024-04-30
+    release  :after 128b, 2024-08-31
+    section 1.29
+    beta     :129b, 2024-07-01, 2024-08-30
+    release  :after 129b, 2024-11-30
+```

 [Upstream release policy](https://kubernetes.io/releases/)

 # Components

 ## OS
- all nodes are based on Alpine V3.15
- 2 GB encrypted root filesystem
- no 3rd party dependencies at boot ( other than container registries )
+- all compute nodes are running on Alpine V3.19
+- 1 or 2 GB encrypted root file system
+- no external dependencies at boot time, apart from container registries
 - minimal attack surface
 - extremely small memory footprint / overhead
+- cri-o container runtime incl. AppArmor support

-## Container runtime
- cri-o rather than Docker for improved security and performance
+## GitOps
+- cli / cmd line install
+- optional full ArgoCD support and integration
+
+## Featured workloads
+- rootless CI/CD build platform to build containers as part of a CI pipeline, using podman / fuse device plugin support
+- containerized AI models via integrated out of the box support for Nvidia GPU workers as well as AWS Neuron

 ## Control plane
 - all Kubernetes components compiled against Alpine OS using `buildmode=pie`
@ -52,11 +65,6 @@ KubeZero is distributed as a collection of versioned Helm charts, allowing custo
 - access to control plane from within the VPC only by default ( VPN access required for Admin tasks )
 - controller nodes are used for various platform admin controllers / operators to reduce costs and noise on worker nodes

-## GitOps
- cli / cmd line install
- optional full ArgoCD support and integration
- fuse device plugin support to build containers as part of a CI pipeline leveraging rootless podman build agents
-
 ## AWS integrations
 - IAM roles for service accounts allowing each pod to assume individual IAM roles
 - access to meta-data services is blocked all workload containers on all nodes
@ -66,10 +74,8 @@ KubeZero is distributed as a collection of versioned Helm charts, allowing custo
 - support for [Inf1 instances](https://aws.amazon.com/ec2/instance-types/inf1/) part of [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/).

 ## Network
+- Cilium using Geneve encapsulation, incl. increased MTU allowing flexible / more containers per worker node compared to eg. AWS VPC CNI
 - Multus support for multiple network interfaces per pod, eg. additional AWS CNI
- Calico using VxLAN incl. increased MTU  
-allows flexible / more containers per worker node compared to eg. AWS VPC CNI
- isolates container traffic from VPC by using VxLAN overlay
 - no restrictions on IP space / sizing from the underlying VPC architecture

 ## Storage
@ -79,16 +85,16 @@ allows flexible / more containers per worker node compared to eg. AWS VPC CNI
 - CSI Snapshot controller and Gemini snapshot groups and retention

 ## Ingress
- AWS Network Loadbalancer and Istio Ingress controllers  
+- AWS Network Loadbalancer and Istio Ingress controllers
 - no additional costs per exposed service
 - real client source IP available to workloads via HTTP header and access logs
 - ACME SSL Certificate handling via cert-manager incl. renewal etc.
 - support for TCP services
- optional rate limiting support 
+- optional rate limiting support
 - optional full service mesh

 ## Metrics
- Prometheus support for all components
+- Prometheus support for all components, incl. out of cluster EC2 instances (node_exporter)
 - automated service discovery allowing instant access to common workload metrics
 - pre-configured Grafana dashboards and alerts
 - Alertmanager events via SNSAlertHub to Slack, Google, Matrix, etc.
--- a/admin/README.md
+++ b/admin/README.md
@ -0,0 +1,12 @@
+# Cluster upgrade flow
+
+## During 1.23 upgrade
+- create new kubezero-values CM if not exists yet, by merging parts of the legacy /etc/kubernetes/kubeadm-values.yaml values with potentially existing values from kubezero ArgoCD app values
+
+
+# General flow
+
+- No ArgoCD -> user kubezero-values CM
+- ArgoCD -> update kubezero-values CM with current values from ArgoCD app values
+
+- Apply any upgrades / migrations
--- a/admin/dev_apply.sh
+++ b/admin/dev_apply.sh
@ -0,0 +1,84 @@
+#!/bin/bash
+#set -eEx
+#set -o pipefail
+set -x
+
+ARTIFACTS=($(echo $1 | tr "," "\n"))
+ACTION=${2:-apply}
+
+#VERSION="latest"
+KUBE_VERSION="$(kubectl version -o json | jq -r .serverVersion.gitVersion)"
+
+WORKDIR=$(mktemp -p /tmp -d kubezero.XXX)
+[ -z "$DEBUG" ] && trap 'rm -rf $WORKDIR' ERR EXIT
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+# shellcheck disable=SC1091
+. "$SCRIPT_DIR"/libhelm.sh
+CHARTS="$(dirname $SCRIPT_DIR)/charts"
+
+### Various hooks for modules
+
+################
+# cert-manager #
+################
+function cert-manager-post() {
+  # If any error occurs, wait for initial webhook deployment and try again
+  # see: https://cert-manager.io/docs/concepts/webhook/#webhook-connection-problems-shortly-af
+ter-cert-manager-installation
+
+  if [ $rc -ne 0 ]; then
+    wait_for "kubectl get deployment -n $namespace cert-manager-webhook"
+    kubectl rollout status deployment -n $namespace cert-manager-webhook
+    wait_for 'kubectl get validatingwebhookconfigurations -o yaml | grep "caBundle: LS0"'
+    apply
+  fi
+
+  wait_for "kubectl get ClusterIssuer -n $namespace kubezero-local-ca-issuer"
+  kubectl wait --timeout=180s --for=condition=Ready -n $namespace ClusterIssuer/kubezero-local
+-ca-issuer
+}
+
+
+###########
+# ArgoCD  #
+###########
+function argocd-pre() {
+  for f in $CLUSTER/secrets/argocd-*.yaml; do
+    kubectl apply -f $f
+  done
+}
+
+
+###########
+# Metrics #
+###########
+# Cleanup patch jobs from previous runs , ArgoCD does this automatically
+function metrics-pre() {
+  kubectl delete jobs --field-selector status.successful=1 -n monitoring
+}
+
+
+### Main
+get_kubezero_values
+
+# Always use embedded kubezero chart
+helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --kube-version $KUBE_VERSION --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
+
+# Resolve all the all enabled artifacts
+if [ ${ARTIFACTS[0]} == "all" ]; then
+  ARTIFACTS=($(ls $WORKDIR/kubezero/templates | sed -e 's/.yaml//g'))
+fi
+
+if [ $ACTION == "apply" -o $ACTION == "crds" ]; then
+  for t in ${ARTIFACTS[@]}; do
+    _helm $ACTION $t || true
+  done
+
+# Delete in reverse order, continue even if errors
+elif [ $ACTION == "delete" ]; then
+  set +e
+  for (( idx=${#ARTIFACTS[@]}-1 ; idx>=0 ; idx-- )) ; do
+    _helm delete ${ARTIFACTS[idx]} || true
+  done
+fi
--- a/releases/v1.22/kubezero.sh
+++ b/releases/v1.22/kubezero.sh
@ -1,15 +1,19 @@
-#!/bin/sh
+#!/bin/bash -e

 if [ -n "$DEBUG" ]; then
  set -x
  LOG="--v=5"
 fi

+# include helm lib
+. /var/lib/kubezero/libhelm.sh
+
 # Export vars to ease use in debug_shell etc
 export WORKDIR=/tmp/kubezero
 export HOSTFS=/host
 export CHARTS=/charts
-export VERSION=v1.22
+export KUBE_VERSION=$(kubeadm version -o json | jq -r .clientVersion.gitVersion)
+export KUBE_VERSION_MINOR=$(echo $KUBE_VERSION | sed -e 's/\.[0-9]*$//')

 export KUBECONFIG="${HOSTFS}/root/.kube/config"

@ -43,7 +47,7 @@ _kubeadm() {

 # Render cluster config
 render_kubeadm() {
-  helm template $CHARTS/kubeadm --output-dir ${WORKDIR} -f ${HOSTFS}/etc/kubernetes/kubezero.yaml
+  helm template $CHARTS/kubeadm --output-dir ${WORKDIR} -f ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml

  # Assemble kubeadm config
  cat /dev/null > ${HOSTFS}/etc/kubernetes/kubeadm.yaml
@ -52,7 +56,7 @@ render_kubeadm() {
    cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml
  done

-  # hack to "uncloack" the json patches after they go processed by helm
+  # "uncloak" the json patches after they got processed by helm
  for s in apiserver controller-manager scheduler; do
    yq eval '.json' ${WORKDIR}/kubeadm/templates/patches/kube-${s}1\+json.yaml > /tmp/_tmp.yaml && \
      mv /tmp/_tmp.yaml ${WORKDIR}/kubeadm/templates/patches/kube-${s}1\+json.yaml
@ -61,15 +65,12 @@ render_kubeadm() {


 parse_kubezero() {
-  [ -f ${HOSTFS}/etc/kubernetes/kubezero.yaml ] || { echo "Missing /etc/kubernetes/kubezero.yaml!"; return 1; }
-
-  export KUBE_VERSION=$(kubeadm version -o yaml | yq eval .clientVersion.gitVersion -)
-  export CLUSTERNAME=$(yq eval '.clusterName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
-  export ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
-  export NODENAME=$(yq eval '.nodeName' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
-
-  export AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
-  export AWS_NTH=$(yq eval '.addons.aws-node-termination-handler.enabled' ${HOSTFS}/etc/kubernetes/kubezero.yaml)
+  export CLUSTERNAME=$(yq eval '.global.clusterName // .clusterName' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
+  export HIGHAVAILABLE=$(yq eval '.global.highAvailable // .highAvailable // "false"' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
+  export ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
+  export NODENAME=$(yq eval '.nodeName' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
+  export PROVIDER_ID=$(yq eval '.providerID // ""' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
+  export AWS_IAM_AUTH=$(yq eval '.api.awsIamAuth.enabled // "false"' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)

  # From here on bail out, allows debug_shell even in error cases
  set -e
@ -114,17 +115,30 @@ post_kubeadm() {
 }


-# First parse kubezero.yaml
-parse_kubezero
+kubeadm_upgrade() {
+  # pre upgrade hook

-if [ "$1" == 'upgrade' ]; then
-  ### PRE 1.22 specific
-  #####################
+  # get current values, argo app over cm
+  get_kubezero_values

-  kubectl delete runtimeclass crio
+  # tumble new config through migrate.py
+  migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml

-  #####################
+  # Update kubezero-values CM
+  kubectl get cm -n kube-system kubezero-values -o=yaml | \
+    yq e '.data."values.yaml" |= load_str("/tmp/kubezero/new-kubezero-values.yaml")' | \
+    kubectl replace -f -

+  # update argo app
+  kubectl get application kubezero -n argocd -o yaml | \
+    kubezero_chart_version=$(yq .version /charts/kubezero/Chart.yaml) \
+    yq '.spec.source.helm.values |= load_str("/tmp/kubezero/new-kubezero-values.yaml") | .spec.source.targetRevision = strenv(kubezero_chart_version)' | \
+    kubectl apply -f -
+
+  # finally remove annotation to allow argo to sync again
+  kubectl patch app kubezero -n argocd --type json -p='[{"op": "remove", "path": "/metadata/annotations"}]'
+
+  # Local node upgrade
  render_kubeadm

  pre_kubeadm
@ -139,36 +153,13 @@ if [ "$1" == 'upgrade' ]; then
    cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config
  fi

-  ### POST 1.22 specific
-
-  # Remove all remaining kiam
-  helm repo add uswitch https://uswitch.github.io/kiam-helm-charts/charts/
-  helm repo update
-  helm template uswitch/kiam --name-template kiam --set server.deployment.enabled=true --set server.prometheus.servicemonitor.enabled=true --set agent.prometheus.servicemonitor.enabled=true | kubectl delete --namespace kube-system -f - || true
-
-  ######################
-  # network
-  yq eval '.network // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml
-  helm template $CHARTS/kubezero-network --namespace kube-system --include-crds --name-template network \
-    -f _values.yaml --kube-version $KUBE_VERSION | kubectl apply --namespace kube-system -f - $LOG
-
-  # addons
-  yq eval '.addons // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml
-  helm template $CHARTS/kubezero-addons --namespace kube-system --include-crds --name-template addons \
-    -f _values.yaml --kube-version $KUBE_VERSION | kubectl apply --namespace kube-system -f - $LOG
-
-  ######################
-
-  # Could be removed with 1.23 as we now have persistent etcd
-  # Execute cluster backup to allow new controllers to join
-  kubectl create job backup-cluster-now --from=cronjob/kubezero-backup -n kube-system
-  # That might take a while as the backup pod needs the CNIs to come online etc.
-  retry 10 30 40 kubectl wait --for=condition=complete job/backup-cluster-now -n kube-system && kubectl delete job backup-cluster-now -n kube-system
+  # post upgrade hook
+  [ -f /var/lib/kubezero/post-upgrade.sh ] && . /var/lib/kubezero/post-upgrade.sh

  # Cleanup after kubeadm on the host
  rm -rf ${HOSTFS}/etc/kubernetes/tmp

-  echo "Successfully upgraded cluster."
+  echo "Successfully upgraded kubeadm control plane."

  # TODO
  # Send Notification currently done via CloudBender -> SNS -> Slack
@ -176,25 +167,21 @@ if [ "$1" == 'upgrade' ]; then

  # Removed:
  # - update oidc do we need that ?
+}

-elif [[ "$1" == 'node-upgrade' ]]; then
-  echo "Starting node upgrade ..."

-  echo "All done."
-
-elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
+control_plane_node() {
+  CMD=$1

  render_kubeadm

-  # Esnure clean slate if bootstrap, restore PKI otherwise
-  if [[ "$1" =~ "^(bootstrap)$" ]]; then
+  # Ensure clean slate if bootstrap, restore PKI otherwise
+  if [[ "$CMD" =~ ^(bootstrap)$ ]]; then
    rm -rf ${HOSTFS}/var/lib/etcd/member

  else
-    # Todo: 1.23
-    # Workaround for 1.22 as the final backup is still tagged with the previous verion from the cronjob
-    #retry 10 60 30 restic restore latest --no-lock -t / --tag $VERSION
-    retry 10 60 30 restic restore latest --no-lock -t /
+    # restore latest backup
+    retry 10 60 30 restic restore latest --no-lock -t / # --tag $KUBE_VERSION_MINOR

    # Make last etcd snapshot available
    cp ${WORKDIR}/etcd_snapshot ${HOSTFS}/etc/kubernetes
@ -205,9 +192,8 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
    # Always use kubeadm kubectl config to never run into chicken egg with custom auth hooks
    cp ${WORKDIR}/admin.conf ${HOSTFS}/root/.kube/config

-    # etcd needs to resync during join
-    if [[ "$1" =~ "^(restore)$" ]]; then
-      # Only restore etcd data set if none exists already
+    # Only restore etcd data during "restore" and none exists already
+    if [[ "$CMD" =~ ^(restore)$ ]]; then
      if [ ! -d ${HOSTFS}/var/lib/etcd/member ]; then
        etcdctl snapshot restore ${HOSTFS}/etc/kubernetes/etcd_snapshot \
          --name $ETCD_NODENAME \
@ -234,7 +220,12 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
  _kubeadm init phase preflight
  _kubeadm init phase kubeconfig all

-  if [[ "$1" =~ "^(join)$" ]]; then
+  if [[ "$CMD" =~ ^(join)$ ]]; then
+    # Delete any former self in case forseti did not delete yet
+    kubectl delete node ${NODENAME} --wait=true || true
+    # Wait for all pods to be deleted otherwise we end up with stale pods eg. kube-proxy and all goes to ....
+    kubectl delete pods -n kube-system --field-selector spec.nodeName=${NODENAME}
+
    # get current running etcd pods for etcdctl commands
    while true; do
      etcd_endpoints=$(kubectl get pods -n kube-system -l component=etcd -o yaml | \
@ -243,39 +234,22 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
      sleep 3
    done

-    # if we are NOT member already, flush etcd to be able to join
+    # see if we are a former member and remove our former self if so
    MY_ID=$(etcdctl member list --endpoints=$etcd_endpoints | grep $ETCD_NODENAME | awk '{print $1}' | sed -e 's/,$//')
+    [ -n "$MY_ID" ] && retry 12 5 5 etcdctl member remove $MY_ID --endpoints=$etcd_endpoints

-    # Failsafe / etcd on ephmeral: we were a member but our dataset is missing
-    # -> remove former self so we can re-join
-    if [ -n "$MY_ID" -a ! -d ${HOSTFS}/var/lib/etcd/member ]; then
-      # Remove former self first
-      [ -n "$MY_ID" ] && retry 12 5 5 etcdctl member remove $MY_ID --endpoints=$etcd_endpoints
-      MY_ID=""
-    fi
+    # flush etcd data directory as joining with previous storage seems flaky, especially during etcd version upgrades
+    rm -rf ${HOSTFS}/var/lib/etcd/member

+    # Announce new etcd member and capture ETCD_INITIAL_CLUSTER, retry needed in case another node joining causes temp quorum loss
+    ETCD_ENVS=$(retry 12 5 5 etcdctl member add $ETCD_NODENAME --peer-urls="https://${ETCD_NODENAME}:2380" --endpoints=$etcd_endpoints)
+    export $(echo "$ETCD_ENVS" | grep ETCD_INITIAL_CLUSTER= | sed -e 's/"//g')

-    if [ -z "$MY_ID" ]; then
-      # flush etcd data directory from restore
-      rm -rf ${HOSTFS}/var/lib/etcd/member
-
-      # Announce new etcd member and capture ETCD_INITIAL_CLUSTER, retry needed in case another node joining causes temp quorum loss
-      ETCD_ENVS=$(retry 12 5 5 etcdctl member add $ETCD_NODENAME --peer-urls="https://${ETCD_NODENAME}:2380" --endpoints=$etcd_endpoints)
-      export $(echo "$ETCD_ENVS" | grep ETCD_INITIAL_CLUSTER= | sed -e 's/"//g')
-    else
-      # build initial_cluster string from running cluster
-      _cluster=$(etcdctl member list --endpoints=$etcd_endpoints -w json | jq -r '.members[] | "\(.name)=\(.peerURLs[]),"')
-      export ETCD_INITIAL_CLUSTER=$(echo ${_cluster%%,} | sed -e 's/ //g')
-    fi
-
-    # Patch kubezero.yaml and re-render to get etcd manifest patched
+    # Patch kubeadm-values.yaml and re-render to get etcd manifest patched
    yq eval -i '.etcd.state = "existing"
      | .etcd.initialCluster = strenv(ETCD_INITIAL_CLUSTER)
-      ' ${HOSTFS}/etc/kubernetes/kubezero.yaml
+      ' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml
    render_kubeadm
-
-    # Delete any former self in case forseti did not delete yet
-    kubectl delete node ${NODENAME} --wait=true || true
  fi

  # Generate our custom etcd yaml
@ -290,7 +264,19 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
  echo "Waiting for Kubernetes API to be online ..."
  retry 0 5 30 kubectl cluster-info --request-timeout 3 >/dev/null

-  if [[ ! "$1" =~ "^(join)$" ]]; then
+  # Update providerID as underlying VM changed during restore
+  if [[ "$CMD" =~ ^(restore)$ ]]; then
+    if [ -n "$PROVIDER_ID" ]; then
+      etcdhelper \
+        -cacert ${HOSTFS}/etc/kubernetes/pki/etcd/ca.crt \
+        -cert ${HOSTFS}/etc/kubernetes/pki/etcd/server.crt \
+        -key ${HOSTFS}/etc/kubernetes/pki/etcd/server.key \
+        -endpoint https://${ETCD_NODENAME}:2379 \
+        change-provider-id ${NODENAME} $PROVIDER_ID
+    fi
+  fi
+
+  if [[ "$CMD" =~ ^(bootstrap|restore)$ ]]; then
    _kubeadm init phase upload-config all
    _kubeadm init phase upload-certs --skip-certificate-key-print

@ -301,7 +287,7 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
  _kubeadm init phase mark-control-plane
  _kubeadm init phase kubelet-finalize all

-  if [[ ! "$1" =~ "^(join)$" ]]; then
+  if [[ "$CMD" =~ ^(bootstrap|restore)$ ]]; then
    _kubeadm init phase addon all
  fi

@ -316,28 +302,61 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
    yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml
  fi

-  # install / update network and addons
-  if [[ "$1" =~ "^(bootstrap|join)$" ]]; then
-    # network
-    yq eval '.network // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml
-    helm template $CHARTS/kubezero-network --namespace kube-system --include-crds --name-template network \
-      -f _values.yaml --kube-version $KUBE_VERSION | kubectl apply --namespace kube-system -f - $LOG
-
-    # addons
-    yq eval '.addons // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml
-    helm template $CHARTS/kubezero-addons --namespace kube-system --include-crds --name-template addons \
-      -f _values.yaml --kube-version $KUBE_VERSION | kubectl apply --namespace kube-system -f - $LOG
-  fi
-
  post_kubeadm

  echo "${1} cluster $CLUSTERNAME successfull."
+}


-# Since 1.21 we only need to backup etcd + /etc/kubernetes/pki !
-elif [ "$1" == 'backup' ]; then
+apply_module() {
+  MODULES=$1
+
+  get_kubezero_values
+
+  # Always use embedded kubezero chart
+  helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
+
+  # CRDs first
+  for t in $MODULES; do
+    _helm crds $t
+  done
+
+  for t in $MODULES; do
+    _helm apply $t
+  done
+
+  echo "Applied KubeZero modules: $MODULES"
+}
+
+
+delete_module() {
+  MODULES=$1
+
+  get_kubezero_values
+
+  # Always use embedded kubezero chart
+  helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
+
+  for t in $MODULES; do
+    _helm delete $t
+  done
+
+  echo "Deleted KubeZero modules: $MODULES. Potential CRDs must be removed manually."
+}
+
+# backup etcd + /etc/kubernetes/pki
+backup() {
+  # Display all ENVs, careful this exposes the password !
+  [ -n "$DEBUG" ] && env 
+
  restic snapshots || restic init || exit 1

+  CV=$(kubectl version -o json | jq .serverVersion.minor -r)
+  let PCV=$CV-1
+
+  CLUSTER_VERSION="v1.$CV"
+  PREVIOUS_VERSION="v1.$PCV"
+
  etcdctl --endpoints=https://${ETCD_NODENAME}:2379 snapshot save ${WORKDIR}/etcd_snapshot

  # pki & cluster-admin access
@ -345,29 +364,43 @@ elif [ "$1" == 'backup' ]; then
  cp -r ${HOSTFS}/etc/kubernetes/admin.conf ${WORKDIR}

  # Backup via restic
-  restic snapshots || restic init
-  restic backup ${WORKDIR} -H $CLUSTERNAME --tag $VERSION
+  restic backup ${WORKDIR} -H $CLUSTERNAME --tag $CLUSTER_VERSION

  echo "Backup complete."

-  # Remove backups from previous versions
-  restic forget --keep-tag $VERSION --prune
+  # Remove backups from pre-previous versions
+  restic forget --keep-tag $CLUSTER_VERSION --keep-tag $PREVIOUS_VERSION --prune

  # Regular retention
  restic forget --keep-hourly 24 --keep-daily ${RESTIC_RETENTION:-7} --prune

  # Defrag etcd backend
  etcdctl --endpoints=https://${ETCD_NODENAME}:2379 defrag
+}


-elif [ "$1" == 'debug_shell' ]; then
+debug_shell() {
  echo "Entering debug shell"

  printf "For manual etcdctl commands use:\n  # export ETCDCTL_ENDPOINTS=$ETCD_NODENAME:2379\n"

-  /bin/sh
+  /bin/bash
+}

-else
-  echo "Unknown command!"
-  exit 1
-fi
+# First parse kubeadm-values.yaml
+parse_kubezero
+
+# Execute tasks
+for t in $@; do
+  case "$t" in
+    kubeadm_upgrade) kubeadm_upgrade;;
+    bootstrap) control_plane_node bootstrap;;
+    join) control_plane_node join;;
+    restore) control_plane_node restore;;
+    apply_*) apply_module "${t##apply_}";;
+    delete_*) delete_module "${t##delete_}";;
+    backup) backup;;
+    debug_shell) debug_shell;;
+    *) echo "Unknown command: '$t'";;
+  esac
+done
--- a/admin/libhelm.sh
+++ b/admin/libhelm.sh
@ -0,0 +1,309 @@
+#!/bin/bash
+
+# Simulate well-known CRDs being available
+API_VERSIONS="-a monitoring.coreos.com/v1 -a snapshot.storage.k8s.io/v1 -a policy/v1/PodDisruptionBudget"
+
+#VERSION="latest"
+VERSION="v1.28"
+
+# Waits for max 300s and retries
+function wait_for() {
+  local TRIES=0
+  while true; do
+    eval " $@" && break
+    [ $TRIES -eq 100 ] && return 1
+    let TRIES=$TRIES+1
+    sleep 3
+  done
+}
+
+
+function chart_location() {
+  echo "$1 --repo https://cdn.zero-downtime.net/charts"
+}
+
+
+function argo_used() {
+  kubectl get application kubezero -n argocd >/dev/null && rc=$? || rc=$?
+  return $rc
+}
+
+
+# get kubezero-values from ArgoCD if available or use in-cluster CM without Argo
+function get_kubezero_values() {
+  argo_used && \
+    { kubectl get application kubezero -n argocd -o yaml | yq .spec.source.helm.values > ${WORKDIR}/kubezero-values.yaml; } || \
+    { kubectl get configmap -n kube-system kubezero-values -o yaml | yq '.data."values.yaml"' > ${WORKDIR}/kubezero-values.yaml ;}
+}
+
+
+function disable_argo() {
+  cat > _argoapp_patch.yaml <<EOF
+spec:
+  syncWindows:
+    - kind: deny
+      schedule: '0 * * * *'
+      duration: 24h
+      namespaces:
+      - '*'
+EOF
+  kubectl patch appproject kubezero -n argocd --patch-file _argoapp_patch.yaml --type=merge && rm _argoapp_patch.yaml
+  echo "Enabled service window for ArgoCD project kubezero"
+}
+
+
+function enable_argo() {
+  kubectl patch appproject kubezero -n argocd --type json -p='[{"op": "remove", "path": "/spec/syncWindows"}]' || true
+  echo "Removed service window for ArgoCD project kubezero"
+}
+
+
+function cntFailedPods() {
+  NS=$1
+
+  NR=$(kubectl get pods -n $NS --field-selector="status.phase!=Succeeded,status.phase!=Running" -o custom-columns="POD:metadata.name" -o json | jq '.items | length')
+  echo $NR
+}
+
+
+function waitSystemPodsRunning() {
+  while true; do
+    [ "$(cntFailedPods kube-system)" -eq 0 ] && break
+    sleep 3
+  done
+}
+
+function argo_app_synced() {
+  APP=$1
+
+  # Ensure we are synced otherwise bail out
+  status=$(kubectl get application $APP -n argocd -o yaml | yq .status.sync.status)
+  if [ "$status" != "Synced" ]; then
+    echo "ArgoCD Application $APP not 'Synced'!"
+    return 1
+  fi
+
+  return 0
+}
+
+
+# make sure namespace exists prior to calling helm as the create-namespace options doesn't work
+function create_ns() {
+  local namespace=$1
+  if [ "$namespace" != "kube-system" ]; then
+    kubectl get ns $namespace || kubectl create ns $namespace
+  fi
+}
+
+
+# delete non kube-system ns
+function delete_ns() {
+  local namespace=$1
+  [ "$namespace" != "kube-system" ] && kubectl delete ns $namespace
+}
+
+
+# Extract crds via helm calls and apply delta=crds only
+function _crds() {
+  helm template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-crds --set ${module}.installCRDs=false -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION > $WORKDIR/helm-no-crds.yaml
+  helm template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --include-crds --set ${module}.installCRDs=true -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION > $WORKDIR/helm-crds.yaml
+  diff -e $WORKDIR/helm-no-crds.yaml $WORKDIR/helm-crds.yaml | head -n-1 | tail -n+2 > $WORKDIR/crds.yaml
+
+  # Only apply if there are actually any crds
+  if [ -s $WORKDIR/crds.yaml ]; then
+    [ -n "$DEBUG" ] && cat $WORKDIR/crds.yaml
+    kubectl apply -f $WORKDIR/crds.yaml --server-side --force-conflicts
+  fi
+}
+
+
+# helm template | kubectl apply -f -
+# confine to one namespace if possible
+function render() {
+  helm template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-crds -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION $@ \
+    | python3 -c '
+#!/usr/bin/python3
+import yaml
+import sys
+
+for manifest in yaml.safe_load_all(sys.stdin):
+    if manifest:
+        if "metadata" in manifest and "namespace" not in manifest["metadata"]:
+            manifest["metadata"]["namespace"] = sys.argv[1]
+        print("---")
+        print(yaml.dump(manifest))' $namespace > $WORKDIR/helm.yaml
+}
+
+
+function _helm() {
+  local action=$1
+  local module=$2
+
+  # check if module is even enabled and return if not
+  [ ! -f $WORKDIR/kubezero/templates/${module}.yaml ] && { echo "Module $module disabled. No-op."; return 0; }
+
+  local chart="$(yq eval '.spec.source.chart' $WORKDIR/kubezero/templates/${module}.yaml)"
+  local namespace="$(yq eval '.spec.destination.namespace' $WORKDIR/kubezero/templates/${module}.yaml)"
+
+  targetRevision=""
+  _version="$(yq eval '.spec.source.targetRevision' $WORKDIR/kubezero/templates/${module}.yaml)"
+
+  [ -n "$_version" ] && targetRevision="--version $_version"
+
+  yq eval '.spec.source.helm.values' $WORKDIR/kubezero/templates/${module}.yaml > $WORKDIR/values.yaml
+
+  echo "using values to $action of module $module: "
+  cat $WORKDIR/values.yaml
+
+  if [ $action == "crds" ]; then
+    # Allow custom CRD handling
+    declare -F ${module}-crds && ${module}-crds || _crds
+
+  elif [ $action == "apply" ]; then
+    # namespace must exist prior to apply
+    create_ns $namespace
+
+    # Optional pre hook
+    declare -F ${module}-pre && ${module}-pre
+
+    render
+    kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
+
+    # Try again without server-side, review with 1.26, required for cert-manager during 1.25
+    [ $rc -ne 0 ] && kubectl $action -f $WORKDIR/helm.yaml && rc=$? || rc=$?
+
+    # Optional post hook
+    declare -F ${module}-post && ${module}-post
+
+  elif [ $action == "delete" ]; then
+    render
+    kubectl $action -f $WORKDIR/helm.yaml && rc=$? || rc=$?
+
+    # Delete dedicated namespace if not kube-system
+    [ -n "$DELETE_NS" ] && delete_ns $namespace
+  fi
+
+  return 0
+}
+
+function all_nodes_upgrade() {
+  CMD="$1"
+
+  echo "Deploy all node upgrade daemonSet(busybox)"
+  cat <<EOF | kubectl apply -f -
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: kubezero-all-nodes-upgrade
+  namespace: kube-system
+  labels:
+    app: kubezero-upgrade
+spec:
+  selector:
+    matchLabels:
+      name: kubezero-all-nodes-upgrade
+  template:
+    metadata:
+      labels:
+        name: kubezero-all-nodes-upgrade
+    spec:
+      hostNetwork: true
+      hostIPC: true
+      hostPID: true
+      tolerations:
+      - operator: Exists
+        effect: NoSchedule
+      initContainers:
+      - name: node-upgrade
+        image: busybox
+        command: ["/bin/sh"]
+        args: ["-x", "-c", "$CMD" ]
+        volumeMounts:
+        - name: host
+          mountPath: /host
+        - name: hostproc
+          mountPath: /hostproc
+        securityContext:
+          privileged: true
+          capabilities:
+            add: ["SYS_ADMIN"]
+      containers:
+      - name: node-upgrade-wait
+        image: busybox
+        command: ["sleep", "3600"]
+      volumes:
+      - name: host
+        hostPath:
+          path: /
+          type: Directory
+      - name: hostproc
+        hostPath:
+          path: /proc
+          type: Directory
+EOF
+
+  kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
+  kubectl delete ds kubezero-all-nodes-upgrade -n kube-system
+}
+
+
+function control_plane_upgrade() {
+  TASKS="$1"
+
+  echo "Deploy cluster admin task: $TASKS"
+  cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Pod
+metadata:
+  name: kubezero-upgrade
+  namespace: kube-system
+  labels:
+    app: kubezero-upgrade
+spec:
+  hostNetwork: true
+  hostIPC: true
+  hostPID: true
+  containers:
+  - name: kubezero-admin
+    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
+    imagePullPolicy: Always
+    command: ["kubezero.sh"]
+    args: [$TASKS]
+    env:
+    - name: DEBUG
+      value: "$DEBUG"
+    - name: NODE_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: spec.nodeName
+    volumeMounts:
+    - name: host
+      mountPath: /host
+    - name: workdir
+      mountPath: /tmp
+    securityContext:
+      capabilities:
+        add: ["SYS_CHROOT"]
+  volumes:
+  - name: host
+    hostPath:
+      path: /
+      type: Directory
+  - name: workdir
+    emptyDir: {}
+  nodeSelector:
+    node-role.kubernetes.io/control-plane: ""
+  tolerations:
+  - key: node-role.kubernetes.io/control-plane
+    operator: Exists
+    effect: NoSchedule
+  restartPolicy: Never
+EOF
+
+  kubectl wait pod kubezero-upgrade -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
+  while true; do
+    kubectl logs kubezero-upgrade -n kube-system -f 2>/dev/null && break
+    sleep 3
+  done
+  kubectl delete pod kubezero-upgrade -n kube-system
+}
--- a/admin/migrate_argo_values.py
+++ b/admin/migrate_argo_values.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+import sys
+import argparse
+import io
+import yaml
+
+
+def migrate(values):
+    """Actual changes here"""
+
+    # argoCD moves to argo module
+    try:
+        if values["argocd"]["enabled"]:
+            values["argo"] = { "enabled": True, "argo-cd": values["argocd"] }
+            values.pop("argocd")
+    except KeyError:
+        pass
+
+    return values
+
+
+def deleteKey(values, key):
+    """Delete key from dictionary if exists"""
+    try:
+        values.pop(key)
+    except KeyError:
+        pass
+
+    return values
+
+
+class MyDumper(yaml.Dumper):
+    """
+    Required to add additional indent for arrays to match yq behaviour to reduce noise in diffs
+    """
+
+    def increase_indent(self, flow=False, indentless=False):
+        return super(MyDumper, self).increase_indent(flow, False)
+
+
+def str_presenter(dumper, data):
+    if len(data.splitlines()) > 1:  # check for multiline string
+        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
+    return dumper.represent_scalar("tag:yaml.org,2002:str", data)
+
+
+def rec_sort(d):
+    if isinstance(d, dict):
+        res = dict()
+
+        # Always have "enabled" first if present
+        if "enabled" in d.keys():
+            res["enabled"] = rec_sort(d["enabled"])
+            d.pop("enabled")
+
+        # next is "name" if present
+        if "name" in d.keys():
+            res["name"] = rec_sort(d["name"])
+            d.pop("name")
+
+        for k in sorted(d.keys()):
+            res[k] = rec_sort(d[k])
+        return res
+    if isinstance(d, list):
+        for idx, elem in enumerate(d):
+            d[idx] = rec_sort(elem)
+
+    return d
+
+
+yaml.add_representer(str, str_presenter)
+
+# to use with safe_dump:
+yaml.representer.SafeRepresenter.add_representer(str, str_presenter)
+
+# Read values
+values = yaml.safe_load(sys.stdin)
+
+# Output new values
+buffer = io.StringIO()
+yaml.dump(
+    rec_sort(migrate(values)),
+    sys.stdout,
+    default_flow_style=False,
+    indent=2,
+    sort_keys=False,
+    Dumper=MyDumper,
+)
--- a/admin/upgrade_cluster.sh
+++ b/admin/upgrade_cluster.sh
@ -0,0 +1,65 @@
+#!/bin/bash
+set -eE
+set -o pipefail
+
+ARGO_APP=${1:-/tmp/new-kubezero-argoapp.yaml}
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+# shellcheck disable=SC1091
+[ -n "$DEBUG" ] && set -x
+
+. "$SCRIPT_DIR"/libhelm.sh
+
+echo "Checking that all pods in kube-system are running ..."
+waitSystemPodsRunning
+
+argo_used && disable_argo
+
+#all_nodes_upgrade ""
+
+control_plane_upgrade kubeadm_upgrade
+
+#echo "Adjust kubezero values as needed:"
+# shellcheck disable=SC2015
+#argo_used && kubectl edit app kubezero -n argocd || kubectl edit cm kubezero-values -n kube-system
+
+### v1.28
+# - remove old argocd app, all resources will be taken over by argo.argo-cd
+argo_used && rc=$? || rc=$?
+if [ $rc -eq 0 ]; then
+  kubectl patch app argocd -n argocd \
+    --type json \
+    --patch='[ { "op": "remove", "path": "/metadata/finalizers" } ]' && \
+  kubectl delete app argocd -n argocd || true
+
+  # remove legacy argocd app resources, but NOT kubezero-git-sync nor the appproject
+  kubectl api-resources --verbs=list --namespaced -o name | grep -ve 'app.*argoproj' | xargs -n 1 kubectl delete --ignore-not-found -l argocd.argoproj.io/instance=argocd -n argocd
+fi
+
+# upgrade modules
+control_plane_upgrade "apply_network, apply_addons, apply_storage, apply_operators"
+
+echo "Checking that all pods in kube-system are running ..."
+waitSystemPodsRunning
+
+echo "Applying remaining KubeZero modules..."
+
+control_plane_upgrade "apply_cert-manager, apply_istio, apply_istio-ingress, apply_istio-private-ingress, apply_logging, apply_metrics, apply_telemetry, apply_argo"
+
+# Trigger backup of upgraded cluster state
+kubectl create job --from=cronjob/kubezero-backup kubezero-backup-$VERSION -n kube-system
+while true; do
+  kubectl wait --for=condition=complete job/kubezero-backup-$VERSION -n kube-system 2>/dev/null && kubectl delete job kubezero-backup-$VERSION -n kube-system && break
+  sleep 1
+done
+
+# Final step is to commit the new argocd kubezero app
+kubectl get app kubezero -n argocd -o yaml | yq 'del(.status) | del(.metadata) | del(.operation) | .metadata.name="kubezero" | .metadata.namespace="argocd"' | yq 'sort_keys(..) | .spec.source.helm.values |= (from_yaml | to_yaml)' > $ARGO_APP
+
+echo "Please commit $ARGO_APP as the updated kubezero/application.yaml for your cluster."
+echo "Then head over to ArgoCD for this cluster and sync all KubeZero modules to apply remaining upgrades."
+
+echo "<Return> to continue and re-enable ArgoCD:"
+read -r
+
+argo_used && enable_argo
--- a/charts/clamav/Chart.yaml
+++ b/charts/clamav/Chart.yaml
@ -1,18 +1,19 @@
 apiVersion: v2
 name: clamav
-description: Chart for deploying a ClamavD on kubernetes as statfulSet
+description: Chart for deploying a ClamAVd on Kubernetes as statfulSet
 type: application
-version: 0.1.1
-appVersion: 0.104.0
+version: "0.3.1"
+appVersion: "1.2.1"
 home: https://kubezero.com
 icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
 keywords:
  - kubezero
  - clamav
 maintainers:
-  - name: Quarky9
+  - name: Stefan Reimer
+    email: stefan@zero-downtime.net
 dependencies:
  - name: kubezero-lib
-    version: ">= 0.1.4"
+    version: ">= 0.1.6"
    repository: https://cdn.zero-downtime.net/charts/
-kubeVersion: ">= 1.18.0"
+kubeVersion: ">= 1.26.0"
--- a/charts/clamav/README.md
+++ b/charts/clamav/README.md
@ -1,8 +1,8 @@
 # clamav

-![Version: 0.1.1](https://img.shields.io/badge/Version-0.1.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 0.104.0](https://img.shields.io/badge/AppVersion-0.104.0-informational?style=flat-square)
+![Version: 0.3.1](https://img.shields.io/badge/Version-0.3.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 1.2.1](https://img.shields.io/badge/AppVersion-1.2.1-informational?style=flat-square)

-Chart for deploying a ClamavD on kubernetes as statfulSet
+Chart for deploying a ClamAVd on Kubernetes as statfulSet

 **Homepage:** <https://kubezero.com>

@ -10,33 +10,32 @@ Chart for deploying a ClamavD on kubernetes as statfulSet

 | Name | Email | Url |
 | ---- | ------ | --- |
-| Quarky9 |  |  |
+| Stefan Reimer | <stefan@zero-downtime.net> |  |

 ## Requirements

-Kubernetes: `>= 1.18.0`
+Kubernetes: `>= 1.26.0`

 | Repository | Name | Version |
 |------------|------|---------|
-| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.4 |
+| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |

 ## Values

 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| clamav.freshclam.mirrors | string | `"database.clamav.net"` | A list of clamav mirrors to be used by the clamav service |
-| clamav.image | string | `"clamav/clamav"` | The clamav docker image |
-| clamav.limits.connectionQueueLength | int | `100` | Maximum length the queue of pending connections may grow to |
-| clamav.limits.fileSize | int | `20` | The largest file size scanable by clamav, in MB |
-| clamav.limits.maxThreads | int | `4` | Maximum number of threads running at the same time. |
-| clamav.limits.scanSize | int | `100` | The largest scan size permitted in clamav, in MB |
-| clamav.limits.sendBufTimeout | int | `500` |  |
-| clamav.replicaCount | int | `1` |  |
-| clamav.resources | object | `{"requests":{"cpu":"300m","memory":"1300M"}}` | The resource requests and limits for the clamav service |
-| clamav.version | string | `"unstable"` | The clamav docker image version - defaults to .Chart.appVersion |
+| freshclam.mirrors | string | `"database.clamav.net"` | A list of clamav mirrors to be used by the clamav service |
 | fullnameOverride | string | `""` | override the full name of the clamav chart |
+| image | object | `{"repository":"clamav/clamav","type":"base"}` | The clamav docker image |
+| limits.connectionQueueLength | int | `100` | Maximum length the queue of pending connections may grow to |
+| limits.fileSize | int | `25` | The largest file size scanable by clamav, in MB |
+| limits.maxThreads | int | `4` | Maximum number of threads running at the same time. |
+| limits.scanSize | int | `100` | The largest scan size permitted in clamav, in MB |
+| limits.sendBufTimeout | int | `500` |  |
 | nameOverride | string | `""` | override the name of the clamav chart |
+| replicaCount | int | `1` |  |
+| resources | object | `{"requests":{"cpu":"300m","memory":"2000M"}}` | The resource requests and limits for the clamav service |
 | service.port | int | `3310` | The port to be used by the clamav service |

 ----------------------------------------------
-Autogenerated from chart metadata using [helm-docs v1.9.1](https://github.com/norwoodj/helm-docs/releases/v1.9.1)
+Autogenerated from chart metadata using [helm-docs v1.11.0](https://github.com/norwoodj/helm-docs/releases/v1.11.0)
--- a/charts/clamav/deploy.sh
+++ b/charts/clamav/deploy.sh
@ -1,7 +0,0 @@
-#!/bin/bash
-
-release=clamav
-namespace=clamav
-
-helm template . --namespace $namespace --name-template $release > clamav.yaml
-kubectl apply --namespace $namespace -f clamav.yaml
--- a/charts/clamav/templates/configmap.yaml
+++ b/charts/clamav/templates/configmap.yaml
@ -10,7 +10,7 @@ data:
    LogTime yes
    LogClean yes
    LogSyslog no
-    LogVerbose no
+    LogVerbose yes
    LogFileMaxSize 0
    LogFile /dev/stdout
    DatabaseDirectory /var/lib/clamav
@ -19,28 +19,28 @@ data:
    User clamav
    ExitOnOOM yes
    Foreground yes
-    MaxScanSize {{.Values.clamav.limits.scanSize}}M
-    MaxFileSize {{.Values.clamav.limits.fileSize}}M
+    MaxScanSize {{.Values.limits.scanSize}}M
+    MaxFileSize {{.Values.limits.fileSize}}M

    #  Close the connection when the data size limit is exceeded.
    #  The value should match your MTA's limit for a maximum attachment size.
    #  Default: 25M
-    StreamMaxLength {{.Values.clamav.limits.scanSize}}M
+    StreamMaxLength {{.Values.limits.scanSize}}M

    # Maximum length the queue of pending connections may grow to.
    # Default: 200
-    MaxConnectionQueueLength {{.Values.clamav.limits.connectionQueueLength}}
+    MaxConnectionQueueLength {{.Values.limits.connectionQueueLength}}

    # Maximum number of threads running at the same time.
    # Default: 10
-    MaxThreads {{.Values.clamav.limits.maxThreads}}
+    MaxThreads {{.Values.limits.maxThreads}}

    # This option specifies how long to wait (in milliseconds) if the send buffer
    # is full.
    # Keep this value low to prevent clamd hanging.
    #
    # Default: 500
-    SendBufTimeout {{.Values.clamav.limits.sendBufTimeout}}
+    SendBufTimeout {{.Values.limits.sendBufTimeout}}

  freshclam.conf: |
    LogTime yes
@ -49,4 +49,4 @@ data:
    Checks 24
    LogSyslog no
    DatabaseOwner root
-    DatabaseMirror {{ .Values.clamav.freshclam.mirrors }}
+    DatabaseMirror {{ .Values.freshclam.mirrors }}
--- a/charts/clamav/templates/statefulset.yaml
+++ b/charts/clamav/templates/statefulset.yaml
@ -6,7 +6,7 @@ metadata:
  labels:
    {{- include "kubezero-lib.labels" . | nindent 4 }}
 spec:
-  replicas: {{ .Values.clamav.replicaCount }}
+  replicas: {{ .Values.replicaCount }}
  selector:
    matchLabels:
      {{- include "kubezero-lib.selectorLabels" . | nindent 6 }}
@ -20,7 +20,7 @@ spec:
    spec:
      containers:
      - name: clamav
-        image: "{{ .Values.clamav.image }}:{{ default .Chart.AppVersion .Values.clamav.version }}_base"
+        image: "{{ .Values.image.repository }}:{{ default .Chart.AppVersion .Values.image.tag }}_{{ .Values.image.type }}"
        ports:
        - containerPort: 3310
          name: clamav
@ -41,7 +41,7 @@ spec:
          successThreshold: 1
          timeoutSeconds: 3
        resources:
-          {{- toYaml .Values.clamav.resources | nindent 10 }}
+          {{- toYaml .Values.resources | nindent 10 }}
        volumeMounts:
          - mountPath: /var/lib/clamav
            name: signatures
@ -53,15 +53,15 @@ spec:
      - name: config-volume
        configMap:
          name: {{ include "kubezero-lib.fullname" . }}
-      {{- with .Values.clamav.nodeSelector }}
+      {{- with .Values.nodeSelector }}
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
-      {{- with .Values.clamav.affinity }}
+      {{- with .Values.affinity }}
      affinity:
        {{- toYaml . | nindent 8 }}
      {{- end }}
-      {{- with .Values.clamav.tolerations }}
+      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
@ -70,7 +70,7 @@ spec:
      name: signatures
    spec:
      accessModes: [ "ReadWriteOnce" ]
-      {{- with .Values.clamav.storageClassName }}
+      {{- with .Values.storageClassName }}
      storageClassName: {{ . }}
      {{- end }}
      resources:
--- a/charts/clamav/update.sh
+++ b/charts/clamav/update.sh
@ -0,0 +1,9 @@
+#!/bin/bash
+set -ex
+
+. ../../scripts/lib-update.sh
+
+update_helm
+
+
+update_docs
--- a/charts/clamav/values.yaml
+++ b/charts/clamav/values.yaml
@ -1,46 +1,41 @@
-# Default values for clamav.
-# This is a YAML-formatted file.
-# Declare variables to be passed into your templates.
-
 # nameOverride -- override the name of the clamav chart
 nameOverride: ""

 # fullnameOverride -- override the full name of the clamav chart
 fullnameOverride: ""

+# image -- The clamav docker image
+image:
+  repository: clamav/clamav
+  # version: "latest"
+  type: base
+
+replicaCount: 1
+
+freshclam:
+  # freshclam.mirrors -- A list of clamav mirrors to be used by the clamav service
+  mirrors: database.clamav.net
+limits:
+  # limits.fileSize -- The largest file size scanable by clamav, in MB
+  fileSize: 25
+  # limits.scanSize -- The largest scan size permitted in clamav, in MB
+  scanSize: 100
+  # limits.connectionQueueLength -- Maximum length the queue of pending connections may grow to
+  connectionQueueLength: 100
+  # limits.maxThreads --Maximum number of threads running at the same time.
+  maxThreads: 4
+  # sendBufTimeout -- This option specifies how long to wait (in milliseconds) if the send buffer is full, keep low to avoid clamd hanging
+  sendBufTimeout: 500
+
 service:
  # service.port -- The port to be used by the clamav service
  port: 3310

-clamav:
-  # clamav.image -- The clamav docker image
-  image: clamav/clamav
-  # clamav.version -- The clamav docker image version - defaults to .Chart.appVersion
-  version: "unstable"
-
-  replicaCount: 1
-
-  freshclam:
-    # clamav.freshclam.mirrors -- A list of clamav mirrors to be used by the clamav service
-    mirrors: database.clamav.net
-  limits:
-    # clamav.limits.fileSize -- The largest file size scanable by clamav, in MB
-    fileSize: 20
-    # clamav.limits.scanSize -- The largest scan size permitted in clamav, in MB
-    scanSize: 100
-    # clamav.limits.connectionQueueLength -- Maximum length the queue of pending connections may grow to
-    connectionQueueLength: 100
-    # clamav.limits.maxThreads --Maximum number of threads running at the same time.
-    maxThreads: 4
-    # clamav.sendBufTimeout -- This option specifies how long to wait (in milliseconds) if the send buffer is full, keep low to avoid clamd hanging
-    sendBufTimeout: 500
-
-
-  resources:
-    # clamav.resources -- The resource requests and limits for the clamav service
-    requests:
-      cpu: 300m
-      memory: 1300M
-    #limits:
-    #  cpu: 1500m
-    #  memory: 2000M
+resources:
+  # resources -- The resource requests and limits for the clamav service
+  requests:
+    cpu: 300m
+    memory: 2000M
+  #limits:
+  #  cpu: 2
+  #  memory: 4000M
--- a/charts/kubeadm/Chart.yaml
+++ b/charts/kubeadm/Chart.yaml
@ -2,7 +2,7 @@ apiVersion: v2
 name: kubeadm
 description: KubeZero Kubeadm cluster config
 type: application
-version: 1.22.8
+version: 1.28.9
 home: https://kubezero.com
 icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
 keywords:
@ -11,4 +11,4 @@ keywords:
 maintainers:
  - name: Stefan Reimer
    email: stefan@zero-downtime.net
-kubeVersion: ">= 1.20.0"
+kubeVersion: ">= 1.26.0"
--- a/charts/kubeadm/README.md
+++ b/charts/kubeadm/README.md
@ -1,6 +1,6 @@
 # kubeadm

-![Version: 1.22.8](https://img.shields.io/badge/Version-1.22.8-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
+![Version: 1.25.8](https://img.shields.io/badge/Version-1.25.8-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)

 KubeZero Kubeadm cluster config

@ -14,18 +14,12 @@ KubeZero Kubeadm cluster config

 ## Requirements

-Kubernetes: `>= 1.20.0`
+Kubernetes: `>= 1.25.0`

 ## Values

 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
-| addons.aws-node-termination-handler.enabled | bool | `false` |  |
-| addons.aws-node-termination-handler.queueURL | string | `""` | arn:aws:sqs:${REGION}:${AWS_ACCOUNT_ID}:${CLUSTERNAME}_Nth |
-| addons.clusterBackup.enabled | bool | `false` |  |
-| addons.clusterBackup.passwordFile | string | `""` | /etc/cloudbender/clusterBackup.passphrase |
-| addons.clusterBackup.repository | string | `""` | s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup |
-| addons.external-dns.enabled | bool | `false` |  |
 | api.apiAudiences | string | `"istio-ca"` |  |
 | api.awsIamAuth.enabled | bool | `false` |  |
 | api.awsIamAuth.kubeAdminRole | string | `"arn:aws:iam::000000000000:role/KubernetesNode"` |  |
@ -36,17 +30,13 @@ Kubernetes: `>= 1.20.0`
 | api.listenPort | int | `6443` |  |
 | api.oidcEndpoint | string | `""` | s3://${CFN[ConfigBucket]}/k8s/$CLUSTERNAME |
 | api.serviceAccountIssuer | string | `""` | https://s3.${REGION}.amazonaws.com/${CFN[ConfigBucket]}/k8s/$CLUSTERNAME |
-| clusterName | string | `"pleasechangeme"` |  |
 | domain | string | `"changeme.org"` |  |
 | etcd.extraArgs | object | `{}` |  |
 | etcd.nodeName | string | `"etcd"` |  |
 | etcd.state | string | `"new"` |  |
-| highAvailable | bool | `false` |  |
+| global.clusterName | string | `"pleasechangeme"` |  |
+| global.highAvailable | bool | `false` |  |
 | listenAddress | string | `"0.0.0.0"` | Needs to be set to primary node IP |
-| network.calico.enabled | bool | `false` |  |
-| network.cilium.enabled | bool | `false` |  |
-| network.multus.enabled | bool | `false` |  |
-| network.multus.tag | string | `"v3.8"` |  |
 | nodeName | string | `"kubezero-node"` | set to $HOSTNAME |
 | protectKernelDefaults | bool | `false` |  |
 | systemd | bool | `false` | Set to false for openrc, eg. on Gentoo or Alpine |
@ -54,7 +44,7 @@ Kubernetes: `>= 1.20.0`
 ## Resources

 - https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/troubleshooting-kubeadm/
- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta2
+- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
 - https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
 - https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/kubelet/config/v1beta1/types.go
 - https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/control-plane-flags/
--- a/charts/kubeadm/README.md.gotmpl
+++ b/charts/kubeadm/README.md.gotmpl
@ -18,7 +18,7 @@
 ## Resources

 - https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/troubleshooting-kubeadm/
- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta2
+- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
 - https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
 - https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/kubelet/config/v1beta1/types.go
 - https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/control-plane-flags/
--- a/charts/kubeadm/create_audit_policy.sh
+++ b/charts/kubeadm/create_audit_policy.sh
@ -0,0 +1,159 @@
+#!/bin/sh
+
+function createMasterAuditPolicy() {
+  path="templates/apiserver/audit-policy.yaml"
+
+  known_apis='
+      - group: "" # core
+      - group: "admissionregistration.k8s.io"
+      - group: "apiextensions.k8s.io"
+      - group: "apiregistration.k8s.io"
+      - group: "apps"
+      - group: "authentication.k8s.io"
+      - group: "authorization.k8s.io"
+      - group: "autoscaling"
+      - group: "batch"
+      - group: "certificates.k8s.io"
+      - group: "extensions"
+      - group: "metrics.k8s.io"
+      - group: "networking.k8s.io"
+      - group: "node.k8s.io"
+      - group: "policy"
+      - group: "rbac.authorization.k8s.io"
+      - group: "scheduling.k8s.io"
+      - group: "storage.k8s.io"'
+
+  cat <<EOF >"${path}"
+apiVersion: audit.k8s.io/v1
+kind: Policy
+rules:
+  # The following requests were manually identified as high-volume and low-risk,
+  # so drop them.
+  - level: None
+    users: ["system:kube-proxy"]
+    verbs: ["watch"]
+    resources:
+      - group: "" # core
+        resources: ["endpoints", "services", "services/status"]
+  - level: None
+    # Ingress controller reads 'configmaps/ingress-uid' through the unsecured port.
+    # TODO(#46983): Change this to the ingress controller service account.
+    users: ["system:unsecured"]
+    namespaces: ["kube-system"]
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["configmaps"]
+  - level: None
+    users: ["kubelet"] # legacy kubelet identity
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["nodes", "nodes/status"]
+  - level: None
+    userGroups: ["system:nodes"]
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["nodes", "nodes/status"]
+  - level: None
+    users:
+      - system:kube-controller-manager
+      - system:cloud-controller-manager
+      - system:kube-scheduler
+      - system:serviceaccount:kube-system:endpoint-controller
+    verbs: ["get", "update"]
+    namespaces: ["kube-system"]
+    resources:
+      - group: "" # core
+        resources: ["endpoints"]
+  - level: None
+    users: ["system:apiserver"]
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["namespaces", "namespaces/status", "namespaces/finalize"]
+  - level: None
+    users: ["cluster-autoscaler"]
+    verbs: ["get", "update"]
+    namespaces: ["kube-system"]
+    resources:
+      - group: "" # core
+        resources: ["configmaps", "endpoints"]
+  # Don't log HPA fetching metrics.
+  - level: None
+    users:
+      - system:kube-controller-manager
+      - system:cloud-controller-manager
+    verbs: ["get", "list"]
+    resources:
+      - group: "metrics.k8s.io"
+
+  # Don't log these read-only URLs.
+  - level: None
+    nonResourceURLs:
+      - /healthz*
+      - /version
+      - /swagger*
+      - /readyz
+
+  # Don't log events requests because of performance impact.
+  - level: None
+    resources:
+      - group: "" # core
+        resources: ["events"]
+
+  # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes
+  - level: Request
+    users: ["kubelet", "system:node-problem-detector", "system:serviceaccount:kube-system:node-problem-detector"]
+    verbs: ["update","patch"]
+    resources:
+      - group: "" # core
+        resources: ["nodes/status", "pods/status"]
+    omitStages:
+      - "RequestReceived"
+  - level: Request
+    userGroups: ["system:nodes"]
+    verbs: ["update","patch"]
+    resources:
+      - group: "" # core
+        resources: ["nodes/status", "pods/status"]
+    omitStages:
+      - "RequestReceived"
+
+  # deletecollection calls can be large, don't log responses for expected namespace deletions
+  - level: Request
+    users: ["system:serviceaccount:kube-system:namespace-controller"]
+    verbs: ["deletecollection"]
+    omitStages:
+      - "RequestReceived"
+
+  # Secrets, ConfigMaps, TokenRequest and TokenReviews can contain sensitive & binary data,
+  # so only log at the Metadata level.
+  - level: Metadata
+    resources:
+      - group: "" # core
+        resources: ["secrets", "configmaps", "serviceaccounts/token"]
+      - group: authentication.k8s.io
+        resources: ["tokenreviews"]
+    omitStages:
+      - "RequestReceived"
+  # Get responses can be large; skip them.
+  - level: Request
+    verbs: ["get", "list", "watch"]
+    resources: ${known_apis}
+    omitStages:
+      - "RequestReceived"
+  # Default level for known APIs
+  - level: RequestResponse
+    resources: ${known_apis}
+    omitStages:
+      - "RequestReceived"
+  # Default level for all other requests.
+  - level: Metadata
+    omitStages:
+      - "RequestReceived"
+EOF
+}
+
+createMasterAuditPolicy
--- a/charts/kubeadm/templates/ClusterConfiguration.yaml
+++ b/charts/kubeadm/templates/ClusterConfiguration.yaml
@ -1,23 +1,25 @@
 apiVersion: kubeadm.k8s.io/v1beta3
 kind: ClusterConfiguration
 kubernetesVersion: {{ .Chart.Version }}
-clusterName: {{ .Values.clusterName }}
+clusterName: {{ .Values.global.clusterName }}
+featureGates:
+  EtcdLearnerMode: true # becomes beta in 1.29
+#  NonGracefulFailover: true
 controlPlaneEndpoint: {{ .Values.api.endpoint }}
 networking:
  podSubnet: 10.244.0.0/16
 etcd:
  local:
-    # As 3.5 is not recommended stick with 3.4.13 till 1.23
-    imageTag: 3.4.13-0
+    # imageTag: 3.5.12-0
    extraArgs:
      ### DNS discovery
      #discovery-srv: {{ .Values.domain }}
-      #discovery-srv-name: {{ .Values.clusterName }}
+      #discovery-srv-name: {{ .Values.global.clusterName }}
      advertise-client-urls: https://{{ .Values.etcd.nodeName }}:2379
      initial-advertise-peer-urls: https://{{ .Values.etcd.nodeName }}:2380
      initial-cluster: {{ include "kubeadm.etcd.initialCluster" .Values.etcd | quote }}
      initial-cluster-state: {{ .Values.etcd.state }}
-      initial-cluster-token: etcd-{{ .Values.clusterName }}
+      initial-cluster-token: etcd-{{ .Values.global.clusterName }}
      name: {{ .Values.etcd.nodeName }}
      listen-peer-urls: https://{{ .Values.listenAddress }}:2380
      listen-client-urls: https://{{ .Values.listenAddress }}:2379
@ -39,13 +41,13 @@ controllerManager:
  extraArgs:
    profiling: "false"
    terminated-pod-gc-threshold: "300"
-    leader-elect: {{ .Values.highAvailable | quote }}
+    leader-elect: {{ .Values.global.highAvailable | quote }}
    logging-format: json
    feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }}
 scheduler:
  extraArgs:
    profiling: "false"
-    leader-elect: {{ .Values.highAvailable | quote }}
+    leader-elect: {{ .Values.global.highAvailable | quote }}
    logging-format: json
    feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }}
 apiServer:
@ -58,8 +60,11 @@ apiServer:
    audit-policy-file: /etc/kubernetes/apiserver/audit-policy.yaml
    audit-log-maxage: "7"
    audit-log-maxsize: "100"
-    audit-log-maxbackup: "3"
+    audit-log-maxbackup: "1"
    audit-log-compress: "true"
+    {{- if .Values.api.falco.enabled }}
+    audit-webhook-config-file: /etc/kubernetes/apiserver/audit-webhook.yaml
+    {{- end }}
    tls-cipher-suites: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384"
    admission-control-config-file: /etc/kubernetes/apiserver/admission-configuration.yaml
    api-audiences: {{ .Values.api.apiAudiences }}
@ -69,12 +74,13 @@ apiServer:
    {{- end }}
    {{- if .Values.api.awsIamAuth.enabled }}
    authentication-token-webhook-config-file: /etc/kubernetes/apiserver/aws-iam-authenticator.yaml
+    authentication-token-webhook-cache-ttl: 3600s
    {{- end }}
    feature-gates: {{ include "kubeadm.featuregates" ( dict "return" "csv" ) | trimSuffix "," | quote }}
-    enable-admission-plugins: DenyServiceExternalIPs,NodeRestriction,EventRateLimit
-    # {{- if .Values.highAvailable }}
-    # goaway-chance: ".001"
-    # {{- end }}
+    enable-admission-plugins: DenyServiceExternalIPs,NodeRestriction,EventRateLimit,ExtendedResourceToleration
+    {{- if .Values.global.highAvailable }}
+    goaway-chance: ".001"
+    {{- end }}
    logging-format: json
    {{- with .Values.api.extraArgs }}
    {{- toYaml . | nindent 4 }}
--- a/charts/kubeadm/templates/KubeProxyConfiguration.yaml
+++ b/charts/kubeadm/templates/KubeProxyConfiguration.yaml
@ -2,4 +2,6 @@ apiVersion: kubeproxy.config.k8s.io/v1alpha1
 kind: KubeProxyConfiguration
 # kube-proxy doesnt really support setting dynamic bind-address via config, replaced by cilium long-term anyways
 metricsBindAddress: "0.0.0.0:10249"
-mode: "ipvs"
+mode: "iptables"
+logging:
+  format: json
--- a/charts/kubeadm/templates/KubeletConfiguration.yaml
+++ b/charts/kubeadm/templates/KubeletConfiguration.yaml
@ -4,9 +4,9 @@ kind: KubeletConfiguration
 failSwapOn: false
 cgroupDriver: cgroupfs
 logging:
-  # We already have syslog-ng logging json
-  # format: json
+  format: json
 hairpinMode: hairpin-veth
+containerRuntimeEndpoint: "unix:///var/run/crio/crio.sock"
 {{- if .Values.systemd }}
 resolvConf: /run/systemd/resolve/resolv.conf
 {{- end }}
@ -18,8 +18,8 @@ protectKernelDefaults: {{ .Values.protectKernelDefaults }}
 tlsCipherSuites: [TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256]
 featureGates:
  {{- include "kubeadm.featuregates" ( dict "return" "map" ) | nindent 2 }}
-# Minimal unit is 50m per pod
-podsPerCore: 20
+# Minimal unit is 40m per pod
+podsPerCore: 25
 # cpuCFSQuotaPeriod: 10ms
 # Basic OS incl. crio
 systemReserved:
@ -33,4 +33,5 @@ kubeReserved:
 #evictionHard:
 #  memory.available: "484Mi"
 imageGCLowThresholdPercent: 70
-# kernelMemcgNotification: true
+serializeImagePulls: false
+maxParallelImagePulls: 4
--- a/charts/kubeadm/templates/_helpers.tpl
+++ b/charts/kubeadm/templates/_helpers.tpl
@ -1,6 +1,11 @@
 {{- /* Feature gates for all control plane components */ -}}
+{{- /* Issues: MemoryQoS */ -}}
+{{- /* v1.28: PodAndContainerStatsFromCRI still not working */ -}}
+{{- /* v1.28: UnknownVersionInteroperabilityProxy requires StorageVersionAPI which is still alpha in 1.30 */ -}}
+{{- /* v1.29: remove/beta SidecarContainers */ -}}
+{{- /* v1.30: remove/beta KubeProxyDrainingTerminatingNodes */ -}}
 {{- define "kubeadm.featuregates" }}
-{{- $gates := list "CustomCPUCFSQuotaPeriod" "GenericEphemeralVolume" "KubeletCredentialProviders"}}
+{{- $gates := list "CustomCPUCFSQuotaPeriod" "SidecarContainers" "KubeProxyDrainingTerminatingNodes" }}
 {{- if eq .return "csv" }}
 {{- range $key := $gates }}
 {{- $key }}=true,
--- a/charts/kubeadm/templates/admin-aws-iam.yaml
+++ b/charts/kubeadm/templates/admin-aws-iam.yaml
@ -4,24 +4,24 @@ kind: Config
 clusters:
 - cluster:
    server: https://{{ .Values.api.endpoint }}
-  name: {{ .Values.clusterName }}
+  name: {{ .Values.global.clusterName }}
 contexts:
 - context:
-    cluster: {{ .Values.clusterName }}
+    cluster: {{ .Values.global.clusterName }}
    user: kubernetes-admin
-  name: kubernetes-admin@{{ .Values.clusterName }}
-current-context: kubernetes-admin@{{ .Values.clusterName }}
+  name: kubernetes-admin@{{ .Values.global.clusterName }}
+current-context: kubernetes-admin@{{ .Values.global.clusterName }}
 preferences: {}
 users:
 - name: kubernetes-admin
  user:
    exec:
-      apiVersion: client.authentication.k8s.io/v1alpha1
+      apiVersion: client.authentication.k8s.io/v1beta1
      command: aws-iam-authenticator
      args:
        - "token"
        - "-i"
-        - "{{ .Values.clusterName }}"
+        - "{{ .Values.global.clusterName }}"
        - "-r"
        - "{{ .Values.api.awsIamAuth.kubeAdminRole }}"
 {{- end }}
--- a/charts/kubeadm/templates/apiserver/audit-policy-off.yaml
+++ b/charts/kubeadm/templates/apiserver/audit-policy-off.yaml
@ -0,0 +1,7 @@
+# Don't Log anything, but audit policy enabled
+apiVersion: audit.k8s.io/v1
+kind: Policy
+metadata:
+  name: kubezero-auditpolicy
+rules:
+- level: None
--- a/charts/kubeadm/templates/apiserver/audit-policy.yaml
+++ b/charts/kubeadm/templates/apiserver/audit-policy.yaml
@ -1,7 +1,164 @@
-# Don't Log anything, but audit policy enabled
 apiVersion: audit.k8s.io/v1
 kind: Policy
-metadata:
-  name: kubezero-auditpolicy
 rules:
- level: None
+  # The following requests were manually identified as high-volume and low-risk,
+  # so drop them.
+  - level: None
+    users: ["system:kube-proxy"]
+    verbs: ["watch"]
+    resources:
+      - group: "" # core
+        resources: ["endpoints", "services", "services/status"]
+  - level: None
+    # Ingress controller reads 'configmaps/ingress-uid' through the unsecured port.
+    # TODO(#46983): Change this to the ingress controller service account.
+    users: ["system:unsecured"]
+    namespaces: ["kube-system"]
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["configmaps"]
+  - level: None
+    users: ["kubelet"] # legacy kubelet identity
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["nodes", "nodes/status"]
+  - level: None
+    userGroups: ["system:nodes"]
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["nodes", "nodes/status"]
+  - level: None
+    users:
+      - system:kube-controller-manager
+      - system:cloud-controller-manager
+      - system:kube-scheduler
+      - system:serviceaccount:kube-system:endpoint-controller
+    verbs: ["get", "update"]
+    namespaces: ["kube-system"]
+    resources:
+      - group: "" # core
+        resources: ["endpoints"]
+  - level: None
+    users: ["system:apiserver"]
+    verbs: ["get"]
+    resources:
+      - group: "" # core
+        resources: ["namespaces", "namespaces/status", "namespaces/finalize"]
+  - level: None
+    users: ["cluster-autoscaler"]
+    verbs: ["get", "update"]
+    namespaces: ["kube-system"]
+    resources:
+      - group: "" # core
+        resources: ["configmaps", "endpoints"]
+  # Don't log HPA fetching metrics.
+  - level: None
+    users:
+      - system:kube-controller-manager
+      - system:cloud-controller-manager
+    verbs: ["get", "list"]
+    resources:
+      - group: "metrics.k8s.io"
+
+  # Don't log these read-only URLs.
+  - level: None
+    nonResourceURLs:
+      - /healthz*
+      - /version
+      - /swagger*
+
+  # Don't log events requests because of performance impact.
+  - level: None
+    resources:
+      - group: "" # core
+        resources: ["events"]
+
+  # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes
+  - level: Request
+    users: ["kubelet", "system:node-problem-detector", "system:serviceaccount:kube-system:node-problem-detector"]
+    verbs: ["update","patch"]
+    resources:
+      - group: "" # core
+        resources: ["nodes/status", "pods/status"]
+    omitStages:
+      - "RequestReceived"
+  - level: Request
+    userGroups: ["system:nodes"]
+    verbs: ["update","patch"]
+    resources:
+      - group: "" # core
+        resources: ["nodes/status", "pods/status"]
+    omitStages:
+      - "RequestReceived"
+
+  # deletecollection calls can be large, don't log responses for expected namespace deletions
+  - level: Request
+    users: ["system:serviceaccount:kube-system:namespace-controller"]
+    verbs: ["deletecollection"]
+    omitStages:
+      - "RequestReceived"
+
+  # Secrets, ConfigMaps, TokenRequest and TokenReviews can contain sensitive & binary data,
+  # so only log at the Metadata level.
+  - level: Metadata
+    resources:
+      - group: "" # core
+        resources: ["secrets", "configmaps", "serviceaccounts/token"]
+      - group: authentication.k8s.io
+        resources: ["tokenreviews"]
+    omitStages:
+      - "RequestReceived"
+  # Get responses can be large; skip them.
+  - level: Request
+    verbs: ["get", "list", "watch"]
+    resources: 
+      - group: "" # core
+      - group: "admissionregistration.k8s.io"
+      - group: "apiextensions.k8s.io"
+      - group: "apiregistration.k8s.io"
+      - group: "apps"
+      - group: "authentication.k8s.io"
+      - group: "authorization.k8s.io"
+      - group: "autoscaling"
+      - group: "batch"
+      - group: "certificates.k8s.io"
+      - group: "extensions"
+      - group: "metrics.k8s.io"
+      - group: "networking.k8s.io"
+      - group: "node.k8s.io"
+      - group: "policy"
+      - group: "rbac.authorization.k8s.io"
+      - group: "scheduling.k8s.io"
+      - group: "storage.k8s.io"
+    omitStages:
+      - "RequestReceived"
+  # Default level for known APIs
+  - level: RequestResponse
+    resources: 
+      - group: "" # core
+      - group: "admissionregistration.k8s.io"
+      - group: "apiextensions.k8s.io"
+      - group: "apiregistration.k8s.io"
+      - group: "apps"
+      - group: "authentication.k8s.io"
+      - group: "authorization.k8s.io"
+      - group: "autoscaling"
+      - group: "batch"
+      - group: "certificates.k8s.io"
+      - group: "extensions"
+      - group: "metrics.k8s.io"
+      - group: "networking.k8s.io"
+      - group: "node.k8s.io"
+      - group: "policy"
+      - group: "rbac.authorization.k8s.io"
+      - group: "scheduling.k8s.io"
+      - group: "storage.k8s.io"
+    omitStages:
+      - "RequestReceived"
+  # Default level for all other requests.
+  - level: Metadata
+    omitStages:
+      - "RequestReceived"
--- a/charts/kubeadm/templates/apiserver/audit-webhook.yaml
+++ b/charts/kubeadm/templates/apiserver/audit-webhook.yaml
@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Config
+clusters:
+  - name: falco
+    cluster:
+      server: http://falco-k8saudit-webhook:9765/k8s-audit
+contexts:
+  - context:
+      cluster: falco
+      user: ""
+    name: default-context
+current-context: default-context
+preferences: {}
+users: []
--- a/charts/kubeadm/templates/credential-provider.yaml
+++ b/charts/kubeadm/templates/credential-provider.yaml
@ -1,17 +0,0 @@
-apiVersion: kubelet.config.k8s.io/v1alpha1
-kind: CredentialProviderConfig
-providers:
-  - name: amazon-ecr-credential-helper
-    matchImages:
-      - "*.dkr.ecr.*.amazonaws.com"
-      - "*.dkr.ecr.*.amazonaws.cn"
-      - "*.dkr.ecr-fips.*.amazonaws.com"
-      - "*.dkr.ecr.us-iso-east-1.c2s.ic.gov"
-      - "*.dkr.ecr.us-isob-east-1.sc2s.sgov.gov"
-    defaultCacheDuration: "12h"
-    apiVersion: credentialprovider.kubelet.k8s.io/v1alpha1
-    args:
-      - get
-    #env:
-    #  - name: AWS_PROFILE
-    #    value: example_profile
--- a/charts/kubeadm/templates/patches/coredns0.yaml
+++ b/charts/kubeadm/templates/patches/coredns0.yaml
@ -1,5 +1,5 @@
 spec:
-  replicas: {{ ternary 3 1 .Values.highAvailable }}
+  replicas: {{ ternary 3 1 .Values.global.highAvailable }}
  template:
    spec:
      containers:
@ -12,13 +12,3 @@ spec:
            memory: 128Mi
      nodeSelector:
        node-role.kubernetes.io/control-plane: ""
-      affinity:
-        podAntiAffinity:
-          requiredDuringSchedulingIgnoredDuringExecution:
-          - labelSelector:
-              matchExpressions:
-              - key: k8s-app
-                operator: In
-                values:
-                - kube-dns
-            topologyKey: "kubernetes.io/hostname"
--- a/charts/kubeadm/templates/patches/etcd0.yaml
+++ b/charts/kubeadm/templates/patches/etcd0.yaml
@ -3,6 +3,6 @@ spec:
  - name: etcd
    resources:
      requests:
-        cpu: 200m
-        memory: 192Mi
+        cpu: 50m
+        memory: 256Mi
        #ephemeral-storage: 1Gi
--- a/charts/kubeadm/templates/patches/kube-apiserver0.yaml
+++ b/charts/kubeadm/templates/patches/kube-apiserver0.yaml
@ -1,7 +1,8 @@
 spec:
+  dnsPolicy: ClusterFirstWithHostNet
  containers:
  - name: kube-apiserver
    resources:
      requests:
-        cpu: 200m
-        memory: 1Gi
+        cpu: 250m
+        memory: 1268Mi
--- a/charts/kubeadm/templates/patches/kube-controller-manager0.yaml
+++ b/charts/kubeadm/templates/patches/kube-controller-manager0.yaml
@ -3,5 +3,5 @@ spec:
  - name: kube-controller-manager
    resources:
      requests:
-        cpu: 100m
-        memory: 128Mi
+        cpu: 50m
+        memory: 192Mi
--- a/charts/kubeadm/templates/patches/kube-scheduler0.yaml
+++ b/charts/kubeadm/templates/patches/kube-scheduler0.yaml
@ -3,5 +3,5 @@ spec:
  - name: kube-scheduler
    resources:
      requests:
-        cpu: 100m
-        memory: 64Mi
+        cpu: 50m
+        memory: 96Mi
--- a/charts/kubeadm/templates/resources/51-aws-iam-authenticator-deployment.yaml
+++ b/charts/kubeadm/templates/resources/51-aws-iam-authenticator-deployment.yaml
@ -1,6 +1,6 @@
 {{- if .Values.api.awsIamAuth.enabled }}
-kind: ClusterRole
 apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
 metadata:
  name: aws-iam-authenticator
 rules:
@ -51,8 +51,8 @@ metadata:
  namespace: kube-system

 ---
-kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
 metadata:
  name: aws-iam-authenticator
  namespace: kube-system
@ -75,7 +75,7 @@ metadata:
    k8s-app: aws-iam-authenticator
 data:
  config.yaml: |
-    clusterID: {{ .Values.clusterName }}
+    clusterID: {{ .Values.global.clusterName }}

 ---
 apiVersion: apps/v1
@ -85,8 +85,6 @@ metadata:
  name: aws-iam-authenticator
  labels:
    k8s-app: aws-iam-authenticator
-  annotations:
-    seccomp.security.alpha.kubernetes.io/pod: runtime/default
 spec:
  selector:
    matchLabels:
@ -95,11 +93,15 @@ spec:
    type: RollingUpdate
  template:
    metadata:
-      annotations:
-        scheduler.alpha.kubernetes.io/critical-pod: ""
      labels:
        k8s-app: aws-iam-authenticator
    spec:
+      securityContext:
+        seccompProfile:
+          type: RuntimeDefault
+
+      priorityClassName: system-cluster-critical
+
      # use service account with access to
      serviceAccountName: aws-iam-authenticator

@ -111,11 +113,11 @@ spec:
        node-role.kubernetes.io/control-plane: ""
      tolerations:
      - effect: NoSchedule
-        key: node-role.kubernetes.io/master
+        key: node-role.kubernetes.io/control-plane

      containers:
      - name: aws-iam-authenticator
-        image: public.ecr.aws/zero-downtime/aws-iam-authenticator:v0.5.7
+        image: public.ecr.aws/zero-downtime/aws-iam-authenticator:v0.6.14
        args:
        - server
        - --backend-mode=CRD,MountedFile
@ -131,7 +133,7 @@ spec:

        resources:
          requests:
-            memory: 20Mi
+            memory: 32Mi
            cpu: 10m
          limits:
            memory: 64Mi
--- a/charts/kubeadm/templates/resources/80-apiserver-dns-service.yaml
+++ b/charts/kubeadm/templates/resources/80-apiserver-dns-service.yaml
@ -1,4 +1,3 @@
-{{- if index .Values "addons" "external-dns" "enabled" }}
 apiVersion: v1
 kind: Service
 metadata:
@ -13,4 +12,3 @@ spec:
  selector:
    component: kube-apiserver
    tier: control-plane
-{{- end }}
--- a/charts/kubeadm/values.yaml
+++ b/charts/kubeadm/values.yaml
@ -1,4 +1,7 @@
-clusterName: pleasechangeme
+global:
+  clusterName: pleasechangeme
+  highAvailable: false
+
 # -- set to $HOSTNAME
 nodeName: kubezero-node
 domain: changeme.org
@ -22,32 +25,8 @@ api:
    workerNodeRole: "arn:aws:iam::000000000000:role/KubernetesNode"
    kubeAdminRole: "arn:aws:iam::000000000000:role/KubernetesNode"

-addons:
-  aws-node-termination-handler:
+  falco:
    enabled: false
-    # -- arn:aws:sqs:${REGION}:${AWS_ACCOUNT_ID}:${CLUSTERNAME}_Nth
-    queueURL: ""
-
-  clusterBackup:
-    enabled: false
-    # -- s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup
-    repository: ""
-    # -- /etc/cloudbender/clusterBackup.passphrase
-    passwordFile: ""
-
-  external-dns:
-    enabled: false
-
-network:
-  multus:
-    enabled: false
-    tag: "v3.8"
-  cilium:
-    enabled: false
-  calico:
-    enabled: false
-
-highAvailable: false

 etcd:
  nodeName: etcd
--- a/charts/kubezero-addons/Chart.yaml
+++ b/charts/kubezero-addons/Chart.yaml
@ -2,25 +2,46 @@ apiVersion: v2
 name: kubezero-addons
 description: KubeZero umbrella chart for various optional cluster addons
 type: application
-version: 0.5.2
-appVersion: v1.22.8
+version: 0.8.7
+appVersion: v1.28
 home: https://kubezero.com
 icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
 keywords:
  - kubezero
  - fuse-device-plugin
-  - aws-node-termination-handler
+  - neuron-device-plugin
+  - nvidia-device-plugin
+  - cluster-autoscaler
+  - sealed-secrets
  - external-dns
+  - aws-node-termination-handler
 maintainers:
  - name: Stefan Reimer
    email: stefan@zero-downtime.net
 dependencies:
-  - name: aws-node-termination-handler
-    version: 0.18.0
-    # repository: https://aws.github.io/eks-charts
-    condition: aws-node-termination-handler.enabled
  - name: external-dns
-    version: 1.7.1
+    version: 1.14.4
    repository: https://kubernetes-sigs.github.io/external-dns/
    condition: external-dns.enabled
-kubeVersion: ">= 1.20.0"
+  - name: cluster-autoscaler
+    version: 9.36.0
+    repository: https://kubernetes.github.io/autoscaler
+    condition: cluster-autoscaler.enabled
+  - name: nvidia-device-plugin
+    version: 0.15.0
+    # https://github.com/NVIDIA/k8s-device-plugin
+    repository: https://nvidia.github.io/k8s-device-plugin
+    condition: nvidia-device-plugin.enabled
+  - name: sealed-secrets
+    version: 2.15.3
+    repository: https://bitnami-labs.github.io/sealed-secrets
+    condition: sealed-secrets.enabled
+  - name: aws-node-termination-handler
+    version: 0.23.0
+    repository: "oci://public.ecr.aws/aws-ec2/helm"
+    condition: aws-node-termination-handler.enabled
+  - name: aws-eks-asg-rolling-update-handler
+    version: 1.5.0
+    repository: https://twin.github.io/helm-charts
+    condition: aws-eks-asg-rolling-update-handler.enabled
+kubeVersion: ">= 1.26.0"
--- a/charts/kubezero-addons/README.md
+++ b/charts/kubezero-addons/README.md
@ -1,6 +1,6 @@
 # kubezero-addons

-![Version: 0.5.2](https://img.shields.io/badge/Version-0.5.2-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.22.8](https://img.shields.io/badge/AppVersion-v1.22.8-informational?style=flat-square)
+![Version: 0.8.7](https://img.shields.io/badge/Version-0.8.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.28](https://img.shields.io/badge/AppVersion-v1.28-informational?style=flat-square)

 KubeZero umbrella chart for various optional cluster addons

@ -14,12 +14,16 @@ KubeZero umbrella chart for various optional cluster addons

 ## Requirements

-Kubernetes: `>= 1.20.0`
+Kubernetes: `>= 1.26.0`

 | Repository | Name | Version |
 |------------|------|---------|
-|  | aws-node-termination-handler | 0.18.0 |
-| https://kubernetes-sigs.github.io/external-dns/ | external-dns | 1.7.1 |
+| https://bitnami-labs.github.io/sealed-secrets | sealed-secrets | 2.15.3 |
+| https://kubernetes-sigs.github.io/external-dns/ | external-dns | 1.14.4 |
+| https://kubernetes.github.io/autoscaler | cluster-autoscaler | 9.36.0 |
+| https://nvidia.github.io/k8s-device-plugin | nvidia-device-plugin | 0.15.0 |
+| https://twin.github.io/helm-charts | aws-eks-asg-rolling-update-handler | 1.5.0 |
+| oci://public.ecr.aws/aws-ec2/helm | aws-node-termination-handler | 0.23.0 |

 # MetalLB   
   
@ -28,14 +32,52 @@ Kubernetes: `>= 1.20.0`
 ## AWS Neuron
 Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/) - [Inf1 instances](https://aws.amazon.com/ec2/instance-types/inf1/)
   
+## Nvidia
+
+## Cluster AutoScaler
+- https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md
+
 ## Values

 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
+| aws-eks-asg-rolling-update-handler.containerSecurityContext.allowPrivilegeEscalation | bool | `false` |  |
+| aws-eks-asg-rolling-update-handler.containerSecurityContext.capabilities.drop[0] | string | `"ALL"` |  |
+| aws-eks-asg-rolling-update-handler.enabled | bool | `false` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[0].name | string | `"CLUSTER_NAME"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[0].value | string | `""` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[1].name | string | `"AWS_REGION"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[1].value | string | `"us-west-2"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[2].name | string | `"EXECUTION_INTERVAL"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[2].value | string | `"60"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[3].name | string | `"METRICS"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[3].value | string | `"true"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[4].name | string | `"EAGER_CORDONING"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[4].value | string | `"true"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[5].name | string | `"SLOW_MODE"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[5].value | string | `"true"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[6].name | string | `"AWS_ROLE_ARN"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[6].value | string | `""` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[7].name | string | `"AWS_WEB_IDENTITY_TOKEN_FILE"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[7].value | string | `"/var/run/secrets/sts.amazonaws.com/serviceaccount/token"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[8].name | string | `"AWS_STS_REGIONAL_ENDPOINTS"` |  |
+| aws-eks-asg-rolling-update-handler.environmentVars[8].value | string | `"regional"` |  |
+| aws-eks-asg-rolling-update-handler.image.repository | string | `"twinproduction/aws-eks-asg-rolling-update-handler"` |  |
+| aws-eks-asg-rolling-update-handler.image.tag | string | `"v1.8.3"` |  |
+| aws-eks-asg-rolling-update-handler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` |  |
+| aws-eks-asg-rolling-update-handler.resources.limits.memory | string | `"128Mi"` |  |
+| aws-eks-asg-rolling-update-handler.resources.requests.cpu | string | `"10m"` |  |
+| aws-eks-asg-rolling-update-handler.resources.requests.memory | string | `"32Mi"` |  |
+| aws-eks-asg-rolling-update-handler.securityContext.runAsNonRoot | bool | `true` |  |
+| aws-eks-asg-rolling-update-handler.securityContext.runAsUser | int | `1001` |  |
+| aws-eks-asg-rolling-update-handler.securityContext.seccompProfile.type | string | `"RuntimeDefault"` |  |
+| aws-eks-asg-rolling-update-handler.tolerations[0].effect | string | `"NoSchedule"` |  |
+| aws-eks-asg-rolling-update-handler.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
 | aws-node-termination-handler.deleteLocalData | bool | `true` |  |
 | aws-node-termination-handler.emitKubernetesEvents | bool | `true` |  |
 | aws-node-termination-handler.enableProbesServer | bool | `true` |  |
 | aws-node-termination-handler.enablePrometheusServer | bool | `false` |  |
+| aws-node-termination-handler.enableSpotInterruptionDraining | bool | `false` |  |
 | aws-node-termination-handler.enableSqsTerminationDraining | bool | `true` |  |
 | aws-node-termination-handler.enabled | bool | `false` |  |
 | aws-node-termination-handler.extraEnv[0] | object | `{"name":"AWS_ROLE_ARN","value":""}` | "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.awsNth" |
@ -46,7 +88,8 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
 | aws-node-termination-handler.fullnameOverride | string | `"aws-node-termination-handler"` |  |
 | aws-node-termination-handler.ignoreDaemonSets | bool | `true` |  |
 | aws-node-termination-handler.jsonLogging | bool | `true` |  |
-| aws-node-termination-handler.managedAsgTag | string | `"aws-node-termination-handler/managed"` | "aws-node-termination-handler/${ClusterName}" |
+| aws-node-termination-handler.logFormatVersion | int | `2` |  |
+| aws-node-termination-handler.managedTag | string | `"zdt:kubezero:nth:${ClusterName}"` | "zdt:kubezero:nth:${ClusterName}" |
 | aws-node-termination-handler.metadataTries | int | `0` |  |
 | aws-node-termination-handler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` |  |
 | aws-node-termination-handler.podMonitor.create | bool | `false` |  |
@ -54,34 +97,40 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
 | aws-node-termination-handler.rbac.pspEnabled | bool | `false` |  |
 | aws-node-termination-handler.taintNode | bool | `true` |  |
 | aws-node-termination-handler.tolerations[0].effect | string | `"NoSchedule"` |  |
-| aws-node-termination-handler.tolerations[0].key | string | `"node-role.kubernetes.io/master"` |  |
+| aws-node-termination-handler.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
+| aws-node-termination-handler.useProviderId | bool | `true` |  |
 | awsNeuron.enabled | bool | `false` |  |
 | awsNeuron.image.name | string | `"public.ecr.aws/neuron/neuron-device-plugin"` |  |
-| awsNeuron.image.tag | string | `"1.9.0.0"` |  |
+| awsNeuron.image.tag | string | `"2.19.16.0"` |  |
+| cluster-autoscaler.autoDiscovery.clusterName | string | `""` |  |
+| cluster-autoscaler.awsRegion | string | `"us-west-2"` |  |
+| cluster-autoscaler.enabled | bool | `false` |  |
+| cluster-autoscaler.extraArgs.balance-similar-node-groups | bool | `true` |  |
+| cluster-autoscaler.extraArgs.ignore-taint | string | `"node.cilium.io/agent-not-ready"` |  |
+| cluster-autoscaler.extraArgs.scan-interval | string | `"30s"` |  |
+| cluster-autoscaler.extraArgs.skip-nodes-with-local-storage | bool | `false` |  |
+| cluster-autoscaler.image.repository | string | `"registry.k8s.io/autoscaling/cluster-autoscaler"` |  |
+| cluster-autoscaler.image.tag | string | `"v1.28.2"` |  |
+| cluster-autoscaler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` |  |
+| cluster-autoscaler.podDisruptionBudget | bool | `false` |  |
+| cluster-autoscaler.prometheusRule.enabled | bool | `false` |  |
+| cluster-autoscaler.prometheusRule.interval | string | `"30"` |  |
+| cluster-autoscaler.serviceMonitor.enabled | bool | `false` |  |
+| cluster-autoscaler.serviceMonitor.interval | string | `"30s"` |  |
+| cluster-autoscaler.tolerations[0].effect | string | `"NoSchedule"` |  |
+| cluster-autoscaler.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
 | clusterBackup.enabled | bool | `false` |  |
 | clusterBackup.extraEnv | list | `[]` |  |
 | clusterBackup.image.name | string | `"public.ecr.aws/zero-downtime/kubezero-admin"` |  |
-| clusterBackup.password | string | `""` |  |
-| clusterBackup.repository | string | `""` |  |
+| clusterBackup.password | string | `""` | /etc/cloudbender/clusterBackup.passphrase |
+| clusterBackup.repository | string | `""` | s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup |
 | external-dns.enabled | bool | `false` |  |
-| external-dns.env[0] | object | `{"name":"AWS_ROLE_ARN","value":""}` | "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.externalDNS" |
-| external-dns.env[1].name | string | `"AWS_WEB_IDENTITY_TOKEN_FILE"` |  |
-| external-dns.env[1].value | string | `"/var/run/secrets/sts.amazonaws.com/serviceaccount/token"` |  |
-| external-dns.env[2].name | string | `"AWS_STS_REGIONAL_ENDPOINTS"` |  |
-| external-dns.env[2].value | string | `"regional"` |  |
-| external-dns.extraVolumeMounts[0].mountPath | string | `"/var/run/secrets/sts.amazonaws.com/serviceaccount/"` |  |
-| external-dns.extraVolumeMounts[0].name | string | `"aws-token"` |  |
-| external-dns.extraVolumeMounts[0].readOnly | bool | `true` |  |
-| external-dns.extraVolumes[0].name | string | `"aws-token"` |  |
-| external-dns.extraVolumes[0].projected.sources[0].serviceAccountToken.audience | string | `"sts.amazonaws.com"` |  |
-| external-dns.extraVolumes[0].projected.sources[0].serviceAccountToken.expirationSeconds | int | `86400` |  |
-| external-dns.extraVolumes[0].projected.sources[0].serviceAccountToken.path | string | `"token"` |  |
 | external-dns.interval | string | `"3m"` |  |
 | external-dns.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` |  |
 | external-dns.provider | string | `"inmemory"` |  |
 | external-dns.sources[0] | string | `"service"` |  |
 | external-dns.tolerations[0].effect | string | `"NoSchedule"` |  |
-| external-dns.tolerations[0].key | string | `"node-role.kubernetes.io/master"` |  |
+| external-dns.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
 | external-dns.triggerLoopOnEvent | bool | `true` |  |
 | forseti.aws.iamRoleArn | string | `""` | "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.kubezeroForseti" |
 | forseti.aws.region | string | `""` |  |
@ -89,3 +138,38 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
 | forseti.image.name | string | `"public.ecr.aws/zero-downtime/forseti"` |  |
 | forseti.image.tag | string | `"v0.1.2"` |  |
 | fuseDevicePlugin.enabled | bool | `false` |  |
+| fuseDevicePlugin.image.name | string | `"public.ecr.aws/zero-downtime/fuse-device-plugin"` |  |
+| fuseDevicePlugin.image.tag | string | `"v1.2.0"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].key | string | `"node.kubernetes.io/instance-type"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].operator | string | `"In"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[0] | string | `"g5.xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[10] | string | `"g4dn.4xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[11] | string | `"g4dn.8xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[12] | string | `"g4dn.12xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[13] | string | `"g4dn.16xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[1] | string | `"g5.2xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[2] | string | `"g5.4xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[3] | string | `"g5.8xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[4] | string | `"g5.12xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[5] | string | `"g5.16xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[6] | string | `"g5.24xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[7] | string | `"g5.48xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[8] | string | `"g4dn.xlarge"` |  |
+| nvidia-device-plugin.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution.nodeSelectorTerms[0].matchExpressions[0].values[9] | string | `"g4dn.2xlarge"` |  |
+| nvidia-device-plugin.enabled | bool | `false` |  |
+| nvidia-device-plugin.tolerations[0].effect | string | `"NoSchedule"` |  |
+| nvidia-device-plugin.tolerations[0].key | string | `"nvidia.com/gpu"` |  |
+| nvidia-device-plugin.tolerations[0].operator | string | `"Exists"` |  |
+| nvidia-device-plugin.tolerations[1].effect | string | `"NoSchedule"` |  |
+| nvidia-device-plugin.tolerations[1].key | string | `"kubezero-workergroup"` |  |
+| nvidia-device-plugin.tolerations[1].operator | string | `"Exists"` |  |
+| sealed-secrets.enabled | bool | `false` |  |
+| sealed-secrets.fullnameOverride | string | `"sealed-secrets-controller"` |  |
+| sealed-secrets.keyrenewperiod | string | `"0"` |  |
+| sealed-secrets.metrics.serviceMonitor.enabled | bool | `false` |  |
+| sealed-secrets.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` |  |
+| sealed-secrets.resources.limits.memory | string | `"128Mi"` |  |
+| sealed-secrets.resources.requests.cpu | string | `"10m"` |  |
+| sealed-secrets.resources.requests.memory | string | `"24Mi"` |  |
+| sealed-secrets.tolerations[0].effect | string | `"NoSchedule"` |  |
+| sealed-secrets.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
--- a/charts/kubezero-addons/README.md.gotmpl
+++ b/charts/kubezero-addons/README.md.gotmpl
@ -20,4 +20,9 @@
 ## AWS Neuron 
 Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/) - [Inf1 instances](https://aws.amazon.com/ec2/instance-types/inf1/)
    
+## Nvidia
+
+## Cluster AutoScaler
+- https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md
+
 {{ template "chart.valuesSection" . }}
--- a/charts/kubezero-addons/aws-eks-asg-rolling-update-handler.patch
+++ b/charts/kubezero-addons/aws-eks-asg-rolling-update-handler.patch
@ -0,0 +1,30 @@
+diff -tuNr charts/aws-eks-asg-rolling-update-handler.orig/templates/deployment.yaml charts/aws-eks-asg-rolling-update-handler/templates/deployment.yaml
+--- charts/aws-eks-asg-rolling-update-handler.orig/templates/deployment.yaml	2023-04-12 15:49:08.744242462 +0000
+++ charts/aws-eks-asg-rolling-update-handler/templates/deployment.yaml	2023-04-12 15:55:44.399489809 +0000
+@@ -34,6 +34,26 @@
+           resources:
+ {{- toYaml . | nindent 12 }}
+ {{- end }}
+          volumeMounts:
+            - name: aws-token
+              mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
+              readOnly: true
+      volumes:
+        - name: aws-token
+          projected:
+            sources:
+            - serviceAccountToken:
+                path: token
+                expirationSeconds: 86400
+                audience: "sts.amazonaws.com"
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+       {{- with .Values.imagePullSecrets }}
+       imagePullSecrets:
+         {{- toYaml . | nindent 8 }}
--- a/charts/kubezero-addons/aws-node-termination-handler.patch
+++ b/charts/kubezero-addons/aws-node-termination-handler.patch
@ -1,19 +1,6 @@
 diff -tuNr charts/aws-node-termination-handler.orig/templates/deployment.yaml charts/aws-node-termination-handler/templates/deployment.yaml
 --- charts/aws-node-termination-handler.orig/templates/deployment.yaml	2022-01-26 18:01:36.123482217 +0100
 +++ charts/aws-node-termination-handler/templates/deployment.yaml	2022-01-26 18:08:21.464304621 +0100
-@@ -161,9 +161,9 @@
-               {{- toYaml . | nindent 12 }}
-             {{- end }}
-           ports:
-           - name: liveness-probe
-             protocol: TCP
-             containerPort: {{ .Values.probes.httpGet.port }}
-+            - name: liveness-probe
-+              protocol: TCP
-+              containerPort: {{ .Values.probes.httpGet.port }}
-           {{- if .Values.enablePrometheusServer }}
-             - name: http-metrics
-               protocol: TCP
@@ -175,13 +175,23 @@
           resources:
             {{- toYaml . | nindent 12 }}
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/Chart.yaml
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/Chart.yaml
@ -0,0 +1,8 @@
+apiVersion: v2
+description: Handles rolling upgrades for AWS ASGs for EKS by replacing outdated nodes
+  by new nodes.
+home: https://github.com/TwiN/aws-eks-asg-rolling-update-handler
+maintainers:
+- name: TwiN
+name: aws-eks-asg-rolling-update-handler
+version: 1.5.0
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/README.md
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/README.md
@ -0,0 +1,16 @@
+# aws-eks-asg-rolling-update-handler
+
+## Configuration
+The following table lists the configurable parameters of the aws-eks-asg-rolling-update-handler chart and their default values.
+| Parameters | Description | Required | Default     |
+|:-----------|:------------|:---------|:------------|
+| environmentVars  | environment variables for aws-eks-asg-rolling-update-handler container, available variables are listed [here](https://github.com/TwiN/aws-eks-asg-rolling-update-handler/blob/master/README.md#usage) | yes |`[{"name":"CLUSTER_NAME","value":"cluster-name"}]`|
+| replicaCount | Number of aws-eks-asg-rolling-update-handler replicas | yes |`1` |
+| image.repository | Image repository | yes |  `twinproduction/aws-eks-asg-rolling-update-handler` |
+| image.tag | image tag | yes | `v1.4.3` |
+| image.pullPolicy | Image pull policy | yes | `IfNotPresent` |
+| resources | CPU/memory resource requests/limits | no | `{}` |
+| podAnnotations | Annotations to add to the aws-eks-asg-rolling-update-handler pod configuration | no | `{}` |
+| podLabels | Labels to add to the aws-eks-asg-rolling-update-handler pod configuration | no | `{}` |
+| securityContext | Pod security context | no | `{}` |
+| containerSecurityContext | Container security context | no | `{}` |
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/_helpers.tpl
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/_helpers.tpl
@ -0,0 +1,31 @@
+{{/*
+Create a default app name.
+*/}}
+{{- define "aws-eks-asg-rolling-update-handler.name" -}}
+{{- .Chart.Name -}}
+{{- end -}}
+
+{{/*
+Create a default namespace.
+*/}}
+{{- define "aws-eks-asg-rolling-update-handler.namespace" -}}
+{{- .Release.Namespace -}}
+{{- end -}}
+
+{{/*
+Common labels
+*/}}
+{{- define "aws-eks-asg-rolling-update-handler.labels" -}}
+app.kubernetes.io/name: {{ include "aws-eks-asg-rolling-update-handler.name" . }}
+{{- end -}}
+
+{{/*
+Create the name of the service account to use.
+*/}}
+{{- define "aws-eks-asg-rolling-update-handler.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create -}}
+    {{ default (include "aws-eks-asg-rolling-update-handler.name" .) .Values.serviceAccount.name }}
+{{- else -}}
+    {{ default "default" .Values.serviceAccount.name }}
+{{- end -}}
+{{- end -}}
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/cluster-role-binding.yaml
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/cluster-role-binding.yaml
@ -0,0 +1,15 @@
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
+  labels:
+{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
+roleRef:
+  kind: ClusterRole
+  name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
+  apiGroup: rbac.authorization.k8s.io
+subjects:
+  - kind: ServiceAccount
+    name: {{ template "aws-eks-asg-rolling-update-handler.serviceAccountName" . }}
+    namespace: {{ template "aws-eks-asg-rolling-update-handler.namespace" . }}
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/cluster-role.yaml
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/cluster-role.yaml
@ -0,0 +1,41 @@
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
+  labels:
+{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
+rules:
+  - apiGroups:
+      - "*"
+    resources:
+      - "*"
+    verbs:
+      - get
+      - list
+      - watch
+  - apiGroups:
+      - "*"
+    resources:
+      - nodes
+    verbs:
+      - get
+      - list
+      - watch
+      - update
+      - patch
+  - apiGroups:
+      - "*"
+    resources:
+      - pods/eviction
+    verbs:
+      - get
+      - list
+      - create
+  - apiGroups:
+      - "*"
+    resources:
+      - pods
+    verbs:
+      - get
+      - list
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/deployment.yaml
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/deployment.yaml
@ -0,0 +1,71 @@
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name:  {{ template "aws-eks-asg-rolling-update-handler.name" . }}
+  namespace: {{ template "aws-eks-asg-rolling-update-handler.namespace" . }}
+  labels:
+{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  selector:
+    matchLabels:
+{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 6 }}
+  template:
+    metadata:
+      labels:
+{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 8 }}
+      {{- with .Values.podLabels }}
+      {{- toYaml . | nindent 8 }}
+      {{- end }}
+      annotations:
+      {{- with .Values.podAnnotations }}
+      {{- toYaml . | nindent 8 }}
+      {{- end }}
+    spec:
+      {{- if .Values.securityContext }}
+      securityContext:
+        {{ toYaml .Values.securityContext | nindent 8 | trim }}
+      {{- end }}
+      automountServiceAccountToken: true
+      serviceAccountName: {{ template "aws-eks-asg-rolling-update-handler.serviceAccountName" . }}
+      restartPolicy: Always
+      dnsPolicy: Default
+      containers:
+        - name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
+          image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- if .Values.containerSecurityContext }}
+          securityContext:
+            {{ toYaml .Values.containerSecurityContext | nindent 12 | trim }}
+          {{- end }}
+          env:
+{{- toYaml .Values.environmentVars | nindent 12 }}
+{{- with .Values.resources }}
+          resources:
+{{- toYaml . | nindent 12 }}
+{{- end }}
+          volumeMounts:
+            - name: aws-token
+              mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
+              readOnly: true
+      volumes:
+        - name: aws-token
+          projected:
+            sources:
+            - serviceAccountToken:
+                path: token
+                expirationSeconds: 86400
+                audience: "sts.amazonaws.com"
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/service-account.yaml
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/templates/service-account.yaml
@ -0,0 +1,13 @@
+{{ if .Values.serviceAccount.create }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ template "aws-eks-asg-rolling-update-handler.serviceAccountName" . }}
+  namespace: {{ template "aws-eks-asg-rolling-update-handler.namespace" . }}
+  labels:
+{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+  {{- toYaml . | nindent 4 }}
+  {{- end }}
+{{ end }}
--- a/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/values.yaml
+++ b/charts/kubezero-addons/charts/aws-eks-asg-rolling-update-handler/values.yaml
@ -0,0 +1,51 @@
+replicaCount: 1
+
+image:
+  repository: twinproduction/aws-eks-asg-rolling-update-handler
+  tag: v1.7.0
+  pullPolicy: IfNotPresent
+
+#imagePullSecrets:
+#- imagePullSecret
+
+environmentVars:
+- name: CLUSTER_NAME
+  value: "cluster-name" # REPLACE THIS WITH THE NAME OF YOUR EKS CLUSTER
+#- name: AUTO_SCALING_GROUP_NAMES
+#  value: "asg-1,asg-2,asg-3" # REPLACE THESE VALUES FOR THE NAMES OF THE ASGs, if CLUSTER_NAME is provided, this is ignored
+#- name: IGNORE_DAEMON_SETS
+#  value: "true"
+#- name: DELETE_LOCAL_DATA
+#  value: "true"
+#- name: AWS_REGION
+#  value: us-west-2
+#- name: ENVIRONMENT
+#  value: ""
+
+resources: {}
+  # limits:
+  #   cpu: 0.3
+  #   memory: 100Mi
+  # requests:
+  #   cpu: 0.1
+  #   memory: 50Mi
+podAnnotations: {}
+  # prometheus.io/port: "8080"
+  # prometheus.io/scrape: "true"
+podLabels: {}
+serviceAccount:
+  create: true
+  #name: aws-eks-asg-rolling-update-handler
+  annotations: {}
+
+securityContext: {}
+  # runAsNonRoot: true
+  # runAsUser: 1001
+  # seccompProfile:
+#  type: RuntimeDefault
+
+containerSecurityContext: {}
+  # allowPrivilegeEscalation: false
+  # capabilities:
+  #  drop:
+#    - ALL
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/Chart.yaml
@ -1,7 +1,7 @@
 apiVersion: v2
-appVersion: 1.16.0
+appVersion: 1.21.0
 description: A Helm chart for the AWS Node Termination Handler.
-home: https://github.com/aws/eks-charts
+home: https://github.com/aws/aws-node-termination-handler/
 icon: https://raw.githubusercontent.com/aws/eks-charts/master/docs/logo/aws.png
 keywords:
 - aws
@ -20,6 +20,5 @@ maintainers:
 name: aws-node-termination-handler
 sources:
 - https://github.com/aws/aws-node-termination-handler/
- https://github.com/aws/eks-charts/
 type: application
-version: 0.18.0
+version: 0.23.0
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/README.md
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/README.md
@ -8,22 +8,24 @@ AWS Node Termination Handler Helm chart for Kubernetes. For more information on

 ## Installing the Chart

-Before you can install the chart you will need to add the `aws` repo to [Helm](https://helm.sh/).
-
+Before you can install the chart you will need to authenticate your Helm client.
 ```shell
-helm repo add eks https://aws.github.io/eks-charts/
+aws ecr-public get-login-password \
+     --region us-east-1 | helm registry login \
+     --username AWS \
+     --password-stdin public.ecr.aws
 ```

-After you've installed the repo you can install the chart, the following command will install the chart with the release name `aws-node-termination-handler` and the default configuration to the `kube-system` namespace.
+Once the helm registry login succeeds, use the following command to install the chart with the release name `aws-node-termination-handler` and the default configuration to the `kube-system` namespace. In the below command, add the CHART_VERSION that you want to install.

 ```shell
-helm upgrade --install --namespace kube-system aws-node-termination-handler eks/aws-node-termination-handler
+helm upgrade --install --namespace kube-system aws-node-termination-handler oci://public.ecr.aws/aws-ec2/helm/aws-node-termination-handler --version $CHART_VERSION
 ```

 To install the chart on an EKS cluster where the AWS Node Termination Handler is already installed, you can run the following command.

 ```shell
-helm upgrade --install --namespace kube-system aws-node-termination-handler eks/aws-node-termination-handler --recreate-pods --force
+helm upgrade --install --namespace kube-system aws-node-termination-handler oci://public.ecr.aws/aws-ec2/helm/aws-node-termination-handler --version $CHART_VERSION --recreate-pods --force
 ```

 If you receive an error similar to the one below simply rerun the above command.
@ -33,7 +35,7 @@ If you receive an error similar to the one below simply rerun the above command.
 To uninstall the `aws-node-termination-handler` chart installation from the `kube-system` namespace run the following command.

 ```shell
-helm delete --namespace kube-system aws-node-termination-handler
+helm uninstall --namespace kube-system aws-node-termination-handler
 ```

 ## Configuration
@ -56,7 +58,7 @@ The configuration in this table applies to all AWS Node Termination Handler mode
 | `serviceAccount.name`              | Service account to be used. If not set and `serviceAccount.create` is `true`, a name is generated using the full name template.                                                                                                                                                                                                                                                        | `nil`                                                 |
 | `serviceAccount.annotations`       | Annotations to add to the service account.                                                                                                                                                                                                                                                                                                                                             | `{}`                                                  |
 | `rbac.create`                      | If `true`, create the RBAC resources.                                                                                                                                                                                                                                                                                                                                                  | `true`                                                |
-| `rbac.pspEnabled`                  | If `true`, create a pod security policy resource.                                                                                                                                                                                                                                                                                                                                      | `true`                                                |
+| `rbac.pspEnabled`                  | If `true`, create a pod security policy resource. Note: `PodSecurityPolicy`s will not be created when Kubernetes version is 1.25 or later.                                                                                                                                                                                                                                                                                     | `true`                                                |
 | `customLabels`                     | Labels to add to all resource metadata.                                                                                                                                                                                                                                                                                                                                                | `{}`                                                  |
 | `podLabels`                        | Labels to add to the pod.                                                                                                                                                                                                                                                                                                                                                              | `{}`                                                  |
 | `podAnnotations`                   | Annotations to add to the pod.                                                                                                                                                                                                                                                                                                                                                         | `{}`                                                  |
@ -70,6 +72,7 @@ The configuration in this table applies to all AWS Node Termination Handler mode
 | `extraEnv`                         | Additional environment variables for the _aws-node-termination-handler_ container.                                                                                                                                                                                                                                                                                                     | `[]`                                                  |
 | `probes`                           | The Kubernetes liveness probe configuration.                                                                                                                                                                                                                                                                                                                                           | _See values.yaml_                                     |
 | `logLevel`                         | Sets the log level (`info`,`debug`, or `error`)                                                                                                                                                                                                                                                                                                                                        | `info`                                                |
+| `logFormatVersion`                         | Sets the log format version. Available versions: 1, 2. Version 1 refers to the format that has been used through v1.17.3. Version 2 offers more detail for the "event kind" and "reason", especially when operating in Queue Processor mode.                           | `1`                                                |
 | `jsonLogging`                      | If `true`, use JSON-formatted logs instead of human readable logs.                                                                                                                                                                                                                                                                                                                     | `false`                                               |
 | `enablePrometheusServer`           | If `true`, start an http server exposing `/metrics` endpoint for _Prometheus_.                                                                                                                                                                                                                                                                                                         | `false`                                               |
 | `prometheusServerPort`             | Replaces the default HTTP port for exposing _Prometheus_ metrics.                                                                                                                                                                                                                                                                                                                      | `9092`                                                |
@ -82,6 +85,7 @@ The configuration in this table applies to all AWS Node Termination Handler mode
 | `podTerminationGracePeriod`        | The time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used, which defaults to 30 seconds if not specified for the pod.                                                                                                                                                                                            | `-1`                                                  |
 | `nodeTerminationGracePeriod`       | Period of time in seconds given to each node to terminate gracefully. Node draining will be scheduled based on this value to optimize the amount of compute time, but still safely drain the node before an event.                                                                                                                                                                     | `120`                                                 |
 | `emitKubernetesEvents`             | If `true`, Kubernetes events will be emitted when interruption events are received and when actions are taken on Kubernetes nodes. In IMDS Processor mode a default set of annotations with all the node metadata gathered from IMDS will be attached to each event. More information [here](https://github.com/aws/aws-node-termination-handler/blob/main/docs/kubernetes_events.md). | `false`                                               |
+| `completeLifecycleActionDelaySeconds` | Pause after draining the node before completing the EC2 Autoscaling lifecycle action. This may be helpful if Pods on the node have Persistent Volume Claims. | -1 |
 | `kubernetesEventsExtraAnnotations` | A comma-separated list of `key=value` extra annotations to attach to all emitted Kubernetes events (e.g. `first=annotation,sample.annotation/number=two"`).                                                                                                                                                                                                                            | `""`                                                  |
 | `webhookURL`                       | Posts event data to URL upon instance interruption action.                                                                                                                                                                                                                                                                                                                             | `""`                                                  |
 | `webhookURLSecretName`             | Pass the webhook URL as a Secret using the key `webhookurl`.                                                                                                                                                                                                                                                                                                                           | `""`                                                  |
@ -110,16 +114,18 @@ The configuration in this table applies to AWS Node Termination Handler in queue
 | `awsRegion`                  | If specified, use the AWS region for AWS API calls, else NTH will try to find the region through the `AWS_REGION` environment variable, IMDS, or the specified queue URL. | `""`                                   |
 | `queueURL`                   | Listens for messages on the specified SQS queue URL.                                                                                                                      | `""`                                   |
 | `workers`                    | The maximum amount of parallel event processors to handle concurrent events.                                                                                              | `10`                                   |
-| `checkASGTagBeforeDraining`  | If `true`, check that the instance is tagged with the `managedAsgTag` before draining the node.                                                                           | `true`                                 |
-| `managedAsgTag`              | The node tag to check if `checkASGTagBeforeDraining` is `true`.                                                                                                           | `aws-node-termination-handler/managed` |
-| `assumeAsgTagPropagation`    | If `true`, assume that ASG tags will be appear on the ASG's instances.                                                                                                    | `false`                                |
-
+| `checkTagBeforeDraining`     | If `true`, check that the instance is tagged with the `managedTag` before draining the node.                                                                              | `true`                                 |
+| `managedTag`                 | The node tag to check if `checkTagBeforeDraining` is `true`.                                                                                                              | `aws-node-termination-handler/managed` |
+| `checkASGTagBeforeDraining`  | [DEPRECATED](Use `checkTagBeforeDraining` instead) If `true`, check that the instance is tagged with the `managedAsgTag` before draining the node. If `false`, disables calls ASG API.                                                                          | `true`                                 |
+| `managedAsgTag`              | [DEPRECATED](Use `managedTag` instead) The node tag to check if `checkASGTagBeforeDraining` is `true`.     
+| `useProviderId`              | If `true`, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname.                                                               | `false`                                |
+| `topologySpreadConstraints`  | [Topology Spread Constraints](https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/) for pod scheduling. Useful with a highly available deployment to reduce the risk of running multiple replicas on the same Node      | `[]`                                   |
 ### IMDS Mode Configuration

 The configuration in this table applies to AWS Node Termination Handler in IMDS mode.

 | Parameter                        | Description                                                                                                                                                                                                                                                   | Default                |
-| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------- |
+| -------------------------------- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|
 | `targetNodeOs`                   | Space separated list of node OS's to target (e.g. `"linux"`, `"windows"`, `"linux windows"`). Windows support is **EXPERIMENTAL**.                                                                                                                            | `"linux"`              |
 | `linuxPodLabels`                 | Labels to add to each Linux pod.                                                                                                                                                                                                                              | `{}`                   |
 | `windowsPodLabels`               | Labels to add to each Windows pod.                                                                                                                                                                                                                            | `{}`                   |
@ -134,7 +140,7 @@ The configuration in this table applies to AWS Node Termination Handler in IMDS
 | `podMonitor.sampleLimit`         | Number of scraped samples accepted.                                                                                                                                                                                                                           | `5000`                 |
 | `useHostNetwork`                 | If `true`, enables `hostNetwork` for the Linux DaemonSet. NOTE: setting this to `false` may cause issues accessing IMDSv2 if your account is not configured with an IP hop count of 2 see [Metrics Endpoint Considerations](#metrics-endpoint-considerations) | `true`                 |
 | `dnsPolicy`                      | If specified, this overrides `linuxDnsPolicy` and `windowsDnsPolicy` with a single policy.                                                                                                                                                                    | `""`                   |
-| `dnsConfig`                      | If specified, this sets the dnsConfig: https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config                                                                                                                                                                    | `{}`                   |
+| `dnsConfig`                      | If specified, this sets the dnsConfig: https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/#pod-dns-config                                                                                                                                | `{}`                   |
 | `linuxDnsPolicy`                 | DNS policy for the Linux DaemonSet.                                                                                                                                                                                                                           | `""`                   |
 | `windowsDnsPolicy`               | DNS policy for the Windows DaemonSet.                                                                                                                                                                                                                         | `""`                   |
 | `daemonsetNodeSelector`          | Expressions to select a node by it's labels for DaemonSet pod assignment. For backwards compatibility the `nodeSelector` value has priority over this but shouldn't be used.                                                                                  | `{}`                   |
@ -148,10 +154,11 @@ The configuration in this table applies to AWS Node Termination Handler in IMDS
 | `windowsTolerations`             | Override `daemonsetTolerations` for the Linux DaemonSet.                                                                                                                                                                                                      | `[]`                   |
 | `enableProbesServer`             | If `true`, start an http server exposing `/healthz` endpoint for probes.                                                                                                                                                                                      | `false`                |
 | `metadataTries`                  | The number of times to try requesting metadata.                                                                                                                                                                                                               | `3`                    |
-| `enableSpotInterruptionDraining` | If `true`, drain nodes when the spot interruption termination notice is received.                                                                                                                                                                             | `true`                 |
-| `enableScheduledEventDraining`   | If `true`, drain nodes before the maintenance window starts for an EC2 instance scheduled event. This is **EXPERIMENTAL**.                                                                                                                                    | `false`                |
-| `enableRebalanceMonitoring`      | If `true`, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set `enableRebalanceDraining`.                                                                              | `false`                |
-| `enableRebalanceDraining`        | If `true`, drain nodes when the rebalance recommendation notice is received.                                                                                                                                                                                  | `false`                |
+| `enableSpotInterruptionDraining` | If `true`, drain nodes when the spot interruption termination notice is received. Only used in IMDS mode.                                                                                                                                                     | `true`                 |
+| `enableScheduledEventDraining`   | If `true`, drain nodes before the maintenance window starts for an EC2 instance scheduled event. Only used in IMDS mode.                                                                                                                                      | `true`                 |
+| `enableRebalanceMonitoring`      | If `true`, cordon nodes when the rebalance recommendation notice is received. If you'd like to drain the node in addition to cordoning, then also set `enableRebalanceDraining`. Only used in IMDS mode.                                                      | `false`                |
+| `enableRebalanceDraining`        | If `true`, drain nodes when the rebalance recommendation notice is received. Only used in IMDS mode.                                                                                                                                                          | `false`                |
+| `deleteSqsMsgIfNodeNotFound`     | If `true`, delete the SQS Message from the SQS Queue if the targeted node is not found. Only used in Queue Processor mode.                                       | `false`                |

 ### Testing Configuration

@ -167,6 +174,6 @@ The configuration in this table applies to AWS Node Termination Handler testing

 ## Metrics Endpoint Considerations

-AWS Node Termination HAndler in IMDS mode runs as a DaemonSet with `useHostNetwork: true` by default. If the Prometheus server is enabled with `enablePrometheusServer: true` nothing else will be able to bind to the configured port (by default `prometheusServerPort: 9092`) in the root network namespace. Therefore, it will need to have a firewall/security group configured on the nodes to block access to the `/metrics` endpoint.
+AWS Node Termination Handler in IMDS mode runs as a DaemonSet with `useHostNetwork: true` by default. If the Prometheus server is enabled with `enablePrometheusServer: true` nothing else will be able to bind to the configured port (by default `prometheusServerPort: 9092`) in the root network namespace. Therefore, it will need to have a firewall/security group configured on the nodes to block access to the `/metrics` endpoint.

 You can switch NTH in IMDS mode to run w/ `useHostNetwork: false`, but you will need to make sure that IMDSv1 is enabled or IMDSv2 IP hop count will need to be incremented to 2 (see the [IMDSv2 documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html).
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/_helpers.tpl
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/_helpers.tpl
@ -48,7 +48,6 @@ Common labels
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
-app.kubernetes.io/component: {{ .Release.Name }}
 app.kubernetes.io/part-of: {{ .Release.Name }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 helm.sh/chart: {{ include "aws-node-termination-handler.chart" . }}
@ -57,6 +56,22 @@ helm.sh/chart: {{ include "aws-node-termination-handler.chart" . }}
 {{- end }}
 {{- end -}}

+{{/*
+Deployment labels
+*/}}
+{{- define "aws-node-termination-handler.labelsDeployment" -}}
+{{ include "aws-node-termination-handler.labels" . }}
+app.kubernetes.io/component: deployment
+{{- end -}}
+
+{{/*
+Daemonset labels
+*/}}
+{{- define "aws-node-termination-handler.labelsDaemonset" -}}
+{{ include "aws-node-termination-handler.labels" . }}
+app.kubernetes.io/component: daemonset
+{{- end -}}
+
 {{/*
 Selector labels
 */}}
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.linux.yaml
@ -5,7 +5,7 @@ metadata:
  name: {{ include "aws-node-termination-handler.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
-    {{- include "aws-node-termination-handler.labels" . | nindent 4 }}
+    {{- include "aws-node-termination-handler.labelsDaemonset" . | nindent 4 }}
 spec:
  {{- with .Values.updateStrategy }}
  updateStrategy:
@ -81,6 +81,8 @@ spec:
              value: {{ .Values.logLevel | quote }}
            - name: JSON_LOGGING
              value: {{ .Values.jsonLogging | quote }}
+            - name: LOG_FORMAT_VERSION
+              value: {{ .Values.logFormatVersion | quote }}
            - name: ENABLE_PROMETHEUS_SERVER
              value: {{ .Values.enablePrometheusServer | quote }}
            - name: PROMETHEUS_SERVER_PORT
@ -151,6 +153,9 @@ spec:
              value: "false"
            - name: UPTIME_FROM_FILE
              value: {{ .Values.procUptimeFile | quote }}
+            {{- with .Values.extraEnv }}
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
          {{- if or .Values.enablePrometheusServer .Values.enableProbesServer }}
          ports:
          {{- if .Values.enableProbesServer }}
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/daemonset.windows.yaml
@ -5,7 +5,7 @@ metadata:
  name: {{ include "aws-node-termination-handler.fullnameWindows" . }}
  namespace: {{ .Release.Namespace }}
  labels:
-    {{- include "aws-node-termination-handler.labels" . | nindent 4 }}
+    {{- include "aws-node-termination-handler.labelsDaemonset" . | nindent 4 }}
 spec:
  {{- with .Values.updateStrategy }}
  updateStrategy:
@ -52,7 +52,7 @@ spec:
      {{- end }}
      containers:
        - name: aws-node-termination-handler
-          {{- with .Values.securityContext }}
+          {{- with unset .Values.securityContext "runAsUser" }}
          securityContext:
            {{- toYaml . | nindent 12 }}
          {{- end }}
@ -81,6 +81,8 @@ spec:
              value: {{ .Values.logLevel | quote }}
            - name: JSON_LOGGING
              value: {{ .Values.jsonLogging | quote }}
+            - name: LOG_FORMAT_VERSION
+              value: {{ .Values.logFormatVersion | quote }}
            - name: ENABLE_PROMETHEUS_SERVER
              value: {{ .Values.enablePrometheusServer | quote }}
            - name: PROMETHEUS_SERVER_PORT
@ -149,6 +151,9 @@ spec:
              value: {{ .Values.enableRebalanceDraining | quote }}
            - name: ENABLE_SQS_TERMINATION_DRAINING
              value: "false"
+            {{- with .Values.extraEnv }}
+              {{- toYaml . | nindent 12 }}
+            {{- end }}
          {{- if or .Values.enablePrometheusServer .Values.enableProbesServer }}
          ports:
          {{- if .Values.enableProbesServer }}
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/deployment.yaml
@ -5,7 +5,7 @@ metadata:
  name: {{ include "aws-node-termination-handler.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
-    {{- include "aws-node-termination-handler.labels" . | nindent 4 }}
+    {{- include "aws-node-termination-handler.labelsDeployment" . | nindent 4 }}
 spec:
  replicas: {{ .Values.replicas }}
  {{- with .Values.strategy }}
@ -78,16 +78,24 @@ spec:
              value: {{ .Values.logLevel | quote }}
            - name: JSON_LOGGING
              value: {{ .Values.jsonLogging | quote }}
+            - name: LOG_FORMAT_VERSION
+              value: {{ .Values.logFormatVersion | quote }}
            - name: ENABLE_PROMETHEUS_SERVER
              value: {{ .Values.enablePrometheusServer | quote }}
            - name: PROMETHEUS_SERVER_PORT
              value: {{ .Values.prometheusServerPort | quote }}
+            # [DEPRECATED] Use CHECK_TAG_BEFORE_DRAINING instead
            - name: CHECK_ASG_TAG_BEFORE_DRAINING
              value: {{ .Values.checkASGTagBeforeDraining | quote }}
+            - name: CHECK_TAG_BEFORE_DRAINING
+              value: {{ .Values.checkTagBeforeDraining | quote }}
+            # [DEPRECATED] Use MANAGED_TAG instead
            - name: MANAGED_ASG_TAG
              value: {{ .Values.managedAsgTag | quote }}
-            - name: ASSUME_ASG_TAG_PROPAGATION
-              value: {{ .Values.assumeAsgTagPropagation | quote }}
+            - name: MANAGED_TAG
+              value: {{ .Values.managedTag | quote }}
+            - name: USE_PROVIDER_ID
+              value: {{ .Values.useProviderId | quote }}
            - name: DRY_RUN
              value: {{ .Values.dryRun | quote }}
            - name: CORDON_ONLY
@ -106,6 +114,8 @@ spec:
              value: {{ .Values.nodeTerminationGracePeriod | quote }}
            - name: EMIT_KUBERNETES_EVENTS
              value: {{ .Values.emitKubernetesEvents | quote }}
+            - name: COMPLETE_LIFECYCLE_ACTION_DELAY_SECONDS
+              value: {{ .Values.completeLifecycleActionDelaySeconds | quote }}
            {{- with .Values.kubernetesEventsExtraAnnotations }}
            - name: KUBERNETES_EVENTS_EXTRA_ANNOTATIONS
              value: {{ . | quote }}
@ -136,14 +146,6 @@ spec:
            - name: WEBHOOK_TEMPLATE
              value: {{ .Values.webhookTemplate | quote }}
            {{- end }}
-            - name: ENABLE_SPOT_INTERRUPTION_DRAINING
-              value: "false"
-            - name: ENABLE_SCHEDULED_EVENT_DRAINING
-              value: "false"
-            - name: ENABLE_REBALANCE_MONITORING
-              value: "false"
-            - name: ENABLE_REBALANCE_DRAINING
-              value: "false"
            - name: ENABLE_SQS_TERMINATION_DRAINING
              value: "true"
            {{- with .Values.awsRegion }}
@ -162,15 +164,17 @@ spec:
            {{- end }}
            - name: QUEUE_URL
              value: {{ .Values.queueURL | quote }}
+            - name: DELETE_SQS_MSG_IF_NODE_NOT_FOUND
+              value: {{ .Values.deleteSqsMsgIfNodeNotFound | quote }}
            - name: WORKERS
              value: {{ .Values.workers | quote }}
            {{- with .Values.extraEnv }}
              {{- toYaml . | nindent 12 }}
            {{- end }}
          ports:
-            - name: liveness-probe
-              protocol: TCP
-              containerPort: {{ .Values.probes.httpGet.port }}
+           - name: liveness-probe
+             protocol: TCP
+             containerPort: {{ .Values.probes.httpGet.port }}
          {{- if .Values.enablePrometheusServer }}
           - name: http-metrics
             protocol: TCP
@ -216,4 +220,8 @@ spec:
      tolerations:
        {{- toYaml . | nindent 8 }}
      {{- end }}
+      {{- with .Values.topologySpreadConstraints }}
+      topologySpreadConstraints:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
 {{- end }}
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/psp.yaml
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/psp.yaml
@ -1,4 +1,4 @@
-{{- if .Values.rbac.pspEnabled }}
+{{- if and (.Values.rbac.pspEnabled) (semverCompare "<1.25-0" .Capabilities.KubeVersion.GitVersion) }}
 apiVersion: policy/v1beta1
 kind: PodSecurityPolicy
 metadata:
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/templates/service.yaml
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/templates/service.yaml
@ -5,7 +5,7 @@ metadata:
  name: {{ include "aws-node-termination-handler.fullname" . }}
  namespace: {{ .Release.Namespace }}
  labels:
-    {{- include "aws-node-termination-handler.labels" . | nindent 4 }}
+    {{- include "aws-node-termination-handler.labelsDeployment" . | nindent 4 }}
 spec:
  type: ClusterIP
  selector:
--- a/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml
+++ b/charts/kubezero-addons/charts/aws-node-termination-handler/values.yaml
@ -23,7 +23,7 @@ serviceAccount:
 rbac:
  # Specifies whether RBAC resources should be created
  create: true
-  # Specifies if PodSecurityPolicy resources should be created
+  # Specifies if PodSecurityPolicy resources should be created. PodSecurityPolicy will not be created when Kubernetes version is 1.25 or later.
  pspEnabled: true

 customLabels: {}
@ -52,6 +52,8 @@ affinity: {}

 tolerations: []

+topologySpreadConstraints: []
+
 # Extra environment variables
 extraEnv: []

@ -66,6 +68,9 @@ probes:
 # Set the log level
 logLevel: info

+# Set the log format version
+logFormatVersion: 1
+
 # Log messages in JSON format
 jsonLogging: false

@ -100,6 +105,9 @@ nodeTerminationGracePeriod: 120
 # emitKubernetesEvents If true, Kubernetes events will be emitted when interruption events are received and when actions are taken on Kubernetes nodes. In IMDS Processor mode a default set of annotations with all the node metadata gathered from IMDS will be attached to each event
 emitKubernetesEvents: false

+# completeLifecycleActionDelaySeconds will pause for the configured duration after draining the node before completing the EC2 Autoscaling lifecycle action. This may be helpful if Pods on the node have Persistent Volume Claims.
+completeLifecycleActionDelaySeconds: -1
+
 # kubernetesEventsExtraAnnotations A comma-separated list of key=value extra annotations to attach to all emitted Kubernetes events
 # Example: "first=annotation,sample.annotation/number=two"
 kubernetesEventsExtraAnnotations: ""
@ -170,14 +178,20 @@ queueURL: ""
 # The maximum amount of parallel event processors to handle concurrent events
 workers: 10

-# If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node
+# [DEPRECATED] Use checkTagBeforeDraining instead
 checkASGTagBeforeDraining: true

-# The tag to ensure is on a node if checkASGTagBeforeDraining is true
+# If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node
+checkTagBeforeDraining: true
+
+# [DEPRECATED] Use managedTag instead
 managedAsgTag: "aws-node-termination-handler/managed"

-# If true, assume that ASG tags will be appear on the ASG's instances
-assumeAsgTagPropagation: false
+# The tag to ensure is on a node if checkTagBeforeDraining is true
+managedTag: "aws-node-termination-handler/managed"
+
+# If true, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname.
+useProviderId: false

 # ---------------------------------------------------------------------------------------------------------------------
 # IMDS Mode
@ -247,24 +261,27 @@ daemonsetTolerations:
 linuxTolerations: []
 windowsTolerations: []

-# If the probes server is running for the Daemonset
+# If the probes server is running.
 enableProbesServer: false

 # Total number of times to try making the metadata request before failing.
 metadataTries: 3

-# enableSpotInterruptionDraining If false, do not drain nodes when the spot interruption termination notice is received
+# enableSpotInterruptionDraining If false, do not drain nodes when the spot interruption termination notice is received. Only used in IMDS mode.
 enableSpotInterruptionDraining: true

-# enableScheduledEventDraining [EXPERIMENTAL] If true, drain nodes before the maintenance window starts for an EC2 instance scheduled event
-enableScheduledEventDraining: false
+# enableScheduledEventDraining If false, do not drain nodes before the maintenance window starts for an EC2 instance scheduled event. Only used in IMDS mode.
+enableScheduledEventDraining: true

-# enableRebalanceMonitoring If true, cordon nodes when the rebalance recommendation notice is received
+# enableRebalanceMonitoring If true, cordon nodes when the rebalance recommendation notice is received. Only used in IMDS mode.
 enableRebalanceMonitoring: false

-# enableRebalanceDraining If true, drain nodes when the rebalance recommendation notice is received
+# enableRebalanceDraining If true, drain nodes when the rebalance recommendation notice is received. Only used in IMDS mode.
 enableRebalanceDraining: false

+# deleteSqsMsgIfNodeNotFound If true, delete the SQS Message from the SQS Queue if the targeted node(s) are not found. Only used in Queue Processor mode.
+deleteSqsMsgIfNodeNotFound: false
+
 # ---------------------------------------------------------------------------------------------------------------------
 # Testing
 # ---------------------------------------------------------------------------------------------------------------------
--- a/charts/kubezero-addons/templates/cluster-backup/cronjob.yaml
+++ b/charts/kubezero-addons/templates/cluster-backup/cronjob.yaml
@ -6,6 +6,7 @@ metadata:
  namespace: kube-system
 spec:
  schedule: "0 * * * *"
+  concurrencyPolicy: "Replace"
  jobTemplate:
    spec:
      backoffLimit: 1
@ -23,21 +24,22 @@ spec:
            volumeMounts:
            - name: host
              mountPath: /host
+              #readOnly: true
            - name: workdir
              mountPath: /tmp
-            env: 
+            env:
            - name: DEBUG
-              value: "1"
+              value: ""
            - name: RESTIC_REPOSITORY
-              valueFrom: 
-                secretKeyRef: 
+              valueFrom:
+                secretKeyRef:
                  name: kubezero-backup-restic
                  key: repository
            - name: RESTIC_PASSWORD
-              valueFrom: 
-                secretKeyRef: 
+              valueFrom:
+                secretKeyRef:
                  name: kubezero-backup-restic
-                  key: password 
+                  key: password
            {{- with .Values.clusterBackup.extraEnv }}
              {{- toYaml . | nindent 12 }}
            {{- end }}
@ -53,8 +55,9 @@ spec:
            emptyDir: {}
          nodeSelector:
            node-role.kubernetes.io/control-plane: ""
+          priorityClassName: system-cluster-critical
          tolerations:
-          - key: node-role.kubernetes.io/master
+          - key: node-role.kubernetes.io/control-plane
            effect: NoSchedule
          restartPolicy: Never
 {{- end }}
--- a/charts/kubezero-addons/templates/cluster-backup/secret.yaml
+++ b/charts/kubezero-addons/templates/cluster-backup/secret.yaml
@ -1,4 +1,4 @@
-{{- if and .Values.clusterBackup.enabled .Values.clusterBackup.repository }}
+{{- if and .Values.clusterBackup.enabled .Values.clusterBackup.repository .Values.clusterBackup.password }}
 apiVersion: v1
 kind: Secret
 metadata:
--- a/charts/kubezero-addons/templates/device-plugins/aws-neuron-ds.yaml
+++ b/charts/kubezero-addons/templates/device-plugins/aws-neuron-ds.yaml
@ -42,6 +42,14 @@ spec:
                      - inf1.2xlarge
                      - inf1.6xlarge
                      - inf1.24xlarge
+                      - inf2.xlarge
+                      - inf2.4xlarge
+                      - inf2.8xlarge
+                      - inf2.24xlarge
+                      - inf2.48xlarge
+                      - trn1.2xlarge
+                      - trn1.32xlarge
+                      - trn1n.32xlarge
      containers:
      - image: "{{ .Values.awsNeuron.image.name }}:{{ .Values.awsNeuron.image.tag }}"
        imagePullPolicy: IfNotPresent
--- a/charts/kubezero-addons/templates/device-plugins/fuse-device-plugin.yaml
+++ b/charts/kubezero-addons/templates/device-plugins/fuse-device-plugin.yaml
@ -13,9 +13,15 @@ spec:
      labels:
        name: fuse-device-plugin
    spec:
-      hostNetwork: true
+      priorityClassName: system-node-critical
+      tolerations:
+      - key: CriticalAddonsOnly
+        operator: Exists
+      - key: kubezero-workergroup
+        effect: NoSchedule
+        operator: Exists
      containers:
-      - image: public.ecr.aws/zero-downtime/fuse-device-plugin:v1.1.0
+      - image: "{{ .Values.fuseDevicePlugin.image.name }}:{{ .Values.fuseDevicePlugin.image.tag }}"
        # imagePullPolicy: Always
        name: fuse-device-plugin
        securityContext:
--- a/charts/kubezero-addons/templates/forseti/deployment.yaml
+++ b/charts/kubezero-addons/templates/forseti/deployment.yaml
@ -69,8 +69,9 @@ spec:
      nodeSelector:
        node-role.kubernetes.io/control-plane: ""
      tolerations:
-      - key: node-role.kubernetes.io/master
+      - key: node-role.kubernetes.io/control-plane
        effect: NoSchedule
+      priorityClassName: system-cluster-critical
      volumes:
      - name: aws-token
        projected:
--- a/charts/kubezero-addons/update.sh
+++ b/charts/kubezero-addons/update.sh
@ -1,10 +1,12 @@
 #!/bin/bash
 set -ex

-NTH_VERSION=$(yq eval '.dependencies[] | select(.name=="aws-node-termination-handler") | .version' Chart.yaml)
+. ../../scripts/lib-update.sh

-rm -rf charts/aws-node-termination-handler
-helm pull eks/aws-node-termination-handler --untar --untardir charts --version $NTH_VERSION
+login_ecr_public
+update_helm

-# diff -tuNr charts/aws-node-termination-handler.orig charts/aws-node-termination-handler > nth.patch
-patch -p0 -i nth.patch --no-backup-if-mismatch
+patch_chart aws-node-termination-handler
+patch_chart aws-eks-asg-rolling-update-handler
+
+update_docs
--- a/charts/kubezero-addons/values.yaml
+++ b/charts/kubezero-addons/values.yaml
@ -5,8 +5,11 @@ clusterBackup:
    name: public.ecr.aws/zero-downtime/kubezero-admin
    # tag: v1.22.8

+  # -- s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup
  repository: ""
+  # -- /etc/cloudbender/clusterBackup.passphrase
  password: ""
+
  extraEnv: []

 forseti:
@ -21,18 +24,96 @@ forseti:
    # -- "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.kubezeroForseti"
    iamRoleArn: ""

+sealed-secrets:
+  enabled: false
+
+  # ensure kubeseal default values match
+  fullnameOverride: sealed-secrets-controller
+
+  # Disable auto keyrotation for now
+  keyrenewperiod: "0"
+
+  resources:
+    requests:
+      cpu: 10m
+      memory: 24Mi
+    limits:
+      memory: 128Mi
+
+  metrics:
+    serviceMonitor:
+      enabled: false
+
+  nodeSelector:
+    node-role.kubernetes.io/control-plane: ""
+  tolerations:
+  - key: node-role.kubernetes.io/control-plane
+    effect: NoSchedule
+
+aws-eks-asg-rolling-update-handler:
+  enabled: false
+  image:
+    repository: twinproduction/aws-eks-asg-rolling-update-handler
+    tag: v1.8.3
+
+  environmentVars:
+    - name: CLUSTER_NAME
+      value: ""
+    - name: AWS_REGION
+      value: us-west-2
+    - name: EXECUTION_INTERVAL
+      value: "60"
+    - name: METRICS
+      value: "true"
+    - name: EAGER_CORDONING
+      value: "true"
+    # Only disable if all services have PDBs across AZs
+    - name: SLOW_MODE
+      value: "true"
+    - name: AWS_ROLE_ARN
+      value: ""
+    - name: AWS_WEB_IDENTITY_TOKEN_FILE
+      value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
+    - name: AWS_STS_REGIONAL_ENDPOINTS
+      value: "regional"
+
+  securityContext:
+    runAsNonRoot: true
+    runAsUser: 1001
+    seccompProfile:
+      type: RuntimeDefault
+
+  containerSecurityContext:
+    allowPrivilegeEscalation: false
+    capabilities:
+     drop:
+       - ALL
+
+  resources:
+    requests:
+      cpu: 10m
+      memory: 32Mi
+    limits:
+      memory: 128Mi
+
+  nodeSelector:
+    node-role.kubernetes.io/control-plane: ""
+  tolerations:
+    - key: node-role.kubernetes.io/control-plane
+      effect: NoSchedule
+
 aws-node-termination-handler:
  enabled: false

  fullnameOverride: "aws-node-termination-handler"

-  #image:
-  #  tag: v1.14.1
-
-  # -- "aws-node-termination-handler/${ClusterName}"
-  managedAsgTag: "aws-node-termination-handler/managed"
+  # -- "zdt:kubezero:nth:${ClusterName}"
+  managedTag: "zdt:kubezero:nth:${ClusterName}"

+  useProviderId: true
  enableSqsTerminationDraining: true
+  # otherwise pds fails trying to reach IMDS
+  enableSpotInterruptionDraining: false
  enableProbesServer: true
  deleteLocalData: true
  ignoreDaemonSets: true
@ -57,9 +138,10 @@ aws-node-termination-handler:
    create: false

  jsonLogging: true
+  logFormatVersion: 2

  tolerations:
-  - key: node-role.kubernetes.io/master
+  - key: node-role.kubernetes.io/control-plane
    effect: NoSchedule
  nodeSelector:
    node-role.kubernetes.io/control-plane: ""
@ -69,13 +151,106 @@ aws-node-termination-handler:

 fuseDevicePlugin:
  enabled: false
+  image:
+    name: public.ecr.aws/zero-downtime/fuse-device-plugin
+    tag: v1.2.0

 awsNeuron:
  enabled: false

  image:
    name: public.ecr.aws/neuron/neuron-device-plugin
-    tag: 1.9.0.0
+    tag: 2.19.16.0
+
+nvidia-device-plugin:
+  enabled: false
+  tolerations:
+  - key: nvidia.com/gpu
+    operator: Exists
+    effect: NoSchedule
+  - key: kubezero-workergroup
+    effect: NoSchedule
+    operator: Exists
+
+  affinity:
+    nodeAffinity:
+      requiredDuringSchedulingIgnoredDuringExecution:
+        nodeSelectorTerms:
+          - matchExpressions:
+              - key: "node.kubernetes.io/instance-type"
+                operator: In
+                values:
+                  - g5.xlarge
+                  - g5.2xlarge
+                  - g5.4xlarge
+                  - g5.8xlarge
+                  - g5.12xlarge
+                  - g5.16xlarge
+                  - g5.24xlarge
+                  - g5.48xlarge
+                  - g4dn.xlarge
+                  - g4dn.2xlarge
+                  - g4dn.4xlarge
+                  - g4dn.8xlarge
+                  - g4dn.12xlarge
+                  - g4dn.16xlarge
+
+cluster-autoscaler:
+  enabled: false
+
+  image:
+    repository: registry.k8s.io/autoscaling/cluster-autoscaler
+    tag: v1.28.2
+
+  autoDiscovery:
+    clusterName: ""
+  awsRegion: "us-west-2"
+
+  serviceMonitor:
+    enabled: false
+    interval: 30s
+
+  prometheusRule:
+    enabled: false
+    interval: "30"
+
+  # Disable pdb for now
+  podDisruptionBudget: false
+
+  extraArgs:
+    scan-interval: 30s
+    skip-nodes-with-local-storage: false
+    balance-similar-node-groups: true
+    ignore-taint: "node.cilium.io/agent-not-ready"
+
+  #securityContext:
+  #  runAsNonRoot: true
+
+  nodeSelector:
+    node-role.kubernetes.io/control-plane: ""
+  tolerations:
+  - key: node-role.kubernetes.io/control-plane
+    effect: NoSchedule
+
+  # On AWS enable Projected Service Accounts to assume IAM role
+  #extraEnv:
+  # AWS_ROLE_ARN: <IamArn>
+  # AWS_WEB_IDENTITY_TOKEN_FILE: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
+  # AWS_STS_REGIONAL_ENDPOINTS: "regional"
+
+  #extraVolumes:
+  #- name: aws-token
+  #  projected:
+  #    sources:
+  #    - serviceAccountToken:
+  #        path: token
+  #        expirationSeconds: 86400
+  #        audience: "sts.amazonaws.com"
+
+  #extraVolumeMounts:
+  #- name: aws-token
+  #  mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
+  #  readOnly: true

 external-dns:
  enabled: false
@ -84,35 +259,14 @@ external-dns:
  triggerLoopOnEvent: true

  tolerations:
-  - key: node-role.kubernetes.io/master
+  - key: node-role.kubernetes.io/control-plane
    effect: NoSchedule
  nodeSelector:
    node-role.kubernetes.io/control-plane: ""

  #logLevel: debug
  sources:
-  - service
+    - service
  #- istio-gateway

  provider: inmemory
-
-  extraVolumes:
-  - name: aws-token
-    projected:
-      sources:
-      - serviceAccountToken:
-          path: token
-          expirationSeconds: 86400
-          audience: "sts.amazonaws.com"
-  extraVolumeMounts:
-  - name: aws-token
-    mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
-    readOnly: true
-  env:
-  # -- "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.externalDNS"
-  - name: AWS_ROLE_ARN
-    value: ""
-  - name: AWS_WEB_IDENTITY_TOKEN_FILE
-    value: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
-  - name: AWS_STS_REGIONAL_ENDPOINTS
-    value: "regional"
--- a/charts/kubezero-argo/Chart.yaml
+++ b/charts/kubezero-argo/Chart.yaml
@ -0,0 +1,36 @@
+apiVersion: v2
+description: KubeZero Argo - Events, Workflow, CD
+name: kubezero-argo
+version: 0.2.2
+home: https://kubezero.com
+icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
+keywords:
+  - kubezero
+  - argocd
+  - argo-events
+  - argo-workflow
+maintainers:
+  - name: Stefan Reimer
+    email: stefan@zero-downtime.net
+# Url: https://github.com/argoproj/argo-helm/tree/main/charts
+dependencies:
+  - name: kubezero-lib
+    version: ">= 0.1.6"
+    repository: https://cdn.zero-downtime.net/charts/
+  - name: argo-events
+    version: 2.4.4
+    repository: https://argoproj.github.io/argo-helm
+    condition: argo-events.enabled
+  - name: argo-cd
+    version: 6.9.2
+    repository: https://argoproj.github.io/argo-helm
+    condition: argo-cd.enabled
+  - name: argocd-apps
+    version: 2.0.0
+    repository: https://argoproj.github.io/argo-helm
+    condition: argo-cd.enabled
+  - name: argocd-image-updater
+    version: 0.10.0
+    repository: https://argoproj.github.io/argo-helm
+    condition: argocd-image-updater.enabled
+kubeVersion: ">= 1.26.0"
--- a/charts/kubezero-argo/README.md
+++ b/charts/kubezero-argo/README.md
@ -0,0 +1,92 @@
+# kubezero-argo
+
+![Version: 0.2.1](https://img.shields.io/badge/Version-0.2.1-informational?style=flat-square)
+
+KubeZero Argo - Events, Workflow, CD
+
+**Homepage:** <https://kubezero.com>
+
+## Maintainers
+
+| Name | Email | Url |
+| ---- | ------ | --- |
+| Stefan Reimer | <stefan@zero-downtime.net> |  |
+
+## Requirements
+
+Kubernetes: `>= 1.26.0`
+
+| Repository | Name | Version |
+|------------|------|---------|
+| https://argoproj.github.io/argo-helm | argo-cd | 6.7.10 |
+| https://argoproj.github.io/argo-helm | argo-events | 2.4.4 |
+| https://argoproj.github.io/argo-helm | argocd-apps | 2.0.0 |
+| https://argoproj.github.io/argo-helm | argocd-image-updater | 0.9.6 |
+| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| argo-cd.applicationSet.enabled | bool | `false` |  |
+| argo-cd.configs.cm."resource.customizations" | string | `"cert-manager.io/Certificate:\n  # Lua script for customizing the health status assessment\n  health.lua: |\n    hs = {}\n    if obj.status ~= nil then\n      if obj.status.conditions ~= nil then\n        for i, condition in ipairs(obj.status.conditions) do\n          if condition.type == \"Ready\" and condition.status == \"False\" then\n            hs.status = \"Degraded\"\n            hs.message = condition.message\n            return hs\n          end\n          if condition.type == \"Ready\" and condition.status == \"True\" then\n            hs.status = \"Healthy\"\n            hs.message = condition.message\n            return hs\n          end\n        end\n      end\n    end\n    hs.status = \"Progressing\"\n    hs.message = \"Waiting for certificate\"\n    return hs\n"` |  |
+| argo-cd.configs.cm."timeout.reconciliation" | string | `"300s"` |  |
+| argo-cd.configs.cm."ui.bannercontent" | string | `"KubeZero v1.27 - Release notes"` |  |
+| argo-cd.configs.cm."ui.bannerpermanent" | string | `"true"` |  |
+| argo-cd.configs.cm."ui.bannerposition" | string | `"bottom"` |  |
+| argo-cd.configs.cm."ui.bannerurl" | string | `"https://kubezero.com/releases/v1.27"` |  |
+| argo-cd.configs.cm.url | string | `"https://argocd.example.com"` |  |
+| argo-cd.configs.params."controller.operation.processors" | string | `"5"` |  |
+| argo-cd.configs.params."controller.status.processors" | string | `"10"` |  |
+| argo-cd.configs.params."server.enable.gzip" | bool | `true` |  |
+| argo-cd.configs.params."server.insecure" | bool | `true` |  |
+| argo-cd.configs.secret.createSecret | bool | `false` |  |
+| argo-cd.configs.ssh.extraHosts | string | `"git.zero-downtime.net ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8YdJ4YcOK7A0K7qOWsRjCS+wHTStXRcwBe7gjG43HPSNijiCKoGf/c+tfNsRhyouawg7Law6M6ahmS/jKWBpznRIM+OdOFVSuhnK/nr6h6wG3/ZfdLicyAPvx1/STGY/Fc6/zXA88i/9PV+g84gSVmhf3fGY92wokiASiu9DU4T9dT1gIkdyOX6fbMi1/mMKLSrHnAQcjyasYDvw9ISCJ95EoSwbj7O4c+7jo9fxYvdCfZZZAEZGozTRLAAO0AnjVcRah7bZV/jfHJuhOipV/TB7UVAhlVv1dfGV7hoTp9UKtKZFJF4cjIrSGxqQA/mdhSdLgkepK7yc4Jp2xGnaarhY29DfqsQqop+ugFpTbj7Xy5Rco07mXc6XssbAZhI1xtCOX20N4PufBuYippCK5AE6AiAyVtJmvfGQk4HP+TjOyhFo7PZm3wc9Hym7IBBVC0Sl30K8ddufkAgHwNGvvu1ZmD9ZWaMOXJDHBCZGMMr16QREZwVtZTwMEQalc7/yqmuqMhmcJIfs/GA2Lt91y+pq9C8XyeUL0VFPch0vkcLSRe3ghMZpRFJ/ht307xPcLzgTJqN6oQtNNDzSQglSEjwhge2K4GyWcIh+oGsWxWz5dHyk1iJmw90Y976BZIl/mYVgbTtZAJ81oGe/0k5rAe+LDL+Yq6tG28QFOg0QmiQ=="` |  |
+| argo-cd.configs.styles | string | `".sidebar__logo img { content: url(https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png); }\n.sidebar__logo__text-logo { height: 0em; }\n.sidebar { background: linear-gradient(to bottom, #6A4D79, #493558, #2D1B30, #0D0711); }\n"` |  |
+| argo-cd.controller.metrics.enabled | bool | `false` |  |
+| argo-cd.controller.metrics.serviceMonitor.enabled | bool | `true` |  |
+| argo-cd.controller.resources.limits.memory | string | `"2048Mi"` |  |
+| argo-cd.controller.resources.requests.cpu | string | `"100m"` |  |
+| argo-cd.controller.resources.requests.memory | string | `"512Mi"` |  |
+| argo-cd.dex.enabled | bool | `false` |  |
+| argo-cd.enabled | bool | `false` |  |
+| argo-cd.global.logging.format | string | `"json"` |  |
+| argo-cd.istio.enabled | bool | `false` |  |
+| argo-cd.istio.gateway | string | `"istio-ingress/ingressgateway"` |  |
+| argo-cd.istio.ipBlocks | list | `[]` |  |
+| argo-cd.notifications.enabled | bool | `false` |  |
+| argo-cd.repoServer.metrics.enabled | bool | `false` |  |
+| argo-cd.repoServer.metrics.serviceMonitor.enabled | bool | `true` |  |
+| argo-cd.server.metrics.enabled | bool | `false` |  |
+| argo-cd.server.metrics.serviceMonitor.enabled | bool | `true` |  |
+| argo-cd.server.service.servicePortHttpsName | string | `"grpc"` |  |
+| argo-events.configs.jetstream.settings.maxFileStore | int | `-1` | Maximum size of the file storage (e.g. 20G) |
+| argo-events.configs.jetstream.settings.maxMemoryStore | int | `-1` | Maximum size of the memory storage (e.g. 1G) |
+| argo-events.configs.jetstream.streamConfig.duplicates | string | `"300s"` | Not documented at the moment |
+| argo-events.configs.jetstream.streamConfig.maxAge | string | `"72h"` | Maximum age of existing messages, i.e. “72h”, “4h35m” |
+| argo-events.configs.jetstream.streamConfig.maxBytes | string | `"1GB"` |  |
+| argo-events.configs.jetstream.streamConfig.maxMsgs | int | `1000000` | Maximum number of messages before expiring oldest message |
+| argo-events.configs.jetstream.streamConfig.replicas | int | `1` | Number of replicas, defaults to 3 and requires minimal 3 |
+| argo-events.configs.jetstream.versions[0].configReloaderImage | string | `"natsio/nats-server-config-reloader:0.14.1"` |  |
+| argo-events.configs.jetstream.versions[0].metricsExporterImage | string | `"natsio/prometheus-nats-exporter:0.14.0"` |  |
+| argo-events.configs.jetstream.versions[0].natsImage | string | `"nats:2.10.11-scratch"` |  |
+| argo-events.configs.jetstream.versions[0].startCommand | string | `"/nats-server"` |  |
+| argo-events.configs.jetstream.versions[0].version | string | `"2.10.11"` |  |
+| argo-events.enabled | bool | `false` |  |
+| argocd-apps.applications | object | `{}` |  |
+| argocd-apps.enabled | bool | `false` |  |
+| argocd-apps.projects | object | `{}` |  |
+| argocd-image-updater.authScripts.enabled | bool | `true` |  |
+| argocd-image-updater.authScripts.scripts."ecr-login.sh" | string | `"#!/bin/sh\naws ecr --region $AWS_REGION get-authorization-token --output text --query 'authorizationData[].authorizationToken' | base64 -d\n"` |  |
+| argocd-image-updater.authScripts.scripts."ecr-public-login.sh" | string | `"#!/bin/sh\naws ecr-public --region us-east-1 get-authorization-token --output text --query 'authorizationData.authorizationToken' | base64 -d\n"` |  |
+| argocd-image-updater.config.argocd.plaintext | bool | `true` |  |
+| argocd-image-updater.enabled | bool | `false` |  |
+| argocd-image-updater.fullnameOverride | string | `"argocd-image-updater"` |  |
+| argocd-image-updater.metrics.enabled | bool | `false` |  |
+| argocd-image-updater.metrics.serviceMonitor.enabled | bool | `true` |  |
+| argocd-image-updater.sshConfig.config | string | `"Host *\n  PubkeyAcceptedAlgorithms +ssh-rsa\n  HostkeyAlgorithms +ssh-rsa\n"` |  |
+
+## Resources
+- https://argoproj.github.io/argo-cd/operator-manual/metrics/
+- https://raw.githubusercontent.com/argoproj/argo-cd/master/examples/dashboard.json
+
--- a/charts/kubezero-argocd/README.md.gotmpl
+++ b/charts/kubezero-argocd/README.md.gotmpl
@ -18,3 +18,4 @@
 ## Resources
 - https://argoproj.github.io/argo-cd/operator-manual/metrics/
 - https://raw.githubusercontent.com/argoproj/argo-cd/master/examples/dashboard.json
+
--- a/charts/kubezero-argocd/dashboards.yaml
+++ b/charts/kubezero-argocd/dashboards.yaml
--- a/charts/kubezero-argo/templates/argo-cd/grafana-dashboards.yaml
+++ b/charts/kubezero-argo/templates/argo-cd/grafana-dashboards.yaml
--- a/charts/kubezero-argo/templates/argo-cd/istio-authorization-policy.yaml
+++ b/charts/kubezero-argo/templates/argo-cd/istio-authorization-policy.yaml
@ -0,0 +1,28 @@
+{{- if index .Values "argo-cd" "istio" "enabled" }}
+{{- if index .Values "argo-cd" "istio" "ipBlocks" }}
+apiVersion: security.istio.io/v1beta1
+kind: AuthorizationPolicy
+metadata:
+  name: argocd-deny-not-in-ipblocks
+  namespace: istio-system
+  labels:
+    {{- include "kubezero-lib.labels" . | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      app: istio-ingressgateway
+  action: DENY
+  rules:
+  - from:
+    - source:
+        notIpBlocks:
+        {{- toYaml (index .Values "argo-cd" "istio" "ipBlocks") | nindent 8 }}
+    to:
+    - operation:
+        hosts: [{{ index .Values "argo-cd" "configs" "cm" "url" | quote }}]
+    when:
+    - key: connection.sni
+      values:
+      - '*'
+{{- end }}
+{{- end }}
--- a/charts/kubezero-argo/templates/argo-cd/istio-service.yaml
+++ b/charts/kubezero-argo/templates/argo-cd/istio-service.yaml
@ -1,4 +1,4 @@
-{{- if .Values.istio.enabled }}
+{{- if index .Values "argo-cd" "istio" "enabled" }}
 apiVersion: networking.istio.io/v1alpha3
 kind: VirtualService
 metadata:
@ -8,9 +8,9 @@ metadata:
    {{- include "kubezero-lib.labels" . | nindent 4 }}
 spec:
  gateways:
-  - {{ .Values.istio.gateway }}
+  - {{ index .Values "argo-cd" "istio" "gateway" }}
  hosts:
-  - {{ index .Values "argo-cd" "server" "config" "url" }}
+  - {{ get (urlParse (index .Values "argo-cd" "configs" "cm" "url")) "host" }}
  http:
  - name: grpc
    match:
@ -19,13 +19,13 @@ spec:
          prefix: argocd-client
    route:
    - destination:
-        host: argocd-server
+        host: argo-argocd-server
        port:
          number: 443
  - name: http
    route:
    - destination:
-        host: argocd-server
+        host: argo-argocd-server
        port:
          number: 80
 {{- end }}
--- a/charts/kubezero-argo/update.sh
+++ b/charts/kubezero-argo/update.sh
@ -0,0 +1,10 @@
+#!/bin/bash
+
+. ../../scripts/lib-update.sh
+
+update_helm
+
+# Create ZDT dashboard configmap
+../kubezero-metrics/sync_grafana_dashboards.py dashboards.yaml templates/argo-cd/grafana-dashboards.yaml
+
+update_docs
--- a/charts/kubezero-argo/values.yaml
+++ b/charts/kubezero-argo/values.yaml
@ -0,0 +1,186 @@
+argo-events:
+  enabled: false
+  configs:
+    jetstream:
+      # Default JetStream settings, could be overridden by EventBus JetStream spec
+      # Ref: https://docs.nats.io/running-a-nats-service/configuration#jetstream
+      settings:
+        # -- Maximum size of the memory storage (e.g. 1G)
+        maxMemoryStore: -1
+        # -- Maximum size of the file storage (e.g. 20G)
+        maxFileStore: -1
+      streamConfig:
+        # -- Maximum number of messages before expiring oldest message
+        maxMsgs: 1000000
+        # -- Maximum age of existing messages, i.e. “72h”, “4h35m”
+        maxAge: 72h
+        # Total size of messages before expiring oldest message, 0 means unlimited.
+        maxBytes: 1GB
+        # -- Number of replicas, defaults to 3 and requires minimal 3
+        replicas: 1
+        # -- Not documented at the moment
+        duplicates: 300s
+      # Supported versions of JetStream eventbus
+      # see: https://github.com/nats-io/k8s/blob/main/helm/charts/nats/values.yaml
+      # do NOT use -alpine tag as the entrypoint differs
+      versions:
+        - version: 2.10.11
+          natsImage: nats:2.10.11-scratch
+          metricsExporterImage: natsio/prometheus-nats-exporter:0.14.0
+          configReloaderImage: natsio/nats-server-config-reloader:0.14.1
+          startCommand: /nats-server
+
+
+argocd-apps:
+  enabled: false
+  projects: {}
+  applications: {}
+
+argo-cd:
+  enabled: false
+  #configs:
+  #  secret:
+  #    `htpasswd -nbBC 10 "" $ARGO_PWD | tr -d ':\n' | sed 's/$2y/$2a/'`
+  #    argocdServerAdminPassword: "$2a$10$ivKzaXVxMqdeDSfS3nqi1Od3iDbnL7oXrixzDfZFRHlXHnAG6LydG"
+  #    argocdServerAdminPasswordMtime: "2020-04-24T15:33:09BST"
+
+  global:
+    logging:
+      format: json
+  # image:
+  #   tag: v2.1.6
+
+  configs:
+    styles: |
+      .sidebar__logo img { content: url(https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png); }
+      .sidebar__logo__text-logo { height: 0em; }
+      .sidebar { background: linear-gradient(to bottom, #6A4D79, #493558, #2D1B30, #0D0711); }
+
+    cm:
+      ui.bannercontent: "KubeZero v1.28 - Release notes"
+      ui.bannerurl: "https://kubezero.com/releases/v1.28"
+      ui.bannerpermanent: "true"
+      ui.bannerposition: "bottom"
+
+      # argo-cd.server.config.url -- ArgoCD URL being exposed via Istio
+      url: https://argocd.example.com
+
+      timeout.reconciliation: 300s
+
+      resource.customizations: |
+        cert-manager.io/Certificate:
+          # Lua script for customizing the health status assessment
+          health.lua: |
+            hs = {}
+            if obj.status ~= nil then
+              if obj.status.conditions ~= nil then
+                for i, condition in ipairs(obj.status.conditions) do
+                  if condition.type == "Ready" and condition.status == "False" then
+                    hs.status = "Degraded"
+                    hs.message = condition.message
+                    return hs
+                  end
+                  if condition.type == "Ready" and condition.status == "True" then
+                    hs.status = "Healthy"
+                    hs.message = condition.message
+                    return hs
+                  end
+                end
+              end
+            end
+            hs.status = "Progressing"
+            hs.message = "Waiting for certificate"
+            return hs
+
+    secret:
+      createSecret: false
+
+    ssh:
+      extraHosts: "git.zero-downtime.net ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8YdJ4YcOK7A0K7qOWsRjCS+wHTStXRcwBe7gjG43HPSNijiCKoGf/c+tfNsRhyouawg7Law6M6ahmS/jKWBpznRIM+OdOFVSuhnK/nr6h6wG3/ZfdLicyAPvx1/STGY/Fc6/zXA88i/9PV+g84gSVmhf3fGY92wokiASiu9DU4T9dT1gIkdyOX6fbMi1/mMKLSrHnAQcjyasYDvw9ISCJ95EoSwbj7O4c+7jo9fxYvdCfZZZAEZGozTRLAAO0AnjVcRah7bZV/jfHJuhOipV/TB7UVAhlVv1dfGV7hoTp9UKtKZFJF4cjIrSGxqQA/mdhSdLgkepK7yc4Jp2xGnaarhY29DfqsQqop+ugFpTbj7Xy5Rco07mXc6XssbAZhI1xtCOX20N4PufBuYippCK5AE6AiAyVtJmvfGQk4HP+TjOyhFo7PZm3wc9Hym7IBBVC0Sl30K8ddufkAgHwNGvvu1ZmD9ZWaMOXJDHBCZGMMr16QREZwVtZTwMEQalc7/yqmuqMhmcJIfs/GA2Lt91y+pq9C8XyeUL0VFPch0vkcLSRe3ghMZpRFJ/ht307xPcLzgTJqN6oQtNNDzSQglSEjwhge2K4GyWcIh+oGsWxWz5dHyk1iJmw90Y976BZIl/mYVgbTtZAJ81oGe/0k5rAe+LDL+Yq6tG28QFOg0QmiQ=="
+
+    params:
+      controller.status.processors: "10"
+      controller.operation.processors: "5"
+
+      server.insecure: true
+      server.enable.gzip: true
+
+  controller:
+    metrics:
+      enabled: false
+      serviceMonitor:
+        enabled: true
+
+    resources:
+      limits:
+        # cpu: 500m
+        memory: 2048Mi
+      requests:
+        cpu: 100m
+        memory: 512Mi
+
+  repoServer:
+    metrics:
+      enabled: false
+      serviceMonitor:
+        enabled: true
+
+  server:
+    # Rename former https port to grpc, works with istio + insecure
+    service:
+      servicePortHttpsName: grpc
+    metrics:
+      enabled: false
+      serviceMonitor:
+        enabled: true
+
+  # redis:
+  # We might want to try to keep redis close to the controller
+  #   affinity:
+
+  dex:
+    enabled: false
+
+  applicationSet:
+    enabled: false
+
+  notifications:
+    enabled: false
+
+  # Support for Istio Ingress for ArgoCD
+  istio:
+    # istio.enabled -- Deploy Istio VirtualService to expose ArgoCD
+    enabled: false
+    # istio.gateway -- Name of the Istio gateway to add the VirtualService to
+    gateway: istio-ingress/ingressgateway
+    ipBlocks: []
+
+argocd-image-updater:
+  enabled: false
+
+  # Unify all ArgoCD pieces under the same argocd namespace
+  fullnameOverride: argocd-image-updater
+
+  config:
+    argocd:
+      plaintext: true
+
+  metrics:
+    enabled: false
+    serviceMonitor:
+      enabled: true
+
+  authScripts:
+    enabled: true
+    scripts:
+      ecr-login.sh: |
+        #!/bin/sh
+        aws ecr --region $AWS_REGION get-authorization-token --output text --query 'authorizationData[].authorizationToken' | base64 -d
+      ecr-public-login.sh: |
+        #!/bin/sh
+        aws ecr-public --region us-east-1 get-authorization-token --output text --query 'authorizationData.authorizationToken' | base64 -d
+  sshConfig:
+    config: |
+      Host *
+        PubkeyAcceptedAlgorithms +ssh-rsa
+        HostkeyAlgorithms +ssh-rsa
--- a/charts/kubezero-argocd/Chart.yaml
+++ b/charts/kubezero-argocd/Chart.yaml
@ -1,21 +0,0 @@
-apiVersion: v2
-description: KubeZero ArgoCD Helm chart to install ArgoCD itself and the KubeZero ArgoCD Application
-name: kubezero-argocd
-version: 0.10.1
-home: https://kubezero.com
-icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
-keywords:
-  - kubezero
-  - argocd
-  - gitops
-maintainers:
-  - name: Stefan Reimer
-    email: stefan@zero-downtime.net
-dependencies:
-  - name: kubezero-lib
-    version: ">= 0.1.4"
-    repository: https://cdn.zero-downtime.net/charts/
-  - name: argo-cd
-    version: 4.5.4
-    repository: https://argoproj.github.io/argo-helm
-kubeVersion: ">= 1.20.0"
--- a/charts/kubezero-argocd/README.md
+++ b/charts/kubezero-argocd/README.md
@ -1,59 +0,0 @@
-# kubezero-argocd
-
-![Version: 0.10.1](https://img.shields.io/badge/Version-0.10.1-informational?style=flat-square)
-
-KubeZero ArgoCD Helm chart to install ArgoCD itself and the KubeZero ArgoCD Application
-
-**Homepage:** <https://kubezero.com>
-
-## Maintainers
-
-| Name | Email | Url |
-| ---- | ------ | --- |
-| Stefan Reimer | <stefan@zero-downtime.net> |  |
-
-## Requirements
-
-Kubernetes: `>= 1.20.0`
-
-| Repository | Name | Version |
-|------------|------|---------|
-| https://argoproj.github.io/argo-helm | argo-cd | 4.5.4 |
-| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.4 |
-
-## Values
-
-| Key | Type | Default | Description |
-|-----|------|---------|-------------|
-| argo-cd.applicationSet.enabled | bool | `false` |  |
-| argo-cd.configs.knownHosts.data.ssh_known_hosts | string | `"bitbucket.org ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAubiN81eDcafrgMeLzaFPsw2kNvEcqTKl/VqLat/MaB33pZy0y3rJZtnqwR2qOOvbwKZYKiEO1O6VqNEBxKvJJelCq0dTXWT5pbO2gDXC6h6QDXCaHo6pOHGPUy+YBaGQRGuSusMEASYiWunYN0vCAI8QaXnWMXNMdFP3jHAJH0eDsoiGnLPBlBp4TNm6rYI74nMzgz3B9IikW4WVK+dc8KZJZWYjAuORU3jc1c/NPskD2ASinf8v3xnfXeukU0sJ5N6m5E8VLjObPEO+mN2t/FZTMZLiFqPWc/ALSqnMnnhwrNi2rbfg/rd/IpL8Le3pSBne8+seeFVBoGqzHM9yXw==\ngithub.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=\ngithub.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl\ngithub.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==\ngitlab.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFSMqzJeV9rUzU4kWitGjeR4PWSa29SPqJ1fVkhtj3Hw9xjLVXVYrU9QlYWrOLXBpQ6KWjbjTDTdDkoohFzgbEY=\ngitlab.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAfuCHKVTjquxvt6CM6tdG4SLp1Btn/nOeHHE5UOzRdf\ngitlab.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCsj2bNKTBSpIYDEGk9KxsGh3mySTRgMtXL583qmBpzeQ+jqCMRgBqB98u3z++J1sKlXHWfM9dyhSevkMwSbhoR8XIq/U0tCNyokEi/ueaBMCvbcTHhO7FcwzY92WK4Yt0aGROY5qX2UKSeOvuP4D6TPqKF1onrSzH9bx9XUf2lEdWT/ia1NEKjunUqu1xOB/StKDHMoX4/OKyIzuS0q/T1zOATthvasJFoPrAjkohTyaDUz2LN5JoH839hViyEG82yB+MjcFV5MU3N1l1QL3cVUCh93xSaua1N85qivl+siMkPGbO5xR/En4iEY6K2XPASUEMaieWVNTRCtJ4S8H+9\ngit.zero-downtime.net ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8YdJ4YcOK7A0K7qOWsRjCS+wHTStXRcwBe7gjG43HPSNijiCKoGf/c+tfNsRhyouawg7Law6M6ahmS/jKWBpznRIM+OdOFVSuhnK/nr6h6wG3/ZfdLicyAPvx1/STGY/Fc6/zXA88i/9PV+g84gSVmhf3fGY92wokiASiu9DU4T9dT1gIkdyOX6fbMi1/mMKLSrHnAQcjyasYDvw9ISCJ95EoSwbj7O4c+7jo9fxYvdCfZZZAEZGozTRLAAO0AnjVcRah7bZV/jfHJuhOipV/TB7UVAhlVv1dfGV7hoTp9UKtKZFJF4cjIrSGxqQA/mdhSdLgkepK7yc4Jp2xGnaarhY29DfqsQqop+ugFpTbj7Xy5Rco07mXc6XssbAZhI1xtCOX20N4PufBuYippCK5AE6AiAyVtJmvfGQk4HP+TjOyhFo7PZm3wc9Hym7IBBVC0Sl30K8ddufkAgHwNGvvu1ZmD9ZWaMOXJDHBCZGMMr16QREZwVtZTwMEQalc7/yqmuqMhmcJIfs/GA2Lt91y+pq9C8XyeUL0VFPch0vkcLSRe3ghMZpRFJ/ht307xPcLzgTJqN6oQtNNDzSQglSEjwhge2K4GyWcIh+oGsWxWz5dHyk1iJmw90Y976BZIl/mYVgbTtZAJ81oGe/0k5rAe+LDL+Yq6tG28QFOg0QmiQ==\n"` |  |
-| argo-cd.configs.secret.createSecret | bool | `false` |  |
-| argo-cd.controller.args.appResyncPeriod | string | `"300"` |  |
-| argo-cd.controller.args.operationProcessors | string | `"4"` |  |
-| argo-cd.controller.args.statusProcessors | string | `"8"` |  |
-| argo-cd.controller.logFormat | string | `"json"` |  |
-| argo-cd.controller.metrics.enabled | bool | `false` |  |
-| argo-cd.controller.metrics.serviceMonitor.enabled | bool | `true` |  |
-| argo-cd.controller.resources.requests.cpu | string | `"100m"` |  |
-| argo-cd.controller.resources.requests.memory | string | `"256Mi"` |  |
-| argo-cd.dex.enabled | bool | `false` |  |
-| argo-cd.global | string | `nil` |  |
-| argo-cd.installCRDs | bool | `false` |  |
-| argo-cd.notifications.enabled | bool | `false` |  |
-| argo-cd.repoServer.logFormat | string | `"json"` |  |
-| argo-cd.repoServer.metrics.enabled | bool | `false` |  |
-| argo-cd.repoServer.metrics.serviceMonitor.enabled | bool | `true` |  |
-| argo-cd.server.config."resource.customizations" | string | `"cert-manager.io/Certificate:\n  # Lua script for customizing the health status assessment\n  health.lua: |\n    hs = {}\n    if obj.status ~= nil then\n      if obj.status.conditions ~= nil then\n        for i, condition in ipairs(obj.status.conditions) do\n          if condition.type == \"Ready\" and condition.status == \"False\" then\n            hs.status = \"Degraded\"\n            hs.message = condition.message\n            return hs\n          end\n          if condition.type == \"Ready\" and condition.status == \"True\" then\n            hs.status = \"Healthy\"\n            hs.message = condition.message\n            return hs\n          end\n        end\n      end\n    end\n    hs.status = \"Progressing\"\n    hs.message = \"Waiting for certificate\"\n    return hs\n"` |  |
-| argo-cd.server.config.url | string | `"argocd.example.com"` | ArgoCD hostname to be exposed via Istio |
-| argo-cd.server.extraArgs[0] | string | `"--insecure"` |  |
-| argo-cd.server.logFormat | string | `"json"` |  |
-| argo-cd.server.metrics.enabled | bool | `false` |  |
-| argo-cd.server.metrics.serviceMonitor.enabled | bool | `true` |  |
-| argo-cd.server.service.servicePortHttpsName | string | `"grpc"` |  |
-| istio.enabled | bool | `false` | Deploy Istio VirtualService to expose ArgoCD |
-| istio.gateway | string | `"istio-ingress/ingressgateway"` | Name of the Istio gateway to add the VirtualService to |
-| istio.ipBlocks | list | `[]` |  |
-
-## Resources
- https://argoproj.github.io/argo-cd/operator-manual/metrics/
- https://raw.githubusercontent.com/argoproj/argo-cd/master/examples/dashboard.json
--- a/charts/kubezero-argocd/values.yaml
+++ b/charts/kubezero-argocd/values.yaml
@ -1,124 +0,0 @@
-# Support for Istio Ingress for ArgoCD
-istio:
-  # istio.enabled -- Deploy Istio VirtualService to expose ArgoCD
-  enabled: false
-  # istio.gateway -- Name of the Istio gateway to add the VirtualService to
-  gateway: istio-ingress/ingressgateway
-  ipBlocks: []
-
-argo-cd:
-  installCRDs: false
-
-  #configs:
-  #  secret:
-  #    `htpasswd -nbBC 10 "" $ARGO_PWD | tr -d ':\n' | sed 's/$2y/$2a/'`
-  #    argocdServerAdminPassword: "$2a$10$ivKzaXVxMqdeDSfS3nqi1Od3iDbnL7oXrixzDfZFRHlXHnAG6LydG"
-  #    argocdServerAdminPasswordMtime: "2020-04-24T15:33:09BST"
-
-  global:
-  # image:
-  #   tag: v2.1.6
-
-  configs:
-    secret:
-      createSecret: false
-    knownHosts:
-     data:
-       ssh_known_hosts: |
-         bitbucket.org ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAubiN81eDcafrgMeLzaFPsw2kNvEcqTKl/VqLat/MaB33pZy0y3rJZtnqwR2qOOvbwKZYKiEO1O6VqNEBxKvJJelCq0dTXWT5pbO2gDXC6h6QDXCaHo6pOHGPUy+YBaGQRGuSusMEASYiWunYN0vCAI8QaXnWMXNMdFP3jHAJH0eDsoiGnLPBlBp4TNm6rYI74nMzgz3B9IikW4WVK+dc8KZJZWYjAuORU3jc1c/NPskD2ASinf8v3xnfXeukU0sJ5N6m5E8VLjObPEO+mN2t/FZTMZLiFqPWc/ALSqnMnnhwrNi2rbfg/rd/IpL8Le3pSBne8+seeFVBoGqzHM9yXw==
-         github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
-         github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
-         github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==
-         gitlab.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFSMqzJeV9rUzU4kWitGjeR4PWSa29SPqJ1fVkhtj3Hw9xjLVXVYrU9QlYWrOLXBpQ6KWjbjTDTdDkoohFzgbEY=
-         gitlab.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAfuCHKVTjquxvt6CM6tdG4SLp1Btn/nOeHHE5UOzRdf
-         gitlab.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCsj2bNKTBSpIYDEGk9KxsGh3mySTRgMtXL583qmBpzeQ+jqCMRgBqB98u3z++J1sKlXHWfM9dyhSevkMwSbhoR8XIq/U0tCNyokEi/ueaBMCvbcTHhO7FcwzY92WK4Yt0aGROY5qX2UKSeOvuP4D6TPqKF1onrSzH9bx9XUf2lEdWT/ia1NEKjunUqu1xOB/StKDHMoX4/OKyIzuS0q/T1zOATthvasJFoPrAjkohTyaDUz2LN5JoH839hViyEG82yB+MjcFV5MU3N1l1QL3cVUCh93xSaua1N85qivl+siMkPGbO5xR/En4iEY6K2XPASUEMaieWVNTRCtJ4S8H+9
-         git.zero-downtime.net ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC8YdJ4YcOK7A0K7qOWsRjCS+wHTStXRcwBe7gjG43HPSNijiCKoGf/c+tfNsRhyouawg7Law6M6ahmS/jKWBpznRIM+OdOFVSuhnK/nr6h6wG3/ZfdLicyAPvx1/STGY/Fc6/zXA88i/9PV+g84gSVmhf3fGY92wokiASiu9DU4T9dT1gIkdyOX6fbMi1/mMKLSrHnAQcjyasYDvw9ISCJ95EoSwbj7O4c+7jo9fxYvdCfZZZAEZGozTRLAAO0AnjVcRah7bZV/jfHJuhOipV/TB7UVAhlVv1dfGV7hoTp9UKtKZFJF4cjIrSGxqQA/mdhSdLgkepK7yc4Jp2xGnaarhY29DfqsQqop+ugFpTbj7Xy5Rco07mXc6XssbAZhI1xtCOX20N4PufBuYippCK5AE6AiAyVtJmvfGQk4HP+TjOyhFo7PZm3wc9Hym7IBBVC0Sl30K8ddufkAgHwNGvvu1ZmD9ZWaMOXJDHBCZGMMr16QREZwVtZTwMEQalc7/yqmuqMhmcJIfs/GA2Lt91y+pq9C8XyeUL0VFPch0vkcLSRe3ghMZpRFJ/ht307xPcLzgTJqN6oQtNNDzSQglSEjwhge2K4GyWcIh+oGsWxWz5dHyk1iJmw90Y976BZIl/mYVgbTtZAJ81oGe/0k5rAe+LDL+Yq6tG28QFOg0QmiQ==
-
-  controller:
-    args:
-      statusProcessors: "8"
-      operationProcessors: "4"
-      appResyncPeriod: "300"
-    logFormat: json
-
-    metrics:
-      enabled: false
-      serviceMonitor:
-        enabled: true
-
-    resources:
-      # limits:
-        # cpu: 500m
-        # memory: 2048Mi
-      requests:
-        cpu: 100m
-        memory: 256Mi
-
-  repoServer:
-    logFormat: json
-    metrics:
-      enabled: false
-      serviceMonitor:
-        enabled: true
-
-  server:
-    logFormat: json
-    config:
-      #ui.bannercontent: "KubeZero Release 1.20.8-12 incl. ArgoCD 2.1 -> Release notes"
-      #ui.bannerurl: "https://blog.argoproj.io/argo-cd-v2-1-first-release-candidate-is-ready-c1aab7795638"
-
-      # argo-cd.server.config.url -- ArgoCD hostname to be exposed via Istio
-      url: argocd.example.com
-
-      #repositories: |
-      #  - url: https://cdn.zero-downtime.net/charts
-
-      resource.customizations: |
-        cert-manager.io/Certificate:
-          # Lua script for customizing the health status assessment
-          health.lua: |
-            hs = {}
-            if obj.status ~= nil then
-              if obj.status.conditions ~= nil then
-                for i, condition in ipairs(obj.status.conditions) do
-                  if condition.type == "Ready" and condition.status == "False" then
-                    hs.status = "Degraded"
-                    hs.message = condition.message
-                    return hs
-                  end
-                  if condition.type == "Ready" and condition.status == "True" then
-                    hs.status = "Healthy"
-                    hs.message = condition.message
-                    return hs
-                  end
-                end
-              end
-            end
-            hs.status = "Progressing"
-            hs.message = "Waiting for certificate"
-            return hs
-
-    # Rename former https port to grpc, works with istio + insecure
-    service:
-      servicePortHttpsName: grpc
-
-    metrics:
-      enabled: false
-      serviceMonitor:
-        enabled: true
-
-    extraArgs:
-    - --insecure
-
-  # redis:
-  # We might want to try to keep redis close to the controller
-  #   affinity:
-
-  dex:
-    enabled: false
-
-  applicationSet:
-    enabled: false
-
-  notifications:
-    enabled: false
--- a/charts/kubezero-auth/Chart.yaml
+++ b/charts/kubezero-auth/Chart.yaml
@ -0,0 +1,23 @@
+apiVersion: v2
+name: kubezero-auth
+description: KubeZero umbrella chart for all things Authentication and Identity management
+type: application
+version: 0.4.6
+appVersion: 22.0.5
+home: https://kubezero.com
+icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
+keywords:
+  - kubezero
+  - keycloak
+maintainers:
+  - name: Stefan Reimer
+    email: stefan@zero-downtime.net
+dependencies:
+  - name: kubezero-lib
+    version: ">= 0.1.6"
+    repository: https://cdn.zero-downtime.net/charts/
+  - name: keycloak
+    version: 18.7.1
+    repository: "oci://registry-1.docker.io/bitnamicharts"
+    condition: keycloak.enabled
+kubeVersion: ">= 1.26.0"
--- a/charts/kubezero-auth/README.md
+++ b/charts/kubezero-auth/README.md
@ -0,0 +1,63 @@
+# kubezero-auth
+
+![Version: 0.4.5](https://img.shields.io/badge/Version-0.4.5-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 22.0.5](https://img.shields.io/badge/AppVersion-22.0.5-informational?style=flat-square)
+
+KubeZero umbrella chart for all things Authentication and Identity management
+
+**Homepage:** <https://kubezero.com>
+
+## Maintainers
+
+| Name | Email | Url |
+| ---- | ------ | --- |
+| Stefan Reimer | <stefan@zero-downtime.net> |  |
+
+## Requirements
+
+Kubernetes: `>= 1.26.0`
+
+| Repository | Name | Version |
+|------------|------|---------|
+| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
+| oci://registry-1.docker.io/bitnamicharts | keycloak | 18.3.2 |
+
+# Keycloak
+   
+## Operator
+
+https://www.keycloak.org/operator/installation
+https://github.com/keycloak/keycloak/tree/main/operator
+https://github.com/aerogear/keycloak-metrics-spi
+https://github.com/keycloak/keycloak-benchmark/tree/main/provision/minikube/keycloak/templates
+
+## Resources
+- https://github.com/bitnami/charts/tree/main/bitnami/keycloak
+   
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| keycloak.auth.adminUser | string | `"admin"` |  |
+| keycloak.auth.existingSecret | string | `"kubezero-auth"` |  |
+| keycloak.auth.passwordSecretKey | string | `"admin-password"` |  |
+| keycloak.enabled | bool | `false` |  |
+| keycloak.istio.admin.enabled | bool | `false` |  |
+| keycloak.istio.admin.gateway | string | `"istio-ingress/private-ingressgateway"` |  |
+| keycloak.istio.admin.url | string | `""` |  |
+| keycloak.istio.auth.enabled | bool | `false` |  |
+| keycloak.istio.auth.gateway | string | `"istio-ingress/ingressgateway"` |  |
+| keycloak.istio.auth.url | string | `""` |  |
+| keycloak.metrics.enabled | bool | `false` |  |
+| keycloak.metrics.serviceMonitor.enabled | bool | `true` |  |
+| keycloak.pdb.create | bool | `false` |  |
+| keycloak.pdb.minAvailable | int | `1` |  |
+| keycloak.postgresql.auth.database | string | `"keycloak"` |  |
+| keycloak.postgresql.auth.existingSecret | string | `"kubezero-auth"` |  |
+| keycloak.postgresql.auth.username | string | `"keycloak"` |  |
+| keycloak.postgresql.primary.persistence.size | string | `"1Gi"` |  |
+| keycloak.postgresql.readReplicas.replicaCount | int | `0` |  |
+| keycloak.production | bool | `true` |  |
+| keycloak.proxy | string | `"edge"` |  |
+| keycloak.replicaCount | int | `1` |  |
+| keycloak.resources.requests.cpu | string | `"100m"` |  |
+| keycloak.resources.requests.memory | string | `"512Mi"` |  |
--- a/charts/kubezero-auth/README.md.gotmpl
+++ b/charts/kubezero-auth/README.md.gotmpl
@ -0,0 +1,28 @@
+{{ template "chart.header" . }}
+{{ template "chart.deprecationWarning" . }}
+
+{{ template "chart.versionBadge" . }}{{ template "chart.typeBadge" . }}{{ template "chart.appVersionBadge" . }}
+
+{{ template "chart.description" . }}
+
+{{ template "chart.homepageLine" . }}
+
+{{ template "chart.maintainersSection" . }}
+
+{{ template "chart.sourcesSection" . }}
+
+{{ template "chart.requirementsSection" . }}
+
+# Keycloak
+    
+## Operator
+
+https://www.keycloak.org/operator/installation
+https://github.com/keycloak/keycloak/tree/main/operator
+https://github.com/aerogear/keycloak-metrics-spi
+https://github.com/keycloak/keycloak-benchmark/tree/main/provision/minikube/keycloak/templates
+
+## Resources
+- https://github.com/bitnami/charts/tree/main/bitnami/keycloak
+    
+{{ template "chart.valuesSection" . }}
--- a/charts/kubezero-auth/dashboards-keycloak.yaml
+++ b/charts/kubezero-auth/dashboards-keycloak.yaml
@ -0,0 +1,9 @@
+configmap: grafana-dashboards
+condition: '.Values.keycloak.metrics.enabled'
+gzip: true
+# folder: 
+dashboards:
+- name: keycloak
+  # url: https://grafana.com/api/dashboards/10441/revisions/2/download
+  url: https://grafana.com/api/dashboards/17878/revisions/1/download
+  tags: ['Keycloak', 'Auth']
--- a/charts/kubezero-auth/docs/keycloak.md
+++ b/charts/kubezero-auth/docs/keycloak.md
@ -0,0 +1,14 @@
+# Abstract
+
+## IdP
+
+### AWS
+
+Get client descriptor for your realm and client via:
+`wget https://<auth-endpoint>/realms/<realm>/protocol/saml/descriptor`
+
+# Resources
+
+## AWS
+- https://aws.amazon.com/blogs/business-intelligence/federate-amazon-quicksight-access-with-open-source-identity-provider-keycloak/
+- https://docs.aws.amazon.com/singlesignon/latest/userguide/troubleshooting.html#issue8
--- a/charts/kubezero-auth/docs/postgres.md
+++ b/charts/kubezero-auth/docs/postgres.md
@ -0,0 +1,49 @@
+# Upgrade Postgres major version
+
+## backup
+
+- shell into running posgres-auth pod
+```
+export PGPASSWORD="<postgres_password from secret>"
+cd /bitnami/posgres
+pg_dumpall > backup
+```
+
+- store backup off-site
+```
+kubectl cp keycloak/kubezero-auth-postgresql-0:/bitnami/postgresql/backup postgres-backup
+```
+
+## upgrade
+
+- upgrade auth chart
+- set replica of the keycloak statefulSet to 0
+- set replica of the postgres-auth statefulSet to 0
+- delete postgres-auth PVC and POD to flush old DB
+
+## restore
+
+- restore replica of postgres-auth statefulSet
+- copy backup to new PVC
+```
+kubectl cp postgres-backup keycloak/kubezero-auth-postgresql-0:/bitnami/postgresql/backup
+```
+
+- log into psql as admin ( shell on running pod )
+```
+psql -U postgres
+```
+
+- drop database `keycloak` in case the keycloak instances connected early
+```
+DROP database keycloak
+``` 
+
+- actual restore
+```
+psql -U postgres -d postgres -f backup
+```
+
+- reset replia of keycloak statefulSet or force ArgoCD sync
+
+success.
--- a/charts/kubezero-auth/templates/keycloak/grafana-dashboards.yaml
+++ b/charts/kubezero-auth/templates/keycloak/grafana-dashboards.yaml
--- a/charts/kubezero-auth/templates/keycloak/istio-authorization-policy.yaml
+++ b/charts/kubezero-auth/templates/keycloak/istio-authorization-policy.yaml
@ -0,0 +1,37 @@
+{{- if and .Values.keycloak.enabled .Values.keycloak.istio.admin.enabled }}
+apiVersion: security.istio.io/v1beta1
+kind: AuthorizationPolicy
+metadata:
+  name: {{ .Release.Name }}-keycloak-admin-deny-not-in-ipblocks
+  namespace: istio-system
+  labels:
+    {{- include "kubezero-lib.labels" $ | nindent 4 }}
+spec:
+  selector:
+    matchLabels:
+      app: istio-ingressgateway
+  action: DENY
+  rules:
+  # block access to metrics via Ingress
+  - to:
+    - operation:
+        hosts: ["{{ .Values.keycloak.istio.admin.url }}"]
+        paths: ["/metrics", "/realms/*/metrics"]
+    when:
+    - key: connection.sni
+      values:
+      - '*'
+  {{- if .Values.keycloak.istio.admin.ipBlocks }}
+  - from:
+    - source:
+        notIpBlocks:
+        {{- toYaml .Values.keycloak.istio.admin.ipBlocks | nindent 8 }}
+    to:
+    - operation:
+        hosts: ["{{ .Values.keycloak.istio.admin.url }}"]
+    when:
+    - key: connection.sni
+      values:
+      - '*'
+  {{- end }}
+{{- end }}
--- a/charts/kubezero-auth/templates/keycloak/istio-service.yaml
+++ b/charts/kubezero-auth/templates/keycloak/istio-service.yaml
@ -0,0 +1,44 @@
+{{- if and .Values.keycloak.enabled .Values.keycloak.istio.admin.enabled .Values.keycloak.istio.admin.url }}
+# Admin endpoint / all URLs allowed
+apiVersion: networking.istio.io/v1beta1
+kind: VirtualService
+metadata:
+  name: {{ template "kubezero-lib.fullname" $ }}-admin
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "kubezero-lib.labels" $ | nindent 4 }}
+spec:
+  gateways:
+  - {{ .Values.keycloak.istio.admin.gateway }}
+  hosts:
+  - {{ .Values.keycloak.istio.admin.url }}
+  http:
+  - route:
+    - destination:
+        host: {{ template "kubezero-lib.fullname" $ }}-keycloak
+{{- end }}
+
+---
+
+{{- if and .Values.keycloak.enabled .Values.keycloak.istio.auth.enabled .Values.keycloak.istio.auth.url }}
+# auth endpoint - only expose minimal URls
+apiVersion: networking.istio.io/v1beta1
+kind: VirtualService
+metadata:
+  name: {{ template "kubezero-lib.fullname" $ }}-auth
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "kubezero-lib.labels" $ | nindent 4 }}
+spec:
+  gateways:
+  - {{ .Values.keycloak.istio.auth.gateway }}
+  hosts:
+  - {{ .Values.keycloak.istio.auth.url }}
+  http:
+  - match:
+    - uri:
+        regex: ^/(js/|realms/|resources/|robots.txt).*
+    route:
+    - destination:
+        host: {{ template "kubezero-lib.fullname" $ }}-keycloak
+{{- end }}
--- a/charts/kubezero-auth/update.sh
+++ b/charts/kubezero-auth/update.sh
@ -0,0 +1,12 @@
+#!/bin/bash
+set -ex
+
+. ../../scripts/lib-update.sh
+
+login_ecr_public
+update_helm
+
+# Fetch dashboards
+../kubezero-metrics/sync_grafana_dashboards.py dashboards-keycloak.yaml templates/keycloak/grafana-dashboards.yaml
+
+update_docs
--- a/charts/kubezero-auth/values.yaml
+++ b/charts/kubezero-auth/values.yaml
@ -0,0 +1,48 @@
+keycloak:
+  enabled: false
+
+  proxy: edge
+  production: true
+
+  auth:
+    adminUser: admin
+    existingSecret: kubezero-auth
+    passwordSecretKey: admin-password
+
+  replicaCount: 1
+
+  pdb:
+    create: false
+    minAvailable: 1
+
+  metrics:
+    enabled: false
+    serviceMonitor:
+      enabled: true
+  resources:
+    requests:
+      cpu: 100m
+      memory: 512Mi
+
+  postgresql:
+    auth:
+      existingSecret: kubezero-auth
+      username: keycloak
+      database: keycloak
+
+    primary:
+      persistence:
+        size: 1Gi
+
+    readReplicas:
+      replicaCount: 0
+
+  istio:
+    admin:
+      enabled: false
+      gateway: istio-ingress/private-ingressgateway
+      url: ""
+    auth:
+      enabled: false
+      gateway: istio-ingress/ingressgateway
+      url: ""
--- a/charts/kubezero-cert-manager/Chart.yaml
+++ b/charts/kubezero-cert-manager/Chart.yaml
@ -2,7 +2,7 @@ apiVersion: v2
 name: kubezero-cert-manager
 description: KubeZero Umbrella Chart for cert-manager
 type: application
-version: 0.9.0
+version: 0.9.7
 home: https://kubezero.com
 icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
 keywords:
@ -13,9 +13,9 @@ maintainers:
    email: stefan@zero-downtime.net
 dependencies:
  - name: kubezero-lib
-    version: ">= 0.1.4"
+    version: ">= 0.1.6"
    repository: https://cdn.zero-downtime.net/charts/
  - name: cert-manager
-    version: 1.8.0
+    version: v1.14.4
    repository: https://charts.jetstack.io
-kubeVersion: ">= 1.20.0"
+kubeVersion: ">= 1.26.0"
--- a/charts/kubezero-cert-manager/README.md
+++ b/charts/kubezero-cert-manager/README.md
@ -1,6 +1,6 @@
 # kubezero-cert-manager

-![Version: 0.9.0](https://img.shields.io/badge/Version-0.9.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
+![Version: 0.9.7](https://img.shields.io/badge/Version-0.9.7-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)

 KubeZero Umbrella Chart for cert-manager

@ -14,12 +14,12 @@ KubeZero Umbrella Chart for cert-manager

 ## Requirements

-Kubernetes: `>= 1.20.0`
+Kubernetes: `>= 1.26.0`

 | Repository | Name | Version |
 |------------|------|---------|
-| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.4 |
-| https://charts.jetstack.io | cert-manager | 1.8.0 |
+| https://cdn.zero-downtime.net/charts/ | kubezero-lib | >= 0.1.6 |
+| https://charts.jetstack.io | cert-manager | v1.14.4 |

 ## AWS - OIDC IAM roles

@ -32,11 +32,16 @@ If your resolvers need additional sercrets like CloudFlare API tokens etc. make

 | Key | Type | Default | Description |
 |-----|------|---------|-------------|
+| cert-manager.cainjector.extraArgs[0] | string | `"--logging-format=json"` |  |
+| cert-manager.cainjector.extraArgs[1] | string | `"--leader-elect=false"` |  |
 | cert-manager.cainjector.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` |  |
 | cert-manager.cainjector.tolerations[0].effect | string | `"NoSchedule"` |  |
-| cert-manager.cainjector.tolerations[0].key | string | `"node-role.kubernetes.io/master"` |  |
+| cert-manager.cainjector.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
+| cert-manager.enableCertificateOwnerRef | bool | `true` |  |
 | cert-manager.enabled | bool | `true` |  |
-| cert-manager.extraArgs[0] | string | `"--dns01-recursive-nameservers-only"` |  |
+| cert-manager.extraArgs[0] | string | `"--logging-format=json"` |  |
+| cert-manager.extraArgs[1] | string | `"--leader-elect=false"` |  |
+| cert-manager.extraArgs[2] | string | `"--dns01-recursive-nameservers-only"` |  |
 | cert-manager.global.leaderElection.namespace | string | `"cert-manager"` |  |
 | cert-manager.ingressShim.defaultIssuerKind | string | `"ClusterIssuer"` |  |
 | cert-manager.ingressShim.defaultIssuerName | string | `"letsencrypt-dns-prod"` |  |
@ -44,10 +49,11 @@ If your resolvers need additional sercrets like CloudFlare API tokens etc. make
 | cert-manager.prometheus.servicemonitor.enabled | bool | `false` |  |
 | cert-manager.startupapicheck.enabled | bool | `false` |  |
 | cert-manager.tolerations[0].effect | string | `"NoSchedule"` |  |
-| cert-manager.tolerations[0].key | string | `"node-role.kubernetes.io/master"` |  |
+| cert-manager.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
+| cert-manager.webhook.extraArgs[0] | string | `"--logging-format=json"` |  |
 | cert-manager.webhook.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` |  |
 | cert-manager.webhook.tolerations[0].effect | string | `"NoSchedule"` |  |
-| cert-manager.webhook.tolerations[0].key | string | `"node-role.kubernetes.io/master"` |  |
+| cert-manager.webhook.tolerations[0].key | string | `"node-role.kubernetes.io/control-plane"` |  |
 | clusterIssuer | object | `{}` |  |
 | localCA.enabled | bool | `false` |  |
 | localCA.selfsigning | bool | `true` |  |
--- a/charts/kubezero-cert-manager/cert-manager-rules.yaml
+++ b/charts/kubezero-cert-manager/cert-manager-rules.yaml
@ -1,3 +1,4 @@
 rules:
 - name: prometheus-rules
+  condition: 'index .Values "cert-manager" "prometheus" "servicemonitor" "enabled"'
  url: file://rules/cert-manager-mixin-prometheusRule
--- a/charts/kubezero-cert-manager/dashboards.yaml
+++ b/charts/kubezero-cert-manager/dashboards.yaml
@ -1,4 +1,4 @@
-configmap: cert-manager-grafana-dashboard
+configmap: grafana-dashboard
 gzip: true
 folder: KubeZero
 condition: 'index .Values "cert-manager" "prometheus" "servicemonitor" "enabled"'
--- a/charts/kubezero-cert-manager/jsonnetfile.json
+++ b/charts/kubezero-cert-manager/jsonnetfile.json
@ -1,6 +1,15 @@
 {
  "version": 1,
  "dependencies": [
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/imusmanmalik/cert-manager-mixin.git",
+          "subdir": ""
+        }
+      },
+      "version": "main"
+    },
    {
      "source": {
        "git": {
@ -8,16 +17,7 @@
          "subdir": "jsonnet/kube-prometheus"
        }
      },
-      "version": "release-0.10"
-    },
-    {
-      "source": {
-        "git": {
-          "remote": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git",
-          "subdir": ""
-        }
-      },
-      "version": "master"
+      "version": "main"
    }
  ],
  "legacyImports": true
--- a/charts/kubezero-cert-manager/jsonnetfile.lock.json
+++ b/charts/kubezero-cert-manager/jsonnetfile.lock.json
@ -8,8 +8,8 @@
          "subdir": "grafana"
        }
      },
-      "version": "199e363523104ff8b3a12483a4e3eca86372b078",
-      "sum": "/jDHzVAjHB4AOLkJHw1GyATX5ogZ1iMdcJXZAgaG3+g="
+      "version": "5698c8940b6dadca3f42107b7839557bc041761f",
+      "sum": "l6fPvh3tW6fWot308w71QY/amrYsFPeitvz1IgJxqQA="
    },
    {
      "source": {
@ -18,8 +18,18 @@
          "subdir": "contrib/mixin"
        }
      },
-      "version": "b872757492ca70f350abd44eb95d81b95339f581",
-      "sum": "W/Azptf1PoqjyMwJON96UY69MFugDA4IAYiKURscryc="
+      "version": "5a53a708d8ab9ef936ac5b8062ffc66c77a2c18f",
+      "sum": "xuUBd2vqF7asyVDe5CE08uPT/RxAdy8O75EjFJoMXXU="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/grafana.git",
+          "subdir": "grafana-mixin"
+        }
+      },
+      "version": "1120f9e255760a3c104b57871fcb91801e934382",
+      "sum": "MkjR7zCgq6MUZgjDzop574tFKoTX2OBr7DTwm1K+Ofs="
    },
    {
      "source": {
@ -28,8 +38,48 @@
          "subdir": "grafonnet"
        }
      },
-      "version": "6db00c292d3a1c71661fc875f90e0ec7caa538c2",
-      "sum": "gF8foHByYcB25jcUOBqP6jxk0OPifQMjPvKY0HaCk6w="
+      "version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
+      "sum": "342u++/7rViR/zj2jeJOjshzglkZ1SY+hFNuyCBFMdc="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/grafonnet-lib.git",
+          "subdir": "grafonnet-7.0"
+        }
+      },
+      "version": "a1d61cce1da59c71409b99b5c7568511fec661ea",
+      "sum": "gCtR9s/4D5fxU9aKXg0Bru+/njZhA0YjLjPiASc61FM="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/grafonnet.git",
+          "subdir": "gen/grafonnet-latest"
+        }
+      },
+      "version": "6ac1593ca787638da223380ff4a3fd0f96e953e1",
+      "sum": "GxEO83uxgsDclLp/fmlUJZDbSGpeUZY6Ap3G2cgdL1g="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/grafonnet.git",
+          "subdir": "gen/grafonnet-v10.0.0"
+        }
+      },
+      "version": "6ac1593ca787638da223380ff4a3fd0f96e953e1",
+      "sum": "W7sLuAvMSJPkC7Oo31t45Nz/cUdJV7jzNSJTd3F1daM="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/grafana/grafonnet.git",
+          "subdir": "gen/grafonnet-v10.4.0"
+        }
+      },
+      "version": "6ac1593ca787638da223380ff4a3fd0f96e953e1",
+      "sum": "ZSmDT7i/qU9P8ggmuPuJT+jonq1ZEsBRCXycW/H5L/A="
    },
    {
      "source": {
@ -38,8 +88,38 @@
          "subdir": "grafana-builder"
        }
      },
-      "version": "5fb2525651cc6e5100e081b10ad9fbe7e3595231",
-      "sum": "0KkygBQd/AFzUvVzezE4qF/uDYgrwUXVpZfINBti0oc="
+      "version": "7561fd330312538d22b00e0c7caecb4ba66321ea",
+      "sum": "+z5VY+bPBNqXcmNAV8xbJcbsRA+pro1R3IM7aIY8OlU="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/imusmanmalik/cert-manager-mixin.git",
+          "subdir": ""
+        }
+      },
+      "version": "72a094ff162bbd93921803994241d73900592c9a",
+      "sum": "h+YvBTXL5A02165i3yt3SxSAbFftChtXYJ0nYFnOAqo="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/jsonnet-libs/docsonnet.git",
+          "subdir": "doc-util"
+        }
+      },
+      "version": "6ac6c69685b8c29c54515448eaca583da2d88150",
+      "sum": "BrAL/k23jq+xy9oA7TWIhUx07dsA/QLm3g7ktCwe//U="
+    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/jsonnet-libs/xtd.git",
+          "subdir": ""
+        }
+      },
+      "version": "fc2e57a8839902ed4ba6cab5a99d642500f7102b",
+      "sum": "43waffw1QzvpY4rKcWoo3L7Vpee+DCYexwLDd5cPG0M="
    },
    {
      "source": {
@ -48,18 +128,8 @@
          "subdir": ""
        }
      },
-      "version": "b538a10c89508f8d12885680cca72a134d3127f5",
-      "sum": "GLt5T2k4RKg36Gfcaf9qlTfVumDitqotVD0ipz/bPJ4="
-    },
-    {
-      "source": {
-        "git": {
-          "remote": "https://github.com/kubernetes-monitoring/kubernetes-mixin.git",
-          "subdir": "lib/promgrafonnet"
-        }
-      },
-      "version": "62ad10fe9ceb53c6b846871997abbfe8e0bd7cf5",
-      "sum": "zv7hXGui6BfHzE9wPatHI/AGZa4A2WKo6pq7ZdqBsps="
+      "version": "a1c276d7a46c4b06fa5d8b4a64441939d398efe5",
+      "sum": "b/mEai1MvVnZ22YvZlXEO4jWDZledrtJg8eOS1ZUj0M="
    },
    {
      "source": {
@ -68,8 +138,8 @@
          "subdir": "jsonnet/kube-state-metrics"
        }
      },
-      "version": "e080c3ce73ad514254e38dccb37c93bec6b257ae",
-      "sum": "U1wzIpTAtOvC1yj43Y8PfvT0JfvnAcMfNH12Wi+ab0Y="
+      "version": "9ba1c3702142918e09e8eb5ca530e15198624259",
+      "sum": "msMZyUvcebzRILLzNlTIiSOwa1XgQKtP7jbZTkiqwM0="
    },
    {
      "source": {
@ -78,8 +148,8 @@
          "subdir": "jsonnet/kube-state-metrics-mixin"
        }
      },
-      "version": "e080c3ce73ad514254e38dccb37c93bec6b257ae",
-      "sum": "u8gaydJoxEjzizQ8jY8xSjYgWooPmxw+wIWdDxifMAk="
+      "version": "9ba1c3702142918e09e8eb5ca530e15198624259",
+      "sum": "qclI7LwucTjBef3PkGBkKxF0mfZPbHnn4rlNWKGtR4c="
    },
    {
      "source": {
@ -88,8 +158,8 @@
          "subdir": "jsonnet/kube-prometheus"
        }
      },
-      "version": "125fb56d7495e20f504e1537ae949e8261a2f812",
-      "sum": "QwH53kTe1jWCeXmQe7+U1PBs/a1p4MCTEW3B8IiIxeo="
+      "version": "76f2e1ef95be0df752037baa040781c5219e1fb3",
+      "sum": "IgpAgyyBZ7VT2vr9kSYQP/lkZUNQnbqpGh2sYCtUKs0="
    },
    {
      "source": {
@ -98,8 +168,8 @@
          "subdir": "jsonnet/mixin"
        }
      },
-      "version": "d8ba1c766a141cb35072ae2f2578ec8588c9efcd",
-      "sum": "qZ4WgiweaE6eeKtFK60QUjLO8sf2L9Q8fgafWvDcyfY=",
+      "version": "71d9433ba612f4b826ffa38520b23a7985b50db3",
+      "sum": "gi+knjdxs2T715iIQIntrimbHRgHnpM8IFBJDD1gYfs=",
      "name": "prometheus-operator-mixin"
    },
    {
@ -109,8 +179,8 @@
          "subdir": "jsonnet/prometheus-operator"
        }
      },
-      "version": "d8ba1c766a141cb35072ae2f2578ec8588c9efcd",
-      "sum": "yjdwZ+5UXL42EavJleAJmd8Ou6MSDfExvlKAxFCxXVE="
+      "version": "71d9433ba612f4b826ffa38520b23a7985b50db3",
+      "sum": "S4LFa0h1AzANixqGMowtwVswVP+y6f+fXloxpO7hMes="
    },
    {
      "source": {
@ -119,8 +189,8 @@
          "subdir": "doc/alertmanager-mixin"
        }
      },
-      "version": "16fa045db47d68a09a102c7b80b8899c1f57c153",
-      "sum": "pep+dHzfIjh2SU5pEkwilMCAT/NoL6YYflV4x8cr7vU=",
+      "version": "14cbe6301c732658d6fe877ec55ad5b738abcf06",
+      "sum": "IpF46ZXsm+0wJJAPtAre8+yxTNZA57mBqGpBP/r7/kw=",
      "name": "alertmanager"
    },
    {
@ -130,8 +200,8 @@
          "subdir": "docs/node-mixin"
        }
      },
-      "version": "a2321e7b940ddcff26873612bccdf7cd4c42b6b6",
-      "sum": "MlWDAKGZ+JArozRKdKEvewHeWn8j2DNBzesJfLVd0dk="
+      "version": "3accd4cf8286e69d70516abdced6bf186274322a",
+      "sum": "vWhHvFqV7+fxrQddTeGVKi1e4EzB3VWtNyD8TjSmevY="
    },
    {
      "source": {
@ -140,10 +210,21 @@
          "subdir": "documentation/prometheus-mixin"
        }
      },
-      "version": "41f1a8125e664985dd30674e5bdf6b683eff5d32",
-      "sum": "ZjQoYhvgKwJNkg+h+m9lW3SYjnjv5Yx5btEipLhru88=",
+      "version": "773170f372e0a57949854b74231ee3e09185f728",
+      "sum": "u/Fpz2MPkezy71/q+c7mF0vc3hE9fWt2W/YbvF0LP/8=",
      "name": "prometheus"
    },
+    {
+      "source": {
+        "git": {
+          "remote": "https://github.com/pyrra-dev/pyrra.git",
+          "subdir": "config/crd/bases"
+        }
+      },
+      "version": "551856d42dff02ec38c5b0ea6a2d99c4cb127e82",
+      "sum": "bY/Pcrrbynguq8/HaI88cQ3B2hLv/xc+76QILY7IL+g=",
+      "name": "pyrra"
+    },
    {
      "source": {
        "git": {
@ -151,19 +232,9 @@
          "subdir": "mixin"
        }
      },
-      "version": "fb97c9a5ef51849ccb7960abbeb9581ad7f511b9",
-      "sum": "X+060DnePPeN/87fgj0SrfxVitywTk8hZA9V4nHxl1g=",
+      "version": "93c79b61825ec00889188e35a58635eee247bc36",
+      "sum": "HhSSbGGCNHCMy1ee5jElYDm0yS9Vesa7QB2/SHKdjsY=",
      "name": "thanos-mixin"
-    },
-    {
-      "source": {
-        "git": {
-          "remote": "https://gitlab.com/uneeq-oss/cert-manager-mixin.git",
-          "subdir": ""
-        }
-      },
-      "version": "eae22f642aaa5d422e4766f6811df2158fc05539",
-      "sum": "DOg3fzS0OWrjjRPVsKgxID/rk9AC3ESQ4gDELc2RNgM="
    }
  ],
  "legacyImports": false
--- a/charts/kubezero-cert-manager/rules.jsonnet
+++ b/charts/kubezero-cert-manager/rules.jsonnet
@ -2,7 +2,7 @@ local addMixin = (import 'kube-prometheus/lib/mixin.libsonnet');

 local certManagerMixin = addMixin({
  name: 'cert-manager',
-  mixin: (import 'gitlab.com/uneeq-oss/cert-manager-mixin/mixin.libsonnet')
+  mixin: (import 'github.com/imusmanmalik/cert-manager-mixin/mixin.libsonnet')
  });

 { 'cert-manager-mixin-prometheusRule': certManagerMixin.prometheusRules }
--- a/charts/kubezero-cert-manager/rules/cert-manager-mixin-prometheusRule
+++ b/charts/kubezero-cert-manager/rules/cert-manager-mixin-prometheusRule
@ -17,8 +17,8 @@
                  "alert": "CertManagerAbsent",
                  "annotations": {
                     "description": "New certificates will not be able to be minted, and existing ones can't be renewed until cert-manager is back.",
-                     "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerabsent",
-                     "summary": "Cert Manager has dissapeared from Prometheus service discovery."
+                     "runbook_url": "https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerabsent",
+                     "summary": "Cert Manager has disappeared from Prometheus service discovery."
                  },
                  "expr": "absent(up{job=\"cert-manager\"})",
                  "for": "10m",
@ -36,7 +36,7 @@
                  "annotations": {
                     "dashboard_url": "https://grafana.example.com/d/TvuRo2iMk/cert-manager",
                     "description": "The domain that this cert covers will be unavailable after {{ $value | humanizeDuration }}. Clients using endpoints that this cert protects will start to fail in {{ $value | humanizeDuration }}.",
-                     "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertexpirysoon",
+                     "runbook_url": "https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertexpirysoon",
                     "summary": "The cert `{{ $labels.name }}` is {{ $value | humanizeDuration }} from expiry, it should have renewed over a week ago."
                  },
                  "expr": "avg by (exported_namespace, namespace, name) (\n  certmanager_certificate_expiration_timestamp_seconds - time()\n) < (21 * 24 * 3600) # 21 days in seconds\n",
@ -50,7 +50,7 @@
                  "annotations": {
                     "dashboard_url": "https://grafana.example.com/d/TvuRo2iMk/cert-manager",
                     "description": "This certificate has not been ready to serve traffic for at least 10m. If the cert is being renewed or there is another valid cert, the ingress controller _may_ be able to serve that instead.",
-                     "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagercertnotready",
+                     "runbook_url": "https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagercertnotready",
                     "summary": "The cert `{{ $labels.name }}` is not ready to serve traffic."
                  },
                  "expr": "max by (name, exported_namespace, namespace, condition) (\n  certmanager_certificate_ready_status{condition!=\"True\"} == 1\n)\n",
@ -64,7 +64,7 @@
                  "annotations": {
                     "dashboard_url": "https://grafana.example.com/d/TvuRo2iMk/cert-manager",
                     "description": "Depending on the rate limit, cert-manager may be unable to generate certificates for up to a week.",
-                     "runbook_url": "https://gitlab.com/uneeq-oss/cert-manager-mixin/-/blob/master/RUNBOOK.md#certmanagerhittingratelimits",
+                     "runbook_url": "https://github.com/imusmanmalik/cert-manager-mixin/blob/main/RUNBOOK.md#certmanagerhittingratelimits",
                     "summary": "Cert manager hitting LetsEncrypt rate limits."
                  },
                  "expr": "sum by (host) (\n  rate(certmanager_http_acme_client_request_count{status=\"429\"}[5m])\n) > 0\n",
--- a/Show More
+++ b/Show More