Merge pull request 'v1.24' (#51) from v1.24 into master

Reviewed-on: #51
master
Stefan Reimer 5 days ago
commit f979c02fd7

@ -1,12 +1,14 @@
# Parse version from latest git semver tag
GTAG=$(shell git describe --tags --match v*.*.* 2>/dev/null || git rev-parse --short HEAD 2>/dev/null)
TAG ?= $(shell echo $(GTAG) | awk -F '-' '{ print $$1 "-" $$2 }' | sed -e 's/-$$//')
BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
GIT_TAG=$(shell git describe --tags --match v*.*.* 2>/dev/null || git rev-parse --short HEAD 2>/dev/null)
TAG ?= $(shell echo $(GIT_TAG) | awk -F '-' '{ print $$1 "-" $$2 }' | sed -e 's/-$$//')
ARCH := amd64
ALL_ARCHS := amd64 arm64
# EXTRA_TAGS supposed to be set at the caller, eg. $(shell echo $(TAG) | awk -F '.' '{ print $$1 "." $$2 }')
ifneq ($(TRIVY_REMOTE),)
TRIVY_OPTS := --server $(TRIVY_REMOTE)
TRIVY_OPTS := --server $(TRIVY_REMOTE)
endif
.SILENT: ; # no need for @
@ -20,46 +22,48 @@ help: ## Show Help
grep -E '^[a-zA-Z_-]+:.*?## .*$$' .ci/podman.mk | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
build: ## Build the app
docker build --rm -t $(REGISTRY)/$(IMAGE):$(TAG)-$(ARCH) --build-arg TAG=$(TAG) --build-arg ARCH=$(ARCH) --platform linux/$(ARCH) .
buildah build --rm --layers -t $(IMAGE):$(TAG)-$(ARCH) --build-arg TAG=$(TAG) --build-arg ARCH=$(ARCH) --platform linux/$(ARCH) .
test: rm-test-image ## Execute Dockerfile.test
test -f Dockerfile.test && \
{ docker build --rm -t $(REGISTRY)/$(IMAGE):$(TAG)-test --from=$(REGISTRY)/$(IMAGE):$(TAG) -f Dockerfile.test --platform linux/$(ARCH) . && \
docker run --rm --env-host -t $(REGISTRY)/$(IMAGE):$(TAG)-$(ARCH)-test; } || \
{ buildah build --rm --layers -t $(REGISTRY)/$(IMAGE):$(TAG)-test --from=$(REGISTRY)/$(IMAGE):$(TAG) -f Dockerfile.test --platform linux/$(ARCH) . && \
podman run --rm --env-host -t $(REGISTRY)/$(IMAGE):$(TAG)-$(ARCH)-test; } || \
echo "No Dockerfile.test found, skipping test"
scan: ## Scan image using trivy
echo "Scanning $(REGISTRY)/$(IMAGE):$(TAG)-$(ARCH) using Trivy $(TRIVY_REMOTE)"
trivy image $(TRIVY_OPTS) $(REGISTRY)/$(IMAGE):$(TAG)-$(ARCH)
echo "Scanning $(IMAGE):$(TAG)-$(ARCH) using Trivy $(TRIVY_REMOTE)"
trivy image $(TRIVY_OPTS) localhost/$(IMAGE):$(TAG)-$(ARCH)
# We create new manifest and add TAG-ARCH image
# if manigest exists already, get it and add TAG-ARCH to eg. add arm64 to existing amd64
push: ## push images to registry
for t in $(TAG) latest $(EXTRA_TAGS); \
do echo "creating and pushing: $$t"; \
docker tag $(REGISTRY)/$(IMAGE):$(TAG)-$(ARCH) $(REGISTRY)/$(IMAGE):$${t}-$(ARCH) && \
docker push $(REGISTRY)/$(IMAGE):$${t}-$(ARCH); \
podman manifest exists $(IMAGE):$$t || podman manifest create $(IMAGE):$$t; \
buildah manifest add $(IMAGE):$$t $(REGISTRY)/$(IMAGE):$(TAG)-$(ARCH) && docker manifest push $(IMAGE):$$t $(REGISTRY)/$(IMAGE):$$t; \
# first tag and push all actual images
# create new manifest for each tag and add all available TAG-ARCH before pushing
push: ecr-login ## push images to registry
for t in $(TAG) latest $(EXTRA_TAGS); do \
buildah tag $(IMAGE):$(TAG)-$(ARCH) $(REGISTRY)/$(IMAGE):$${t}-$(ARCH); \
buildah manifest rm $(IMAGE):$$t || true; \
buildah manifest create $(IMAGE):$$t; \
for a in $(ALL_ARCHS); do \
buildah manifest add $(IMAGE):$$t $(REGISTRY)/$(IMAGE):$(TAG)-$$a; \
done; \
buildah manifest push --all $(IMAGE):$$t docker://$(REGISTRY)/$(IMAGE):$$t; \
done
ecr-login: ## log into AWS ECR public
aws ecr-public get-login-password --region $(REGION) | docker login --username AWS --password-stdin $(REGISTRY)
aws ecr-public get-login-password --region $(REGION) | podman login --username AWS --password-stdin $(REGISTRY)
clean: rm-test-image rm-image ## delete local built container and test images
rm-remote-untagged: ## delete all remote untagged images
echo "Removing all untagged images from $(IMAGE) in $(REGION)"
IMAGE_IDS=$$(for image in $$(aws ecr-public describe-images --repository-name $(IMAGE) --region $(REGION) --output json | jq -r '.imageDetails[] | select(.imageTags | not ).imageDigest'); do echo -n "imageDigest=$$image "; done) ; \
[ -n "$$IMAGE_IDS" ] && aws ecr-public batch-delete-image --repository-name $(IMAGE) --region $(REGION) --image-ids $$IMAGE_IDS || echo "No image to remove"
[ -n "$$IMAGE_IDS" ] && aws ecr-public batch-delete-image --repository-name $(IMAGE) --region $(REGION) --image-ids $$IMAGE_IDS || echo "No image to remove"
rm-image:
test -z "$$(docker image ls -q $(IMAGE):$(TAG)-$(ARCH))" || docker image rm -f $(IMAGE):$(TAG)-$(ARCH) > /dev/null
test -z "$$(docker image ls -q $(IMAGE):$(TAG)-$(ARCH))" || podman image rm -f $(IMAGE):$(TAG)-$(ARCH) > /dev/null
test -z "$$(docker image ls -q $(IMAGE):$(TAG)-$(ARCH))" || echo "Error: Removing image failed"
# Ensure we run the tests by removing any previous runs
rm-test-image:
test -z "$$(docker image ls -q $(IMAGE):$(TAG)-$(ARCH)-test)" || docker image rm -f $(IMAGE):$(TAG)-$(ARCH)-test > /dev/null
test -z "$$(docker image ls -q $(IMAGE):$(TAG)-$(ARCH)-test)" || podman image rm -f $(IMAGE):$(TAG)-$(ARCH)-test > /dev/null
test -z "$$(docker image ls -q $(IMAGE):$(TAG)-$(ARCH)-test)" || echo "Error: Removing test image failed"
ci-pull-upstream: ## pull latest shared .ci subtree

@ -3,12 +3,13 @@ ARG ALPINE_VERSION=3.16
FROM alpine:${ALPINE_VERSION}
ARG ALPINE_VERSION
ARG KUBE_VERSION=1.23
ARG KUBE_VERSION=1.24
RUN cd /etc/apk/keys && \
wget "https://cdn.zero-downtime.net/alpine/stefan@zero-downtime.net-61bb6bfb.rsa.pub" && \
echo "@kubezero https://cdn.zero-downtime.net/alpine/v${ALPINE_VERSION}/kubezero" >> /etc/apk/repositories && \
echo "@testing http://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories && \
echo "@edge-testing http://dl-cdn.alpinelinux.org/alpine/edge/testing" >> /etc/apk/repositories && \
echo "@edge-community http://dl-cdn.alpinelinux.org/alpine/edge/community" >> /etc/apk/repositories && \
apk upgrade -U -a --no-cache && \
apk --no-cache add \
jq \
@ -21,9 +22,9 @@ RUN cd /etc/apk/keys && \
kubeadm@kubezero~=${KUBE_VERSION} \
kubectl@kubezero~=${KUBE_VERSION} \
etcdhelper@kubezero \
etcd-ctl@testing \
restic@testing \
helm@testing
etcd-ctl@edge-testing \
restic@edge-community \
helm@edge-community
RUN helm repo add kubezero https://cdn.zero-downtime.net/charts && \
mkdir -p /var/lib/kubezero

@ -5,11 +5,11 @@ KubeZero is a Kubernetes distribution providing an integrated container platform
# Design philosophy
- Cloud provider agnostic, bare-metal/self-hosted
- Focus on security and simplicity before feature bloat
- No vendor lock in, most components are optional and could be exchanged
- Focus on security and simplicity over feature creep
- No vendor lock in, most components are optional and could be easily exchanged
- Organic Open Source / open and permissive licenses over closed-source solutions
- No premium services / subscriptions required
- Staying and contributing back to upstream projects as much as possible
- Staying up to date and contributing back to upstream projects, like alpine-cloud-images and others
- Corgi approved :dog:
@ -19,7 +19,7 @@ KubeZero is a Kubernetes distribution providing an integrated container platform
# Version / Support Matrix
KubeZero releases track the same *minor* version of Kubernetes.
Any 1.21.X-Y release of Kubezero supports any Kubernetes cluster 1.21.X.
Any 1.24.X-Y release of Kubezero supports any Kubernetes cluster 1.24.X.
KubeZero is distributed as a collection of versioned Helm charts, allowing custom upgrade schedules and module versions as needed.
@ -28,15 +28,15 @@ KubeZero is distributed as a collection of versioned Helm charts, allowing custo
gantt
title KubeZero Support Timeline
dateFormat YYYY-MM-DD
section 1.22
beta :122b, 2022-05-01, 2022-06-01
release :after 122b, 2022-10-28
section 1.23
beta :123b, 2022-08-01, 2022-09-01
release :after 123b, 2023-02-28
release :after 123b, 2023-02-01
section 1.24
beta :124b, 2022-12-01, 2022-12-31
release :after 124b, 2023-05-01
beta :124b, 2022-11-14, 2022-12-31
release :after 124b, 2023-06-01
section 1.25
beta :125b, 2023-03-01, 2023-03-31
release :after 125b, 2023-08-01
```
[Upstream release policy](https://kubernetes.io/releases/)
@ -44,7 +44,7 @@ gantt
# Components
## OS
- all nodes are based on Alpine V3.15
- all nodes are based on Alpine V3.16
- 2 GB encrypted root filesystem
- no 3rd party dependencies at boot ( other than container registries )
- minimal attack surface

@ -56,7 +56,7 @@ render_kubeadm() {
cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml
done
# hack to "uncloack" the json patches after they go processed by helm
# "uncloak" the json patches after they got processed by helm
for s in apiserver controller-manager scheduler; do
yq eval '.json' ${WORKDIR}/kubeadm/templates/patches/kube-${s}1\+json.yaml > /tmp/_tmp.yaml && \
mv /tmp/_tmp.yaml ${WORKDIR}/kubeadm/templates/patches/kube-${s}1\+json.yaml
@ -65,11 +65,6 @@ render_kubeadm() {
parse_kubezero() {
# remove with 1.24
if [ ! -f ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml ]; then
[ -f ${HOSTFS}/etc/kubernetes/kubezero.yaml ] && cp ${HOSTFS}/etc/kubernetes/kubezero.yaml ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml
fi
export CLUSTERNAME=$(yq eval '.global.clusterName // .clusterName' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
export HIGHAVAILABLE=$(yq eval '.global.highAvailable // .highAvailable // "false"' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
export ETCD_NODENAME=$(yq eval '.etcd.nodeName' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml)
@ -328,6 +323,21 @@ apply_module() {
}
delete_module() {
MODULES=$1
get_kubezero_values
# Always use embedded kubezero chart
helm template $CHARTS/kubezero -f $WORKDIR/kubezero-values.yaml --version ~$KUBE_VERSION --devel --output-dir $WORKDIR
for t in $MODULES; do
_helm delete $t
done
echo "Deleted KubeZero modules: $MODULES. Potential CRDs must be removed manually."
}
# backup etcd + /etc/kubernetes/pki
backup() {
# Display all ENVs, careful this exposes the password !
@ -382,7 +392,8 @@ for t in $@; do
bootstrap) control_plane_node bootstrap;;
join) control_plane_node join;;
restore) control_plane_node restore;;
apply_*) apply_module ${t##apply_};;
apply_*) apply_module "${t##apply_}";;
delete_*) delete_module "${t##delete_}";;
backup) backup;;
debug_shell) debug_shell;;
*) echo "Unknown command: '$t'";;

@ -108,6 +108,7 @@ function _crds() {
# Only apply if there are actually any crds
if [ -s $WORKDIR/crds.yaml ]; then
[ -n "$DEBUG" ] && cat $WORKDIR/crds.yaml
kubectl apply -f $WORKDIR/crds.yaml --server-side --force-conflicts
fi
}
@ -115,7 +116,7 @@ function _crds() {
# helm template | kubectl apply -f -
# confine to one namespace if possible
function apply() {
function render() {
helm template $(chart_location $chart) -n $namespace --name-template $module $targetRevision --skip-crds -f $WORKDIR/values.yaml $API_VERSIONS --kube-version $KUBE_VERSION $@ \
| python3 -c '
#!/usr/bin/python3
@ -128,8 +129,6 @@ for manifest in yaml.safe_load_all(sys.stdin):
manifest["metadata"]["namespace"] = sys.argv[1]
print("---")
print(yaml.dump(manifest))' $namespace > $WORKDIR/helm.yaml
kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
}
@ -164,13 +163,15 @@ function _helm() {
# Optional pre hook
declare -F ${module}-pre && ${module}-pre
apply
render
kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
# Optional post hook
declare -F ${module}-post && ${module}-post
elif [ $action == "delete" ]; then
apply
render
kubectl $action -f $WORKDIR/helm.yaml && rc=$? || rc=$?
# Delete dedicated namespace if not kube-system
[ -n "$DELETE_NS" ] && delete_ns $namespace

@ -8,32 +8,34 @@ import yaml
def migrate(values):
"""Actual changes here"""
# migrate ClusterName to clusterName
if "ClusterName" in values:
values["clusterName"] = values["ClusterName"]
values.pop("ClusterName")
# ClusterBackup is enabled on AWS anyways, same with cluster-autoscaler
if "aws" in values["global"]:
deleteKey(values["addons"], "clusterBackup")
deleteKey(values["addons"], "cluster-autoscaler")
# Remove calico and multus
deleteKey(values["network"], "calico")
deleteKey(values["network"], "multus")
# ArgoCD helm changes
if "argocd" in values:
if "server" in values["argocd"]:
if not "configs" in values["argocd"]:
values["argocd"]["configs"] = {}
if not "cm" in values["argocd"]["configs"]:
values["argocd"]["configs"]["cm"] = {}
values["argocd"]["configs"]["cm"]["url"] = values["argocd"]["server"]["config"][
"url"
]
deleteKey(values["argocd"], "server")
# Remove HighAvailableControlplane
try:
values["global"]["highAvailable"] = values["HighAvailableControlplane"]
values.pop("HighAvailableControlplane")
except KeyError:
pass
# Create new clusterwide cloudprovider data if possible
# IamArn: arn:aws:iam::<ACCOUNT_ID>:role/<REGION>.<CLUSTERNAME>.cert-manager
try:
if values["cert-manager"]["IamArn"]:
account_id = values["cert-manager"]["IamArn"].split(":")[4]
region = values["cert-manager"]["IamArn"].split(":")[5].split('.')[0].split('/')[1]
if "global" not in values:
values["global"] = {}
if "aws" not in values["global"]:
values["global"]["aws"] = {}
return values
values["global"]["aws"]["region"] = region
values["global"]["aws"]["accountId"] = account_id
def deleteKey(values, key):
"""Delete key from dictionary if exists"""
try:
values.pop(key)
except KeyError:
pass
@ -44,9 +46,11 @@ class MyDumper(yaml.Dumper):
"""
Required to add additional indent for arrays to match yq behaviour to reduce noise in diffs
"""
def increase_indent(self, flow=False, indentless=False):
return super(MyDumper, self).increase_indent(flow, False)
def str_presenter(dumper, data):
if len(data.splitlines()) > 1: # check for multiline string
return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
@ -93,5 +97,5 @@ yaml.dump(
default_flow_style=False,
indent=2,
sort_keys=False,
Dumper=MyDumper
Dumper=MyDumper,
)

@ -1,11 +1,12 @@
#!/bin/bash -e
VERSION="v1.23"
#VERSION="latest"
VERSION="v1.24"
ARGO_APP=${1:-/tmp/new-kubezero-argoapp.yaml}
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
. $SCRIPT_DIR/libhelm.sh
# shellcheck disable=SC1091
. "$SCRIPT_DIR"/libhelm.sh
[ -n "$DEBUG" ] && set -x
@ -38,6 +39,9 @@ spec:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
initContainers:
- name: node-upgrade
image: busybox
@ -75,7 +79,7 @@ EOF
control_plane_upgrade() {
TASKS="$1"
echo "Deploy cluster admin task: $TASK"
echo "Deploy cluster admin task: $TASKS"
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
@ -120,6 +124,10 @@ spec:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
restartPolicy: Never
EOF
@ -138,46 +146,41 @@ waitSystemPodsRunning
argo_used && disable_argo
all_nodes_upgrade "nsenter -m/hostproc/1/ns/mnt mount --make-shared /sys/fs/cgroup; nsenter -m/hostproc/1/ns/mnt mount --make-shared /sys; nsenter -r/host /usr/bin/podman image prune -a -f;"
#all_nodes_upgrade ""
control_plane_upgrade kubeadm_upgrade
echo "Adjust kubezero values as needed: (eg. set cilium cluster id and ensure no IP space overlap !!):"
echo "Adjust kubezero values as needed:"
# shellcheck disable=SC2015
argo_used && kubectl edit app kubezero -n argocd || kubectl edit cm kubezero-values -n kube-system
# Remove multus DS due to label changes, if this fails:
# kubezero-network $ helm template . --set multus.enabled=true | kubectl apply -f -
kubectl delete ds kube-multus-ds -n kube-system || true
# Required due to chart upgrade to 4.X part of prometheus-stack 40.X
kubectl delete daemonset metrics-prometheus-node-exporter -n monitoring || true
# AWS EBS CSI driver change their fsGroupPolicy
kubectl delete CSIDriver ebs.csi.aws.com || true
# Remove calico
#kubectl delete deployment calico-kube-controllers -n kube-system || true
#kubectl delete daemonset calico-node -n kube-system || true
#kubectl delete network-attachment-definitions calico -n kube-system || true
# Delete external-dns deployment as upstream changed strategy to 'recreate'
kubectl delete deployment addons-external-dns -n kube-system || true
# Remove previous cilium config as the helm options are additive only -> fail
kubectl delete configmap cilium-config -n kube-system || true
control_plane_upgrade "apply_network, apply_addons, apply_storage"
kubectl rollout restart daemonset/calico-node -n kube-system
kubectl rollout restart daemonset/cilium -n kube-system
kubectl rollout restart daemonset/kube-multus-ds -n kube-system
kubectl rollout restart daemonset/cilium -n kube-system
echo "Checking that all pods in kube-system are running ..."
waitSystemPodsRunning
echo "Applying remaining KubeZero modules..."
# Delete outdated cert-manager CRDs, otherwise serverside apply will fail
for c in certificaterequests.cert-manager.io certificates.cert-manager.io challenges.acme.cert-manager.io clusterissuers.cert-manager.io issuers.cert-manager.io orders.acme.cert-manager.io; do
kubectl delete crd $c
done
# delete argocd deployments as various immutable things changed, also redis restart fails otherwise
kubectl delete deployment argocd-redis -n argocd || true
kubectl delete deployment argocd-repo-server -n argocd || true
kubectl delete statefulset argocd-application-controller -n argocd || true
control_plane_upgrade "apply_cert-manager, apply_istio, apply_istio-ingress, apply_istio-private-ingress, apply_logging, apply_metrics, apply_argocd"
# Delete prometheus-push gateway due to label changes
kubectl delete deploy -l app=prometheus-pushgateway -n monitoring || true
# delete legace ArgCD controller which is now a statefulSet
kubectl delete deployment argocd-application-controller -n argocd || true
control_plane_upgrade "apply_cert-manager, apply_istio, apply_istio-ingress, apply_istio-private-ingress, apply_logging, apply_metrics, apply_argocd"
# Final step is to commit the new argocd kubezero app
kubectl get app kubezero -n argocd -o yaml | yq 'del(.status) | del(.metadata) | del(.operation) | .metadata.name="kubezero" | .metadata.namespace="argocd"' | yq 'sort_keys(..) | .spec.source.helm.values |= (from_yaml | to_yaml)' > $ARGO_APP
@ -186,6 +189,6 @@ echo "Please commit $ARGO_APP as the updated kubezero/application.yaml for your
echo "Then head over to ArgoCD for this cluster and sync all KubeZero modules to apply remaining upgrades."
echo "<Return> to continue and re-enable ArgoCD:"
read
read -r
argo_used && enable_argo

@ -1,52 +0,0 @@
#!/bin/bash
# Migrate addons and network values from local kubeadm-values.yaml on controllers into CM
# - enable cilium
# Create emtpy CM if not exists yet
kubectl get cm -n kube-system kubezero-values || \
kubectl create configmap -n kube-system kubezero-values
kubectl get cm -n kube-system kubeadm-values || \
kubectl create configmap -n kube-system kubeadm-values
# tweak local kubeadm for upgrade later on
yq eval -i '.global.clusterName = strenv(CLUSTERNAME) |
.global.highAvailable = env(HIGHAVAILABLE)' \
${HOSTFS}/etc/kubernetes/kubeadm-values.yaml
# extract network
yq e '.network |
.cilium.enabled = true |
.calico.enabled = true |
.multus.enabled = true |
.multus.defaultNetworks = ["cilium"] |
.cilium.cluster.name = strenv(CLUSTERNAME) |
{"network": .}' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml > $WORKDIR/network-values.yaml
# get current argo cd values
kubectl get application kubezero -n argocd -o yaml | yq '.spec.source.helm.values' > ${WORKDIR}/argo-values.yaml
# merge all into new CM and set new minimal addons
yq ea '. as $item ireduce ({}; . * $item ) |
.global.clusterName = strenv(CLUSTERNAME) |
.global.highAvailable = env(HIGHAVAILABLE) |
.addons.clusterBackup.image.tag = "v1.23" ' ${WORKDIR}/network-values.yaml $WORKDIR/argo-values.yaml > $WORKDIR/kubezero-pre-values.yaml
# tumble new config through migrate.py
cat $WORKDIR/kubezero-pre-values.yaml | migrate_argo_values.py > $WORKDIR/kubezero-values.yaml
# Update kubezero-values CM
kubectl get cm -n kube-system kubezero-values -o=yaml | \
yq e '.data."values.yaml" |= load_str("/tmp/kubezero/kubezero-values.yaml")' | \
kubectl replace -f -
# update argo app
kubectl get application kubezero -n argocd -o yaml | \
kubezero_chart_version=$(yq .version /charts/kubezero/Chart.yaml) \
yq '.spec.source.helm.values |= load_str("/tmp/kubezero/kubezero-values.yaml") | .spec.source.targetRevision = strenv(kubezero_chart_version)' | \
kubectl apply -f -
# finally remove annotation to allow argo to sync again
kubectl patch app kubezero -n argocd --type json -p='[{"op": "remove", "path": "/metadata/annotations"}]'

@ -0,0 +1,21 @@
#!/bin/bash
# get current values, argo app over cm
get_kubezero_values
# tumble new config through migrate.py
migrate_argo_values.py < "$WORKDIR"/kubezero-values.yaml > "$WORKDIR"/new-kubezero-values.yaml
# Update kubezero-values CM
kubectl get cm -n kube-system kubezero-values -o=yaml | \
yq e '.data."values.yaml" |= load_str("/tmp/kubezero/new-kubezero-values.yaml")' | \
kubectl replace -f -
# update argo app
kubectl get application kubezero -n argocd -o yaml | \
kubezero_chart_version=$(yq .version /charts/kubezero/Chart.yaml) \
yq '.spec.source.helm.values |= load_str("/tmp/kubezero/new-kubezero-values.yaml") | .spec.source.targetRevision = strenv(kubezero_chart_version)' | \
kubectl apply -f -
# finally remove annotation to allow argo to sync again
kubectl patch app kubezero -n argocd --type json -p='[{"op": "remove", "path": "/metadata/annotations"}]'

@ -2,7 +2,7 @@ apiVersion: v2
name: kubeadm
description: KubeZero Kubeadm cluster config
type: application
version: 1.23.11
version: 1.24.9
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -11,4 +11,4 @@ keywords:
maintainers:
- name: Stefan Reimer
email: stefan@zero-downtime.net
kubeVersion: ">= 1.20.0"
kubeVersion: ">= 1.24.0"

@ -1,6 +1,6 @@
# kubeadm
![Version: 1.23.10](https://img.shields.io/badge/Version-1.23.10-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
![Version: 1.24.9](https://img.shields.io/badge/Version-1.24.9-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
KubeZero Kubeadm cluster config
@ -14,7 +14,7 @@ KubeZero Kubeadm cluster config
## Requirements
Kubernetes: `>= 1.20.0`
Kubernetes: `>= 1.24.0`
## Values
@ -44,7 +44,7 @@ Kubernetes: `>= 1.20.0`
## Resources
- https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/troubleshooting-kubeadm/
- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta2
- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
- https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
- https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/kubelet/config/v1beta1/types.go
- https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/control-plane-flags/

@ -18,7 +18,7 @@
## Resources
- https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/troubleshooting-kubeadm/
- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta2
- https://godoc.org/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
- https://pkg.go.dev/k8s.io/kubernetes/cmd/kubeadm/app/apis/kubeadm/v1beta3
- https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/kubelet/config/v1beta1/types.go
- https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/control-plane-flags/

@ -2,8 +2,8 @@ apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
kubernetesVersion: {{ .Chart.Version }}
clusterName: {{ .Values.global.clusterName }}
featureGates:
UnversionedKubeletConfigMap: true
#featureGates:
# NonGracefulFailover: true
controlPlaneEndpoint: {{ .Values.api.endpoint }}
networking:
podSubnet: 10.244.0.0/16

@ -17,8 +17,8 @@ protectKernelDefaults: {{ .Values.protectKernelDefaults }}
tlsCipherSuites: [TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256]
featureGates:
{{- include "kubeadm.featuregates" ( dict "return" "map" ) | nindent 2 }}
# Minimal unit is 50m per pod
podsPerCore: 20
# Minimal unit is 40m per pod
podsPerCore: 25
# cpuCFSQuotaPeriod: 10ms
# Basic OS incl. crio
systemReserved:

@ -1,6 +1,6 @@
{{- /* Feature gates for all control plane components */ -}}
{{- define "kubeadm.featuregates" }}
{{- $gates := list "CustomCPUCFSQuotaPeriod" "KubeletCredentialProviders"}}
{{- $gates := list "CustomCPUCFSQuotaPeriod" "CronJobTimeZone" "NodeOutOfServiceVolumeDetach" }}
{{- if eq .return "csv" }}
{{- range $key := $gates }}
{{- $key }}=true,

@ -1,4 +1,4 @@
apiVersion: kubelet.config.k8s.io/v1alpha1
apiVersion: kubelet.config.k8s.io/v1beta1
kind: CredentialProviderConfig
providers:
- name: amazon-ecr-credential-helper

@ -112,10 +112,12 @@ spec:
tolerations:
- effect: NoSchedule
key: node-role.kubernetes.io/master
- effect: NoSchedule
key: node-role.kubernetes.io/control-plane
containers:
- name: aws-iam-authenticator
image: public.ecr.aws/zero-downtime/aws-iam-authenticator:v0.5.9
image: public.ecr.aws/zero-downtime/aws-iam-authenticator:v0.5.11
args:
- server
- --backend-mode=CRD,MountedFile
@ -131,7 +133,7 @@ spec:
resources:
requests:
memory: 20Mi
memory: 32Mi
cpu: 10m
limits:
memory: 64Mi

@ -2,8 +2,8 @@ apiVersion: v2
name: kubezero-addons
description: KubeZero umbrella chart for various optional cluster addons
type: application
version: 0.6.3
appVersion: v1.23.11
version: 0.7.3
appVersion: v1.24
home: https://kubezero.com
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
keywords:
@ -11,16 +11,14 @@ keywords:
- fuse-device-plugin
- neuron-device-plugin
- nvidia-device-plugin
- aws-node-termination-handler
- cluster-autoscaler
- sealed-secrets
- external-dns
- aws-node-termination-handler
maintainers:
- name: Stefan Reimer
email: stefan@zero-downtime.net
dependencies:
- name: aws-node-termination-handler
version: 0.18.5
# repository: https://aws.github.io/eks-charts
condition: aws-node-termination-handler.enabled
- name: external-dns
version: 1.11.0
repository: https://kubernetes-sigs.github.io/external-dns/
@ -30,8 +28,20 @@ dependencies:
repository: https://kubernetes.github.io/autoscaler
condition: cluster-autoscaler.enabled
- name: nvidia-device-plugin
version: 0.12.2
version: 0.13.0
# https://github.com/NVIDIA/k8s-device-plugin
repository: https://nvidia.github.io/k8s-device-plugin
condition: nvidia-device-plugin.enabled
kubeVersion: ">= 1.20.0"
- name: sealed-secrets
version: 2.7.1
repository: https://bitnami-labs.github.io/sealed-secrets
condition: sealed-secrets.enabled
- name: aws-node-termination-handler
version: 0.20.1
# repository: https://aws.github.io/eks-charts
condition: aws-node-termination-handler.enabled
- name: aws-eks-asg-rolling-update-handler
version: 1.2.7
# repository: https://twin.github.io/helm-charts
condition: aws-eks-asg-rolling-update-handler.enabled
kubeVersion: ">= 1.24.0"

@ -1,6 +1,6 @@
# kubezero-addons
![Version: 0.6.3](https://img.shields.io/badge/Version-0.6.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.23.11](https://img.shields.io/badge/AppVersion-v1.23.11-informational?style=flat-square)
![Version: 0.7.3](https://img.shields.io/badge/Version-0.7.3-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: v1.24](https://img.shields.io/badge/AppVersion-v1.24-informational?style=flat-square)
KubeZero umbrella chart for various optional cluster addons
@ -14,14 +14,16 @@ KubeZero umbrella chart for various optional cluster addons
## Requirements
Kubernetes: `>= 1.20.0`
Kubernetes: `>= 1.24.0`
| Repository | Name | Version |
|------------|------|---------|
| | aws-node-termination-handler | 0.18.5 |
| | aws-eks-asg-rolling-update-handler | 1.2.7 |
| | aws-node-termination-handler | 0.20.1 |
| https://bitnami-labs.github.io/sealed-secrets | sealed-secrets | 2.7.1 |
| https://kubernetes-sigs.github.io/external-dns/ | external-dns | 1.11.0 |
| https://kubernetes.github.io/autoscaler | cluster-autoscaler | 9.21.0 |
| https://nvidia.github.io/k8s-device-plugin | nvidia-device-plugin | 0.12.2 |
| https://nvidia.github.io/k8s-device-plugin | nvidia-device-plugin | 0.13.0 |
# MetalLB
@ -39,6 +41,34 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| Key | Type | Default | Description |
|-----|------|---------|-------------|
| aws-eks-asg-rolling-update-handler.enabled | bool | `false` | |
| aws-eks-asg-rolling-update-handler.environmentVars[0].name | string | `"CLUSTER_NAME"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[0].value | string | `""` | |
| aws-eks-asg-rolling-update-handler.environmentVars[1].name | string | `"AWS_REGION"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[1].value | string | `"us-west-2"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[2].name | string | `"EXECUTION_INTERVAL"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[2].value | string | `"60"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[3].name | string | `"METRICS"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[3].value | string | `"true"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[4].name | string | `"EAGER_CORDONING"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[4].value | string | `"true"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[5].name | string | `"SLOW_MODE"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[5].value | string | `"true"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[6].name | string | `"AWS_ROLE_ARN"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[6].value | string | `""` | |
| aws-eks-asg-rolling-update-handler.environmentVars[7].name | string | `"AWS_WEB_IDENTITY_TOKEN_FILE"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[7].value | string | `"/var/run/secrets/sts.amazonaws.com/serviceaccount/token"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[8].name | string | `"AWS_STS_REGIONAL_ENDPOINTS"` | |
| aws-eks-asg-rolling-update-handler.environmentVars[8].value | string | `"regional"` | |
| aws-eks-asg-rolling-update-handler.image.tag | string | `"v1.7.0"` | |
| aws-eks-asg-rolling-update-handler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| aws-eks-asg-rolling-update-handler.resources.limits.memory | string | `"128Mi"` | |
| aws-eks-asg-rolling-update-handler.resources.requests.cpu | string | `"10m"` | |
| aws-eks-asg-rolling-update-handler.resources.requests.memory | string | `"32Mi"` | |
| aws-eks-asg-rolling-update-handler.tolerations[0].effect | string | `"NoSchedule"` | |
| aws-eks-asg-rolling-update-handler.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| aws-eks-asg-rolling-update-handler.tolerations[1].effect | string | `"NoSchedule"` | |
| aws-eks-asg-rolling-update-handler.tolerations[1].key | string | `"node-role.kubernetes.io/control-plane"` | |
| aws-node-termination-handler.deleteLocalData | bool | `true` | |
| aws-node-termination-handler.emitKubernetesEvents | bool | `true` | |
| aws-node-termination-handler.enableProbesServer | bool | `true` | |
@ -54,7 +84,8 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| aws-node-termination-handler.fullnameOverride | string | `"aws-node-termination-handler"` | |
| aws-node-termination-handler.ignoreDaemonSets | bool | `true` | |
| aws-node-termination-handler.jsonLogging | bool | `true` | |
| aws-node-termination-handler.managedAsgTag | string | `"aws-node-termination-handler/managed"` | "aws-node-termination-handler/${ClusterName}" |
| aws-node-termination-handler.logFormatVersion | int | `2` | |
| aws-node-termination-handler.managedTag | string | `"aws-node-termination-handler/managed"` | "aws-node-termination-handler/${ClusterName}" |
| aws-node-termination-handler.metadataTries | int | `0` | |
| aws-node-termination-handler.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| aws-node-termination-handler.podMonitor.create | bool | `false` | |
@ -63,6 +94,8 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| aws-node-termination-handler.taintNode | bool | `true` | |
| aws-node-termination-handler.tolerations[0].effect | string | `"NoSchedule"` | |
| aws-node-termination-handler.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| aws-node-termination-handler.tolerations[1].effect | string | `"NoSchedule"` | |
| aws-node-termination-handler.tolerations[1].key | string | `"node-role.kubernetes.io/control-plane"` | |
| aws-node-termination-handler.useProviderId | bool | `true` | |
| awsNeuron.enabled | bool | `false` | |
| awsNeuron.image.name | string | `"public.ecr.aws/neuron/neuron-device-plugin"` | |
@ -80,30 +113,22 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| cluster-autoscaler.serviceMonitor.interval | string | `"30s"` | |
| cluster-autoscaler.tolerations[0].effect | string | `"NoSchedule"` | |
| cluster-autoscaler.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| cluster-autoscaler.tolerations[1].effect | string | `"NoSchedule"` | |
| cluster-autoscaler.tolerations[1].key | string | `"node-role.kubernetes.io/control-plane"` | |
| clusterBackup.enabled | bool | `false` | |
| clusterBackup.extraEnv | list | `[]` | |
| clusterBackup.image.name | string | `"public.ecr.aws/zero-downtime/kubezero-admin"` | |
| clusterBackup.password | string | `""` | /etc/cloudbender/clusterBackup.passphrase |
| clusterBackup.repository | string | `""` | s3:https://s3.amazonaws.com/${CFN[ConfigBucket]}/k8s/${CLUSTERNAME}/clusterBackup |
| external-dns.enabled | bool | `false` | |
| external-dns.env[0] | object | `{"name":"AWS_ROLE_ARN","value":""}` | "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.externalDNS" |
| external-dns.env[1].name | string | `"AWS_WEB_IDENTITY_TOKEN_FILE"` | |
| external-dns.env[1].value | string | `"/var/run/secrets/sts.amazonaws.com/serviceaccount/token"` | |
| external-dns.env[2].name | string | `"AWS_STS_REGIONAL_ENDPOINTS"` | |
| external-dns.env[2].value | string | `"regional"` | |
| external-dns.extraVolumeMounts[0].mountPath | string | `"/var/run/secrets/sts.amazonaws.com/serviceaccount/"` | |
| external-dns.extraVolumeMounts[0].name | string | `"aws-token"` | |
| external-dns.extraVolumeMounts[0].readOnly | bool | `true` | |
| external-dns.extraVolumes[0].name | string | `"aws-token"` | |
| external-dns.extraVolumes[0].projected.sources[0].serviceAccountToken.audience | string | `"sts.amazonaws.com"` | |
| external-dns.extraVolumes[0].projected.sources[0].serviceAccountToken.expirationSeconds | int | `86400` | |
| external-dns.extraVolumes[0].projected.sources[0].serviceAccountToken.path | string | `"token"` | |
| external-dns.interval | string | `"3m"` | |
| external-dns.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| external-dns.provider | string | `"inmemory"` | |
| external-dns.sources[0] | string | `"service"` | |
| external-dns.tolerations[0].effect | string | `"NoSchedule"` | |
| external-dns.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| external-dns.tolerations[1].effect | string | `"NoSchedule"` | |
| external-dns.tolerations[1].key | string | `"node-role.kubernetes.io/control-plane"` | |
| external-dns.triggerLoopOnEvent | bool | `true` | |
| forseti.aws.iamRoleArn | string | `""` | "arn:aws:iam::${AWS::AccountId}:role/${AWS::Region}.${ClusterName}.kubezeroForseti" |
| forseti.aws.region | string | `""` | |
@ -128,3 +153,15 @@ Device plugin for [AWS Neuron](https://aws.amazon.com/machine-learning/neuron/)
| nvidia-device-plugin.tolerations[1].effect | string | `"NoSchedule"` | |
| nvidia-device-plugin.tolerations[1].key | string | `"kubezero-workergroup"` | |
| nvidia-device-plugin.tolerations[1].operator | string | `"Exists"` | |
| sealed-secrets.enabled | bool | `false` | |
| sealed-secrets.fullnameOverride | string | `"sealed-secrets-controller"` | |
| sealed-secrets.keyrenewperiod | int | `0` | |
| sealed-secrets.metrics.serviceMonitor.enabled | bool | `false` | |
| sealed-secrets.nodeSelector."node-role.kubernetes.io/control-plane" | string | `""` | |
| sealed-secrets.resources.limits.memory | string | `"128Mi"` | |
| sealed-secrets.resources.requests.cpu | string | `"10m"` | |
| sealed-secrets.resources.requests.memory | string | `"24Mi"` | |
| sealed-secrets.tolerations[0].effect | string | `"NoSchedule"` | |
| sealed-secrets.tolerations[0].key | string | `"node-role.kubernetes.io/master"` | |
| sealed-secrets.tolerations[1].effect | string | `"NoSchedule"` | |
| sealed-secrets.tolerations[1].key | string | `"node-role.kubernetes.io/control-plane"` | |

@ -0,0 +1,8 @@
apiVersion: v2
description: Handles rolling upgrades for AWS ASGs for EKS by replacing outdated nodes
by new nodes.
home: https://github.com/TwiN/aws-eks-asg-rolling-update-handler
maintainers:
- name: TwiN
name: aws-eks-asg-rolling-update-handler
version: 1.2.7

@ -0,0 +1,31 @@
{{/*
Create a default app name.
*/}}
{{- define "aws-eks-asg-rolling-update-handler.name" -}}
{{- .Chart.Name -}}
{{- end -}}
{{/*
Create a default namespace.
*/}}
{{- define "aws-eks-asg-rolling-update-handler.namespace" -}}
{{- .Release.Namespace -}}
{{- end -}}
{{/*
Common labels
*/}}
{{- define "aws-eks-asg-rolling-update-handler.labels" -}}
app.kubernetes.io/name: {{ include "aws-eks-asg-rolling-update-handler.name" . }}
{{- end -}}
{{/*
Create the name of the service account to use.
*/}}
{{- define "aws-eks-asg-rolling-update-handler.serviceAccountName" -}}
{{- if .Values.serviceAccount.create -}}
{{ default (include "aws-eks-asg-rolling-update-handler.name" .) .Values.serviceAccount.name }}
{{- else -}}
{{ default "default" .Values.serviceAccount.name }}
{{- end -}}
{{- end -}}

@ -0,0 +1,15 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
labels:
{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
roleRef:
kind: ClusterRole
name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: {{ template "aws-eks-asg-rolling-update-handler.serviceAccountName" . }}
namespace: {{ template "aws-eks-asg-rolling-update-handler.namespace" . }}

@ -0,0 +1,41 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
labels:
{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
rules:
- apiGroups:
- "*"
resources:
- "*"
verbs:
- get
- list
- watch
- apiGroups:
- "*"
resources:
- nodes
verbs:
- get
- list
- watch
- update
- patch
- apiGroups:
- "*"
resources:
- pods/eviction
verbs:
- get
- list
- create
- apiGroups:
- "*"
resources:
- pods
verbs:
- get
- list

@ -0,0 +1,56 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
namespace: {{ template "aws-eks-asg-rolling-update-handler.namespace" . }}
labels:
{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
spec:
replicas: {{ .Values.replicaCount }}
selector:
matchLabels:
{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 6 }}
template:
metadata:
labels:
{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 8 }}
spec:
automountServiceAccountToken: true
serviceAccountName: {{ template "aws-eks-asg-rolling-update-handler.serviceAccountName" . }}
restartPolicy: Always
dnsPolicy: Default
containers:
- name: {{ template "aws-eks-asg-rolling-update-handler.name" . }}
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
{{- toYaml .Values.environmentVars | nindent 12 }}
{{- with .Values.resources }}
resources:
{{- toYaml . | nindent 12 }}
{{- end }}
volumeMounts:
- name: aws-token
mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
readOnly: true
volumes:
- name: aws-token
projected:
sources:
- serviceAccountToken:
path: token
expirationSeconds: 86400
audience: "sts.amazonaws.com"
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}

@ -0,0 +1,13 @@
{{ if .Values.serviceAccount.create }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ template "aws-eks-asg-rolling-update-handler.serviceAccountName" . }}
namespace: {{ template "aws-eks-asg-rolling-update-handler.namespace" . }}
labels:
{{ include "aws-eks-asg-rolling-update-handler.labels" . | indent 4 }}
{{- with .Values.serviceAccount.annotations }}
annotations:
{{- toYaml . | nindent 4 }}
{{- end }}
{{ end }}

@ -0,0 +1,28 @@
replicaCount: 1
image:
repository: twinproduction/aws-eks-asg-rolling-update-handler
tag: v1.4.3
pullPolicy: IfNotPresent
#imagePullSecrets:
#- imagePullSecret
environmentVars:
- name: CLUSTER_NAME
value: "cluster-name" # REPLACE THIS WITH THE NAME OF YOUR EKS CLUSTER
#- name: AUTO_SCALING_GROUP_NAMES
# value: "asg-1,asg-2,asg-3" # REPLACE THESE VALUES FOR THE NAMES OF THE ASGs, if CLUSTER_NAME is provided, this is ignored
#- name: IGNORE_DAEMON_SETS
# value: "true"
#- name: DELETE_LOCAL_DATA
# value: "true"
#- name: AWS_REGION
# value: us-west-2
#- name: ENVIRONMENT
# value: ""
serviceAccount:
create: true
#name: aws-eks-asg-rolling-update-handler
annotations: {}

@ -1,5 +1,5 @@
apiVersion: v2
appVersion: 1.16.5
appVersion: 1.18.1
description: A Helm chart for the AWS Node Termination Handler.
home: https://github.com/aws/eks-charts
icon: https://raw.githubusercontent.com/aws/eks-charts/master/docs/logo/aws.png
@ -22,4 +22,4 @@ sources:
- https://github.com/aws/aws-node-termination-handler/
- https://github.com/aws/eks-charts/
type: application
version: 0.18.5
version: 0.20.1

@ -70,6 +70,7 @@ The configuration in this table applies to all AWS Node Termination Handler mode
| `extraEnv` | Additional environment variables for the _aws-node-termination-handler_ container. | `[]` |
| `probes` | The Kubernetes liveness probe configuration. | _See values.yaml_ |
| `logLevel` | Sets the log level (`info`,`debug`, or `error`) | `info` |
| `logFormatVersion` | Sets the log format version. Available versions: 1, 2. Version 1 refers to the format that has been used through v1.17.3. Version 2 offers more detail for the "event kind" and "reason", especially when operating in Queue Processor mode. | `1` |
| `jsonLogging` | If `true`, use JSON-formatted logs instead of human readable logs. | `false` |
| `enablePrometheusServer` | If `true`, start an http server exposing `/metrics` endpoint for _Prometheus_. | `false` |
| `prometheusServerPort` | Replaces the default HTTP port for exposing _Prometheus_ metrics. | `9092` |
@ -82,6 +83,7 @@ The configuration in this table applies to all AWS Node Termination Handler mode
| `podTerminationGracePeriod` | The time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used, which defaults to 30 seconds if not specified for the pod. | `-1` |
| `nodeTerminationGracePeriod` | Period of time in seconds given to each node to terminate gracefully. Node draining will be scheduled based on this value to optimize the amount of compute time, but still safely drain the node before an event. | `120` |
| `emitKubernetesEvents` | If `true`, Kubernetes events will be emitted when interruption events are received and when actions are taken on Kubernetes nodes. In IMDS Processor mode a default set of annotations with all the node metadata gathered from IMDS will be attached to each event. More information [here](https://github.com/aws/aws-node-termination-handler/blob/main/docs/kubernetes_events.md). | `false` |
| `completeLifecycleActionDelaySeconds` | Pause after draining the node before completing the EC2 Autoscaling lifecycle action. This may be helpful if Pods on the node have Persistent Volume Claims. | -1 |
| `kubernetesEventsExtraAnnotations` | A comma-separated list of `key=value` extra annotations to attach to all emitted Kubernetes events (e.g. `first=annotation,sample.annotation/number=two"`). | `""` |
| `webhookURL` | Posts event data to URL upon instance interruption action. | `""` |
| `webhookURLSecretName` | Pass the webhook URL as a Secret using the key `webhookurl`. | `""` |
@ -110,9 +112,11 @@ The configuration in this table applies to AWS Node Termination Handler in queue
| `awsRegion` | If specified, use the AWS region for AWS API calls, else NTH will try to find the region through the `AWS_REGION` environment variable, IMDS, or the specified queue URL. | `""` |
| `queueURL` | Listens for messages on the specified SQS queue URL. | `""` |
| `workers` | The maximum amount of parallel event processors to handle concurrent events. | `10` |
| `checkASGTagBeforeDraining` | If `true`, check that the instance is tagged with the `managedAsgTag` before draining the node. If `false`, disables calls ASG API. | `true` |
| `managedAsgTag` | The node tag to check if `checkASGTagBeforeDraining` is `true`. | `aws-node-termination-handler/managed` |
| `useProviderId` | If `true`, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname. | `false` |
| `checkTagBeforeDraining` | If `true`, check that the instance is tagged with the `managedTag` before draining the node. | `true` |
| `managedTag` | The node tag to check if `checkTagBeforeDraining` is `true`. | `aws-node-termination-handler/managed` |
| `checkASGTagBeforeDraining` | [DEPRECATED](Use `checkTagBeforeDraining` instead) If `true`, check that the instance is tagged with the `managedAsgTag` before draining the node. If `false`, disables calls ASG API. | `true` |
| `managedAsgTag` | [DEPRECATED](Use `managedTag` instead) The node tag to check if `checkASGTagBeforeDraining` is `true`.
| `useProviderId` | If `true`, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname. | `false` |
### IMDS Mode Configuration

@ -81,6 +81,8 @@ spec:
value: {{ .Values.logLevel | quote }}
- name: JSON_LOGGING
value: {{ .Values.jsonLogging | quote }}
- name: LOG_FORMAT_VERSION
value: {{ .Values.logFormatVersion | quote }}
- name: ENABLE_PROMETHEUS_SERVER
value: {{ .Values.enablePrometheusServer | quote }}
- name: PROMETHEUS_SERVER_PORT
@ -151,6 +153,9 @@ spec:
value: "false"
- name: UPTIME_FROM_FILE
value: {{ .Values.procUptimeFile | quote }}
{{- with .Values.extraEnv }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if or .Values.enablePrometheusServer .Values.enableProbesServer }}
ports:
{{- if .Values.enableProbesServer }}

@ -52,7 +52,7 @@ spec:
{{- end }}
containers:
- name: aws-node-termination-handler
{{- with .Values.securityContext }}
{{- with unset .Values.securityContext "runAsUser" }}
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
@ -81,6 +81,8 @@ spec:
value: {{ .Values.logLevel | quote }}
- name: JSON_LOGGING
value: {{ .Values.jsonLogging | quote }}
- name: LOG_FORMAT_VERSION
value: {{ .Values.logFormatVersion | quote }}
- name: ENABLE_PROMETHEUS_SERVER
value: {{ .Values.enablePrometheusServer | quote }}
- name: PROMETHEUS_SERVER_PORT
@ -149,6 +151,9 @@ spec:
value: {{ .Values.enableRebalanceDraining | quote }}
- name: ENABLE_SQS_TERMINATION_DRAINING
value: "false"
{{- with .Values.extraEnv }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if or .Values.enablePrometheusServer .Values.enableProbesServer }}
ports:
{{- if .Values.enableProbesServer }}

@ -78,14 +78,22 @@ spec:
value: {{ .Values.logLevel | quote }}
- name: JSON_LOGGING
value: {{ .Values.jsonLogging | quote }}
- name: LOG_FORMAT_VERSION
value: {{ .Values.logFormatVersion | quote }}
- name: ENABLE_PROMETHEUS_SERVER
value: {{ .Values.enablePrometheusServer | quote }}
- name: PROMETHEUS_SERVER_PORT
value: {{ .Values.prometheusServerPort | quote }}
# [DEPRECATED] Use CHECK_TAG_BEFORE_DRAINING instead
- name: CHECK_ASG_TAG_BEFORE_DRAINING
value: {{ .Values.checkASGTagBeforeDraining | quote }}
- name: CHECK_TAG_BEFORE_DRAINING
value: {{ .Values.checkTagBeforeDraining | quote }}
# [DEPRECATED] Use MANAGED_TAG instead
- name: MANAGED_ASG_TAG
value: {{ .Values.managedAsgTag | quote }}
- name: MANAGED_TAG
value: {{ .Values.managedTag | quote }}
- name: USE_PROVIDER_ID
value: {{ .Values.useProviderId | quote }}
- name: DRY_RUN
@ -106,6 +114,8 @@ spec:
value: {{ .Values.nodeTerminationGracePeriod | quote }}
- name: EMIT_KUBERNETES_EVENTS
value: {{ .Values.emitKubernetesEvents | quote }}
- name: COMPLETE_LIFECYCLE_ACTION_DELAY_SECONDS
value: {{ .Values.completeLifecycleActionDelaySeconds | quote }}
{{- with .Values.kubernetesEventsExtraAnnotations }}
- name: KUBERNETES_EVENTS_EXTRA_ANNOTATIONS
value: {{ . | quote }}

@ -66,6 +66,9 @@ probes:
# Set the log level
logLevel: info
# Set the log format version
logFormatVersion: 1
# Log messages in JSON format
jsonLogging: false
@ -100,6 +103,9 @@ nodeTerminationGracePeriod: 120
# emitKubernetesEvents If true, Kubernetes events will be emitted when interruption events are received and when actions are taken on Kubernetes nodes. In IMDS Processor mode a default set of annotations with all the node metadata gathered from IMDS will be attached to each event
emitKubernetesEvents: false
# completeLifecycleActionDelaySeconds will pause for the configured duration after draining the node before completing the EC2 Autoscaling lifecycle action. This may be helpful if Pods on the node have Persistent Volume Claims.
completeLifecycleActionDelaySeconds: -1
# kubernetesEventsExtraAnnotations A comma-separated list of key=value extra annotations to attach to all emitted Kubernetes events
# Example: "first=annotation,sample.annotation/number=two"
kubernetesEventsExtraAnnotations: ""
@ -170,13 +176,18 @@ queueURL: ""
# The maximum amount of parallel event processors to handle concurrent events
workers: 10
# If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node
# If false, disables calls to ASG API.
# [DEPRECATED] Use checkTagBeforeDraining instead
checkASGTagBeforeDraining: true
# The tag to ensure is on a node if checkASGTagBeforeDraining is true
# If true, check that the instance is tagged with "aws-node-termination-handler/managed" as the key before draining the node
checkTagBeforeDraining: true
# [DEPRECATED] Use managedTag instead
managedAsgTag: "aws-node-termination-handler/managed"
# The tag to ensure is on a node if checkTagBeforeDraining is true
managedTag: "aws-node-termination-handler/managed"
# If true, fetch node name through Kubernetes node spec ProviderID instead of AWS event PrivateDnsHostname.
useProviderId: false

@ -0,0 +1,36 @@
diff -tuNr charts/aws-eks-asg-rolling-update-handler.orig/templates/deployment.yaml charts/aws-eks-asg-rolling-update-handler/templates/deployment.yaml
--- charts/aws-eks-asg-rolling-update-handler.orig/templates/deployment.yaml 2022-12-16 13:10:26.049272371 +0000
+++ charts/aws-eks-asg-rolling-update-handler/templates/deployment.yaml 2022-12-16 15:56:00.880666339 +0000
@@ -25,7 +25,31 @@
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
-{{- toYaml .Values.environmentVars | nindent 12 }}
+ {{- toYaml .Values.environmentVars | nindent 12 }}
+ {{- with .Values.resources }}
+ resources:
+ {{- toYaml . | nindent 12 }}
+ {{- end }}
+ volumeMounts:
+ - name: aws-token
+ mountPath: "/var/run/secrets/sts.amazonaws.com/serviceaccount/"
+ readOnly: true
+ volumes:
+ - name: aws-token
+ projected:
+ sources:
+ - serviceAccountToken:
+ path: token
+ expirationSeconds: 86400
+ audience: "sts.amazonaws.com"
+ {{- with .Values.nodeSelector }}
+ nodeSelector:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
+ {{- with .Values.tolerations }}
+ tolerations:
+ {{- toYaml . | nindent 8 }}
+ {{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}

@ -6,6 +6,7 @@ metadata:
namespace: kube-system
spec:
schedule: "0 * * * *"
concurrencyPolicy: "Replace"
jobTemplate:
spec:
backoffLimit: 1
@ -56,5 +57,7 @@ spec:
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
restartPolicy: Never
{{- end }}

@ -71,6 +71,8 @@ spec:
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
volumes:
- name: aws-token
projected:

@ -4,6 +4,7 @@ set -ex
helm repo update
NTH_VERSION=$(yq eval '.dependencies[] | select(.name=="aws-node-termination-handler") | .version' Chart.yaml)
RUH_VERSION=$(yq eval '.dependencies[] | select(.name=="aws-eks-asg-rolling-update-handler") | .version' Chart.yaml)
rm -rf charts/aws-node-termination-handler
helm pull eks/aws-node-termination-handler --untar --untardir charts --version $NTH_VERSION