docs: more 1.22 upgrade info

This commit is contained in:
Stefan Reimer 2022-04-13 18:02:14 +02:00
parent 98a005e51e
commit 97010c1bea
6 changed files with 91 additions and 182 deletions

View File

@ -1,9 +1,9 @@
# KubeZero 1.22
## Release notes
## What's new - Major themes
### Custom AMIs
Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers, all sources and build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images).
### Alpine - Custom AMIs
Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers. All sources and the build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images) as usual though.
This eliminates *ALL* dependencies at boot time other than container registries. Gone are the days when Ubuntu, SuSE or Github decided to ruin your morning coffee.
@ -15,25 +15,28 @@ The [external-dns](https://github.com/kubernetes-sigs/external-dns) controller g
Further usage of this controller to automate any DNS related configurations, like Ingress etc. is planned for following releases.
### crun - container runtime
got migrated from runc to crun, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun)
### Container runtime
Cri-o now uses crun rather than runc, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun)
### Version upgrades
- Istio to 1.13.2
## Version upgrades
- Istio to 1.13.2 using new upstream Helm charts
- aws-termination-handler to 1.16
- aws-iam-authenticator to 0.5.7
- aws-iam-authenticator to 0.5.7, required for >1.22 allows using the latest version on the client side again
### Misc
## Misc
- new metrics and dashboards for openEBS LVM CSI drivers
- new node label `node.kubernetes.io/instance-type` for all nodes containing the EC2 instance type
- kubelet root moved to `/var/lib/containers` to ensure ephemeral storage is allocated from the configurable volume rather than the root fs of the worker
## Upgrade
# Upgrade
`(No, really, you MUST read this before you upgrade)`
*Ensure your Kube context points to the correct cluster !!!*
- Ensure your Kube context points to the correct cluster !
- Ensure any usage of Kiam has been migrated to OIDC providers as any remaining Kiam components will be deleted as part of the upgrade
1. Trigger the cluster upgrade:
`./upgrade_122.sh`
`./release/v1.22/upgrade_cluster.sh`
2. Upgrade CFN stacks for the control plane and all worker groups
Change Kubernetes version in controller config from `1.21.9` to `1.22.8`

View File

@ -137,7 +137,13 @@ if [ "$1" == 'upgrade' ]; then
cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config
fi
### POST 1.21 specific
### POST 1.22 specific
# Remove all remaining kiam
helm repo add uswitch https://uswitch.github.io/kiam-helm-charts/charts/
helm repo update
helm template uswitch/kiam --name-template kiam --set server.prometheus.servicemonitor.enabled=true --set agent.prometheus.servicemonitor.enabled=true |
kubectl delete --namespace kube-system -f - || true
######################
# network
@ -306,7 +312,7 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml
fi
# Install some basics on bootstrap and join for 1.21 to get new modules in place
# install / update network and addons
if [[ "$1" =~ "^(bootstrap|join|restore)$" ]]; then
# network
yq eval '.network // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml

View File

@ -1,29 +0,0 @@
#!/bin/bash
set -x
# Allow EFS and EBS Argo apps to be deleted without removing things like storageClasses etc.
# all to be replaced by kubezero-storage
kubectl patch application aws-ebs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
# Migrate ZeroDownTime helm repo and fix project permissions
kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/sourceRepos/0", "value": "https://cdn.zero-downtime.net/charts" }]'
kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/destinations", "value": [{"namespace": "*", "server": "https://kubernetes.default.svc"}] }]'
# Delete EBS and EFS Deployments and Daemonsets as we cannot change the lables while moving them to storage.
# This will NOT affect provisioned volumes
kubectl delete deployment ebs-csi-controller -n kube-system
kubectl delete daemonSet ebs-csi-node -n kube-system
# Snapshot controller was removed from EBS chart
kubectl delete statefulset ebs-snapshot-controller -n kube-system
kubectl delete deployment efs-csi-controller -n kube-system
kubectl delete daemonSet efs-csi-node -n kube-system
# Remove calico Servicemonitor in case still around
kubectl delete servicemonitor calico-node -n kube-system
# Upgrade Prometheus stack, requires state metrics to be removed first
kubectl delete deployment metrics-kube-state-metrics -n monitoring
kubectl delete deployment metrics-prometheus-adapter -n monitoring

View File

@ -4,24 +4,12 @@ YAML=$1
# Convert keys
yq eval -i '
.spec.source.repoURL="https://cdn.zero-downtime.net/charts" |
.spec.source.targetRevision="1.21.9-3" |
del(.spec.source.helm.parameters)' $YAML
.spec.source.targetRevision="1.22.8-2"
' $YAML
# Extract values
yq eval '.spec.source.helm.values' $1 > _values.yaml
# Remove kiam and calico from Argo
yq eval -i 'del(.calico) | del(.kiam)' _values.yaml
# Move storage into module
yq eval -i '.storage.enabled=true' _values.yaml
[ $(yq eval 'has("aws-ebs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-ebs-csi-driver=.aws-ebs-csi-driver' _values.yaml
[ $(yq eval 'has("aws-efs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-efs-csi-driver=.aws-efs-csi-driver' _values.yaml
# Finally remove old helm apps
yq eval -i 'del(.aws-ebs-csi-driver) | del(.aws-efs-csi-driver)' _values.yaml
# merge _values.yaml back
yq eval -Pi '.spec.source.helm.values |= strload("_values.yaml")' $YAML

View File

@ -1,125 +0,0 @@
#!/bin/bash -e
VERSION="v1.21.9"
[ -n "$DEBUG" ] && DEBUG=1
# unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
unset AWS_DEFAULT_PROFILE
nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o json | jq .items[].metadata.name -r)
nodes=""
for node in $nodes; do
echo "Deploying node upgrade job on $node..."
cat <<EOF | sed -e "s/__node__/$node/g" | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade-${VERSION//.}-node-__node__
namespace: kube-system
labels:
app: kubezero-upgrade-node
spec:
hostNetwork: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args:
- node-upgrade
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
kubernetes.io/hostname: __node__
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade-${VERSION//.}-node-$node -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system
done
echo "Deploying cluster upgrade job ..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade-${VERSION//.}
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
hostNetwork: true
#hostIPC: true
#hostPID: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args:
- upgrade
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system

View File

@ -0,0 +1,66 @@
#!/bin/bash -e
VERSION="v1.22.8"
[ -n "$DEBUG" ] && DEBUG=1
# unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
unset AWS_DEFAULT_PROFILE
echo "Deploying cluster upgrade job ..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade-${VERSION//.}
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
hostNetwork: true
hostIPC: true
hostPID: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args:
- upgrade
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system