docs: more 1.22 upgrade info

This commit is contained in:
Stefan Reimer 2022-04-13 18:02:14 +02:00
parent 98a005e51e
commit 97010c1bea
6 changed files with 91 additions and 182 deletions

View File

@ -1,9 +1,9 @@
# KubeZero 1.22 # KubeZero 1.22
## Release notes ## What's new - Major themes
### Custom AMIs ### Alpine - Custom AMIs
Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers, all sources and build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images). Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers. All sources and the build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images) as usual though.
This eliminates *ALL* dependencies at boot time other than container registries. Gone are the days when Ubuntu, SuSE or Github decided to ruin your morning coffee. This eliminates *ALL* dependencies at boot time other than container registries. Gone are the days when Ubuntu, SuSE or Github decided to ruin your morning coffee.
@ -15,25 +15,28 @@ The [external-dns](https://github.com/kubernetes-sigs/external-dns) controller g
Further usage of this controller to automate any DNS related configurations, like Ingress etc. is planned for following releases. Further usage of this controller to automate any DNS related configurations, like Ingress etc. is planned for following releases.
### crun - container runtime ### Container runtime
got migrated from runc to crun, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun) Cri-o now uses crun rather than runc, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun)
### Version upgrades ## Version upgrades
- Istio to 1.13.2 - Istio to 1.13.2 using new upstream Helm charts
- aws-termination-handler to 1.16 - aws-termination-handler to 1.16
- aws-iam-authenticator to 0.5.7 - aws-iam-authenticator to 0.5.7, required for >1.22 allows using the latest version on the client side again
### Misc ## Misc
- new metrics and dashboards for openEBS LVM CSI drivers - new metrics and dashboards for openEBS LVM CSI drivers
- new node label `node.kubernetes.io/instance-type` for all nodes containing the EC2 instance type - new node label `node.kubernetes.io/instance-type` for all nodes containing the EC2 instance type
- kubelet root moved to `/var/lib/containers` to ensure ephemeral storage is allocated from the configurable volume rather than the root fs of the worker
## Upgrade # Upgrade
`(No, really, you MUST read this before you upgrade)`
*Ensure your Kube context points to the correct cluster !!!* - Ensure your Kube context points to the correct cluster !
- Ensure any usage of Kiam has been migrated to OIDC providers as any remaining Kiam components will be deleted as part of the upgrade
1. Trigger the cluster upgrade: 1. Trigger the cluster upgrade:
`./upgrade_122.sh` `./release/v1.22/upgrade_cluster.sh`
2. Upgrade CFN stacks for the control plane and all worker groups 2. Upgrade CFN stacks for the control plane and all worker groups
Change Kubernetes version in controller config from `1.21.9` to `1.22.8` Change Kubernetes version in controller config from `1.21.9` to `1.22.8`

View File

@ -137,7 +137,13 @@ if [ "$1" == 'upgrade' ]; then
cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config
fi fi
### POST 1.21 specific ### POST 1.22 specific
# Remove all remaining kiam
helm repo add uswitch https://uswitch.github.io/kiam-helm-charts/charts/
helm repo update
helm template uswitch/kiam --name-template kiam --set server.prometheus.servicemonitor.enabled=true --set agent.prometheus.servicemonitor.enabled=true |
kubectl delete --namespace kube-system -f - || true
###################### ######################
# network # network
@ -306,7 +312,7 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml
fi fi
# Install some basics on bootstrap and join for 1.21 to get new modules in place # install / update network and addons
if [[ "$1" =~ "^(bootstrap|join|restore)$" ]]; then if [[ "$1" =~ "^(bootstrap|join|restore)$" ]]; then
# network # network
yq eval '.network // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml yq eval '.network // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml

View File

@ -1,29 +0,0 @@
#!/bin/bash
set -x
# Allow EFS and EBS Argo apps to be deleted without removing things like storageClasses etc.
# all to be replaced by kubezero-storage
kubectl patch application aws-ebs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
# Migrate ZeroDownTime helm repo and fix project permissions
kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/sourceRepos/0", "value": "https://cdn.zero-downtime.net/charts" }]'
kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/destinations", "value": [{"namespace": "*", "server": "https://kubernetes.default.svc"}] }]'
# Delete EBS and EFS Deployments and Daemonsets as we cannot change the lables while moving them to storage.
# This will NOT affect provisioned volumes
kubectl delete deployment ebs-csi-controller -n kube-system
kubectl delete daemonSet ebs-csi-node -n kube-system
# Snapshot controller was removed from EBS chart
kubectl delete statefulset ebs-snapshot-controller -n kube-system
kubectl delete deployment efs-csi-controller -n kube-system
kubectl delete daemonSet efs-csi-node -n kube-system
# Remove calico Servicemonitor in case still around
kubectl delete servicemonitor calico-node -n kube-system
# Upgrade Prometheus stack, requires state metrics to be removed first
kubectl delete deployment metrics-kube-state-metrics -n monitoring
kubectl delete deployment metrics-prometheus-adapter -n monitoring

View File

@ -4,24 +4,12 @@ YAML=$1
# Convert keys # Convert keys
yq eval -i ' yq eval -i '
.spec.source.repoURL="https://cdn.zero-downtime.net/charts" | .spec.source.targetRevision="1.22.8-2"
.spec.source.targetRevision="1.21.9-3" | ' $YAML
del(.spec.source.helm.parameters)' $YAML
# Extract values # Extract values
yq eval '.spec.source.helm.values' $1 > _values.yaml yq eval '.spec.source.helm.values' $1 > _values.yaml
# Remove kiam and calico from Argo
yq eval -i 'del(.calico) | del(.kiam)' _values.yaml
# Move storage into module
yq eval -i '.storage.enabled=true' _values.yaml
[ $(yq eval 'has("aws-ebs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-ebs-csi-driver=.aws-ebs-csi-driver' _values.yaml
[ $(yq eval 'has("aws-efs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-efs-csi-driver=.aws-efs-csi-driver' _values.yaml
# Finally remove old helm apps
yq eval -i 'del(.aws-ebs-csi-driver) | del(.aws-efs-csi-driver)' _values.yaml
# merge _values.yaml back # merge _values.yaml back
yq eval -Pi '.spec.source.helm.values |= strload("_values.yaml")' $YAML yq eval -Pi '.spec.source.helm.values |= strload("_values.yaml")' $YAML

View File

@ -1,125 +0,0 @@
#!/bin/bash -e
VERSION="v1.21.9"
[ -n "$DEBUG" ] && DEBUG=1
# unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
unset AWS_DEFAULT_PROFILE
nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o json | jq .items[].metadata.name -r)
nodes=""
for node in $nodes; do
echo "Deploying node upgrade job on $node..."
cat <<EOF | sed -e "s/__node__/$node/g" | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade-${VERSION//.}-node-__node__
namespace: kube-system
labels:
app: kubezero-upgrade-node
spec:
hostNetwork: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args:
- node-upgrade
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
kubernetes.io/hostname: __node__
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade-${VERSION//.}-node-$node -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system
done
echo "Deploying cluster upgrade job ..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade-${VERSION//.}
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
hostNetwork: true
#hostIPC: true
#hostPID: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args:
- upgrade
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system

View File

@ -0,0 +1,66 @@
#!/bin/bash -e
VERSION="v1.22.8"
[ -n "$DEBUG" ] && DEBUG=1
# unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
unset AWS_DEFAULT_PROFILE
echo "Deploying cluster upgrade job ..."
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: kubezero-upgrade-${VERSION//.}
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
hostNetwork: true
hostIPC: true
hostPID: true
containers:
- name: kubezero-admin
image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
imagePullPolicy: Always
command: ["kubezero.sh"]
args:
- upgrade
env:
- name: DEBUG
value: "$DEBUG"
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: ["SYS_CHROOT"]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: {}
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
restartPolicy: Never
EOF
kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
while true; do
kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
sleep 3
done
kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system