docs: more 1.22 upgrade info

2022-04-13 18:02:14 +02:00 · 2022-04-13 18:02:14 +02:00 · 97010c1bea
parent 98a005e51e
commit 97010c1bea
6 changed files with 91 additions and 182 deletions
--- a/releases/v1.22/README.md
+++ b/releases/v1.22/README.md
@ -1,9 +1,9 @@
 # KubeZero 1.22

-## Release notes
+## What's new - Major themes

-### Custom AMIs
-Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers, all sources and build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images).
+### Alpine - Custom AMIs
+Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers. All sources and the build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images) as usual though.

 This eliminates *ALL* dependencies at boot time other than container registries. Gone are the days when Ubuntu, SuSE or Github decided to ruin your morning coffee.  

@ -15,25 +15,28 @@ The [external-dns](https://github.com/kubernetes-sigs/external-dns) controller g

 Further usage of this controller to automate any DNS related configurations, like Ingress etc. is planned for following releases.

-### crun - container runtime
-got migrated from runc to crun, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun)
+### Container runtime
+Cri-o now uses crun rather than runc, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun)

-### Version upgrades
- Istio to 1.13.2
+## Version upgrades
+- Istio to 1.13.2 using new upstream Helm charts
 - aws-termination-handler to 1.16
- aws-iam-authenticator to 0.5.7
+- aws-iam-authenticator to 0.5.7, required for >1.22 allows using the latest version on the client side again 

-### Misc
+## Misc
 - new metrics and dashboards for openEBS LVM CSI drivers
 - new node label `node.kubernetes.io/instance-type` for all nodes containing the EC2 instance type
+- kubelet root moved to `/var/lib/containers` to ensure ephemeral storage is allocated from the configurable volume rather than the root fs of the worker


-## Upgrade
+# Upgrade
+`(No, really, you MUST read this before you upgrade)`

-*Ensure your Kube context points to the correct cluster !!!*
+- Ensure your Kube context points to the correct cluster !
+- Ensure any usage of Kiam has been migrated to OIDC providers as any remaining Kiam components will be deleted as part of the upgrade

 1. Trigger the cluster upgrade:  
-`./upgrade_122.sh`
+`./release/v1.22/upgrade_cluster.sh`

 2. Upgrade CFN stacks for the control plane and all worker groups
 Change Kubernetes version in controller config from `1.21.9` to `1.22.8`
--- a/releases/v1.22/kubezero.sh
+++ b/releases/v1.22/kubezero.sh
@ -137,7 +137,13 @@ if [ "$1" == 'upgrade' ]; then
    cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config
  fi

-  ### POST 1.21 specific
+  ### POST 1.22 specific
+  
+  # Remove all remaining kiam
+  helm repo add uswitch https://uswitch.github.io/kiam-helm-charts/charts/
+  helm repo update
+  helm template uswitch/kiam --name-template kiam --set server.prometheus.servicemonitor.enabled=true --set agent.prometheus.servicemonitor.enabled=true |
+    kubectl delete --namespace kube-system -f - || true
  
  ######################
  # network
@ -306,7 +312,7 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
    yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml
  fi

-  # Install some basics on bootstrap and join for 1.21 to get new modules in place
+  # install / update network and addons
  if [[ "$1" =~ "^(bootstrap|join|restore)$" ]]; then
    # network
    yq eval '.network // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml
--- a/releases/v1.22/kubezero_121.sh
+++ b/releases/v1.22/kubezero_121.sh
@ -1,29 +0,0 @@
-#!/bin/bash
-set -x
-
-# Allow EFS and EBS Argo apps to be deleted without removing things like storageClasses etc.
-# all to be replaced by kubezero-storage
-kubectl patch application aws-ebs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
-kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
-
-# Migrate ZeroDownTime helm repo and fix project permissions
-kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/sourceRepos/0", "value": "https://cdn.zero-downtime.net/charts" }]'
-kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/destinations", "value": [{"namespace": "*", "server": "https://kubernetes.default.svc"}] }]'
-
-# Delete EBS and EFS Deployments and Daemonsets as we cannot change the lables while moving them to storage.
-# This will NOT affect provisioned volumes
-kubectl delete deployment ebs-csi-controller -n kube-system
-kubectl delete daemonSet ebs-csi-node -n kube-system
-
-# Snapshot controller was removed from EBS chart
-kubectl delete statefulset ebs-snapshot-controller -n kube-system
-
-kubectl delete deployment efs-csi-controller -n kube-system
-kubectl delete daemonSet efs-csi-node -n kube-system
-
-# Remove calico Servicemonitor in case still around
-kubectl delete servicemonitor calico-node -n kube-system
-
-# Upgrade Prometheus stack, requires state metrics to be removed first
-kubectl delete deployment metrics-kube-state-metrics -n monitoring
-kubectl delete deployment metrics-prometheus-adapter -n monitoring
--- a/releases/v1.22/migrate_argo.sh
+++ b/releases/v1.22/migrate_argo.sh
@ -4,24 +4,12 @@ YAML=$1

 # Convert keys
 yq eval -i '
-  .spec.source.repoURL="https://cdn.zero-downtime.net/charts" |
-  .spec.source.targetRevision="1.21.9-3" |
-  del(.spec.source.helm.parameters)' $YAML
+  .spec.source.targetRevision="1.22.8-2"
+	' $YAML

 # Extract values
 yq eval '.spec.source.helm.values' $1 > _values.yaml

-# Remove kiam and calico from Argo
-yq eval -i 'del(.calico) | del(.kiam)' _values.yaml
-
-# Move storage into module
-yq eval -i '.storage.enabled=true' _values.yaml
-
-[ $(yq eval 'has("aws-ebs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-ebs-csi-driver=.aws-ebs-csi-driver' _values.yaml
-[ $(yq eval 'has("aws-efs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-efs-csi-driver=.aws-efs-csi-driver' _values.yaml
-
-# Finally remove old helm apps
-yq eval -i 'del(.aws-ebs-csi-driver) | del(.aws-efs-csi-driver)' _values.yaml

 # merge _values.yaml back
 yq eval -Pi '.spec.source.helm.values |= strload("_values.yaml")' $YAML
--- a/releases/v1.22/upgrade_121.sh
+++ b/releases/v1.22/upgrade_121.sh
@ -1,125 +0,0 @@
-#!/bin/bash -e
-
-VERSION="v1.21.9"
-
-[ -n "$DEBUG" ] && DEBUG=1
-
-# unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
-unset AWS_DEFAULT_PROFILE
-
-nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o json | jq .items[].metadata.name -r)
-nodes=""
-for node in $nodes; do
-  echo "Deploying node upgrade job on $node..."
-
-  cat <<EOF | sed -e "s/__node__/$node/g" | kubectl apply -f -
-apiVersion: v1
-kind: Pod
-metadata:
-  name: kubezero-upgrade-${VERSION//.}-node-__node__
-  namespace: kube-system
-  labels:
-    app: kubezero-upgrade-node
-spec:
-  hostNetwork: true
-  containers:
-  - name: kubezero-admin
-    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
-    imagePullPolicy: Always
-    command: ["kubezero.sh"]
-    args:
-    - node-upgrade
-    env:
-    - name: DEBUG
-      value: "$DEBUG"
-    - name: NODE_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: spec.nodeName
-    volumeMounts:
-    - name: host
-      mountPath: /host
-    - name: workdir
-      mountPath: /tmp
-    securityContext:
-      capabilities:
-        add: ["SYS_CHROOT"]
-  volumes:
-  - name: host
-    hostPath:
-      path: /
-      type: Directory
-  - name: workdir
-    emptyDir: {}
-  nodeSelector:
-    kubernetes.io/hostname: __node__
-  tolerations:
-  - key: node-role.kubernetes.io/master
-    effect: NoSchedule
-  restartPolicy: Never
-EOF
-  kubectl wait pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
-  while true; do
-    kubectl logs kubezero-upgrade-${VERSION//.}-node-$node -n kube-system -f 2>/dev/null && break
-    sleep 3
-  done
-  kubectl delete pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system
-done
-
-echo "Deploying cluster upgrade job ..."
-
-cat <<EOF | kubectl apply -f -
-apiVersion: v1
-kind: Pod
-metadata:
-  name: kubezero-upgrade-${VERSION//.}
-  namespace: kube-system
-  labels:
-    app: kubezero-upgrade
-spec:
-  hostNetwork: true
-  #hostIPC: true
-  #hostPID: true
-  containers:
-  - name: kubezero-admin
-    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
-    imagePullPolicy: Always
-    command: ["kubezero.sh"]
-    args:
-    - upgrade
-    env:
-    - name: DEBUG
-      value: "$DEBUG"
-    - name: NODE_NAME
-      valueFrom:
-        fieldRef:
-          fieldPath: spec.nodeName
-    volumeMounts:
-    - name: host
-      mountPath: /host
-    - name: workdir
-      mountPath: /tmp
-    securityContext:
-      capabilities:
-        add: ["SYS_CHROOT"]
-  volumes:
-  - name: host
-    hostPath:
-      path: /
-      type: Directory
-  - name: workdir
-    emptyDir: {}
-  nodeSelector:
-    node-role.kubernetes.io/control-plane: ""
-  tolerations:
-  - key: node-role.kubernetes.io/master
-    effect: NoSchedule
-  restartPolicy: Never
-EOF
-
-kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
-while true; do
-  kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
-  sleep 3
-done
-kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system
--- a/releases/v1.22/upgrade_cluster.sh
+++ b/releases/v1.22/upgrade_cluster.sh
@ -0,0 +1,66 @@
+#!/bin/bash -e
+
+VERSION="v1.22.8"
+
+[ -n "$DEBUG" ] && DEBUG=1
+
+# unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
+unset AWS_DEFAULT_PROFILE
+
+echo "Deploying cluster upgrade job ..."
+
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: Pod
+metadata:
+  name: kubezero-upgrade-${VERSION//.}
+  namespace: kube-system
+  labels:
+    app: kubezero-upgrade
+spec:
+  hostNetwork: true
+  hostIPC: true
+  hostPID: true
+  containers:
+  - name: kubezero-admin
+    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
+    imagePullPolicy: Always
+    command: ["kubezero.sh"]
+    args:
+    - upgrade
+    env:
+    - name: DEBUG
+      value: "$DEBUG"
+    - name: NODE_NAME
+      valueFrom:
+        fieldRef:
+          fieldPath: spec.nodeName
+    volumeMounts:
+    - name: host
+      mountPath: /host
+    - name: workdir
+      mountPath: /tmp
+    securityContext:
+      capabilities:
+        add: ["SYS_CHROOT"]
+  volumes:
+  - name: host
+    hostPath:
+      path: /
+      type: Directory
+  - name: workdir
+    emptyDir: {}
+  nodeSelector:
+    node-role.kubernetes.io/control-plane: ""
+  tolerations:
+  - key: node-role.kubernetes.io/master
+    effect: NoSchedule
+  restartPolicy: Never
+EOF
+
+kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
+while true; do
+  kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
+  sleep 3
+done
+kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system