docs: more 1.22 upgrade info

2022-04-13 18:02:14 +02:00 · 2022-04-13 18:02:14 +02:00 · 97010c1bea
parent 98a005e51e
commit 97010c1bea
6 changed files with 91 additions and 182 deletions
--- a/releases/v1.22/README.md
+++ b/releases/v1.22/README.md
@ -1,9 +1,9 @@
 # KubeZero 1.22
-## Release notes
+## What's new - Major themes
-### Custom AMIs
+### Alpine - Custom AMIs
-Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers, all sources and build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images).
+Starting with 1.22, all KubeZero nodes will boot from custom pre-baked AMIs. These AMIs will be provided and shared by the Zero Down Time for all customers. All sources and the build pipeline are freely [available](https://git.zero-downtime.net/ZeroDownTime/alpine-zdt-images) as usual though.
 This eliminates *ALL* dependencies at boot time other than container registries. Gone are the days when Ubuntu, SuSE or Github decided to ruin your morning coffee.  
@ -15,25 +15,28 @@ The [external-dns](https://github.com/kubernetes-sigs/external-dns) controller g
 Further usage of this controller to automate any DNS related configurations, like Ingress etc. is planned for following releases.
-### crun - container runtime
+### Container runtime
-got migrated from runc to crun, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun)
+Cri-o now uses crun rather than runc, which reduces the memory overhead *per pod* from 16M to 4M, details at [crun intro](https://www.redhat.com/sysadmin/introduction-crun)
-### Version upgrades
+## Version upgrades
- Istio to 1.13.2
+- Istio to 1.13.2 using new upstream Helm charts
 - aws-termination-handler to 1.16
- aws-iam-authenticator to 0.5.7
+- aws-iam-authenticator to 0.5.7, required for >1.22 allows using the latest version on the client side again 
-### Misc
+## Misc
 - new metrics and dashboards for openEBS LVM CSI drivers
 - new node label `node.kubernetes.io/instance-type` for all nodes containing the EC2 instance type
 - kubelet root moved to `/var/lib/containers` to ensure ephemeral storage is allocated from the configurable volume rather than the root fs of the worker
-## Upgrade
+# Upgrade
 `(No, really, you MUST read this before you upgrade)`
-*Ensure your Kube context points to the correct cluster !!!*
+- Ensure your Kube context points to the correct cluster !
 - Ensure any usage of Kiam has been migrated to OIDC providers as any remaining Kiam components will be deleted as part of the upgrade
 1. Trigger the cluster upgrade:  
-`./upgrade_122.sh`
+`./release/v1.22/upgrade_cluster.sh`
 2. Upgrade CFN stacks for the control plane and all worker groups
 Change Kubernetes version in controller config from `1.21.9` to `1.22.8`
--- a/releases/v1.22/kubezero.sh
+++ b/releases/v1.22/kubezero.sh
@ -137,7 +137,13 @@ if [ "$1" == 'upgrade' ]; then
    cp ${HOSTFS}/etc/kubernetes/admin.conf ${HOSTFS}/root/.kube/config
  fi
-  ### POST 1.21 specific
+  ### POST 1.22 specific
  # Remove all remaining kiam
  helm repo add uswitch https://uswitch.github.io/kiam-helm-charts/charts/
  helm repo update
  helm template uswitch/kiam --name-template kiam --set server.prometheus.servicemonitor.enabled=true --set agent.prometheus.servicemonitor.enabled=true |
    kubectl delete --namespace kube-system -f - || true
  ######################
  # network
@ -306,7 +312,7 @@ elif [[ "$1" =~ "^(bootstrap|restore|join)$" ]]; then
    yq eval -M ".clusters[0].cluster.certificate-authority-data = \"$(cat ${HOSTFS}/etc/kubernetes/pki/ca.crt | base64 -w0)\"" ${WORKDIR}/kubeadm/templates/admin-aws-iam.yaml > ${HOSTFS}/etc/kubernetes/admin-aws-iam.yaml
  fi
-  # Install some basics on bootstrap and join for 1.21 to get new modules in place
+  # install / update network and addons
  if [[ "$1" =~ "^(bootstrap|join|restore)$" ]]; then
    # network
    yq eval '.network // ""' ${HOSTFS}/etc/kubernetes/kubezero.yaml > _values.yaml
--- a/releases/v1.22/kubezero_121.sh
+++ b/releases/v1.22/kubezero_121.sh
@ -1,29 +0,0 @@
 #!/bin/bash
 set -x
 # Allow EFS and EBS Argo apps to be deleted without removing things like storageClasses etc.
 # all to be replaced by kubezero-storage
 kubectl patch application aws-ebs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
 kubectl patch application aws-efs-csi-driver -n argocd --type=json -p='[{"op": "remove", "path": "/metadata/finalizers"}]'
 # Migrate ZeroDownTime helm repo and fix project permissions
 kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/sourceRepos/0", "value": "https://cdn.zero-downtime.net/charts" }]'
 kubectl patch appproject kubezero -n argocd --type=json -p='[{"op": "replace", "path": "/spec/destinations", "value": [{"namespace": "*", "server": "https://kubernetes.default.svc"}] }]'
 # Delete EBS and EFS Deployments and Daemonsets as we cannot change the lables while moving them to storage.
 # This will NOT affect provisioned volumes
 kubectl delete deployment ebs-csi-controller -n kube-system
 kubectl delete daemonSet ebs-csi-node -n kube-system
 # Snapshot controller was removed from EBS chart
 kubectl delete statefulset ebs-snapshot-controller -n kube-system
 kubectl delete deployment efs-csi-controller -n kube-system
 kubectl delete daemonSet efs-csi-node -n kube-system
 # Remove calico Servicemonitor in case still around
 kubectl delete servicemonitor calico-node -n kube-system
 # Upgrade Prometheus stack, requires state metrics to be removed first
 kubectl delete deployment metrics-kube-state-metrics -n monitoring
 kubectl delete deployment metrics-prometheus-adapter -n monitoring
--- a/releases/v1.22/migrate_argo.sh
+++ b/releases/v1.22/migrate_argo.sh
@ -4,24 +4,12 @@ YAML=$1
 # Convert keys
 yq eval -i '
-  .spec.source.repoURL="https://cdn.zero-downtime.net/charts" |
+  .spec.source.targetRevision="1.22.8-2"
-  .spec.source.targetRevision="1.21.9-3" |
+	' $YAML
  del(.spec.source.helm.parameters)' $YAML
 # Extract values
 yq eval '.spec.source.helm.values' $1 > _values.yaml
 # Remove kiam and calico from Argo
 yq eval -i 'del(.calico) | del(.kiam)' _values.yaml
 # Move storage into module
 yq eval -i '.storage.enabled=true' _values.yaml
 [ $(yq eval 'has("aws-ebs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-ebs-csi-driver=.aws-ebs-csi-driver' _values.yaml
 [ $(yq eval 'has("aws-efs-csi-driver")' _values.yaml) == "true" ] && yq eval -i '.storage.aws-efs-csi-driver=.aws-efs-csi-driver' _values.yaml
 # Finally remove old helm apps
 yq eval -i 'del(.aws-ebs-csi-driver) | del(.aws-efs-csi-driver)' _values.yaml
 # merge _values.yaml back
 yq eval -Pi '.spec.source.helm.values |= strload("_values.yaml")' $YAML
--- a/releases/v1.22/upgrade_121.sh
+++ b/releases/v1.22/upgrade_121.sh
@ -1,125 +0,0 @@
 #!/bin/bash -e
 VERSION="v1.21.9"
 [ -n "$DEBUG" ] && DEBUG=1
 # unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
 unset AWS_DEFAULT_PROFILE
 nodes=$(kubectl get nodes -l node-role.kubernetes.io/control-plane -o json | jq .items[].metadata.name -r)
 nodes=""
 for node in $nodes; do
  echo "Deploying node upgrade job on $node..."
  cat <<EOF | sed -e "s/__node__/$node/g" | kubectl apply -f -
 apiVersion: v1
 kind: Pod
 metadata:
  name: kubezero-upgrade-${VERSION//.}-node-__node__
  namespace: kube-system
  labels:
    app: kubezero-upgrade-node
 spec:
  hostNetwork: true
  containers:
  - name: kubezero-admin
    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
    imagePullPolicy: Always
    command: ["kubezero.sh"]
    args:
    - node-upgrade
    env:
    - name: DEBUG
      value: "$DEBUG"
    - name: NODE_NAME
      valueFrom:
        fieldRef:
          fieldPath: spec.nodeName
    volumeMounts:
    - name: host
      mountPath: /host
    - name: workdir
      mountPath: /tmp
    securityContext:
      capabilities:
        add: ["SYS_CHROOT"]
  volumes:
  - name: host
    hostPath:
      path: /
      type: Directory
  - name: workdir
    emptyDir: {}
  nodeSelector:
    kubernetes.io/hostname: __node__
  tolerations:
  - key: node-role.kubernetes.io/master
    effect: NoSchedule
  restartPolicy: Never
 EOF
  kubectl wait pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
  while true; do
    kubectl logs kubezero-upgrade-${VERSION//.}-node-$node -n kube-system -f 2>/dev/null && break
    sleep 3
  done
  kubectl delete pod kubezero-upgrade-${VERSION//.}-node-$node -n kube-system
 done
 echo "Deploying cluster upgrade job ..."
 cat <<EOF | kubectl apply -f -
 apiVersion: v1
 kind: Pod
 metadata:
  name: kubezero-upgrade-${VERSION//.}
  namespace: kube-system
  labels:
    app: kubezero-upgrade
 spec:
  hostNetwork: true
  #hostIPC: true
  #hostPID: true
  containers:
  - name: kubezero-admin
    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
    imagePullPolicy: Always
    command: ["kubezero.sh"]
    args:
    - upgrade
    env:
    - name: DEBUG
      value: "$DEBUG"
    - name: NODE_NAME
      valueFrom:
        fieldRef:
          fieldPath: spec.nodeName
    volumeMounts:
    - name: host
      mountPath: /host
    - name: workdir
      mountPath: /tmp
    securityContext:
      capabilities:
        add: ["SYS_CHROOT"]
  volumes:
  - name: host
    hostPath:
      path: /
      type: Directory
  - name: workdir
    emptyDir: {}
  nodeSelector:
    node-role.kubernetes.io/control-plane: ""
  tolerations:
  - key: node-role.kubernetes.io/master
    effect: NoSchedule
  restartPolicy: Never
 EOF
 kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
 while true; do
  kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
  sleep 3
 done
 kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system
--- a/releases/v1.22/upgrade_cluster.sh
+++ b/releases/v1.22/upgrade_cluster.sh
@ -0,0 +1,66 @@
 #!/bin/bash -e
 VERSION="v1.22.8"
 [ -n "$DEBUG" ] && DEBUG=1
 # unset any AWS_DEFAULT_PROFILE as it will break aws-iam-auth
 unset AWS_DEFAULT_PROFILE
 echo "Deploying cluster upgrade job ..."
 cat <<EOF | kubectl apply -f -
 apiVersion: v1
 kind: Pod
 metadata:
  name: kubezero-upgrade-${VERSION//.}
  namespace: kube-system
  labels:
    app: kubezero-upgrade
 spec:
  hostNetwork: true
  hostIPC: true
  hostPID: true
  containers:
  - name: kubezero-admin
    image: public.ecr.aws/zero-downtime/kubezero-admin:${VERSION}
    imagePullPolicy: Always
    command: ["kubezero.sh"]
    args:
    - upgrade
    env:
    - name: DEBUG
      value: "$DEBUG"
    - name: NODE_NAME
      valueFrom:
        fieldRef:
          fieldPath: spec.nodeName
    volumeMounts:
    - name: host
      mountPath: /host
    - name: workdir
      mountPath: /tmp
    securityContext:
      capabilities:
        add: ["SYS_CHROOT"]
  volumes:
  - name: host
    hostPath:
      path: /
      type: Directory
  - name: workdir
    emptyDir: {}
  nodeSelector:
    node-role.kubernetes.io/control-plane: ""
  tolerations:
  - key: node-role.kubernetes.io/master
    effect: NoSchedule
  restartPolicy: Never
 EOF
 kubectl wait pod kubezero-upgrade-${VERSION//.} -n kube-system --timeout 120s --for=condition=initialized 2>/dev/null
 while true; do
  kubectl logs kubezero-upgrade-${VERSION//.} -n kube-system -f 2>/dev/null && break
  sleep 3
 done
 kubectl delete pod kubezero-upgrade-${VERSION//.} -n kube-system