2022-04-13 16:02:14 +00:00
#!/bin/bash -e
2022-09-15 12:58:08 +00:00
VERSION = "v1.23"
#VERSION="latest"
ARGO_APP = ${ 1 :- /tmp/new-kubezero-argoapp.yaml }
2022-09-15 09:37:21 +00:00
2022-09-14 17:08:14 +00:00
SCRIPT_DIR = $( cd -- " $( dirname -- " ${ BASH_SOURCE [0] } " ) " & > /dev/null && pwd )
. $SCRIPT_DIR /libhelm.sh
2022-04-29 18:22:20 +00:00
[ -n " $DEBUG " ] && set -x
2022-04-13 16:02:14 +00:00
2022-09-11 11:54:56 +00:00
all_nodes_upgrade( ) {
CMD = " $1 "
2022-09-15 09:37:21 +00:00
echo "Deploy all node upgrade daemonSet(busybox)"
2022-08-24 15:13:39 +00:00
cat <<EOF | kubectl apply -f -
2022-04-29 18:22:20 +00:00
apiVersion: apps/v1
kind: DaemonSet
metadata:
2022-09-15 09:37:21 +00:00
name: kubezero-all-nodes-upgrade
2022-04-29 18:22:20 +00:00
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
selector:
matchLabels:
2022-09-15 09:37:21 +00:00
name: kubezero-all-nodes-upgrade
2022-04-29 18:22:20 +00:00
template:
metadata:
labels:
2022-09-15 09:37:21 +00:00
name: kubezero-all-nodes-upgrade
2022-04-29 18:22:20 +00:00
spec:
2022-09-23 10:54:27 +00:00
hostNetwork: true
hostIPC: true
hostPID: true
2022-04-29 18:22:20 +00:00
tolerations:
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
2022-05-04 12:49:56 +00:00
initContainers:
2022-09-15 09:37:21 +00:00
- name: node-upgrade
2022-04-29 18:22:20 +00:00
image: busybox
command: [ "/bin/sh" ]
2022-09-11 11:54:56 +00:00
args: [ "-x" , "-c" , " $CMD " ]
2022-04-29 18:22:20 +00:00
volumeMounts:
- name: host
mountPath: /host
2022-09-23 10:54:27 +00:00
- name: hostproc
mountPath: /hostproc
2022-09-11 11:54:56 +00:00
securityContext:
2022-09-23 10:54:27 +00:00
privileged: true
2022-09-11 11:54:56 +00:00
capabilities:
add: [ "SYS_ADMIN" ]
2022-05-04 12:49:56 +00:00
containers:
2022-09-15 09:37:21 +00:00
- name: node-upgrade-wait
2022-05-04 12:49:56 +00:00
image: busybox
command: [ "sleep" , "3600" ]
2022-04-29 18:22:20 +00:00
volumes:
- name: host
hostPath:
path: /
type: Directory
2022-09-23 10:54:27 +00:00
- name: hostproc
hostPath:
path: /proc
type: Directory
2022-04-29 18:22:20 +00:00
EOF
2022-09-15 09:37:21 +00:00
kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
kubectl delete ds kubezero-all-nodes-upgrade -n kube-system
2022-08-24 15:13:39 +00:00
}
2022-04-29 18:22:20 +00:00
2022-04-13 16:02:14 +00:00
2022-09-11 11:54:56 +00:00
control_plane_upgrade( ) {
TASKS = " $1 "
2022-09-15 09:37:21 +00:00
echo " Deploy cluster admin task: $TASK "
2022-09-11 11:54:56 +00:00
cat <<EOF | kubectl apply -f -
2022-04-13 16:02:14 +00:00
apiVersion: v1
kind: Pod
metadata:
2022-09-15 09:37:21 +00:00
name: kubezero-upgrade
2022-04-13 16:02:14 +00:00
namespace: kube-system
labels:
app: kubezero-upgrade
spec:
hostNetwork: true
hostIPC: true
hostPID: true
containers:
- name: kubezero-admin
2022-05-16 15:59:18 +00:00
image: public.ecr.aws/zero-downtime/kubezero-admin:${ VERSION }
2022-04-13 16:02:14 +00:00
imagePullPolicy: Always
command: [ "kubezero.sh" ]
2022-09-11 11:54:56 +00:00
args: [ $TASKS ]
2022-04-13 16:02:14 +00:00
env:
- name: DEBUG
value: " $DEBUG "
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: host
mountPath: /host
- name: workdir
mountPath: /tmp
securityContext:
capabilities:
add: [ "SYS_CHROOT" ]
volumes:
- name: host
hostPath:
path: /
type: Directory
- name: workdir
emptyDir: { }
nodeSelector:
node-role.kubernetes.io/control-plane: ""
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
restartPolicy: Never
EOF
2022-09-15 09:37:21 +00:00
kubectl wait pod kubezero-upgrade -n kube-system --timeout 120s --for= condition = initialized 2>/dev/null
2022-09-11 11:54:56 +00:00
while true; do
2022-09-15 09:37:21 +00:00
kubectl logs kubezero-upgrade -n kube-system -f 2>/dev/null && break
2022-09-11 11:54:56 +00:00
sleep 3
done
2022-09-15 09:37:21 +00:00
kubectl delete pod kubezero-upgrade -n kube-system
2022-09-11 11:54:56 +00:00
}
2022-09-23 10:54:27 +00:00
echo "Checking that all pods in kube-system are running ..."
waitSystemPodsRunning
2022-09-14 17:08:14 +00:00
argo_used && disable_argo
2022-09-28 15:41:30 +00:00
all_nodes_upgrade "nsenter -m/hostproc/1/ns/mnt mount --make-shared /sys/fs/cgroup; nsenter -m/hostproc/1/ns/mnt mount --make-shared /sys; nsenter -r/host /usr/bin/podman image prune -a -f;"
2022-09-11 11:54:56 +00:00
2022-09-15 09:37:21 +00:00
control_plane_upgrade kubeadm_upgrade
2022-09-11 11:54:56 +00:00
2022-09-23 10:54:27 +00:00
echo "Adjust kubezero values as needed: (eg. set cilium cluster id and ensure no IP space overlap !!):"
argo_used && kubectl edit app kubezero -n argocd || kubectl edit cm kubezero-values -n kube-system
2022-09-11 11:54:56 +00:00
2022-09-20 11:50:19 +00:00
# Remove multus DS due to label changes, if this fails:
# kubezero-network $ helm template . --set multus.enabled=true | kubectl apply -f -
kubectl delete ds kube-multus-ds -n kube-system || true
2022-09-11 11:54:56 +00:00
2022-09-16 09:21:38 +00:00
# Required due to chart upgrade to 4.X part of prometheus-stack 40.X
2022-09-20 11:50:19 +00:00
kubectl delete daemonset metrics-prometheus-node-exporter -n monitoring || true
# AWS EBS CSI driver change their fsGroupPolicy
kubectl delete CSIDriver ebs.csi.aws.com || true
2022-09-16 09:21:38 +00:00
2022-09-28 15:41:30 +00:00
# Delete external-dns deployment as upstream changed strategy to 'recreate'
kubectl delete deployment addons-external-dns -n kube-system || true
2022-09-23 10:54:27 +00:00
control_plane_upgrade "apply_network, apply_addons, apply_storage"
2022-09-11 11:54:56 +00:00
2022-09-15 09:37:21 +00:00
kubectl rollout restart daemonset/calico-node -n kube-system
kubectl rollout restart daemonset/cilium -n kube-system
kubectl rollout restart daemonset/kube-multus-ds -n kube-system
2022-09-11 11:54:56 +00:00
2022-09-23 10:54:27 +00:00
echo "Checking that all pods in kube-system are running ..."
waitSystemPodsRunning
echo "Applying remaining KubeZero modules..."
2022-09-28 15:41:30 +00:00
# Delete outdated cert-manager CRDs, otherwise serverside apply will fail
for c in certificaterequests.cert-manager.io certificates.cert-manager.io challenges.acme.cert-manager.io clusterissuers.cert-manager.io issuers.cert-manager.io orders.acme.cert-manager.io; do
kubectl delete crd $c
done
2022-09-23 10:54:27 +00:00
control_plane_upgrade "apply_cert-manager, apply_istio, apply_istio-ingress, apply_istio-private-ingress, apply_logging, apply_metrics, apply_argocd"
2022-09-15 11:05:38 +00:00
2022-09-28 15:41:30 +00:00
# delete legace ArgCD controller which is now a statefulSet
kubectl delete deployment argocd-application-controller -n argocd || true
2022-09-15 11:05:38 +00:00
# Final step is to commit the new argocd kubezero app
2022-09-23 10:54:27 +00:00
kubectl get app kubezero -n argocd -o yaml | yq 'del(.status) | del(.metadata) | del(.operation) | .metadata.name="kubezero" | .metadata.namespace="argocd"' | yq 'sort_keys(..) | .spec.source.helm.values |= (from_yaml | to_yaml)' > $ARGO_APP
2022-09-15 11:05:38 +00:00
2022-09-15 12:58:08 +00:00
echo " Please commit $ARGO_APP as the updated kubezero/application.yaml for your cluster. "
2022-09-15 11:05:38 +00:00
echo "Then head over to ArgoCD for this cluster and sync all KubeZero modules to apply remaining upgrades."
2022-09-23 10:54:27 +00:00
echo "<Return> to continue and re-enable ArgoCD:"
read
argo_used && enable_argo