fix: rc round of fixed for the upgrade flow of 1.23
This commit is contained in:
parent
048b113b1e
commit
c06da7b814
@ -166,7 +166,7 @@ control_plane_node() {
|
|||||||
|
|
||||||
else
|
else
|
||||||
# restore latest backup
|
# restore latest backup
|
||||||
retry 10 60 30 restic restore latest --no-lock -t / --tag $KUBE_VERSION_MINOR
|
retry 10 60 30 restic restore latest --no-lock -t / #Review: Use latest no matter what for now: --tag $KUBE_VERSION_MINOR
|
||||||
|
|
||||||
# Make last etcd snapshot available
|
# Make last etcd snapshot available
|
||||||
cp ${WORKDIR}/etcd_snapshot ${HOSTFS}/etc/kubernetes
|
cp ${WORKDIR}/etcd_snapshot ${HOSTFS}/etc/kubernetes
|
||||||
@ -382,8 +382,7 @@ for t in $@; do
|
|||||||
bootstrap) control_plane_node bootstrap;;
|
bootstrap) control_plane_node bootstrap;;
|
||||||
join) control_plane_node join;;
|
join) control_plane_node join;;
|
||||||
restore) control_plane_node restore;;
|
restore) control_plane_node restore;;
|
||||||
apply_network) apply_module network;;
|
apply_*) apply_module ${t##apply_};;
|
||||||
apply_addons) apply_module addons;;
|
|
||||||
backup) backup;;
|
backup) backup;;
|
||||||
debug_shell) debug_shell;;
|
debug_shell) debug_shell;;
|
||||||
*) echo "Unknown command: '$t'";;
|
*) echo "Unknown command: '$t'";;
|
||||||
|
@ -55,6 +55,21 @@ function enable_argo() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function cntFailedPods() {
|
||||||
|
NS=$1
|
||||||
|
|
||||||
|
NR=$(kubectl get pods -n $NS --field-selector="status.phase!=Succeeded,status.phase!=Running" -o custom-columns="POD:metadata.name" -o json | jq '.items | length')
|
||||||
|
echo $NR
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function waitSystemPodsRunning() {
|
||||||
|
while true; do
|
||||||
|
[ "$(cntFailedPods kube-system)" -eq 0 ] && break
|
||||||
|
sleep 3
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
function argo_app_synced() {
|
function argo_app_synced() {
|
||||||
APP=$1
|
APP=$1
|
||||||
|
|
||||||
@ -93,7 +108,7 @@ function _crds() {
|
|||||||
|
|
||||||
# Only apply if there are actually any crds
|
# Only apply if there are actually any crds
|
||||||
if [ -s $WORKDIR/crds.yaml ]; then
|
if [ -s $WORKDIR/crds.yaml ]; then
|
||||||
kubectl apply -f $WORKDIR/crds.yaml --server-side
|
kubectl apply -f $WORKDIR/crds.yaml --server-side --force-conflicts
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +129,7 @@ for manifest in yaml.safe_load_all(sys.stdin):
|
|||||||
print("---")
|
print("---")
|
||||||
print(yaml.dump(manifest))' $namespace > $WORKDIR/helm.yaml
|
print(yaml.dump(manifest))' $namespace > $WORKDIR/helm.yaml
|
||||||
|
|
||||||
kubectl $action -f $WORKDIR/helm.yaml && rc=$? || rc=$?
|
kubectl $action -f $WORKDIR/helm.yaml --server-side --force-conflicts && rc=$? || rc=$?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,6 +31,9 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
name: kubezero-all-nodes-upgrade
|
name: kubezero-all-nodes-upgrade
|
||||||
spec:
|
spec:
|
||||||
|
hostNetwork: true
|
||||||
|
hostIPC: true
|
||||||
|
hostPID: true
|
||||||
tolerations:
|
tolerations:
|
||||||
- key: node-role.kubernetes.io/master
|
- key: node-role.kubernetes.io/master
|
||||||
operator: Exists
|
operator: Exists
|
||||||
@ -43,7 +46,10 @@ spec:
|
|||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: host
|
- name: host
|
||||||
mountPath: /host
|
mountPath: /host
|
||||||
|
- name: hostproc
|
||||||
|
mountPath: /hostproc
|
||||||
securityContext:
|
securityContext:
|
||||||
|
privileged: true
|
||||||
capabilities:
|
capabilities:
|
||||||
add: ["SYS_ADMIN"]
|
add: ["SYS_ADMIN"]
|
||||||
containers:
|
containers:
|
||||||
@ -55,6 +61,10 @@ spec:
|
|||||||
hostPath:
|
hostPath:
|
||||||
path: /
|
path: /
|
||||||
type: Directory
|
type: Directory
|
||||||
|
- name: hostproc
|
||||||
|
hostPath:
|
||||||
|
path: /proc
|
||||||
|
type: Directory
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
|
kubectl rollout status daemonset -n kube-system kubezero-all-nodes-upgrade --timeout 300s
|
||||||
@ -122,14 +132,18 @@ EOF
|
|||||||
kubectl delete pod kubezero-upgrade -n kube-system
|
kubectl delete pod kubezero-upgrade -n kube-system
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
echo "Checking that all pods in kube-system are running ..."
|
||||||
|
waitSystemPodsRunning
|
||||||
|
|
||||||
argo_used && disable_argo
|
argo_used && disable_argo
|
||||||
|
|
||||||
all_nodes_upgrade "mount --make-shared /host/sys/fs/cgroup; mount --make-shared /host/sys;"
|
all_nodes_upgrade "nsenter -m/hostproc/1/ns/mnt mount --make-shared /sys/fs/cgroup; mount --make-shared /sys; sleep 3;"
|
||||||
|
|
||||||
control_plane_upgrade kubeadm_upgrade
|
control_plane_upgrade kubeadm_upgrade
|
||||||
|
|
||||||
echo "Adjust kubezero-values as needed: (eg. set cilium cluster id etc):"
|
echo "Adjust kubezero values as needed: (eg. set cilium cluster id and ensure no IP space overlap !!):"
|
||||||
kubectl edit cm kubezero-values -n kube-system
|
argo_used && kubectl edit app kubezero -n argocd || kubectl edit cm kubezero-values -n kube-system
|
||||||
|
|
||||||
# Remove multus DS due to label changes, if this fails:
|
# Remove multus DS due to label changes, if this fails:
|
||||||
# kubezero-network $ helm template . --set multus.enabled=true | kubectl apply -f -
|
# kubezero-network $ helm template . --set multus.enabled=true | kubectl apply -f -
|
||||||
@ -141,16 +155,25 @@ kubectl delete daemonset metrics-prometheus-node-exporter -n monitoring || true
|
|||||||
# AWS EBS CSI driver change their fsGroupPolicy
|
# AWS EBS CSI driver change their fsGroupPolicy
|
||||||
kubectl delete CSIDriver ebs.csi.aws.com || true
|
kubectl delete CSIDriver ebs.csi.aws.com || true
|
||||||
|
|
||||||
control_plane_upgrade "apply_network, apply_addons"
|
control_plane_upgrade "apply_network, apply_addons, apply_storage"
|
||||||
|
|
||||||
kubectl rollout restart daemonset/calico-node -n kube-system
|
kubectl rollout restart daemonset/calico-node -n kube-system
|
||||||
kubectl rollout restart daemonset/cilium -n kube-system
|
kubectl rollout restart daemonset/cilium -n kube-system
|
||||||
kubectl rollout restart daemonset/kube-multus-ds -n kube-system
|
kubectl rollout restart daemonset/kube-multus-ds -n kube-system
|
||||||
|
|
||||||
argo_used && enable_argo
|
echo "Checking that all pods in kube-system are running ..."
|
||||||
|
waitSystemPodsRunning
|
||||||
|
|
||||||
|
echo "Applying remaining KubeZero modules..."
|
||||||
|
control_plane_upgrade "apply_cert-manager, apply_istio, apply_istio-ingress, apply_istio-private-ingress, apply_logging, apply_metrics, apply_argocd"
|
||||||
|
|
||||||
# Final step is to commit the new argocd kubezero app
|
# Final step is to commit the new argocd kubezero app
|
||||||
kubectl get app kubezero -n argocd -o yaml | yq 'del(.status) | del(.metadata) | .metadata.name="kubezero" | .metadata.namespace="argocd"' | yq 'sort_keys(..) | .spec.source.helm.values |= (from_yaml | to_yaml)' > /tmp/new-kubezero-argoapp.yaml
|
kubectl get app kubezero -n argocd -o yaml | yq 'del(.status) | del(.metadata) | del(.operation) | .metadata.name="kubezero" | .metadata.namespace="argocd"' | yq 'sort_keys(..) | .spec.source.helm.values |= (from_yaml | to_yaml)' > $ARGO_APP
|
||||||
|
|
||||||
echo "Please commit $ARGO_APP as the updated kubezero/application.yaml for your cluster."
|
echo "Please commit $ARGO_APP as the updated kubezero/application.yaml for your cluster."
|
||||||
echo "Then head over to ArgoCD for this cluster and sync all KubeZero modules to apply remaining upgrades."
|
echo "Then head over to ArgoCD for this cluster and sync all KubeZero modules to apply remaining upgrades."
|
||||||
|
|
||||||
|
echo "<Return> to continue and re-enable ArgoCD:"
|
||||||
|
read
|
||||||
|
|
||||||
|
argo_used && enable_argo
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# Migrate addons and network values from local kubeadm-values.yaml on controllers into CM
|
# Migrate addons and network values from local kubeadm-values.yaml on controllers into CM
|
||||||
# - remove secrets from addons
|
|
||||||
# - enable cilium
|
# - enable cilium
|
||||||
|
|
||||||
# Create emtpy CM if not exists yet
|
# Create emtpy CM if not exists yet
|
||||||
@ -16,13 +15,6 @@ yq eval -i '.global.clusterName = strenv(CLUSTERNAME) |
|
|||||||
.global.highAvailable = env(HIGHAVAILABLE)' \
|
.global.highAvailable = env(HIGHAVAILABLE)' \
|
||||||
${HOSTFS}/etc/kubernetes/kubeadm-values.yaml
|
${HOSTFS}/etc/kubernetes/kubeadm-values.yaml
|
||||||
|
|
||||||
# extract addons
|
|
||||||
yq e '.addons |
|
|
||||||
del .clusterBackup.repository |
|
|
||||||
del .clusterBackup.password |
|
|
||||||
.clusterBackup.image.tag = strenv(KUBE_VERSION) |
|
|
||||||
{"addons": .}' ${HOSTFS}/etc/kubernetes/kubeadm-values.yaml > $WORKDIR/addons-values.yaml
|
|
||||||
|
|
||||||
# extract network
|
# extract network
|
||||||
yq e '.network |
|
yq e '.network |
|
||||||
.cilium.enabled = true |
|
.cilium.enabled = true |
|
||||||
@ -35,10 +27,11 @@ yq e '.network |
|
|||||||
# get current argo cd values
|
# get current argo cd values
|
||||||
kubectl get application kubezero -n argocd -o yaml | yq '.spec.source.helm.values' > ${WORKDIR}/argo-values.yaml
|
kubectl get application kubezero -n argocd -o yaml | yq '.spec.source.helm.values' > ${WORKDIR}/argo-values.yaml
|
||||||
|
|
||||||
# merge all into new CM
|
# merge all into new CM and set new minimal addons
|
||||||
yq ea '. as $item ireduce ({}; . * $item ) |
|
yq ea '. as $item ireduce ({}; . * $item ) |
|
||||||
.global.clusterName = strenv(CLUSTERNAME) |
|
.global.clusterName = strenv(CLUSTERNAME) |
|
||||||
.global.highAvailable = env(HIGHAVAILABLE)' $WORKDIR/addons-values.yaml ${WORKDIR}/network-values.yaml $WORKDIR/argo-values.yaml > $WORKDIR/kubezero-pre-values.yaml
|
.global.highAvailable = env(HIGHAVAILABLE) |
|
||||||
|
.addons.clusterBackup.image.tag = "v1.23" ' ${WORKDIR}/network-values.yaml $WORKDIR/argo-values.yaml > $WORKDIR/kubezero-pre-values.yaml
|
||||||
|
|
||||||
# tumble new config through migrate.py
|
# tumble new config through migrate.py
|
||||||
cat $WORKDIR/kubezero-pre-values.yaml | migrate_argo_values.py > $WORKDIR/kubezero-values.yaml
|
cat $WORKDIR/kubezero-pre-values.yaml | migrate_argo_values.py > $WORKDIR/kubezero-values.yaml
|
||||||
|
@ -2,7 +2,7 @@ apiVersion: v2
|
|||||||
name: kubezero-network
|
name: kubezero-network
|
||||||
description: KubeZero umbrella chart for all things network
|
description: KubeZero umbrella chart for all things network
|
||||||
type: application
|
type: application
|
||||||
version: 0.3.3
|
version: 0.3.4
|
||||||
home: https://kubezero.com
|
home: https://kubezero.com
|
||||||
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
icon: https://cdn.zero-downtime.net/assets/kubezero/logo-small-64.png
|
||||||
keywords:
|
keywords:
|
||||||
|
@ -37,27 +37,27 @@ cilium:
|
|||||||
binPath: "/usr/libexec/cni"
|
binPath: "/usr/libexec/cni"
|
||||||
#-- Ensure this is false if multus is enabled
|
#-- Ensure this is false if multus is enabled
|
||||||
exclusive: false
|
exclusive: false
|
||||||
# chainingMode: generic-veth
|
chainingMode: generic-veth
|
||||||
|
|
||||||
# Until we switch to Cilium
|
bpf:
|
||||||
#bpf:
|
hostLegacyRouting: true
|
||||||
# hostLegacyRouting: true
|
|
||||||
# tproxy: false
|
# tproxy: false
|
||||||
|
|
||||||
# enableIPv4Masquerade: false
|
# enableIPv4Masquerade: false
|
||||||
# enableIdentityMark: false
|
# enableIdentityMark: false
|
||||||
policyEnforcementMode: "audit"
|
policyEnforcementMode: "never"
|
||||||
|
|
||||||
cluster:
|
cluster:
|
||||||
# This should match the second octet of clusterPoolIPv4PodCIDRList
|
# This should match the second octet of clusterPoolIPv4PodCIDRList
|
||||||
# to prevent IP space overlap and easy tracking
|
# to prevent IP space overlap and easy tracking
|
||||||
id: 1
|
# use 240 as default, less likely to clash with 1, do NOT use 244 used by calico until 1.25
|
||||||
|
id: 240
|
||||||
name: default
|
name: default
|
||||||
|
|
||||||
ipam:
|
ipam:
|
||||||
operator:
|
operator:
|
||||||
clusterPoolIPv4PodCIDRList:
|
clusterPoolIPv4PodCIDRList:
|
||||||
- 10.1.0.0/16
|
- 10.240.0.0/16
|
||||||
|
|
||||||
# Should be handled by multus
|
# Should be handled by multus
|
||||||
nodePort:
|
nodePort:
|
||||||
|
@ -79,26 +79,29 @@ external-dns:
|
|||||||
value: "0"
|
value: "0"
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
|
|
||||||
{{- with index .Values "addons" "cluster-autoscaler" }}
|
|
||||||
cluster-autoscaler:
|
cluster-autoscaler:
|
||||||
{{- toYaml . | nindent 2 }}
|
enabled: {{ default "false" (index .Values "addons" "cluster-autoscaler" "enabled") }}
|
||||||
autoDiscovery:
|
|
||||||
clusterName: {{ $.Values.global.clusterName }}
|
|
||||||
|
|
||||||
{{- with $.Values.metrics }}
|
{{- with omit (index .Values "addons" "cluster-autoscaler") "enabled" }}
|
||||||
|
{{- toYaml . | nindent 2 }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
autoDiscovery:
|
||||||
|
clusterName: {{ .Values.global.clusterName }}
|
||||||
|
|
||||||
|
{{- with .Values.metrics }}
|
||||||
serviceMonitor:
|
serviceMonitor:
|
||||||
enabled: {{ .enabled }}
|
enabled: {{ .enabled }}
|
||||||
prometheusRule:
|
prometheusRule:
|
||||||
enabled: {{ .enabled }}
|
enabled: {{ .enabled }}
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
|
{{- if .Values.global.aws }}
|
||||||
# AWS
|
# AWS
|
||||||
{{- if $.Values.global.aws }}
|
awsRegion: {{ .Values.global.aws.region }}
|
||||||
awsRegion: {{ $.Values.global.aws.region }}
|
|
||||||
|
|
||||||
extraEnv:
|
extraEnv:
|
||||||
AWS_ROLE_ARN: "arn:aws:iam::{{ $.Values.global.aws.accountId }}:role/{{ $.Values.global.aws.region }}.{{ $.Values.global.clusterName }}.clusterAutoScaler"
|
AWS_ROLE_ARN: "arn:aws:iam::{{ .Values.global.aws.accountId }}:role/{{ .Values.global.aws.region }}.{{ .Values.global.clusterName }}.clusterAutoScaler"
|
||||||
AWS_WEB_IDENTITY_TOKEN_FILE: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
|
AWS_WEB_IDENTITY_TOKEN_FILE: "/var/run/secrets/sts.amazonaws.com/serviceaccount/token"
|
||||||
AWS_STS_REGIONAL_ENDPOINTS: "regional"
|
AWS_STS_REGIONAL_ENDPOINTS: "regional"
|
||||||
extraVolumes:
|
extraVolumes:
|
||||||
@ -115,8 +118,6 @@ cluster-autoscaler:
|
|||||||
readOnly: true
|
readOnly: true
|
||||||
{{- end }}
|
{{- end }}
|
||||||
|
|
||||||
{{- end }}
|
|
||||||
|
|
||||||
{{- with .Values.addons.fuseDevicePlugin }}
|
{{- with .Values.addons.fuseDevicePlugin }}
|
||||||
fuseDevicePlugin:
|
fuseDevicePlugin:
|
||||||
{{- toYaml . | nindent 2 }}
|
{{- toYaml . | nindent 2 }}
|
||||||
|
@ -19,11 +19,13 @@ addons:
|
|||||||
enabled: false
|
enabled: false
|
||||||
aws-node-termination-handler:
|
aws-node-termination-handler:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
cluster-autoscaler:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
network:
|
network:
|
||||||
enabled: true
|
enabled: true
|
||||||
retain: true
|
retain: true
|
||||||
targetRevision: 0.3.3
|
targetRevision: 0.3.4
|
||||||
cilium:
|
cilium:
|
||||||
cluster: {}
|
cluster: {}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user