fix: minor upgrade flow fixes

This commit is contained in:
Stefan Reimer 2024-11-13 14:35:50 +00:00
parent aeb509cb08
commit 573dd3ec3e
3 changed files with 17 additions and 6 deletions

View File

@ -64,7 +64,7 @@ render_kubeadm() {
cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml cat ${WORKDIR}/kubeadm/templates/${f}Configuration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml
done done
if [[ "$phase" =~ ^(bootstrap|restore)$ ]]; then if [[ "$phase" =~ ^(bootstrap|join|restore)$ ]]; then
cat ${WORKDIR}/kubeadm/templates/InitConfiguration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml cat ${WORKDIR}/kubeadm/templates/InitConfiguration.yaml >> ${HOSTFS}/etc/kubernetes/kubeadm.yaml
fi fi

View File

@ -19,6 +19,11 @@ echo "Checking that all pods in kube-system are running ..."
[ "$ARGOCD" == "True" ] && disable_argo [ "$ARGOCD" == "True" ] && disable_argo
# 1.30 fix for the missing kubeadm socket annotations
for c in $(kubectl get nodes -l "node-role.kubernetes.io/control-plane=" | grep v1.29 | awk {'print $1}'); do
kubectl annotate node $c 'kubeadm.alpha.kubernetes.io/cri-socket=unix:///var/run/crio/crio.sock'
done
control_plane_upgrade kubeadm_upgrade control_plane_upgrade kubeadm_upgrade
echo "Control plane upgraded, <Return> to continue" echo "Control plane upgraded, <Return> to continue"
@ -33,13 +38,14 @@ kubectl delete runtimeclass crio || true
# upgrade modules # upgrade modules
# #
# Preload cilium images to running nodes # Preload cilium images to running nodes, disabled till 1.31
all_nodes_upgrade "chroot /host crictl pull quay.io/cilium/cilium:v1.16.3" # all_nodes_upgrade "chroot /host crictl pull quay.io/cilium/cilium:v1.16.3; chroot /host crictl pull ghcr.io/k8snetworkplumbingwg/multus-cni:v3.9.3"
control_plane_upgrade "apply_network, apply_addons, apply_storage, apply_operators" control_plane_upgrade "apply_network, apply_addons, apply_storage, apply_operators"
echo "Checking that all pods in kube-system are running ..." # Disabled during 1.30 due to nvidia runtime deadlock
waitSystemPodsRunning #echo "Checking that all pods in kube-system are running ..."
#waitSystemPodsRunning
echo "Applying remaining KubeZero modules..." echo "Applying remaining KubeZero modules..."

View File

@ -6,11 +6,12 @@ metadata:
spec: spec:
runtimeClassName: nvidia runtimeClassName: nvidia
containers: containers:
#- image: nvcr.io/nvidia/k8s/cuda-sample:vectoradd-cuda12.5.0
- image: nvidia/cuda:12.5.1-base-ubuntu22.04 - image: nvidia/cuda:12.5.1-base-ubuntu22.04
command: command:
- "bash" - "bash"
- "-c" - "-c"
- "sleep 3600" - "nvidia-smi; sleep 3600"
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
name: nvidia-test name: nvidia-test
resources: resources:
@ -18,3 +19,7 @@ spec:
nvidia.com/gpu: 1 nvidia.com/gpu: 1
requests: requests:
memory: 1024Mi memory: 1024Mi
tolerations:
- effect: NoSchedule
key: kubezero-workergroup
operator: Exists